11 Commits
1_0 ... 1_1

24 changed files with 605 additions and 344 deletions

68
README.md Normal file
View File

@@ -0,0 +1,68 @@
# .Net library and tool to work with PDF files
## Usage
### VAR.PdfTools
Add the resulting assembly as reference in your projects, and this line on code:
using VAR.PdfTools;
Then extract the contents of a data column using:
var columnData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
columnData.AddRange(extractor.GetColumn("Column"));
}
Or the content of a field (text on the right of the indicated text):
var fieldData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
fieldData.Add(extractor.GetField(txtFieldName.Text));
}
### VAR.PdfTools.Workbench
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
## Building
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
## Contributing
1. Fork it!
2. Create your feature branch: `git checkout -b my-new-feature`
3. Commit your changes: `git commit -am 'Add some feature'`
4. Push to the branch: `git push origin my-new-feature`
5. Submit a pull request :D
## Credits
* Valeriano Alfonso Rodriguez.
## License
The MIT License (MIT)
Copyright (c) 2014-2015 Valeriano Alfonso Rodriguez
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

26
VAR.PdfTools.Net35.sln Normal file
View File

@@ -0,0 +1,26 @@
Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Net35", "VAR.PdfTools\VAR.PdfTools.Net35.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench.Net35", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.Net35.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -1,24 +0,0 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<configSections>
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" >
<section name="VAR.PdfTools.Workbench.Properties.Settings" type="System.Configuration.ClientSettingsSection, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" allowExeDefinition="MachineToLocalUser" requirePermission="false" />
</sectionGroup>
</configSections>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
</startup>
<userSettings>
<VAR.PdfTools.Workbench.Properties.Settings>
<setting name="LastPdfPath" serializeAs="String">
<value />
</setting>
<setting name="LastColumnName" serializeAs="String">
<value />
</setting>
<setting name="LastFieldName" serializeAs="String">
<value />
</setting>
</VAR.PdfTools.Workbench.Properties.Settings>
</userSettings>
</configuration>

View File

@@ -38,13 +38,15 @@
this.txtColumnName = new System.Windows.Forms.TextBox(); this.txtColumnName = new System.Windows.Forms.TextBox();
this.txtFieldName = new System.Windows.Forms.TextBox(); this.txtFieldName = new System.Windows.Forms.TextBox();
this.btnGetField = new System.Windows.Forms.Button(); this.btnGetField = new System.Windows.Forms.Button();
this.txtText = new System.Windows.Forms.TextBox();
this.btnHasText = new System.Windows.Forms.Button();
this.SuspendLayout(); this.SuspendLayout();
// //
// lblOutputs // lblOutputs
// //
this.lblOutputs.AutoSize = true; this.lblOutputs.AutoSize = true;
this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.lblOutputs.Location = new System.Drawing.Point(12, 130); this.lblOutputs.Location = new System.Drawing.Point(12, 143);
this.lblOutputs.Name = "lblOutputs"; this.lblOutputs.Name = "lblOutputs";
this.lblOutputs.Size = new System.Drawing.Size(51, 13); this.lblOutputs.Size = new System.Drawing.Size(51, 13);
this.lblOutputs.TabIndex = 11; this.lblOutputs.TabIndex = 11;
@@ -88,11 +90,11 @@
| System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right))); | System.Windows.Forms.AnchorStyles.Right)));
this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.txtOutput.Location = new System.Drawing.Point(15, 146); this.txtOutput.Location = new System.Drawing.Point(15, 159);
this.txtOutput.Multiline = true; this.txtOutput.Multiline = true;
this.txtOutput.Name = "txtOutput"; this.txtOutput.Name = "txtOutput";
this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.txtOutput.Size = new System.Drawing.Size(457, 303); this.txtOutput.Size = new System.Drawing.Size(457, 290);
this.txtOutput.TabIndex = 7; this.txtOutput.TabIndex = 7;
// //
// btnProcess // btnProcess
@@ -140,11 +142,30 @@
this.btnGetField.UseVisualStyleBackColor = true; this.btnGetField.UseVisualStyleBackColor = true;
this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click); this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click);
// //
// txtText
//
this.txtText.Location = new System.Drawing.Point(15, 111);
this.txtText.Name = "txtText";
this.txtText.Size = new System.Drawing.Size(142, 20);
this.txtText.TabIndex = 17;
//
// btnHasText
//
this.btnHasText.Location = new System.Drawing.Point(163, 109);
this.btnHasText.Name = "btnHasText";
this.btnHasText.Size = new System.Drawing.Size(75, 23);
this.btnHasText.TabIndex = 16;
this.btnHasText.Text = "HasText";
this.btnHasText.UseVisualStyleBackColor = true;
this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click);
//
// FrmPdfInfo // FrmPdfInfo
// //
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(484, 461); this.ClientSize = new System.Drawing.Size(484, 461);
this.Controls.Add(this.txtText);
this.Controls.Add(this.btnHasText);
this.Controls.Add(this.txtFieldName); this.Controls.Add(this.txtFieldName);
this.Controls.Add(this.btnGetField); this.Controls.Add(this.btnGetField);
this.Controls.Add(this.txtColumnName); this.Controls.Add(this.txtColumnName);
@@ -176,5 +197,7 @@
private System.Windows.Forms.TextBox txtColumnName; private System.Windows.Forms.TextBox txtColumnName;
private System.Windows.Forms.TextBox txtFieldName; private System.Windows.Forms.TextBox txtFieldName;
private System.Windows.Forms.Button btnGetField; private System.Windows.Forms.Button btnGetField;
private System.Windows.Forms.TextBox txtText;
private System.Windows.Forms.Button btnHasText;
} }
} }

View File

@@ -1,6 +1,5 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Data;
using System.Linq; using System.Linq;
using System.Windows.Forms; using System.Windows.Forms;
@@ -18,6 +17,7 @@ namespace VAR.PdfTools.Workbench
txtPdfPath.Text = Properties.Settings.Default.LastPdfPath; txtPdfPath.Text = Properties.Settings.Default.LastPdfPath;
txtColumnName.Text = Properties.Settings.Default.LastColumnName; txtColumnName.Text = Properties.Settings.Default.LastColumnName;
txtFieldName.Text = Properties.Settings.Default.LastFieldName; txtFieldName.Text = Properties.Settings.Default.LastFieldName;
txtText.Text = Properties.Settings.Default.LastText;
} }
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e) private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
@@ -25,6 +25,7 @@ namespace VAR.PdfTools.Workbench
Properties.Settings.Default.LastPdfPath = txtPdfPath.Text; Properties.Settings.Default.LastPdfPath = txtPdfPath.Text;
Properties.Settings.Default.LastColumnName = txtColumnName.Text; Properties.Settings.Default.LastColumnName = txtColumnName.Text;
Properties.Settings.Default.LastFieldName = txtFieldName.Text; Properties.Settings.Default.LastFieldName = txtFieldName.Text;
Properties.Settings.Default.LastText = txtText.Text;
Properties.Settings.Default.Save(); Properties.Settings.Default.Save();
} }
@@ -86,8 +87,9 @@ namespace VAR.PdfTools.Workbench
PdfTextExtractor extractor = new PdfTextExtractor(page); PdfTextExtractor extractor = new PdfTextExtractor(page);
foreach (PdfTextElement textElement in extractor.Elements) foreach (PdfTextElement textElement in extractor.Elements)
{ {
lines.Add(string.Format("Text({0}, {1})({2}, {3}): \"{4}\"", lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight, textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight,
textElement.Font == null ? string.Empty : textElement.Font.Name,
textElement.VisibleText)); textElement.VisibleText));
} }
} }
@@ -132,5 +134,25 @@ namespace VAR.PdfTools.Workbench
} }
txtOutput.Lines = fieldData.ToArray(); txtOutput.Lines = fieldData.ToArray();
} }
private void btnHasText_Click(object sender, EventArgs e)
{
if (System.IO.File.Exists(txtPdfPath.Text) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
List<string> lines = new List<string>();
int pageNum = 1;
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(txtText.Text))));
}
txtOutput.Lines = lines.ToArray();
}
} }
} }

View File

@@ -112,9 +112,9 @@
<value>2.0</value> <value>2.0</value>
</resheader> </resheader>
<resheader name="reader"> <resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader> </resheader>
<resheader name="writer"> <resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader> </resheader>
</root> </root>

View File

@@ -1,7 +1,4 @@
using System; using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms; using System.Windows.Forms;
namespace VAR.PdfTools.Workbench namespace VAR.PdfTools.Workbench

View File

@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
[assembly: AssemblyCulture("")] [assembly: AssemblyCulture("")]
[assembly: ComVisible(false)] [assembly: ComVisible(false)]
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")] [assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
[assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.1.*")]

View File

@@ -1,71 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace VAR.PdfTools.Workbench.Properties
{
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources
{
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources()
{
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager
{
get
{
if ((resourceMan == null))
{
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("VAR.PdfTools.Workbench.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture
{
get
{
return resourceCulture;
}
set
{
resourceCulture = value;
}
}
}
}

View File

@@ -1,117 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@@ -12,7 +12,7 @@ namespace VAR.PdfTools.Workbench.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "14.0.0.0")] [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
@@ -58,5 +58,17 @@ namespace VAR.PdfTools.Workbench.Properties {
this["LastFieldName"] = value; this["LastFieldName"] = value;
} }
} }
[global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("")]
public string LastText {
get {
return ((string)(this["LastText"]));
}
set {
this["LastText"] = value;
}
}
} }
} }

View File

@@ -11,5 +11,8 @@
<Setting Name="LastFieldName" Type="System.String" Scope="User"> <Setting Name="LastFieldName" Type="System.String" Scope="User">
<Value Profile="(Default)" /> <Value Profile="(Default)" />
</Setting> </Setting>
<Setting Name="LastText" Type="System.String" Scope="User">
<Value Profile="(Default)" />
</Setting>
</Settings> </Settings>
</SettingsFile> </SettingsFile>

View File

@@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}</ProjectGuid>
<OutputType>WinExe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>VAR.PdfTools.Workbench</RootNamespace>
<AssemblyName>VAR.PdfTools.Workbench</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup>
<StartupObject />
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Deployment" />
<Reference Include="System.Drawing" />
<Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="FrmPdfInfo.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="FrmPdfInfo.Designer.cs">
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
</Compile>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="FrmPdfInfo.resx">
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
</EmbeddedResource>
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Settings.settings</DependentUpon>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
<Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project>
<Name>VAR.PdfTools</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

View File

@@ -12,6 +12,7 @@
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion> <TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment> <FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects> <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile />
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget> <PlatformTarget>AnyCPU</PlatformTarget>
@@ -40,11 +41,9 @@
<Reference Include="System.Core" /> <Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" /> <Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" /> <Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" /> <Reference Include="System.Data" />
<Reference Include="System.Deployment" /> <Reference Include="System.Deployment" />
<Reference Include="System.Drawing" /> <Reference Include="System.Drawing" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Windows.Forms" /> <Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" /> <Reference Include="System.Xml" />
</ItemGroup> </ItemGroup>
@@ -60,15 +59,6 @@
<EmbeddedResource Include="FrmPdfInfo.resx"> <EmbeddedResource Include="FrmPdfInfo.resx">
<DependentUpon>FrmPdfInfo.cs</DependentUpon> <DependentUpon>FrmPdfInfo.cs</DependentUpon>
</EmbeddedResource> </EmbeddedResource>
<EmbeddedResource Include="Properties\Resources.resx">
<Generator>ResXFileCodeGenerator</Generator>
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
<SubType>Designer</SubType>
</EmbeddedResource>
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<None Include="Properties\Settings.settings"> <None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator> <Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput> <LastGenOutput>Settings.Designer.cs</LastGenOutput>
@@ -79,9 +69,6 @@
<DesignTimeSharedInput>True</DesignTimeSharedInput> <DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile> </Compile>
</ItemGroup> </ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj"> <ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
<Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project> <Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project>

View File

@@ -1,7 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.IO.Compression;
using System.Linq; using System.Linq;
namespace VAR.PdfTools namespace VAR.PdfTools
@@ -36,49 +35,12 @@ namespace VAR.PdfTools
#region Private methods #region Private methods
private static byte[] DecodeFlateStreamData(byte[] streamData) private static void ApplyFilterToStream(PdfStream stream, string filter)
{ {
MemoryStream msInput = new MemoryStream(streamData);
MemoryStream msOutput = new MemoryStream();
// It seems to work when skipping the first two bytes.
byte header;
header = (byte)msInput.ReadByte();
header = (byte)msInput.ReadByte();
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
int cbRead;
byte[] abResult = new byte[1024];
do
{
cbRead = zip.Read(abResult, 0, abResult.Length);
if (cbRead > 0)
{
msOutput.Write(abResult, 0, cbRead);
}
}
while (cbRead > 0);
zip.Close();
msOutput.Flush();
if (msOutput.Length >= 0)
{
msOutput.Capacity = (int)msOutput.Length;
return msOutput.GetBuffer();
}
return null;
}
private static void ApplyFiltersToStreams(PdfStream stream)
{
string filter = stream.Dictionary.GetParamAsString("Filter");
if (filter == "FlateDecode") if (filter == "FlateDecode")
{ {
stream.OriginalData = stream.Data; byte[] decodedStreamData = PdfFilters.FlateDecode.Decode(stream.Data);
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
byte[] decodedStreamData = DecodeFlateStreamData(stream.Data);
stream.Data = decodedStreamData; stream.Data = decodedStreamData;
stream.Dictionary.Values["Length"] = new PdfInteger { Value = decodedStreamData.Length };
stream.Dictionary.Values.Remove("Filter");
} }
else if (filter == "ASCII85Decode" || filter == "A85") else if (filter == "ASCII85Decode" || filter == "A85")
{ {
@@ -92,11 +54,55 @@ namespace VAR.PdfTools
{ {
// FIXME: Implement this filter // FIXME: Implement this filter
} }
else
{ {
// FIXME: Implement the rest of filters // FIXME: Implement the rest of filters
} }
} }
private static void ApplyFiltersToStreams(PdfStream stream)
{
if (stream.Dictionary.Values.ContainsKey("Filter") == false) { return; }
IPdfElement elemFilter = stream.Dictionary.Values["Filter"];
stream.OriginalData = stream.Data;
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
if (elemFilter is PdfString)
{
ApplyFilterToStream(stream, ((PdfString)elemFilter).Value);
}
else if (elemFilter is PdfName)
{
ApplyFilterToStream(stream, ((PdfName)elemFilter).Value);
}
else if(elemFilter is PdfArray)
{
foreach(IPdfElement elemSubFilter in ((PdfArray)elemFilter).Values)
{
if (elemSubFilter is PdfString)
{
ApplyFilterToStream(stream, ((PdfString)elemSubFilter).Value);
}
else if (elemSubFilter is PdfName)
{
ApplyFilterToStream(stream, ((PdfName)elemSubFilter).Value);
}
else
{
throw new Exception("PdfFilter not correctly specified");
}
}
}
else
{
throw new Exception("PdfFilter not correctly specified");
}
stream.Dictionary.Values["Length"] = new PdfInteger { Value = stream.Data.Length };
stream.Dictionary.Values.Remove("Filter");
}
private static IPdfElement ResolveIndirectReferences(IPdfElement elem, Dictionary<int, PdfObject> dictReferences) private static IPdfElement ResolveIndirectReferences(IPdfElement elem, Dictionary<int, PdfObject> dictReferences)
{ {
if (elem is PdfObjectReference) if (elem is PdfObjectReference)
@@ -211,7 +217,7 @@ namespace VAR.PdfTools
do do
{ {
PdfObject obj = parser.ParseObject(doc.Objects); PdfObject obj = parser.ParseObject(doc.Objects);
if (obj != null) if (obj != null && obj.Data != null)
{ {
if (obj.Data is PdfStream) if (obj.Data is PdfStream)
{ {

View File

@@ -58,6 +58,7 @@ namespace VAR.PdfTools
foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values) foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values)
{ {
var font = new PdfFont(pair.Value as PdfDictionary); var font = new PdfFont(pair.Value as PdfDictionary);
font.Name = pair.Key;
_fonts.Add(pair.Key, font); _fonts.Add(pair.Key, font);
} }
} }

View File

@@ -26,44 +26,44 @@ namespace VAR.PdfTools
public class PdfBoolean : IPdfElement public class PdfBoolean : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Boolean; public PdfElementTypes Type { get { return PdfElementTypes.Boolean; } }
public bool Value { get; set; } public bool Value { get; set; }
} }
public class PdfInteger : IPdfElement public class PdfInteger : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Integer; public PdfElementTypes Type { get { return PdfElementTypes.Integer; } }
public long Value { get; set; } public long Value { get; set; }
} }
public class PdfReal : IPdfElement public class PdfReal : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Real; public PdfElementTypes Type { get { return PdfElementTypes.Real; } }
public double Value { get; set; } public double Value { get; set; }
} }
public class PdfString : IPdfElement public class PdfString : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.String; public PdfElementTypes Type { get { return PdfElementTypes.String; } }
public string Value { get; set; } public string Value { get; set; }
} }
public class PdfName : IPdfElement public class PdfName : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Name; public PdfElementTypes Type { get { return PdfElementTypes.Name; } }
public string Value { get; set; } public string Value { get; set; }
} }
public class PdfArray : IPdfElement public class PdfArray : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Array; public PdfElementTypes Type { get { return PdfElementTypes.Array; } }
private List<IPdfElement> _values = new List<IPdfElement>(); private List<IPdfElement> _values = new List<IPdfElement>();
public List<IPdfElement> Values { get { return _values; } } public List<IPdfElement> Values { get { return _values; } }
} }
public class PdfDictionary : IPdfElement public class PdfDictionary : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Dictionary; public PdfElementTypes Type { get { return PdfElementTypes.Dictionary; } }
private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>(); private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>();
public Dictionary<string, IPdfElement> Values { get { return _values; } } public Dictionary<string, IPdfElement> Values { get { return _values; } }
@@ -135,19 +135,19 @@ namespace VAR.PdfTools
public class PdfNull : IPdfElement public class PdfNull : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Null; public PdfElementTypes Type { get { return PdfElementTypes.Null; } }
} }
public class PdfObjectReference : IPdfElement public class PdfObjectReference : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.ObjectReference; public PdfElementTypes Type { get { return PdfElementTypes.ObjectReference; } }
public int ObjectID { get; set; } public int ObjectID { get; set; }
public int ObjectGeneration { get; set; } public int ObjectGeneration { get; set; }
} }
public class PdfStream : IPdfElement public class PdfStream : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Stream; public PdfElementTypes Type { get { return PdfElementTypes.Stream; } }
public PdfDictionary Dictionary { get; set; } public PdfDictionary Dictionary { get; set; }
public byte[] Data { get; set; } public byte[] Data { get; set; }
@@ -157,11 +157,11 @@ namespace VAR.PdfTools
public class PdfObject : IPdfElement public class PdfObject : IPdfElement
{ {
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Object; public PdfElementTypes Type { get { return PdfElementTypes.Object; } }
public int ObjectID { get; set; } public int ObjectID { get; set; }
public int ObjectGeneration { get; set; } public int ObjectGeneration { get; set; }
public IPdfElement Data { get; set; } public IPdfElement Data { get; set; }
public int UsageCount { get; set; } = 0; public int UsageCount { get; set; }
} }
public static class PdfElementUtils public static class PdfElementUtils

View File

@@ -0,0 +1,50 @@
using System;
using System.IO;
using System.IO.Compression;
namespace VAR.PdfTools
{
public static class PdfFilters
{
public class FlateDecode
{
public byte[] Encode(byte[] streamData)
{
throw new NotImplementedException("FlateFilter.Encode: Not implemented");
}
public static byte[] Decode(byte[] streamData)
{
MemoryStream msInput = new MemoryStream(streamData);
MemoryStream msOutput = new MemoryStream();
// It seems to work when skipping the first two bytes.
byte header;
header = (byte)msInput.ReadByte();
header = (byte)msInput.ReadByte();
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
int cbRead;
byte[] abResult = new byte[1024];
do
{
cbRead = zip.Read(abResult, 0, abResult.Length);
if (cbRead > 0)
{
msOutput.Write(abResult, 0, cbRead);
}
}
while (cbRead > 0);
zip.Close();
msOutput.Flush();
if (msOutput.Length >= 0)
{
msOutput.Capacity = (int)msOutput.Length;
return msOutput.GetBuffer();
}
return null;
}
}
}
}

View File

@@ -15,6 +15,8 @@ namespace VAR.PdfTools
private double _height = 1.0; private double _height = 1.0;
private string _name = string.Empty;
private bool _tainted = false; private bool _tainted = false;
#endregion #endregion
@@ -25,6 +27,8 @@ namespace VAR.PdfTools
public double Height { get { return _height; } } public double Height { get { return _height; } }
public string Name { get { return _name; } set { _name = value; } }
public bool Tainted { get { return _tainted; } } public bool Tainted { get { return _tainted; } }
#endregion #endregion

View File

@@ -23,13 +23,6 @@ namespace VAR.PdfTools
public PdfParser(byte[] stream) public PdfParser(byte[] stream)
{ {
_stream = stream; _stream = stream;
// Intentar usar el separador decimal de la cultura
try
{
_decimalSeparator = CultureInfo.CurrentUICulture.NumberFormat.NumberDecimalSeparator;
}
catch { }
} }
#endregion #endregion
@@ -436,7 +429,7 @@ namespace VAR.PdfTools
if (dotCount == 1) if (dotCount == 1)
{ {
PdfReal obj = new PdfReal(); PdfReal obj = new PdfReal();
obj.Value = Convert.ToDouble(sbNumber.ToString()); obj.Value = Convert.ToDouble(sbNumber.ToString(), CultureInfo.InvariantCulture);
return obj; return obj;
} }
} }

View File

@@ -139,6 +139,9 @@ namespace VAR.PdfTools
public double VisibleHeight { get; set; } public double VisibleHeight { get; set; }
private List<PdfTextElement> _childs = new List<PdfTextElement>();
public List<PdfTextElement> Childs { get { return _childs; } }
#endregion #endregion
#region Public methods #region Public methods
@@ -179,6 +182,8 @@ namespace VAR.PdfTools
private StringBuilder _sbText = new StringBuilder(); private StringBuilder _sbText = new StringBuilder();
private double _textWidth = 0; private double _textWidth = 0;
PdfTextElement _currentTextElement = null;
#endregion #endregion
#region Properties #region Properties
@@ -216,13 +221,8 @@ namespace VAR.PdfTools
return sbText.ToString(); return sbText.ToString();
} }
private void FlushTextElement() private PdfTextElement BuildTextElement()
{ {
if (_sbText.Length == 0)
{
return;
}
PdfTextElement textElem = new PdfTextElement(); PdfTextElement textElem = new PdfTextElement();
textElem.Font = _font; textElem.Font = _font;
textElem.FontSize = _fontSize; textElem.FontSize = _fontSize;
@@ -231,17 +231,98 @@ namespace VAR.PdfTools
textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font); textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0]; textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0];
textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1]; textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
_textElements.Add(textElem); return textElem;
}
private void FlushTextElementSoft()
{
if (_sbText.Length == 0)
{
return;
}
PdfTextElement textElem = BuildTextElement();
if (_currentTextElement == null)
{
_currentTextElement = new PdfTextElement();
_currentTextElement.Font = null;
_currentTextElement.FontSize = -1;
_currentTextElement.Matrix = textElem.Matrix.Copy();
_currentTextElement.RawText = string.Empty;
_currentTextElement.VisibleText = string.Empty;
_currentTextElement.VisibleWidth = 0;
_currentTextElement.VisibleHeight = 0;
}
_currentTextElement.VisibleText += textElem.VisibleText;
_currentTextElement.VisibleWidth += textElem.VisibleWidth;
_currentTextElement.VisibleHeight = System.Math.Max(_currentTextElement.VisibleHeight, textElem.VisibleHeight);
_currentTextElement.Childs.Add(textElem);
_sbText = new StringBuilder(); _sbText = new StringBuilder();
_textWidth = 0; _textWidth = 0;
} }
private PdfTextElement FindElementByText(string text) private void AddTextElement(PdfTextElement textElement)
{ {
if (string.IsNullOrEmpty(textElement.VisibleText.Trim()))
{
return;
}
_textElements.Add(textElement);
}
private void FlushTextElement()
{
if (_sbText.Length == 0)
{
if (_currentTextElement != null)
{
AddTextElement(_currentTextElement);
_currentTextElement = null;
}
return;
}
if (_currentTextElement != null)
{
FlushTextElementSoft();
AddTextElement(_currentTextElement);
_currentTextElement = null;
}
else
{
PdfTextElement textElem = BuildTextElement();
AddTextElement(textElem);
}
_sbText = new StringBuilder();
_textWidth = 0;
}
private string SimplifyText(string text)
{
StringBuilder sbResult = new StringBuilder();
foreach (char c in text)
{
if (c == '.' || c == ',' ||
c == ':' || c == ';' ||
c == '-' || c == '_' ||
c == ' ' || c == '\t')
{
continue;
}
sbResult.Append(char.ToUpper(c));
}
return sbResult.ToString();
}
private PdfTextElement FindElementByText(string text, bool fuzzy)
{
string matchingText = fuzzy ? SimplifyText(text) : text;
foreach (PdfTextElement elem in _textElements) foreach (PdfTextElement elem in _textElements)
{ {
if (elem.VisibleText == text) string elemText = fuzzy ? SimplifyText(elem.VisibleText) : elem.VisibleText;
if (elemText == matchingText)
{ {
return elem; return elem;
} }
@@ -305,13 +386,13 @@ namespace VAR.PdfTools
private void OpEndText() private void OpEndText()
{ {
FlushTextElement(); FlushTextElementSoft();
inText = false; inText = false;
} }
private void OpTextFont(string fontName, double size) private void OpTextFont(string fontName, double size)
{ {
FlushTextElement(); FlushTextElementSoft();
_font = _page.Fonts[fontName]; _font = _page.Fonts[fontName];
_fontSize = size; _fontSize = size;
} }
@@ -321,7 +402,7 @@ namespace VAR.PdfTools
_textLeading = textLeading; _textLeading = textLeading;
} }
private void OpTesDisplace(double x, double y) private void OpTextDisplace(double x, double y)
{ {
FlushTextElement(); FlushTextElement();
var newMatrix = new Matrix3x3(); var newMatrix = new Matrix3x3();
@@ -332,7 +413,7 @@ namespace VAR.PdfTools
private void OpTextLineFeed() private void OpTextLineFeed()
{ {
OpTesDisplace(0, -_textLeading); OpTextDisplace(0, -_textLeading);
} }
private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f) private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f)
@@ -455,14 +536,14 @@ namespace VAR.PdfTools
{ {
double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
double y = PdfElementUtils.GetReal(action.Parameters[1], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTesDisplace(x, y); OpTextDisplace(x, y);
} }
else if (action.Token == "TD") else if (action.Token == "TD")
{ {
double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
double y = PdfElementUtils.GetReal(action.Parameters[1], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextLeading(-y); OpTextLeading(-y);
OpTesDisplace(x, y); OpTextDisplace(x, y);
} }
else if (action.Token == "Tm") else if (action.Token == "Tm")
{ {
@@ -507,51 +588,89 @@ namespace VAR.PdfTools
public List<string> GetColumn(string column) public List<string> GetColumn(string column)
{ {
PdfTextElement columnHead = FindElementByText(column); return GetColumn(column, true);
}
public List<string> GetColumn(string column, bool fuzzy)
{
PdfTextElement columnHead = FindElementByText(column, fuzzy);
if(columnHead == null) if(columnHead == null)
{ {
return new List<string>(); return new List<string>();
} }
double headY = columnHead.GetY(); double headY = columnHead.GetY();
double headX1 = columnHead.GetX();
double headX2 = headX1 + columnHead.VisibleWidth;
// Get all the elements that intersects vertically and sort // Determine horizontal extent
var columnData = new List<PdfTextElement>(); double extentX1 = double.MinValue;
double extentX2 = double.MaxValue;
foreach (PdfTextElement elem in _textElements)
{
if(elem == columnHead){continue;}
if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; }
double elemX1 = elem.GetX();
double elemX2 = elemX1 + elem.VisibleWidth;
if (elemX2 < headX1)
{
if (elemX2 > extentX1)
{
extentX1 = elemX2;
}
}
if (elemX1 > headX2)
{
if (elemX1 < extentX2)
{
extentX2 = elemX1;
}
}
}
// Get all the elements that intersects vertically, are down and sort results
var columnDataRaw = new List<PdfTextElement>();
foreach (PdfTextElement elem in _textElements) foreach (PdfTextElement elem in _textElements)
{ {
if (TextElementVerticalIntersection(columnHead, elem) == false) { continue; } if (TextElementVerticalIntersection(columnHead, elem) == false) { continue; }
// Only intems down the column
double elemY = elem.GetY(); double elemY = elem.GetY();
if (elemY >= headY) { continue; } if (elemY >= headY) { continue; }
columnDataRaw.Add(elem);
}
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
// Only items completelly inside extents, amd break on the first element outside
var columnData = new List<PdfTextElement>();
foreach (PdfTextElement elem in columnDataRaw)
{
double elemX1 = elem.GetX();
double elemX2 = elemX1 + elem.VisibleWidth;
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
columnData.Add(elem); columnData.Add(elem);
} }
columnData = columnData.OrderByDescending(elem => elem.GetY()).ToList();
// Filter only nearest elements // Emit result
var result = new List<string>(); var result = new List<string>();
double prevY = headY;
double medDiff = 0;
bool first = true;
foreach (PdfTextElement elem in columnData) foreach (PdfTextElement elem in columnData)
{ {
double elemY = elem.GetY();
double diff = prevY - elemY;
prevY = elemY;
if (first)
{
first = false;
medDiff = diff;
}
if (diff > medDiff) { break; }
medDiff = (medDiff + diff) / 2;
result.Add(elem.VisibleText); result.Add(elem.VisibleText);
} }
return result; return result;
} }
public string GetField(string column) public string GetField(string field)
{ {
PdfTextElement fieldTitle = FindElementByText(column); return GetField(field, true);
}
public string GetField(string field, bool fuzzy)
{
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
if (fieldTitle == null) if (fieldTitle == null)
{ {
return null; return null;
@@ -577,6 +696,21 @@ namespace VAR.PdfTools
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText; return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
} }
public bool HasText(string text)
{
return HasText(text, true);
}
public bool HasText(string text, bool fuzzy)
{
PdfTextElement fieldTitle = FindElementByText(text, fuzzy);
if (fieldTitle == null)
{
return false;
}
return true;
}
#endregion #endregion
} }
} }

View File

@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
[assembly: AssemblyCulture("")] [assembly: AssemblyCulture("")]
[assembly: ComVisible(false)] [assembly: ComVisible(false)]
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")] [assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
[assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.1.*")]

View File

@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{EB7E003A-6A95-4002-809F-926C7C8A11E9}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>VAR.PdfTools</RootNamespace>
<AssemblyName>VAR.PdfTools</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="PdfContentAction.cs" />
<Compile Include="PdfDocument.cs" />
<Compile Include="PdfDocumentPage.cs" />
<Compile Include="PdfElements.cs" />
<Compile Include="PdfFilters.cs" />
<Compile Include="PdfFont.cs" />
<Compile Include="PdfParser.cs" />
<Compile Include="PdfStandar14FontMetrics.cs" />
<Compile Include="PdfTextExtractor.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

View File

@@ -11,6 +11,7 @@
<AssemblyName>VAR.PdfTools</AssemblyName> <AssemblyName>VAR.PdfTools</AssemblyName>
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion> <TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment> <FileAlignment>512</FileAlignment>
<TargetFrameworkProfile />
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols> <DebugSymbols>true</DebugSymbols>
@@ -34,9 +35,7 @@
<Reference Include="System.Core" /> <Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" /> <Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" /> <Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" /> <Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" /> <Reference Include="System.Xml" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
@@ -44,6 +43,7 @@
<Compile Include="PdfDocument.cs" /> <Compile Include="PdfDocument.cs" />
<Compile Include="PdfDocumentPage.cs" /> <Compile Include="PdfDocumentPage.cs" />
<Compile Include="PdfElements.cs" /> <Compile Include="PdfElements.cs" />
<Compile Include="PdfFilters.cs" />
<Compile Include="PdfFont.cs" /> <Compile Include="PdfFont.cs" />
<Compile Include="PdfParser.cs" /> <Compile Include="PdfParser.cs" />
<Compile Include="PdfStandar14FontMetrics.cs" /> <Compile Include="PdfStandar14FontMetrics.cs" />