Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 29e49546fa | |||
| d46f8d2abe | |||
| 7d9b7981a8 | |||
| 9b2310ea96 | |||
| 58c09c1110 | |||
| 241b68bd0a | |||
| 85d998a8d3 | |||
| da908d0f36 | |||
| ed03166f6f | |||
| 17001d6c8a | |||
| 35ec8b478f |
68
README.md
Normal file
68
README.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# .Net library and tool to work with PDF files
|
||||
|
||||
## Usage
|
||||
|
||||
### VAR.PdfTools
|
||||
Add the resulting assembly as reference in your projects, and this line on code:
|
||||
|
||||
using VAR.PdfTools;
|
||||
|
||||
Then extract the contents of a data column using:
|
||||
|
||||
var columnData = new List<string>();
|
||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
columnData.AddRange(extractor.GetColumn("Column"));
|
||||
}
|
||||
|
||||
Or the content of a field (text on the right of the indicated text):
|
||||
|
||||
var fieldData = new List<string>();
|
||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
fieldData.Add(extractor.GetField(txtFieldName.Text));
|
||||
}
|
||||
|
||||
### VAR.PdfTools.Workbench
|
||||
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
|
||||
|
||||
## Building
|
||||
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
|
||||
|
||||
## Contributing
|
||||
1. Fork it!
|
||||
2. Create your feature branch: `git checkout -b my-new-feature`
|
||||
3. Commit your changes: `git commit -am 'Add some feature'`
|
||||
4. Push to the branch: `git push origin my-new-feature`
|
||||
5. Submit a pull request :D
|
||||
|
||||
## Credits
|
||||
* Valeriano Alfonso Rodriguez.
|
||||
|
||||
## License
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014-2015 Valeriano Alfonso Rodriguez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
26
VAR.PdfTools.Net35.sln
Normal file
26
VAR.PdfTools.Net35.sln
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Net35", "VAR.PdfTools\VAR.PdfTools.Net35.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench.Net35", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.Net35.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
@@ -1,24 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<configSections>
|
||||
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" >
|
||||
<section name="VAR.PdfTools.Workbench.Properties.Settings" type="System.Configuration.ClientSettingsSection, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" allowExeDefinition="MachineToLocalUser" requirePermission="false" />
|
||||
</sectionGroup>
|
||||
</configSections>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
|
||||
</startup>
|
||||
<userSettings>
|
||||
<VAR.PdfTools.Workbench.Properties.Settings>
|
||||
<setting name="LastPdfPath" serializeAs="String">
|
||||
<value />
|
||||
</setting>
|
||||
<setting name="LastColumnName" serializeAs="String">
|
||||
<value />
|
||||
</setting>
|
||||
<setting name="LastFieldName" serializeAs="String">
|
||||
<value />
|
||||
</setting>
|
||||
</VAR.PdfTools.Workbench.Properties.Settings>
|
||||
</userSettings>
|
||||
</configuration>
|
||||
29
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
29
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
@@ -38,13 +38,15 @@
|
||||
this.txtColumnName = new System.Windows.Forms.TextBox();
|
||||
this.txtFieldName = new System.Windows.Forms.TextBox();
|
||||
this.btnGetField = new System.Windows.Forms.Button();
|
||||
this.txtText = new System.Windows.Forms.TextBox();
|
||||
this.btnHasText = new System.Windows.Forms.Button();
|
||||
this.SuspendLayout();
|
||||
//
|
||||
// lblOutputs
|
||||
//
|
||||
this.lblOutputs.AutoSize = true;
|
||||
this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
|
||||
this.lblOutputs.Location = new System.Drawing.Point(12, 130);
|
||||
this.lblOutputs.Location = new System.Drawing.Point(12, 143);
|
||||
this.lblOutputs.Name = "lblOutputs";
|
||||
this.lblOutputs.Size = new System.Drawing.Size(51, 13);
|
||||
this.lblOutputs.TabIndex = 11;
|
||||
@@ -88,11 +90,11 @@
|
||||
| System.Windows.Forms.AnchorStyles.Left)
|
||||
| System.Windows.Forms.AnchorStyles.Right)));
|
||||
this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
|
||||
this.txtOutput.Location = new System.Drawing.Point(15, 146);
|
||||
this.txtOutput.Location = new System.Drawing.Point(15, 159);
|
||||
this.txtOutput.Multiline = true;
|
||||
this.txtOutput.Name = "txtOutput";
|
||||
this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
|
||||
this.txtOutput.Size = new System.Drawing.Size(457, 303);
|
||||
this.txtOutput.Size = new System.Drawing.Size(457, 290);
|
||||
this.txtOutput.TabIndex = 7;
|
||||
//
|
||||
// btnProcess
|
||||
@@ -140,11 +142,30 @@
|
||||
this.btnGetField.UseVisualStyleBackColor = true;
|
||||
this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click);
|
||||
//
|
||||
// txtText
|
||||
//
|
||||
this.txtText.Location = new System.Drawing.Point(15, 111);
|
||||
this.txtText.Name = "txtText";
|
||||
this.txtText.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtText.TabIndex = 17;
|
||||
//
|
||||
// btnHasText
|
||||
//
|
||||
this.btnHasText.Location = new System.Drawing.Point(163, 109);
|
||||
this.btnHasText.Name = "btnHasText";
|
||||
this.btnHasText.Size = new System.Drawing.Size(75, 23);
|
||||
this.btnHasText.TabIndex = 16;
|
||||
this.btnHasText.Text = "HasText";
|
||||
this.btnHasText.UseVisualStyleBackColor = true;
|
||||
this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click);
|
||||
//
|
||||
// FrmPdfInfo
|
||||
//
|
||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||
this.ClientSize = new System.Drawing.Size(484, 461);
|
||||
this.Controls.Add(this.txtText);
|
||||
this.Controls.Add(this.btnHasText);
|
||||
this.Controls.Add(this.txtFieldName);
|
||||
this.Controls.Add(this.btnGetField);
|
||||
this.Controls.Add(this.txtColumnName);
|
||||
@@ -176,5 +197,7 @@
|
||||
private System.Windows.Forms.TextBox txtColumnName;
|
||||
private System.Windows.Forms.TextBox txtFieldName;
|
||||
private System.Windows.Forms.Button btnGetField;
|
||||
private System.Windows.Forms.TextBox txtText;
|
||||
private System.Windows.Forms.Button btnHasText;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Data;
|
||||
using System.Linq;
|
||||
using System.Windows.Forms;
|
||||
|
||||
@@ -18,6 +17,7 @@ namespace VAR.PdfTools.Workbench
|
||||
txtPdfPath.Text = Properties.Settings.Default.LastPdfPath;
|
||||
txtColumnName.Text = Properties.Settings.Default.LastColumnName;
|
||||
txtFieldName.Text = Properties.Settings.Default.LastFieldName;
|
||||
txtText.Text = Properties.Settings.Default.LastText;
|
||||
}
|
||||
|
||||
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
|
||||
@@ -25,6 +25,7 @@ namespace VAR.PdfTools.Workbench
|
||||
Properties.Settings.Default.LastPdfPath = txtPdfPath.Text;
|
||||
Properties.Settings.Default.LastColumnName = txtColumnName.Text;
|
||||
Properties.Settings.Default.LastFieldName = txtFieldName.Text;
|
||||
Properties.Settings.Default.LastText = txtText.Text;
|
||||
Properties.Settings.Default.Save();
|
||||
}
|
||||
|
||||
@@ -86,8 +87,9 @@ namespace VAR.PdfTools.Workbench
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
foreach (PdfTextElement textElement in extractor.Elements)
|
||||
{
|
||||
lines.Add(string.Format("Text({0}, {1})({2}, {3}): \"{4}\"",
|
||||
lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
|
||||
textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight,
|
||||
textElement.Font == null ? string.Empty : textElement.Font.Name,
|
||||
textElement.VisibleText));
|
||||
}
|
||||
}
|
||||
@@ -132,5 +134,25 @@ namespace VAR.PdfTools.Workbench
|
||||
}
|
||||
txtOutput.Lines = fieldData.ToArray();
|
||||
}
|
||||
|
||||
private void btnHasText_Click(object sender, EventArgs e)
|
||||
{
|
||||
if (System.IO.File.Exists(txtPdfPath.Text) == false)
|
||||
{
|
||||
MessageBox.Show("File does not exist");
|
||||
return;
|
||||
}
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||
|
||||
List<string> lines = new List<string>();
|
||||
int pageNum = 1;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(txtText.Text))));
|
||||
}
|
||||
txtOutput.Lines = lines.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,9 +112,9 @@
|
||||
<value>2.0</value>
|
||||
</resheader>
|
||||
<resheader name="reader">
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
</root>
|
||||
@@ -1,7 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace VAR.PdfTools.Workbench
|
||||
|
||||
@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
|
||||
[assembly: AssemblyCulture("")]
|
||||
[assembly: ComVisible(false)]
|
||||
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
|
||||
[assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.1.*")]
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Runtime Version:4.0.30319.42000
|
||||
//
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace VAR.PdfTools.Workbench.Properties
|
||||
{
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// A strongly-typed resource class, for looking up localized strings, etc.
|
||||
/// </summary>
|
||||
// This class was auto-generated by the StronglyTypedResourceBuilder
|
||||
// class via a tool like ResGen or Visual Studio.
|
||||
// To add or remove a member, edit your .ResX file then rerun ResGen
|
||||
// with the /str option, or rebuild your VS project.
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||
internal class Resources
|
||||
{
|
||||
|
||||
private static global::System.Resources.ResourceManager resourceMan;
|
||||
|
||||
private static global::System.Globalization.CultureInfo resourceCulture;
|
||||
|
||||
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
|
||||
internal Resources()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the cached ResourceManager instance used by this class.
|
||||
/// </summary>
|
||||
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||
internal static global::System.Resources.ResourceManager ResourceManager
|
||||
{
|
||||
get
|
||||
{
|
||||
if ((resourceMan == null))
|
||||
{
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("VAR.PdfTools.Workbench.Properties.Resources", typeof(Resources).Assembly);
|
||||
resourceMan = temp;
|
||||
}
|
||||
return resourceMan;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Overrides the current thread's CurrentUICulture property for all
|
||||
/// resource lookups using this strongly typed resource class.
|
||||
/// </summary>
|
||||
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||
internal static global::System.Globalization.CultureInfo Culture
|
||||
{
|
||||
get
|
||||
{
|
||||
return resourceCulture;
|
||||
}
|
||||
set
|
||||
{
|
||||
resourceCulture = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<root>
|
||||
<!--
|
||||
Microsoft ResX Schema
|
||||
|
||||
Version 2.0
|
||||
|
||||
The primary goals of this format is to allow a simple XML format
|
||||
that is mostly human readable. The generation and parsing of the
|
||||
various data types are done through the TypeConverter classes
|
||||
associated with the data types.
|
||||
|
||||
Example:
|
||||
|
||||
... ado.net/XML headers & schema ...
|
||||
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||
<resheader name="version">2.0</resheader>
|
||||
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||
</data>
|
||||
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||
<comment>This is a comment</comment>
|
||||
</data>
|
||||
|
||||
There are any number of "resheader" rows that contain simple
|
||||
name/value pairs.
|
||||
|
||||
Each data row contains a name, and value. The row also contains a
|
||||
type or mimetype. Type corresponds to a .NET class that support
|
||||
text/value conversion through the TypeConverter architecture.
|
||||
Classes that don't support this are serialized and stored with the
|
||||
mimetype set.
|
||||
|
||||
The mimetype is used for serialized objects, and tells the
|
||||
ResXResourceReader how to depersist the object. This is currently not
|
||||
extensible. For a given mimetype the value must be set accordingly:
|
||||
|
||||
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||
that the ResXResourceWriter will generate, however the reader can
|
||||
read any of the formats listed below.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.binary.base64
|
||||
value : The object must be serialized with
|
||||
: System.Serialization.Formatters.Binary.BinaryFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.soap.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||
value : The object must be serialized into a byte array
|
||||
: using a System.ComponentModel.TypeConverter
|
||||
: and then encoded with base64 encoding.
|
||||
-->
|
||||
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||
<xsd:element name="root" msdata:IsDataSet="true">
|
||||
<xsd:complexType>
|
||||
<xsd:choice maxOccurs="unbounded">
|
||||
<xsd:element name="metadata">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
<xsd:attribute name="type" type="xsd:string" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="assembly">
|
||||
<xsd:complexType>
|
||||
<xsd:attribute name="alias" type="xsd:string" />
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="data">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
|
||||
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="resheader">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:choice>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<resheader name="resmimetype">
|
||||
<value>text/microsoft-resx</value>
|
||||
</resheader>
|
||||
<resheader name="version">
|
||||
<value>2.0</value>
|
||||
</resheader>
|
||||
<resheader name="reader">
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
</root>
|
||||
@@ -12,7 +12,7 @@ namespace VAR.PdfTools.Workbench.Properties {
|
||||
|
||||
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "14.0.0.0")]
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")]
|
||||
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
|
||||
|
||||
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
|
||||
@@ -58,5 +58,17 @@ namespace VAR.PdfTools.Workbench.Properties {
|
||||
this["LastFieldName"] = value;
|
||||
}
|
||||
}
|
||||
|
||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
||||
public string LastText {
|
||||
get {
|
||||
return ((string)(this["LastText"]));
|
||||
}
|
||||
set {
|
||||
this["LastText"] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,5 +11,8 @@
|
||||
<Setting Name="LastFieldName" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
<Setting Name="LastText" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
</Settings>
|
||||
</SettingsFile>
|
||||
86
VAR.PdfTools.Workbench/VAR.PdfTools.Workbench.Net35.csproj
Normal file
86
VAR.PdfTools.Workbench/VAR.PdfTools.Workbench.Net35.csproj
Normal file
@@ -0,0 +1,86 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}</ProjectGuid>
|
||||
<OutputType>WinExe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>VAR.PdfTools.Workbench</RootNamespace>
|
||||
<AssemblyName>VAR.PdfTools.Workbench</AssemblyName>
|
||||
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<StartupObject />
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Deployment" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Windows.Forms" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="FrmPdfInfo.cs">
|
||||
<SubType>Form</SubType>
|
||||
</Compile>
|
||||
<Compile Include="FrmPdfInfo.Designer.cs">
|
||||
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
|
||||
</Compile>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<EmbeddedResource Include="FrmPdfInfo.resx">
|
||||
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
|
||||
</EmbeddedResource>
|
||||
<None Include="Properties\Settings.settings">
|
||||
<Generator>SettingsSingleFileGenerator</Generator>
|
||||
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
||||
</None>
|
||||
<Compile Include="Properties\Settings.Designer.cs">
|
||||
<AutoGen>True</AutoGen>
|
||||
<DependentUpon>Settings.settings</DependentUpon>
|
||||
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
|
||||
<Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project>
|
||||
<Name>VAR.PdfTools</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
@@ -12,6 +12,7 @@
|
||||
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
@@ -40,11 +41,9 @@
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Deployment" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Windows.Forms" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
@@ -60,15 +59,6 @@
|
||||
<EmbeddedResource Include="FrmPdfInfo.resx">
|
||||
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
|
||||
</EmbeddedResource>
|
||||
<EmbeddedResource Include="Properties\Resources.resx">
|
||||
<Generator>ResXFileCodeGenerator</Generator>
|
||||
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
|
||||
<SubType>Designer</SubType>
|
||||
</EmbeddedResource>
|
||||
<Compile Include="Properties\Resources.Designer.cs">
|
||||
<AutoGen>True</AutoGen>
|
||||
<DependentUpon>Resources.resx</DependentUpon>
|
||||
</Compile>
|
||||
<None Include="Properties\Settings.settings">
|
||||
<Generator>SettingsSingleFileGenerator</Generator>
|
||||
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
||||
@@ -79,9 +69,6 @@
|
||||
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
|
||||
<Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project>
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Linq;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
@@ -36,51 +35,14 @@ namespace VAR.PdfTools
|
||||
|
||||
#region Private methods
|
||||
|
||||
private static byte[] DecodeFlateStreamData(byte[] streamData)
|
||||
private static void ApplyFilterToStream(PdfStream stream, string filter)
|
||||
{
|
||||
MemoryStream msInput = new MemoryStream(streamData);
|
||||
MemoryStream msOutput = new MemoryStream();
|
||||
|
||||
// It seems to work when skipping the first two bytes.
|
||||
byte header;
|
||||
header = (byte)msInput.ReadByte();
|
||||
header = (byte)msInput.ReadByte();
|
||||
|
||||
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
|
||||
int cbRead;
|
||||
byte[] abResult = new byte[1024];
|
||||
do
|
||||
{
|
||||
cbRead = zip.Read(abResult, 0, abResult.Length);
|
||||
if (cbRead > 0)
|
||||
{
|
||||
msOutput.Write(abResult, 0, cbRead);
|
||||
}
|
||||
}
|
||||
while (cbRead > 0);
|
||||
zip.Close();
|
||||
msOutput.Flush();
|
||||
if (msOutput.Length >= 0)
|
||||
{
|
||||
msOutput.Capacity = (int)msOutput.Length;
|
||||
return msOutput.GetBuffer();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void ApplyFiltersToStreams(PdfStream stream)
|
||||
{
|
||||
string filter = stream.Dictionary.GetParamAsString("Filter");
|
||||
if (filter == "FlateDecode")
|
||||
{
|
||||
stream.OriginalData = stream.Data;
|
||||
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
|
||||
byte[] decodedStreamData = DecodeFlateStreamData(stream.Data);
|
||||
byte[] decodedStreamData = PdfFilters.FlateDecode.Decode(stream.Data);
|
||||
stream.Data = decodedStreamData;
|
||||
stream.Dictionary.Values["Length"] = new PdfInteger { Value = decodedStreamData.Length };
|
||||
stream.Dictionary.Values.Remove("Filter");
|
||||
}
|
||||
else if(filter == "ASCII85Decode" || filter == "A85")
|
||||
else if (filter == "ASCII85Decode" || filter == "A85")
|
||||
{
|
||||
// FIXME: Implement this filter
|
||||
}
|
||||
@@ -92,11 +54,55 @@ namespace VAR.PdfTools
|
||||
{
|
||||
// FIXME: Implement this filter
|
||||
}
|
||||
else
|
||||
{
|
||||
// FIXME: Implement the rest of filters
|
||||
}
|
||||
}
|
||||
|
||||
private static void ApplyFiltersToStreams(PdfStream stream)
|
||||
{
|
||||
if (stream.Dictionary.Values.ContainsKey("Filter") == false) { return; }
|
||||
IPdfElement elemFilter = stream.Dictionary.Values["Filter"];
|
||||
|
||||
stream.OriginalData = stream.Data;
|
||||
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
|
||||
|
||||
if (elemFilter is PdfString)
|
||||
{
|
||||
ApplyFilterToStream(stream, ((PdfString)elemFilter).Value);
|
||||
}
|
||||
else if (elemFilter is PdfName)
|
||||
{
|
||||
ApplyFilterToStream(stream, ((PdfName)elemFilter).Value);
|
||||
}
|
||||
else if(elemFilter is PdfArray)
|
||||
{
|
||||
foreach(IPdfElement elemSubFilter in ((PdfArray)elemFilter).Values)
|
||||
{
|
||||
if (elemSubFilter is PdfString)
|
||||
{
|
||||
ApplyFilterToStream(stream, ((PdfString)elemSubFilter).Value);
|
||||
}
|
||||
else if (elemSubFilter is PdfName)
|
||||
{
|
||||
ApplyFilterToStream(stream, ((PdfName)elemSubFilter).Value);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("PdfFilter not correctly specified");
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("PdfFilter not correctly specified");
|
||||
}
|
||||
|
||||
stream.Dictionary.Values["Length"] = new PdfInteger { Value = stream.Data.Length };
|
||||
stream.Dictionary.Values.Remove("Filter");
|
||||
}
|
||||
|
||||
private static IPdfElement ResolveIndirectReferences(IPdfElement elem, Dictionary<int, PdfObject> dictReferences)
|
||||
{
|
||||
if (elem is PdfObjectReference)
|
||||
@@ -211,7 +217,7 @@ namespace VAR.PdfTools
|
||||
do
|
||||
{
|
||||
PdfObject obj = parser.ParseObject(doc.Objects);
|
||||
if (obj != null)
|
||||
if (obj != null && obj.Data != null)
|
||||
{
|
||||
if (obj.Data is PdfStream)
|
||||
{
|
||||
|
||||
@@ -58,6 +58,7 @@ namespace VAR.PdfTools
|
||||
foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values)
|
||||
{
|
||||
var font = new PdfFont(pair.Value as PdfDictionary);
|
||||
font.Name = pair.Key;
|
||||
_fonts.Add(pair.Key, font);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,44 +26,44 @@ namespace VAR.PdfTools
|
||||
|
||||
public class PdfBoolean : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Boolean;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Boolean; } }
|
||||
public bool Value { get; set; }
|
||||
}
|
||||
|
||||
public class PdfInteger : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Integer;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Integer; } }
|
||||
public long Value { get; set; }
|
||||
}
|
||||
|
||||
public class PdfReal : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Real;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Real; } }
|
||||
public double Value { get; set; }
|
||||
}
|
||||
|
||||
public class PdfString : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.String;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.String; } }
|
||||
public string Value { get; set; }
|
||||
}
|
||||
|
||||
public class PdfName : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Name;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Name; } }
|
||||
public string Value { get; set; }
|
||||
}
|
||||
|
||||
public class PdfArray : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Array;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Array; } }
|
||||
private List<IPdfElement> _values = new List<IPdfElement>();
|
||||
public List<IPdfElement> Values { get { return _values; } }
|
||||
}
|
||||
|
||||
public class PdfDictionary : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Dictionary;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Dictionary; } }
|
||||
private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>();
|
||||
public Dictionary<string, IPdfElement> Values { get { return _values; } }
|
||||
|
||||
@@ -112,7 +112,7 @@ namespace VAR.PdfTools
|
||||
{
|
||||
PdfArray array = value as PdfArray;
|
||||
MemoryStream memStream = new MemoryStream();
|
||||
foreach(IPdfElement elem in array.Values)
|
||||
foreach (IPdfElement elem in array.Values)
|
||||
{
|
||||
PdfStream stream = elem as PdfStream;
|
||||
if (stream == null) { continue; }
|
||||
@@ -135,19 +135,19 @@ namespace VAR.PdfTools
|
||||
|
||||
public class PdfNull : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Null;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Null; } }
|
||||
}
|
||||
|
||||
public class PdfObjectReference : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.ObjectReference;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.ObjectReference; } }
|
||||
public int ObjectID { get; set; }
|
||||
public int ObjectGeneration { get; set; }
|
||||
}
|
||||
|
||||
public class PdfStream : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Stream;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Stream; } }
|
||||
public PdfDictionary Dictionary { get; set; }
|
||||
public byte[] Data { get; set; }
|
||||
|
||||
@@ -157,18 +157,18 @@ namespace VAR.PdfTools
|
||||
|
||||
public class PdfObject : IPdfElement
|
||||
{
|
||||
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Object;
|
||||
public PdfElementTypes Type { get { return PdfElementTypes.Object; } }
|
||||
public int ObjectID { get; set; }
|
||||
public int ObjectGeneration { get; set; }
|
||||
public IPdfElement Data { get; set; }
|
||||
public int UsageCount { get; set; } = 0;
|
||||
public int UsageCount { get; set; }
|
||||
}
|
||||
|
||||
public static class PdfElementUtils
|
||||
{
|
||||
public static double GetReal(IPdfElement elem, double defaultValue)
|
||||
{
|
||||
if(elem == null)
|
||||
if (elem == null)
|
||||
{
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
50
VAR.PdfTools/PdfFilters.cs
Normal file
50
VAR.PdfTools/PdfFilters.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
public static class PdfFilters
|
||||
{
|
||||
public class FlateDecode
|
||||
{
|
||||
public byte[] Encode(byte[] streamData)
|
||||
{
|
||||
throw new NotImplementedException("FlateFilter.Encode: Not implemented");
|
||||
}
|
||||
|
||||
public static byte[] Decode(byte[] streamData)
|
||||
{
|
||||
MemoryStream msInput = new MemoryStream(streamData);
|
||||
MemoryStream msOutput = new MemoryStream();
|
||||
|
||||
// It seems to work when skipping the first two bytes.
|
||||
byte header;
|
||||
header = (byte)msInput.ReadByte();
|
||||
header = (byte)msInput.ReadByte();
|
||||
|
||||
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
|
||||
int cbRead;
|
||||
byte[] abResult = new byte[1024];
|
||||
do
|
||||
{
|
||||
cbRead = zip.Read(abResult, 0, abResult.Length);
|
||||
if (cbRead > 0)
|
||||
{
|
||||
msOutput.Write(abResult, 0, cbRead);
|
||||
}
|
||||
}
|
||||
while (cbRead > 0);
|
||||
zip.Close();
|
||||
msOutput.Flush();
|
||||
if (msOutput.Length >= 0)
|
||||
{
|
||||
msOutput.Capacity = (int)msOutput.Length;
|
||||
return msOutput.GetBuffer();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,8 @@ namespace VAR.PdfTools
|
||||
|
||||
private double _height = 1.0;
|
||||
|
||||
private string _name = string.Empty;
|
||||
|
||||
private bool _tainted = false;
|
||||
|
||||
#endregion
|
||||
@@ -25,6 +27,8 @@ namespace VAR.PdfTools
|
||||
|
||||
public double Height { get { return _height; } }
|
||||
|
||||
public string Name { get { return _name; } set { _name = value; } }
|
||||
|
||||
public bool Tainted { get { return _tainted; } }
|
||||
|
||||
#endregion
|
||||
|
||||
@@ -23,13 +23,6 @@ namespace VAR.PdfTools
|
||||
public PdfParser(byte[] stream)
|
||||
{
|
||||
_stream = stream;
|
||||
|
||||
// Intentar usar el separador decimal de la cultura
|
||||
try
|
||||
{
|
||||
_decimalSeparator = CultureInfo.CurrentUICulture.NumberFormat.NumberDecimalSeparator;
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
#endregion
|
||||
@@ -436,7 +429,7 @@ namespace VAR.PdfTools
|
||||
if (dotCount == 1)
|
||||
{
|
||||
PdfReal obj = new PdfReal();
|
||||
obj.Value = Convert.ToDouble(sbNumber.ToString());
|
||||
obj.Value = Convert.ToDouble(sbNumber.ToString(), CultureInfo.InvariantCulture);
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,6 +139,9 @@ namespace VAR.PdfTools
|
||||
|
||||
public double VisibleHeight { get; set; }
|
||||
|
||||
private List<PdfTextElement> _childs = new List<PdfTextElement>();
|
||||
public List<PdfTextElement> Childs { get { return _childs; } }
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
@@ -179,6 +182,8 @@ namespace VAR.PdfTools
|
||||
private StringBuilder _sbText = new StringBuilder();
|
||||
private double _textWidth = 0;
|
||||
|
||||
PdfTextElement _currentTextElement = null;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
@@ -216,13 +221,8 @@ namespace VAR.PdfTools
|
||||
return sbText.ToString();
|
||||
}
|
||||
|
||||
private void FlushTextElement()
|
||||
private PdfTextElement BuildTextElement()
|
||||
{
|
||||
if (_sbText.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
PdfTextElement textElem = new PdfTextElement();
|
||||
textElem.Font = _font;
|
||||
textElem.FontSize = _fontSize;
|
||||
@@ -231,17 +231,98 @@ namespace VAR.PdfTools
|
||||
textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
|
||||
textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0];
|
||||
textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
|
||||
_textElements.Add(textElem);
|
||||
return textElem;
|
||||
}
|
||||
|
||||
private void FlushTextElementSoft()
|
||||
{
|
||||
if (_sbText.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
PdfTextElement textElem = BuildTextElement();
|
||||
if (_currentTextElement == null)
|
||||
{
|
||||
_currentTextElement = new PdfTextElement();
|
||||
_currentTextElement.Font = null;
|
||||
_currentTextElement.FontSize = -1;
|
||||
_currentTextElement.Matrix = textElem.Matrix.Copy();
|
||||
_currentTextElement.RawText = string.Empty;
|
||||
_currentTextElement.VisibleText = string.Empty;
|
||||
_currentTextElement.VisibleWidth = 0;
|
||||
_currentTextElement.VisibleHeight = 0;
|
||||
}
|
||||
_currentTextElement.VisibleText += textElem.VisibleText;
|
||||
_currentTextElement.VisibleWidth += textElem.VisibleWidth;
|
||||
_currentTextElement.VisibleHeight = System.Math.Max(_currentTextElement.VisibleHeight, textElem.VisibleHeight);
|
||||
_currentTextElement.Childs.Add(textElem);
|
||||
|
||||
_sbText = new StringBuilder();
|
||||
_textWidth = 0;
|
||||
}
|
||||
|
||||
private PdfTextElement FindElementByText(string text)
|
||||
private void AddTextElement(PdfTextElement textElement)
|
||||
{
|
||||
if (string.IsNullOrEmpty(textElement.VisibleText.Trim()))
|
||||
{
|
||||
return;
|
||||
}
|
||||
_textElements.Add(textElement);
|
||||
}
|
||||
|
||||
private void FlushTextElement()
|
||||
{
|
||||
if (_sbText.Length == 0)
|
||||
{
|
||||
if (_currentTextElement != null)
|
||||
{
|
||||
AddTextElement(_currentTextElement);
|
||||
_currentTextElement = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (_currentTextElement != null)
|
||||
{
|
||||
FlushTextElementSoft();
|
||||
AddTextElement(_currentTextElement);
|
||||
_currentTextElement = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
PdfTextElement textElem = BuildTextElement();
|
||||
AddTextElement(textElem);
|
||||
}
|
||||
|
||||
_sbText = new StringBuilder();
|
||||
_textWidth = 0;
|
||||
}
|
||||
|
||||
private string SimplifyText(string text)
|
||||
{
|
||||
StringBuilder sbResult = new StringBuilder();
|
||||
foreach (char c in text)
|
||||
{
|
||||
if (c == '.' || c == ',' ||
|
||||
c == ':' || c == ';' ||
|
||||
c == '-' || c == '_' ||
|
||||
c == ' ' || c == '\t')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
sbResult.Append(char.ToUpper(c));
|
||||
}
|
||||
return sbResult.ToString();
|
||||
}
|
||||
|
||||
private PdfTextElement FindElementByText(string text, bool fuzzy)
|
||||
{
|
||||
string matchingText = fuzzy ? SimplifyText(text) : text;
|
||||
foreach (PdfTextElement elem in _textElements)
|
||||
{
|
||||
if (elem.VisibleText == text)
|
||||
string elemText = fuzzy ? SimplifyText(elem.VisibleText) : elem.VisibleText;
|
||||
if (elemText == matchingText)
|
||||
{
|
||||
return elem;
|
||||
}
|
||||
@@ -305,13 +386,13 @@ namespace VAR.PdfTools
|
||||
|
||||
private void OpEndText()
|
||||
{
|
||||
FlushTextElement();
|
||||
FlushTextElementSoft();
|
||||
inText = false;
|
||||
}
|
||||
|
||||
private void OpTextFont(string fontName, double size)
|
||||
{
|
||||
FlushTextElement();
|
||||
FlushTextElementSoft();
|
||||
_font = _page.Fonts[fontName];
|
||||
_fontSize = size;
|
||||
}
|
||||
@@ -321,7 +402,7 @@ namespace VAR.PdfTools
|
||||
_textLeading = textLeading;
|
||||
}
|
||||
|
||||
private void OpTesDisplace(double x, double y)
|
||||
private void OpTextDisplace(double x, double y)
|
||||
{
|
||||
FlushTextElement();
|
||||
var newMatrix = new Matrix3x3();
|
||||
@@ -332,7 +413,7 @@ namespace VAR.PdfTools
|
||||
|
||||
private void OpTextLineFeed()
|
||||
{
|
||||
OpTesDisplace(0, -_textLeading);
|
||||
OpTextDisplace(0, -_textLeading);
|
||||
}
|
||||
|
||||
private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f)
|
||||
@@ -455,14 +536,14 @@ namespace VAR.PdfTools
|
||||
{
|
||||
double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
|
||||
double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
|
||||
OpTesDisplace(x, y);
|
||||
OpTextDisplace(x, y);
|
||||
}
|
||||
else if (action.Token == "TD")
|
||||
{
|
||||
double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
|
||||
double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
|
||||
OpTextLeading(-y);
|
||||
OpTesDisplace(x, y);
|
||||
OpTextDisplace(x, y);
|
||||
}
|
||||
else if (action.Token == "Tm")
|
||||
{
|
||||
@@ -507,51 +588,89 @@ namespace VAR.PdfTools
|
||||
|
||||
public List<string> GetColumn(string column)
|
||||
{
|
||||
PdfTextElement columnHead = FindElementByText(column);
|
||||
return GetColumn(column, true);
|
||||
}
|
||||
|
||||
public List<string> GetColumn(string column, bool fuzzy)
|
||||
{
|
||||
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
||||
if(columnHead == null)
|
||||
{
|
||||
return new List<string>();
|
||||
}
|
||||
double headY = columnHead.GetY();
|
||||
double headX1 = columnHead.GetX();
|
||||
double headX2 = headX1 + columnHead.VisibleWidth;
|
||||
|
||||
// Get all the elements that intersects vertically and sort
|
||||
var columnData = new List<PdfTextElement>();
|
||||
// Determine horizontal extent
|
||||
double extentX1 = double.MinValue;
|
||||
double extentX2 = double.MaxValue;
|
||||
foreach (PdfTextElement elem in _textElements)
|
||||
{
|
||||
if(elem == columnHead){continue;}
|
||||
if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; }
|
||||
double elemX1 = elem.GetX();
|
||||
double elemX2 = elemX1 + elem.VisibleWidth;
|
||||
|
||||
if (elemX2 < headX1)
|
||||
{
|
||||
if (elemX2 > extentX1)
|
||||
{
|
||||
extentX1 = elemX2;
|
||||
}
|
||||
}
|
||||
if (elemX1 > headX2)
|
||||
{
|
||||
if (elemX1 < extentX2)
|
||||
{
|
||||
extentX2 = elemX1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Get all the elements that intersects vertically, are down and sort results
|
||||
var columnDataRaw = new List<PdfTextElement>();
|
||||
foreach (PdfTextElement elem in _textElements)
|
||||
{
|
||||
if (TextElementVerticalIntersection(columnHead, elem) == false) { continue; }
|
||||
|
||||
// Only intems down the column
|
||||
double elemY = elem.GetY();
|
||||
if (elemY >= headY) { continue; }
|
||||
|
||||
columnDataRaw.Add(elem);
|
||||
}
|
||||
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
||||
|
||||
// Only items completelly inside extents, amd break on the first element outside
|
||||
var columnData = new List<PdfTextElement>();
|
||||
foreach (PdfTextElement elem in columnDataRaw)
|
||||
{
|
||||
double elemX1 = elem.GetX();
|
||||
double elemX2 = elemX1 + elem.VisibleWidth;
|
||||
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
|
||||
|
||||
columnData.Add(elem);
|
||||
}
|
||||
columnData = columnData.OrderByDescending(elem => elem.GetY()).ToList();
|
||||
|
||||
// Filter only nearest elements
|
||||
// Emit result
|
||||
var result = new List<string>();
|
||||
double prevY = headY;
|
||||
double medDiff = 0;
|
||||
bool first = true;
|
||||
foreach (PdfTextElement elem in columnData)
|
||||
{
|
||||
double elemY = elem.GetY();
|
||||
double diff = prevY - elemY;
|
||||
prevY = elemY;
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
medDiff = diff;
|
||||
}
|
||||
if (diff > medDiff) { break; }
|
||||
medDiff = (medDiff + diff) / 2;
|
||||
|
||||
result.Add(elem.VisibleText);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public string GetField(string column)
|
||||
public string GetField(string field)
|
||||
{
|
||||
PdfTextElement fieldTitle = FindElementByText(column);
|
||||
return GetField(field, true);
|
||||
}
|
||||
|
||||
public string GetField(string field, bool fuzzy)
|
||||
{
|
||||
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
||||
if (fieldTitle == null)
|
||||
{
|
||||
return null;
|
||||
@@ -577,6 +696,21 @@ namespace VAR.PdfTools
|
||||
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
||||
}
|
||||
|
||||
public bool HasText(string text)
|
||||
{
|
||||
return HasText(text, true);
|
||||
}
|
||||
|
||||
public bool HasText(string text, bool fuzzy)
|
||||
{
|
||||
PdfTextElement fieldTitle = FindElementByText(text, fuzzy);
|
||||
if (fieldTitle == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
|
||||
[assembly: AssemblyCulture("")]
|
||||
[assembly: ComVisible(false)]
|
||||
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
|
||||
[assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.1.*")]
|
||||
|
||||
61
VAR.PdfTools/VAR.PdfTools.Net35.csproj
Normal file
61
VAR.PdfTools/VAR.PdfTools.Net35.csproj
Normal file
@@ -0,0 +1,61 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{EB7E003A-6A95-4002-809F-926C7C8A11E9}</ProjectGuid>
|
||||
<OutputType>Library</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>VAR.PdfTools</RootNamespace>
|
||||
<AssemblyName>VAR.PdfTools</AssemblyName>
|
||||
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="PdfContentAction.cs" />
|
||||
<Compile Include="PdfDocument.cs" />
|
||||
<Compile Include="PdfDocumentPage.cs" />
|
||||
<Compile Include="PdfElements.cs" />
|
||||
<Compile Include="PdfFilters.cs" />
|
||||
<Compile Include="PdfFont.cs" />
|
||||
<Compile Include="PdfParser.cs" />
|
||||
<Compile Include="PdfStandar14FontMetrics.cs" />
|
||||
<Compile Include="PdfTextExtractor.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
@@ -11,6 +11,7 @@
|
||||
<AssemblyName>VAR.PdfTools</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
@@ -34,9 +35,7 @@
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@@ -44,6 +43,7 @@
|
||||
<Compile Include="PdfDocument.cs" />
|
||||
<Compile Include="PdfDocumentPage.cs" />
|
||||
<Compile Include="PdfElements.cs" />
|
||||
<Compile Include="PdfFilters.cs" />
|
||||
<Compile Include="PdfFont.cs" />
|
||||
<Compile Include="PdfParser.cs" />
|
||||
<Compile Include="PdfStandar14FontMetrics.cs" />
|
||||
|
||||
Reference in New Issue
Block a user