25 Commits
1_1 ... 1_4

Author SHA1 Message Date
7ba320a22c Bump version: 1.4 2017-08-02 13:30:03 +02:00
1edddf17b1 Fix JoinTextElements to only join text elements near m-size. 2017-08-02 13:28:20 +02:00
62120898d2 Bump version: 1.3 2017-06-27 01:10:26 +02:00
dc1b9bc7ca PdfTextExtractor.JoinTextElements: Joins PdfTextElements when they are nearby. 2017-06-27 01:09:50 +02:00
d1ea41474b Reorder Code. 2017-06-27 01:03:27 +02:00
b11a2ac393 Simplify PdfFont.ParseSizes. 2017-06-26 22:17:46 +02:00
36fb20eb2e Remove VisualStudio2015 incompatibilities (Remove C#7.0-isms) 2017-06-26 08:25:30 +02:00
15fbec2470 FrmPdfInfo: Improve rendering, making more accurate the location of the glyphs. 2017-06-26 01:49:48 +02:00
52841de51b PdfFont: Convert "Zero" widths to default 0.5 2017-06-26 01:46:05 +02:00
d4c4615684 PdfTextExtractor: Rework text position calculations. 2017-06-26 01:45:34 +02:00
ae76cab45d PdfTextExtractor: Fix HasText method to match contained text, instead of full PdfTextElements. 2017-06-25 12:38:27 +02:00
8dc54105fd Refactorings 2017-06-25 12:03:41 +02:00
3469593a2a VAR.PdfTools.Workbench: Crude rendering of the parsed PDF. 2017-06-25 02:21:37 +02:00
ebff0c2028 Remove Visual Studio 2010 support 2017-06-11 16:29:24 +02:00
2fd074e041 Add Visual Studio 2017 support to NuGet Generation script. 2017-06-11 16:16:55 +02:00
4223619802 Set "Times-Roman" as default basefont. 2017-06-11 16:05:17 +02:00
771305f5d0 Refactor PdfFont creator. 2017-06-11 16:04:40 +02:00
90c7c5db92 Fix NuGet buid script 2017-04-13 08:23:19 +02:00
b474fc1257 Bump version 1.2 2017-04-12 22:51:12 +02:00
a5879ec9c2 PdfTextExtractor: Apply simple heuristics to join different text blocks checking matrix "collinearity". 2017-04-12 22:49:00 +02:00
0938553510 Add NuGet building files 2017-02-13 21:09:09 +01:00
c1fd18f355 PdfTextExtractor: Fix text size calculation 2016-09-07 09:06:20 +02:00
c0a8de2617 Merge branch 'master' of https://github.com/Kableado/VAR.PdfTools 2016-09-06 17:42:55 +02:00
4d92f144f8 PdfParser: Parse inline images 2016-09-06 17:42:13 +02:00
c388e9daae Fixes on project files to be compatible with Monodevelop 2016-07-04 07:15:09 +02:00
41 changed files with 1080 additions and 765 deletions

2
.gitignore vendored
View File

@@ -25,3 +25,5 @@ Thumbs.db
obj/ obj/
[Rr]elease*/ [Rr]elease*/
_ReSharper*/ _ReSharper*/
*.userprefs
*.nupkg

View File

@@ -1,6 +1,6 @@
The MIT License (MIT) The MIT License (MIT)
Copyright (c) 2014-2015 Valeriano Alfonso Rodriguez Copyright (c) 2016-2017 Valeriano Alfonso Rodriguez
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@@ -33,6 +33,9 @@ It is a simple Windows.Forms application, to test basic funcitionallity of the l
## Building ## Building
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE. A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
A .nuget package can be build using:
VAR.PdfTools\Build.NuGet.cmd
## Contributing ## Contributing
1. Fork it! 1. Fork it!
2. Create your feature branch: `git checkout -b my-new-feature` 2. Create your feature branch: `git checkout -b my-new-feature`
@@ -47,7 +50,7 @@ A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the
The MIT License (MIT) The MIT License (MIT)
Copyright (c) 2014-2015 Valeriano Alfonso Rodriguez Copyright (c) 2016-2017 Valeriano Alfonso Rodriguez
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@@ -1,26 +0,0 @@
Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Net35", "VAR.PdfTools\VAR.PdfTools.Net35.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench.Net35", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.Net35.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -40,6 +40,7 @@
this.btnGetField = new System.Windows.Forms.Button(); this.btnGetField = new System.Windows.Forms.Button();
this.txtText = new System.Windows.Forms.TextBox(); this.txtText = new System.Windows.Forms.TextBox();
this.btnHasText = new System.Windows.Forms.Button(); this.btnHasText = new System.Windows.Forms.Button();
this.btnRender = new System.Windows.Forms.Button();
this.SuspendLayout(); this.SuspendLayout();
// //
// lblOutputs // lblOutputs
@@ -159,11 +160,23 @@
this.btnHasText.UseVisualStyleBackColor = true; this.btnHasText.UseVisualStyleBackColor = true;
this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click); this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click);
// //
// btnRender
//
this.btnRender.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.btnRender.Location = new System.Drawing.Point(397, 52);
this.btnRender.Name = "btnRender";
this.btnRender.Size = new System.Drawing.Size(75, 23);
this.btnRender.TabIndex = 18;
this.btnRender.Text = "Render";
this.btnRender.UseVisualStyleBackColor = true;
this.btnRender.Click += new System.EventHandler(this.btnRender_Click);
//
// FrmPdfInfo // FrmPdfInfo
// //
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(484, 461); this.ClientSize = new System.Drawing.Size(484, 461);
this.Controls.Add(this.btnRender);
this.Controls.Add(this.txtText); this.Controls.Add(this.txtText);
this.Controls.Add(this.btnHasText); this.Controls.Add(this.btnHasText);
this.Controls.Add(this.txtFieldName); this.Controls.Add(this.txtFieldName);
@@ -199,5 +212,6 @@
private System.Windows.Forms.Button btnGetField; private System.Windows.Forms.Button btnGetField;
private System.Windows.Forms.TextBox txtText; private System.Windows.Forms.TextBox txtText;
private System.Windows.Forms.Button btnHasText; private System.Windows.Forms.Button btnHasText;
private System.Windows.Forms.Button btnRender;
} }
} }

View File

@@ -1,7 +1,12 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using System.IO;
using System.Linq; using System.Linq;
using System.Windows.Forms; using System.Windows.Forms;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools.Workbench namespace VAR.PdfTools.Workbench
{ {
@@ -89,7 +94,7 @@ namespace VAR.PdfTools.Workbench
{ {
lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"", lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight, textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight,
textElement.Font == null ? string.Empty : textElement.Font.Name, textElement.Font == null ? "#NULL#" : textElement.Font.Name,
textElement.VisibleText)); textElement.VisibleText));
} }
} }
@@ -154,5 +159,151 @@ namespace VAR.PdfTools.Workbench
} }
txtOutput.Lines = lines.ToArray(); txtOutput.Lines = lines.ToArray();
} }
private void btnRender_Click(object sender, EventArgs e)
{
if (File.Exists(txtPdfPath.Text) == false)
{
MessageBox.Show("File does not exist");
return;
}
const int Scale = 5;
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
List<string> lines = new List<string>();
lines.Add(string.Format("Filename : {0}", baseDocumentFilename));
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
int pageNumber = 1;
foreach (PdfDocumentPage page in doc.Pages)
{
double pageXMin = double.MaxValue;
double pageYMin = double.MaxValue;
double pageXMax = double.MinValue;
double pageYMax = double.MinValue;
// Preprocess page to get max size
PdfTextExtractor extractor = new PdfTextExtractor(page);
foreach (PdfTextElement textElement in extractor.Elements)
{
double textElementXMin = textElement.GetX();
double textElementYMax = textElement.GetY();
double textElementXMax = textElementXMin + textElement.VisibleWidth;
double textElementYMin = textElementYMax - textElement.VisibleHeight;
if (textElementXMax > pageXMax) { pageXMax = textElementXMax; }
if (textElementYMax > pageYMax) { pageYMax = textElementYMax; }
if (textElementXMin < pageXMin) { pageXMin = textElementXMin; }
if (textElementYMin < pageYMin) { pageYMin = textElementYMin; }
}
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNumber, extractor.Elements.Count));
// Prepare page image
int pageWidth = (int)Math.Ceiling(pageXMax - pageXMin);
int pageHeight = (int)Math.Ceiling(pageYMax - pageYMin);
using (Bitmap bmp = new Bitmap(pageWidth * Scale, pageHeight * Scale, PixelFormat.Format32bppArgb))
using (Graphics gc = Graphics.FromImage(bmp))
using (Pen penTextElem = new Pen(Color.Blue))
{
gc.Clear(Color.White);
// Draw text elements
foreach (PdfTextElement textElement in extractor.Elements)
{
DrawTextElement(textElement, gc, penTextElem, Scale, pageHeight, pageXMin, pageYMin);
}
// Save image to disk
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNumber));
bmp.Save(fileName, ImageFormat.Png);
}
pageNumber++;
}
txtOutput.Lines = lines.ToArray();
}
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, int Scale, int pageHeight, double pageXMin, double pageYMin)
{
double textElementX = textElement.GetX() - pageXMin;
double textElementY = textElement.GetY() - pageYMin;
double textElementWidth = textElement.VisibleWidth;
double textElementHeight = textElement.VisibleHeight;
string textElementText = textElement.VisibleText;
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
if (textElementHeight < 0.0001) { return; }
double textElementPageX = textElementX;
double textElementPageY = pageHeight - textElementY;
DrawRoundedRectangle(gc, penTextElem,
(int)(textElementPageX * Scale),
(int)(textElementPageY * Scale),
(int)(textElementWidth * Scale),
(int)(textElementHeight * Scale),
Scale);
using (Font font = new Font("Arial", (int)(textElementHeight * Scale), GraphicsUnit.Pixel))
{
foreach (PdfCharElement c in textElement.Characters)
{
gc.DrawString(c.Char,
font,
Brushes.Black,
(int)((textElementPageX + c.Displacement) * Scale),
(int)(textElementPageY * Scale));
gc.FillRectangle(Brushes.Red,
(int)((textElementPageX + c.Displacement) * Scale),
(int)(textElementPageY * Scale),
2, 2);
}
}
}
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
{
int diameter = radius * 2;
Size size = new Size(diameter, diameter);
Rectangle arc = new Rectangle(x, y, diameter, diameter);
GraphicsPath path = new GraphicsPath();
// top left arc
path.AddArc(arc, 180, 90);
// top right arc
arc.X = (x + width) - diameter;
path.AddArc(arc, 270, 90);
// bottom right arc
arc.Y = (y + height) - diameter;
path.AddArc(arc, 0, 90);
// bottom left arc
arc.X = x;
path.AddArc(arc, 90, 90);
path.CloseFigure();
return path;
}
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
{
if (graphics == null)
throw new ArgumentNullException("graphics");
if (pen == null)
throw new ArgumentNullException("pen");
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
{
graphics.DrawPath(pen, path);
}
}
} }
} }

View File

@@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
[assembly: AssemblyCulture("")] [assembly: AssemblyCulture("")]
[assembly: ComVisible(false)] [assembly: ComVisible(false)]
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")] [assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
[assembly: AssemblyVersion("1.1.*")] [assembly: AssemblyVersion("1.4.0.*")]

View File

@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup> <PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -13,6 +13,8 @@
<FileAlignment>512</FileAlignment> <FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects> <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile /> <TargetFrameworkProfile />
<ProductVersion>10.0.0</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget> <PlatformTarget>AnyCPU</PlatformTarget>
@@ -33,16 +35,12 @@
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
</PropertyGroup> </PropertyGroup>
<PropertyGroup>
<StartupObject />
</PropertyGroup>
<ItemGroup> <ItemGroup>
<Reference Include="System" /> <Reference Include="System" />
<Reference Include="System.Core" /> <Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" /> <Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" /> <Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" /> <Reference Include="System.Data" />
<Reference Include="System.Deployment" />
<Reference Include="System.Drawing" /> <Reference Include="System.Drawing" />
<Reference Include="System.Windows.Forms" /> <Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" /> <Reference Include="System.Xml" />
@@ -56,9 +54,6 @@
</Compile> </Compile>
<Compile Include="Program.cs" /> <Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="FrmPdfInfo.resx">
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
</EmbeddedResource>
<None Include="Properties\Settings.settings"> <None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator> <Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput> <LastGenOutput>Settings.Designer.cs</LastGenOutput>
@@ -69,12 +64,6 @@
<DesignTimeSharedInput>True</DesignTimeSharedInput> <DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile> </Compile>
</ItemGroup> </ItemGroup>
<ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
<Project>{eb7e003a-6a95-4002-809f-926c7c8a11e9}</Project>
<Name>VAR.PdfTools</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it. <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets. Other similar extension points exist, see Microsoft.Common.targets.
@@ -83,4 +72,10 @@
<Target Name="AfterBuild"> <Target Name="AfterBuild">
</Target> </Target>
--> -->
<ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.Net35.csproj">
<Project>{EB7E003A-6A95-4002-809F-926C7C8A11E9}</Project>
<Name>VAR.PdfTools.Net35</Name>
</ProjectReference>
</ItemGroup>
</Project> </Project>

View File

@@ -42,7 +42,6 @@
<Reference Include="System.Xml.Linq" /> <Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" /> <Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" /> <Reference Include="System.Data" />
<Reference Include="System.Deployment" />
<Reference Include="System.Drawing" /> <Reference Include="System.Drawing" />
<Reference Include="System.Windows.Forms" /> <Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" /> <Reference Include="System.Xml" />
@@ -56,9 +55,6 @@
</Compile> </Compile>
<Compile Include="Program.cs" /> <Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="FrmPdfInfo.resx">
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
</EmbeddedResource>
<None Include="Properties\Settings.settings"> <None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator> <Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput> <LastGenOutput>Settings.Designer.cs</LastGenOutput>

View File

@@ -1,22 +1,28 @@
Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14 # Visual Studio 14
VisualStudioVersion = 14.0.25123.0 VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools", "VAR.PdfTools\VAR.PdfTools.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools", "VAR.PdfTools\VAR.PdfTools.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}"
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Notes", "Notes", "{CE2D7584-5D82-401E-9A88-A9961CBB6959}"
ProjectSection(SolutionItems) = preProject
LICENSE.txt = LICENSE.txt
README.md = README.md
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU Release|Any CPU = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution GlobalSection(ProjectConfigurationPlatforms) = postSolution
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug .Net 4.6.1|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug|Any CPU {EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug .Net 4.6.1|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release|Any CPU {EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release .Net 4.6.1|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release|Any CPU {EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release .Net 4.6.1|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU {A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU {A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU

View File

@@ -0,0 +1,29 @@
@echo off
:: MSBuild and tools path
if exist "%windir%\Microsoft.Net\Framework\v4.0.30319" set MsBuildPath=%windir%\Microsoft.NET\Framework\v4.0.30319
if exist "%windir%\Microsoft.Net\Framework64\v4.0.30319" set MsBuildPath=%windir%\Microsoft.NET\Framework64\v4.0.30319
if exist "C:\Program Files (x86)\MSBuild\14.0\Bin" set MsBuildPath=C:\Program Files (x86)\MSBuild\14.0\Bin
if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin" set MsBuildPath=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin
set PATH=%MsBuildPath%;%PATH%
echo %MsBuildPath%
:: NuGet
set nuget="nuget"
if exist "%~dp0..\packages\NuGet.CommandLine.3.4.3\tools\NuGet.exe" set nuget="%~dp0\..\packages\NuGet.CommandLine.3.4.3\tools\NuGet.exe"
:: Release .Net 3.5
Title Building Release .Net 3.5
msbuild VAR.PdfTools.csproj /t:Build /p:Configuration="Release .Net 3.5" /p:Platform="AnyCPU"
:: Release .Net 4.6.1
Title Building Release .Net 4.6.1
msbuild VAR.PdfTools.csproj /t:Build /p:Configuration="Release .Net 4.6.1" /p:Platform="AnyCPU"
:: Packing Nuget
Title Packing Nuget
%nuget% pack VAR.PdfTools.csproj -Verbosity detailed -OutputDir "NuGet" -Properties Configuration="Release .Net 4.6.1" -Prop Platform=AnyCPU
title Finished
pause

View File

@@ -0,0 +1,121 @@
using System;
namespace VAR.PdfTools.Maths
{
public class Matrix3x3
{
#region Declarations
public double[,] _matrix = new double[3, 3];
#endregion
#region Properties
public double[,] Matrix { get { return _matrix; } }
#endregion
#region Creator
public Matrix3x3()
{
Idenity();
}
public Matrix3x3(double a, double b, double c, double d, double e, double f)
{
Set(a, b, c, d, e, f);
}
#endregion
#region Public methods
public void Idenity()
{
_matrix[0, 0] = 1.0;
_matrix[0, 1] = 0.0;
_matrix[0, 2] = 0.0;
_matrix[1, 0] = 0.0;
_matrix[1, 1] = 1.0;
_matrix[1, 2] = 0.0;
_matrix[2, 0] = 0.0;
_matrix[2, 1] = 0.0;
_matrix[2, 2] = 1.0;
}
public void Set(double a, double b, double c, double d, double e, double f)
{
_matrix[0, 0] = a;
_matrix[1, 0] = b;
_matrix[2, 0] = 0;
_matrix[0, 1] = c;
_matrix[1, 1] = d;
_matrix[2, 1] = 0;
_matrix[0, 2] = e;
_matrix[1, 2] = f;
_matrix[2, 2] = 1;
}
public Vector3D Multiply(Vector3D vect)
{
Vector3D vectResult = new Vector3D();
vectResult.Vector[0] = (vect.Vector[0] * _matrix[0, 0]) + (vect.Vector[1] * _matrix[0, 1]) + (vect.Vector[2] * _matrix[0, 2]);
vectResult.Vector[1] = (vect.Vector[0] * _matrix[1, 0]) + (vect.Vector[1] * _matrix[1, 1]) + (vect.Vector[2] * _matrix[1, 2]);
vectResult.Vector[2] = (vect.Vector[0] * _matrix[2, 0]) + (vect.Vector[1] * _matrix[2, 1]) + (vect.Vector[2] * _matrix[2, 2]);
return vectResult;
}
public Matrix3x3 Multiply(Matrix3x3 matrix)
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = (_matrix[0, 0] * matrix._matrix[0, 0]) + (_matrix[1, 0] * matrix._matrix[0, 1]) + (_matrix[2, 0] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 1] = (_matrix[0, 1] * matrix._matrix[0, 0]) + (_matrix[1, 1] * matrix._matrix[0, 1]) + (_matrix[2, 1] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 2] = (_matrix[0, 2] * matrix._matrix[0, 0]) + (_matrix[1, 2] * matrix._matrix[0, 1]) + (_matrix[2, 2] * matrix._matrix[0, 2]);
newMatrix._matrix[1, 0] = (_matrix[0, 0] * matrix._matrix[1, 0]) + (_matrix[1, 0] * matrix._matrix[1, 1]) + (_matrix[2, 0] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 1] = (_matrix[0, 1] * matrix._matrix[1, 0]) + (_matrix[1, 1] * matrix._matrix[1, 1]) + (_matrix[2, 1] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 2] = (_matrix[0, 2] * matrix._matrix[1, 0]) + (_matrix[1, 2] * matrix._matrix[1, 1]) + (_matrix[2, 2] * matrix._matrix[1, 2]);
newMatrix._matrix[2, 0] = (_matrix[0, 0] * matrix._matrix[2, 0]) + (_matrix[1, 0] * matrix._matrix[2, 1]) + (_matrix[2, 0] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 1] = (_matrix[0, 1] * matrix._matrix[2, 0]) + (_matrix[1, 1] * matrix._matrix[2, 1]) + (_matrix[2, 1] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 2] = (_matrix[0, 2] * matrix._matrix[2, 0]) + (_matrix[1, 2] * matrix._matrix[2, 1]) + (_matrix[2, 2] * matrix._matrix[2, 2]);
return newMatrix;
}
public Matrix3x3 Copy()
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = _matrix[0, 0];
newMatrix._matrix[0, 1] = _matrix[0, 1];
newMatrix._matrix[0, 2] = _matrix[0, 2];
newMatrix._matrix[1, 0] = _matrix[1, 0];
newMatrix._matrix[1, 1] = _matrix[1, 1];
newMatrix._matrix[1, 2] = _matrix[1, 2];
newMatrix._matrix[2, 0] = _matrix[2, 0];
newMatrix._matrix[2, 1] = _matrix[2, 1];
newMatrix._matrix[2, 2] = _matrix[2, 2];
return newMatrix;
}
public bool IsCollinear(Matrix3x3 otherMatrix, double horizontalDelta = 0.00001, double verticalDelta = 0.00001)
{
double epsilon = 0.00001;
return (
Math.Abs(_matrix[0, 0] - otherMatrix.Matrix[0, 0]) <= epsilon &&
Math.Abs(_matrix[1, 0] - otherMatrix.Matrix[1, 0]) <= epsilon &&
Math.Abs(_matrix[0, 1] - otherMatrix.Matrix[0, 1]) <= epsilon &&
Math.Abs(_matrix[1, 1] - otherMatrix.Matrix[1, 1]) <= epsilon &&
Math.Abs(_matrix[0, 2] - otherMatrix.Matrix[0, 2]) <= horizontalDelta &&
Math.Abs(_matrix[1, 2] - otherMatrix.Matrix[1, 2]) <= verticalDelta &&
true);
}
#endregion
}
}

View File

@@ -0,0 +1,33 @@
namespace VAR.PdfTools.Maths
{
public class Vector3D
{
#region Declarations
public double[] _vector = new double[3];
#endregion
#region Properties
public double[] Vector { get { return _vector; } }
#endregion
#region Creator
public Vector3D()
{
Init();
}
public void Init()
{
_vector[0] = 0.0;
_vector[1] = 0.0;
_vector[2] = 1.0;
}
#endregion
}
}

View File

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic; using System.Collections.Generic;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {

View File

@@ -2,6 +2,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {

View File

@@ -1,5 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {

View File

@@ -1,202 +0,0 @@
using System.Collections.Generic;
using System.IO;
namespace VAR.PdfTools
{
public enum PdfElementTypes
{
Undefined,
Boolean,
Integer,
Real,
String,
Name,
Array,
Dictionary,
Null,
ObjectReference,
Object,
Stream,
};
public interface IPdfElement
{
PdfElementTypes Type { get; }
}
public class PdfBoolean : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Boolean; } }
public bool Value { get; set; }
}
public class PdfInteger : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Integer; } }
public long Value { get; set; }
}
public class PdfReal : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Real; } }
public double Value { get; set; }
}
public class PdfString : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.String; } }
public string Value { get; set; }
}
public class PdfName : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Name; } }
public string Value { get; set; }
}
public class PdfArray : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Array; } }
private List<IPdfElement> _values = new List<IPdfElement>();
public List<IPdfElement> Values { get { return _values; } }
}
public class PdfDictionary : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Dictionary; } }
private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>();
public Dictionary<string, IPdfElement> Values { get { return _values; } }
public string GetParamAsString(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfName)
{
return ((PdfName)value).Value;
}
if (value is PdfString)
{
return ((PdfString)value).Value;
}
return null;
}
public long? GetParamAsInt(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfInteger)
{
return ((PdfInteger)value).Value;
}
return null;
}
public byte[] GetParamAsStream(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
PdfArray array = value as PdfArray;
MemoryStream memStream = new MemoryStream();
foreach (IPdfElement elem in array.Values)
{
PdfStream stream = elem as PdfStream;
if (stream == null) { continue; }
memStream.Write(stream.Data, 0, stream.Data.Length);
}
if (memStream.Length > 0)
{
return memStream.ToArray();
}
return null;
}
if (value is PdfStream)
{
return ((PdfStream)value).Data;
}
return null;
}
}
public class PdfNull : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Null; } }
}
public class PdfObjectReference : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.ObjectReference; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
}
public class PdfStream : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Stream; } }
public PdfDictionary Dictionary { get; set; }
public byte[] Data { get; set; }
public byte[] OriginalData { get; set; }
public IPdfElement OriginalFilter { get; set; }
}
public class PdfObject : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Object; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
public IPdfElement Data { get; set; }
public int UsageCount { get; set; }
}
public static class PdfElementUtils
{
public static double GetReal(IPdfElement elem, double defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return ((PdfReal)elem).Value;
}
return defaultValue;
}
public static long GetInt(IPdfElement elem, long defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return (long)((PdfReal)elem).Value;
}
return defaultValue;
}
}
}

View File

@@ -0,0 +1,7 @@
namespace VAR.PdfTools.PdfElements
{
public interface IPdfElement
{
PdfElementTypes Type { get; }
}
}

View File

@@ -0,0 +1,11 @@
using System.Collections.Generic;
namespace VAR.PdfTools.PdfElements
{
public class PdfArray : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Array; } }
private List<IPdfElement> _values = new List<IPdfElement>();
public List<IPdfElement> Values { get { return _values; } }
}
}

View File

@@ -0,0 +1,8 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfBoolean : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Boolean; } }
public bool Value { get; set; }
}
}

View File

@@ -0,0 +1,77 @@
using System.Collections.Generic;
using System.IO;
namespace VAR.PdfTools.PdfElements
{
public class PdfDictionary : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Dictionary; } }
private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>();
public Dictionary<string, IPdfElement> Values { get { return _values; } }
public string GetParamAsString(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfName)
{
return ((PdfName)value).Value;
}
if (value is PdfString)
{
return ((PdfString)value).Value;
}
return null;
}
public long? GetParamAsInt(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfInteger)
{
return ((PdfInteger)value).Value;
}
return null;
}
public byte[] GetParamAsStream(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
PdfArray array = value as PdfArray;
MemoryStream memStream = new MemoryStream();
foreach (IPdfElement elem in array.Values)
{
PdfStream stream = elem as PdfStream;
if (stream == null) { continue; }
memStream.Write(stream.Data, 0, stream.Data.Length);
}
if (memStream.Length > 0)
{
return memStream.ToArray();
}
return null;
}
if (value is PdfStream)
{
return ((PdfStream)value).Data;
}
return null;
}
}
}

View File

@@ -0,0 +1,18 @@
namespace VAR.PdfTools.PdfElements
{
public enum PdfElementTypes
{
Undefined,
Boolean,
Integer,
Real,
String,
Name,
Array,
Dictionary,
Null,
ObjectReference,
Object,
Stream,
};
}

View File

@@ -0,0 +1,56 @@
namespace VAR.PdfTools.PdfElements
{
public static class PdfElementUtils
{
public static double GetReal(IPdfElement elem, double defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return ((PdfReal)elem).Value;
}
return defaultValue;
}
public static long GetInt(IPdfElement elem, long defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return (long)((PdfReal)elem).Value;
}
return defaultValue;
}
public static string GetString(IPdfElement elem, string defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfString)
{
return ((PdfString)elem).Value;
}
if (elem is PdfName)
{
return ((PdfName)elem).Value;
}
return defaultValue;
}
}
}

View File

@@ -0,0 +1,8 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfInteger : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Integer; } }
public long Value { get; set; }
}
}

View File

@@ -0,0 +1,8 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfName : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Name; } }
public string Value { get; set; }
}
}

View File

@@ -0,0 +1,7 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfNull : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Null; } }
}
}

View File

@@ -0,0 +1,11 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfObject : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Object; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
public IPdfElement Data { get; set; }
public int UsageCount { get; set; }
}
}

View File

@@ -0,0 +1,9 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfObjectReference : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.ObjectReference; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
}
}

View File

@@ -0,0 +1,8 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfReal : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Real; } }
public double Value { get; set; }
}
}

View File

@@ -0,0 +1,12 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfStream : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Stream; } }
public PdfDictionary Dictionary { get; set; }
public byte[] Data { get; set; }
public byte[] OriginalData { get; set; }
public IPdfElement OriginalFilter { get; set; }
}
}

View File

@@ -0,0 +1,8 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfString : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.String; } }
public string Value { get; set; }
}
}

View File

@@ -1,5 +1,5 @@
using System; using System.Collections.Generic;
using System.Collections.Generic; using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {
@@ -45,6 +45,19 @@ namespace VAR.PdfTools
_tainted = true; _tainted = true;
} }
PrepareSizes(baseData);
}
#endregion
#region Private methods
private void PrepareSizes(PdfDictionary baseData)
{
// Set "Times-Roman" as default basefont sizes
_widths = PdfStandar14FontMetrics.Times_Roman.Widths;
_height = PdfStandar14FontMetrics.Times_Roman.ApproxHeight;
if (baseData.Values.ContainsKey("ToUnicode")) if (baseData.Values.ContainsKey("ToUnicode"))
{ {
byte[] toUnicodeStream = ((PdfStream)baseData.Values["ToUnicode"]).Data; byte[] toUnicodeStream = ((PdfStream)baseData.Values["ToUnicode"]).Data;
@@ -52,7 +65,19 @@ namespace VAR.PdfTools
_toUnicode = parser.ParseToUnicode(); _toUnicode = parser.ParseToUnicode();
} }
string baseFont = _baseData.GetParamAsString("BaseFont");
if (string.IsNullOrEmpty(baseFont))
{
SetBaseFontSizes(baseFont);
}
if (_baseData.Values.ContainsKey("FirstChar") && _baseData.Values.ContainsKey("LastChar") && _baseData.Values.ContainsKey("Widths")) if (_baseData.Values.ContainsKey("FirstChar") && _baseData.Values.ContainsKey("LastChar") && _baseData.Values.ContainsKey("Widths"))
{
ParseSizes();
}
}
private void ParseSizes()
{ {
double glyphSpaceToTextSpace = 1000.0; // FIXME: SubType:Type3 Uses a FontMatrix that may not correspond to 1/1000th double glyphSpaceToTextSpace = 1000.0; // FIXME: SubType:Type3 Uses a FontMatrix that may not correspond to 1/1000th
_widths = new Dictionary<char, double>(); _widths = new Dictionary<char, double>();
@@ -62,26 +87,14 @@ namespace VAR.PdfTools
char actualChar = firstChar; char actualChar = firstChar;
foreach (IPdfElement elem in widths.Values) foreach (IPdfElement elem in widths.Values)
{ {
PdfReal widthReal = elem as PdfReal; _widths.Add(actualChar, PdfElementUtils.GetReal(elem, 500) / glyphSpaceToTextSpace);
if (widthReal != null)
{
_widths.Add(actualChar, widthReal.Value / glyphSpaceToTextSpace);
actualChar++; actualChar++;
continue;
}
PdfInteger widthInt = elem as PdfInteger;
if (widthInt != null)
{
_widths.Add(actualChar, widthInt.Value / glyphSpaceToTextSpace);
actualChar++;
continue;
}
} }
// FIMXE: Calculate real height // FIMXE: Calculate real height
} }
else
private void SetBaseFontSizes(string baseFont)
{ {
string baseFont = _baseData.GetParamAsString("BaseFont");
if (baseFont == "Times-Roman") if (baseFont == "Times-Roman")
{ {
_widths = PdfStandar14FontMetrics.Times_Roman.Widths; _widths = PdfStandar14FontMetrics.Times_Roman.Widths;
@@ -153,7 +166,6 @@ namespace VAR.PdfTools
_height = PdfStandar14FontMetrics.ZapfDingbats.ApproxHeight; _height = PdfStandar14FontMetrics.ZapfDingbats.ApproxHeight;
} }
} }
}
#endregion #endregion
@@ -177,15 +189,23 @@ namespace VAR.PdfTools
public double GetCharWidth(char character) public double GetCharWidth(char character)
{ {
double charWidth = 0;
if (_widths == null) if (_widths == null)
{ {
return 0; return charWidth;
} }
if (_widths.ContainsKey(character)) if (_widths.ContainsKey(character))
{ {
return _widths[character]; charWidth = _widths[character];
} }
return 0;
// NOTE: Convert "Zero" to default width of 0.5
if (charWidth <= 0.0001)
{
charWidth = 0.5;
}
return charWidth;
} }
#endregion #endregion

View File

@@ -4,6 +4,7 @@ using System.Globalization;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {
@@ -1021,6 +1022,20 @@ namespace VAR.PdfTools
PdfContentAction action = new PdfContentAction(token, elems); PdfContentAction action = new PdfContentAction(token, elems);
elems = new List<IPdfElement>(); elems = new List<IPdfElement>();
actions.Add(action); actions.Add(action);
if (action.Token == "ID")
{
// Embbed inline image
byte lineFeed = 0x0A;
byte carriageReturn = 0x0D;
long distToObject = MeasureToMarkers(new char[][] {
new char[] {(char)lineFeed, 'E', 'I'},
new char[] {(char)carriageReturn, (char)lineFeed, 'E', 'I'},
});
byte[] imageBody = GetRawData(distToObject);
SkipEndOfLine();
string endToken = ParseToken();
action.Parameters.Add(new PdfStream { OriginalData = imageBody, });
}
} }
} while (IsEndOfStream() == false); } while (IsEndOfStream() == false);
return actions; return actions;

View File

@@ -1,124 +1,16 @@
using System.Collections.Generic; using System;
using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using VAR.PdfTools.Maths;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {
public class Vector3D public struct PdfCharElement
{ {
#region Declarations public string Char;
public double Displacement;
public double[] _vector = new double[3];
#endregion
#region Properties
public double[] Vector { get { return _vector; } }
#endregion
#region Creator
public Vector3D()
{
Init();
}
public void Init()
{
_vector[0] = 0.0;
_vector[1] = 0.0;
_vector[2] = 1.0;
}
#endregion
}
public class Matrix3x3
{
#region Declarations
public double[,] _matrix = new double[3, 3];
#endregion
#region Properties
public double[,] Matrix { get { return _matrix; } }
#endregion
#region Creator
public Matrix3x3()
{
Idenity();
}
#endregion
#region Public methods
public void Idenity()
{
_matrix[0, 0] = 1.0;
_matrix[0, 1] = 0.0;
_matrix[0, 2] = 0.0;
_matrix[1, 0] = 0.0;
_matrix[1, 1] = 1.0;
_matrix[1, 2] = 0.0;
_matrix[2, 0] = 0.0;
_matrix[2, 1] = 0.0;
_matrix[2, 2] = 1.0;
}
public Vector3D Multiply(Vector3D vect)
{
Vector3D vectResult = new Vector3D();
vectResult.Vector[0] = (vect.Vector[0] * _matrix[0, 0]) + (vect.Vector[1] * _matrix[0, 1]) + (vect.Vector[2] * _matrix[0, 2]);
vectResult.Vector[1] = (vect.Vector[0] * _matrix[1, 0]) + (vect.Vector[1] * _matrix[1, 1]) + (vect.Vector[2] * _matrix[1, 2]);
vectResult.Vector[2] = (vect.Vector[0] * _matrix[2, 0]) + (vect.Vector[1] * _matrix[2, 1]) + (vect.Vector[2] * _matrix[2, 2]);
return vectResult;
}
public Matrix3x3 Multiply(Matrix3x3 matrix)
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = (_matrix[0, 0] * matrix._matrix[0, 0]) + (_matrix[1, 0] * matrix._matrix[0, 1]) + (_matrix[2, 0] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 1] = (_matrix[0, 1] * matrix._matrix[0, 0]) + (_matrix[1, 1] * matrix._matrix[0, 1]) + (_matrix[2, 1] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 2] = (_matrix[0, 2] * matrix._matrix[0, 0]) + (_matrix[1, 2] * matrix._matrix[0, 1]) + (_matrix[2, 2] * matrix._matrix[0, 2]);
newMatrix._matrix[1, 0] = (_matrix[0, 0] * matrix._matrix[1, 0]) + (_matrix[1, 0] * matrix._matrix[1, 1]) + (_matrix[2, 0] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 1] = (_matrix[0, 1] * matrix._matrix[1, 0]) + (_matrix[1, 1] * matrix._matrix[1, 1]) + (_matrix[2, 1] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 2] = (_matrix[0, 2] * matrix._matrix[1, 0]) + (_matrix[1, 2] * matrix._matrix[1, 1]) + (_matrix[2, 2] * matrix._matrix[1, 2]);
newMatrix._matrix[2, 0] = (_matrix[0, 0] * matrix._matrix[2, 0]) + (_matrix[1, 0] * matrix._matrix[2, 1]) + (_matrix[2, 0] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 1] = (_matrix[0, 1] * matrix._matrix[2, 0]) + (_matrix[1, 1] * matrix._matrix[2, 1]) + (_matrix[2, 1] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 2] = (_matrix[0, 2] * matrix._matrix[2, 0]) + (_matrix[1, 2] * matrix._matrix[2, 1]) + (_matrix[2, 2] * matrix._matrix[2, 2]);
return newMatrix;
}
public Matrix3x3 Copy()
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = _matrix[0, 0];
newMatrix._matrix[0, 1] = _matrix[0, 1];
newMatrix._matrix[0, 2] = _matrix[0, 2];
newMatrix._matrix[1, 0] = _matrix[1, 0];
newMatrix._matrix[1, 1] = _matrix[1, 1];
newMatrix._matrix[1, 2] = _matrix[1, 2];
newMatrix._matrix[2, 0] = _matrix[2, 0];
newMatrix._matrix[2, 1] = _matrix[2, 1];
newMatrix._matrix[2, 2] = _matrix[2, 2];
return newMatrix;
}
#endregion
} }
public class PdfTextElement public class PdfTextElement
@@ -139,8 +31,9 @@ namespace VAR.PdfTools
public double VisibleHeight { get; set; } public double VisibleHeight { get; set; }
private List<PdfTextElement> _childs = new List<PdfTextElement>(); public List<PdfCharElement> Characters { get; set; }
public List<PdfTextElement> Childs { get { return _childs; } }
public List<PdfTextElement> Childs { get; set; }
#endregion #endregion
@@ -174,15 +67,17 @@ namespace VAR.PdfTools
// Text state // Text state
private PdfFont _font = null; private PdfFont _font = null;
private double _fontSize = 1; private double _fontSize = 1;
private double _charSpacing = 0;
private double _wordSpacing = 0;
private double _textLeading = 0; private double _textLeading = 0;
// Text object state // Text object state
private bool inText = false; private bool inText = false;
private Matrix3x3 _textMatrix = new Matrix3x3(); private Matrix3x3 _textMatrix = new Matrix3x3();
private Matrix3x3 _textMatrixCurrent = new Matrix3x3();
private StringBuilder _sbText = new StringBuilder(); private StringBuilder _sbText = new StringBuilder();
private double _textWidth = 0; private double _textWidth = 0;
private List<PdfCharElement> _listCharacters = new List<PdfCharElement>();
PdfTextElement _currentTextElement = null;
#endregion #endregion
@@ -199,7 +94,8 @@ namespace VAR.PdfTools
public PdfTextExtractor(PdfDocumentPage page) public PdfTextExtractor(PdfDocumentPage page)
{ {
_page = page; _page = page;
ProcessPage(); ProcessPageContent();
JoinTextElements();
} }
#endregion #endregion
@@ -226,42 +122,24 @@ namespace VAR.PdfTools
PdfTextElement textElem = new PdfTextElement(); PdfTextElement textElem = new PdfTextElement();
textElem.Font = _font; textElem.Font = _font;
textElem.FontSize = _fontSize; textElem.FontSize = _fontSize;
textElem.Matrix = _textMatrix.Multiply(_graphicsMatrix); textElem.Matrix = _textMatrixCurrent.Multiply(_graphicsMatrix);
textElem.RawText = _sbText.ToString(); textElem.RawText = _sbText.ToString();
textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font); textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0]; textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0];
textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1]; textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
textElem.Characters = new List<PdfCharElement>();
foreach (PdfCharElement c in _listCharacters)
{
textElem.Characters.Add(new PdfCharElement
{
Char = c.Char,
Displacement = (c.Displacement * textElem.Matrix.Matrix[0, 0]),
});
}
textElem.Childs = new List<PdfTextElement>();
return textElem; return textElem;
} }
private void FlushTextElementSoft()
{
if (_sbText.Length == 0)
{
return;
}
PdfTextElement textElem = BuildTextElement();
if (_currentTextElement == null)
{
_currentTextElement = new PdfTextElement();
_currentTextElement.Font = null;
_currentTextElement.FontSize = -1;
_currentTextElement.Matrix = textElem.Matrix.Copy();
_currentTextElement.RawText = string.Empty;
_currentTextElement.VisibleText = string.Empty;
_currentTextElement.VisibleWidth = 0;
_currentTextElement.VisibleHeight = 0;
}
_currentTextElement.VisibleText += textElem.VisibleText;
_currentTextElement.VisibleWidth += textElem.VisibleWidth;
_currentTextElement.VisibleHeight = System.Math.Max(_currentTextElement.VisibleHeight, textElem.VisibleHeight);
_currentTextElement.Childs.Add(textElem);
_sbText = new StringBuilder();
_textWidth = 0;
}
private void AddTextElement(PdfTextElement textElement) private void AddTextElement(PdfTextElement textElement)
{ {
if (string.IsNullOrEmpty(textElement.VisibleText.Trim())) if (string.IsNullOrEmpty(textElement.VisibleText.Trim()))
@@ -275,27 +153,16 @@ namespace VAR.PdfTools
{ {
if (_sbText.Length == 0) if (_sbText.Length == 0)
{ {
if (_currentTextElement != null) _textWidth = 0;
{
AddTextElement(_currentTextElement);
_currentTextElement = null;
}
return; return;
} }
if (_currentTextElement != null)
{
FlushTextElementSoft();
AddTextElement(_currentTextElement);
_currentTextElement = null;
}
else
{
PdfTextElement textElem = BuildTextElement(); PdfTextElement textElem = BuildTextElement();
AddTextElement(textElem); AddTextElement(textElem);
}
_textMatrixCurrent.Matrix[0, 2] += _textWidth;
_sbText = new StringBuilder(); _sbText = new StringBuilder();
_listCharacters.Clear();
_textWidth = 0; _textWidth = 0;
} }
@@ -330,6 +197,21 @@ namespace VAR.PdfTools
return null; return null;
} }
private List<PdfTextElement> FindElementsContainingText(string text, bool fuzzy)
{
List<PdfTextElement> list = new List<PdfTextElement>();
string matchingText = fuzzy ? SimplifyText(text) : text;
foreach (PdfTextElement elem in _textElements)
{
string elemText = fuzzy ? SimplifyText(elem.VisibleText) : elem.VisibleText;
if (elemText.Contains(matchingText))
{
list.Add(elem);
}
}
return list;
}
private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2) private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2)
{ {
double elem1X1 = elem1.GetX(); double elem1X1 = elem1.GetX();
@@ -359,44 +241,47 @@ namespace VAR.PdfTools
_graphicsMatrixStack.Add(_graphicsMatrix.Copy()); _graphicsMatrixStack.Add(_graphicsMatrix.Copy());
} }
private void OpSetGraphMatrix(double a, double b, double c, double d, double e, double f)
{
_graphicsMatrix.Set(a, b, c, d, e, f);
}
private void OpPopGraphState() private void OpPopGraphState()
{ {
_graphicsMatrix = _graphicsMatrixStack[_graphicsMatrixStack.Count - 1]; _graphicsMatrix = _graphicsMatrixStack[_graphicsMatrixStack.Count - 1];
_graphicsMatrixStack.RemoveAt(_graphicsMatrixStack.Count - 1); _graphicsMatrixStack.RemoveAt(_graphicsMatrixStack.Count - 1);
} }
private void OpSetGraphMatrix(double a, double b, double c, double d, double e, double f)
{
_graphicsMatrix.Matrix[0, 0] = a;
_graphicsMatrix.Matrix[1, 0] = b;
_graphicsMatrix.Matrix[2, 0] = 0;
_graphicsMatrix.Matrix[0, 1] = c;
_graphicsMatrix.Matrix[1, 1] = d;
_graphicsMatrix.Matrix[2, 1] = 0;
_graphicsMatrix.Matrix[0, 2] = e;
_graphicsMatrix.Matrix[1, 2] = f;
_graphicsMatrix.Matrix[2, 2] = 1;
}
private void OpBeginText() private void OpBeginText()
{ {
_textMatrix.Idenity(); _textMatrix.Idenity();
_textMatrixCurrent.Idenity();
inText = true; inText = true;
} }
private void OpEndText() private void OpEndText()
{ {
FlushTextElementSoft(); FlushTextElement();
inText = false; inText = false;
} }
private void OpTextFont(string fontName, double size) private void OpTextFont(string fontName, double size)
{ {
FlushTextElementSoft(); FlushTextElement();
_font = _page.Fonts[fontName]; _font = _page.Fonts[fontName];
_fontSize = size; _fontSize = size;
} }
private void OpTextCharSpacing(double charSpacing)
{
_charSpacing = charSpacing;
}
private void OpTextWordSpacing(double wordSpacing)
{
_wordSpacing = wordSpacing;
}
private void OpTextLeading(double textLeading) private void OpTextLeading(double textLeading)
{ {
_textLeading = textLeading; _textLeading = textLeading;
@@ -409,6 +294,7 @@ namespace VAR.PdfTools
newMatrix.Matrix[0, 2] = x; newMatrix.Matrix[0, 2] = x;
newMatrix.Matrix[1, 2] = y; newMatrix.Matrix[1, 2] = y;
_textMatrix = newMatrix.Multiply(_textMatrix); _textMatrix = newMatrix.Multiply(_textMatrix);
_textMatrixCurrent = _textMatrix.Copy();
} }
private void OpTextLineFeed() private void OpTextLineFeed()
@@ -418,16 +304,10 @@ namespace VAR.PdfTools
private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f) private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f)
{ {
Matrix3x3 newMatrix = new Matrix3x3(a, b, c, d, e, f);
FlushTextElement(); FlushTextElement();
_textMatrix.Matrix[0, 0] = a; _textMatrix = newMatrix;
_textMatrix.Matrix[1, 0] = b; _textMatrixCurrent = _textMatrix.Copy();
_textMatrix.Matrix[2, 0] = 0;
_textMatrix.Matrix[0, 1] = c;
_textMatrix.Matrix[1, 1] = d;
_textMatrix.Matrix[2, 1] = 0;
_textMatrix.Matrix[0, 2] = e;
_textMatrix.Matrix[1, 2] = f;
_textMatrix.Matrix[2, 2] = 1;
} }
private void OpTextPut(string text) private void OpTextPut(string text)
@@ -438,7 +318,12 @@ namespace VAR.PdfTools
{ {
foreach (char c in text) foreach (char c in text)
{ {
_textWidth += _font.GetCharWidth(c) * _fontSize; string realChar = _font.ToUnicode(c);
if (realChar == "\0") { continue; }
_listCharacters.Add(new PdfCharElement { Char = _font.ToUnicode(c), Displacement = _textWidth, });
double charWidth = _font.GetCharWidth(c) * _fontSize;
_textWidth += charWidth;
_textWidth += ((c == 0x20) ? _wordSpacing : _charSpacing);
} }
} }
} }
@@ -448,16 +333,16 @@ namespace VAR.PdfTools
if (inText == false) { return; } if (inText == false) { return; }
foreach (IPdfElement elem in array.Values) foreach (IPdfElement elem in array.Values)
{ {
if(elem is PdfString) if (elem is PdfString)
{ {
OpTextPut(((PdfString)elem).Value); OpTextPut(((PdfString)elem).Value);
} }
else if(elem is PdfInteger || elem is PdfReal) else if (elem is PdfInteger || elem is PdfReal)
{ {
double spacing = PdfElementUtils.GetReal(elem, 0); double spacing = PdfElementUtils.GetReal(elem, 0);
_textWidth += spacing; _textWidth -= (spacing / 1000) * _fontSize;
} }
else if(elem is PdfArray) else if (elem is PdfArray)
{ {
OpTextPutMultiple(((PdfArray)elem)); OpTextPutMultiple(((PdfArray)elem));
} }
@@ -468,11 +353,14 @@ namespace VAR.PdfTools
#region Private methods #region Private methods
private void ProcessPage() private void ProcessPageContent()
{ {
foreach (PdfContentAction action in _page.ContentActions) int unknowCount = 0;
for (int i = 0; i < _page.ContentActions.Count; i++)
{ {
// Graphics Operations PdfContentAction action = _page.ContentActions[i];
// Special graphics state
if (action.Token == "q") if (action.Token == "q")
{ {
OpPushGraphState(); OpPushGraphState();
@@ -503,11 +391,13 @@ namespace VAR.PdfTools
} }
else if (action.Token == "Tc") else if (action.Token == "Tc")
{ {
// FIXME: Char spacing double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
OpTextCharSpacing(charSpacing);
} }
else if (action.Token == "Tw") else if (action.Token == "Tw")
{ {
// FIXME: Word spacing double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
OpTextWordSpacing(wordSpacing);
} }
else if (action.Token == "Tz") else if (action.Token == "Tz")
{ {
@@ -515,7 +405,7 @@ namespace VAR.PdfTools
} }
else if (action.Token == "Tf") else if (action.Token == "Tf")
{ {
string fontName = ((PdfName)action.Parameters[0]).Value; string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0); double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextFont(fontName, fontSize); OpTextFont(fontName, fontSize);
} }
@@ -561,27 +451,159 @@ namespace VAR.PdfTools
} }
else if (action.Token == "Tj") else if (action.Token == "Tj")
{ {
OpTextPut(((PdfString)action.Parameters[0]).Value); string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextPut(text);
} }
else if (action.Token == "'") else if (action.Token == "'")
{ {
string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextLineFeed(); OpTextLineFeed();
OpTextPut(((PdfString)action.Parameters[0]).Value); OpTextPut(text);
} }
else if (action.Token == "\"") else if (action.Token == "\"")
{ {
double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0); double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextPut(((PdfString)action.Parameters[2]).Value); string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextCharSpacing(charSpacing);
OpTextWordSpacing(wordSpacing);
OpTextPut(text);
} }
else if (action.Token == "TJ") else if (action.Token == "TJ")
{ {
OpTextPutMultiple(((PdfArray)action.Parameters[0])); OpTextPutMultiple(((PdfArray)action.Parameters[0]));
} }
else if (action.Token == "re")
{
// FIXME: Interpret this
}
else if (action.Token == "f")
{
// FIXME: Interpret this
}
else if (action.Token == "g")
{
// FIXME: Interpret this
}
else if (action.Token == "rg")
{
// FIXME: Interpret this
}
else if (action.Token == "BI")
{
// FIXME: Interpret this
}
else if (action.Token == "ID")
{
// FIXME: Interpret this
}
else if (action.Token == "EI")
{
// FIXME: Interpret this
}
else if (action.Token == "W")
{
// FIXME: Interpret this
}
else if (action.Token == "n")
{
// FIXME: Interpret this
}
else if (action.Token == "Do")
{
// FIXME: Interpret this
}
else
{
unknowCount++;
}
} }
FlushTextElement(); FlushTextElement();
} }
private void JoinTextElements()
{
var textElementsCondensed = new List<PdfTextElement>();
while (_textElements.Count > 0)
{
PdfTextElement elem = _textElements[0];
_textElements.Remove(elem);
double blockY = elem.GetY();
double blockXMin = elem.GetX();
double blockXMax = blockXMin + elem.VisibleWidth;
// Prepare first neighbour
var textElementNeighbours = new List<PdfTextElement>();
textElementNeighbours.Add(elem);
// Search Neighbours
int i = 0;
while (i < _textElements.Count)
{
PdfTextElement neighbour = _textElements[i];
double neighbourY = neighbour.GetY();
if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; }
double neighbourXMin = neighbour.GetX();
double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
double auxBlockXMin = blockXMin - (elem.FontSize * elem.Font.GetCharWidth('m'));
double auxBlockXMax = blockXMax + (elem.FontSize * elem.Font.GetCharWidth('m'));
if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
{
_textElements.Remove(neighbour);
textElementNeighbours.Add(neighbour);
if (blockXMax < neighbourXMax) { blockXMax = neighbourXMax; }
if (blockXMin > neighbourXMin) { blockXMin = neighbourXMin; }
i = 0;
continue;
}
i++;
}
if(textElementNeighbours.Count == 0)
{
textElementsCondensed.Add(elem);
continue;
}
// Join neighbours
var chars = new List<PdfCharElement>();
foreach (PdfTextElement neighbour in textElementNeighbours)
{
double neighbourXMin = neighbour.GetX();
foreach(PdfCharElement c in neighbour.Characters)
{
chars.Add(new PdfCharElement
{
Char = c.Char,
Displacement = (c.Displacement + neighbourXMin) - blockXMin,
});
}
}
chars = chars.OrderBy(c => c.Displacement).ToList();
var sbText = new StringBuilder();
foreach(PdfCharElement c in chars)
{
sbText.Append(c.Char);
}
PdfTextElement blockElem = new PdfTextElement
{
Font = null,
FontSize = elem.FontSize,
Matrix = elem.Matrix.Copy(),
RawText = sbText.ToString(),
VisibleText = sbText.ToString(),
VisibleWidth = blockXMax - blockXMin,
VisibleHeight = elem.VisibleHeight,
Characters = chars,
Childs = textElementNeighbours,
};
blockElem.Matrix.Matrix[0, 2] = blockXMin;
textElementsCondensed.Add(blockElem);
}
_textElements = textElementsCondensed;
}
#endregion #endregion
#region Public methods #region Public methods
@@ -594,7 +616,7 @@ namespace VAR.PdfTools
public List<string> GetColumn(string column, bool fuzzy) public List<string> GetColumn(string column, bool fuzzy)
{ {
PdfTextElement columnHead = FindElementByText(column, fuzzy); PdfTextElement columnHead = FindElementByText(column, fuzzy);
if(columnHead == null) if (columnHead == null)
{ {
return new List<string>(); return new List<string>();
} }
@@ -607,7 +629,7 @@ namespace VAR.PdfTools
double extentX2 = double.MaxValue; double extentX2 = double.MaxValue;
foreach (PdfTextElement elem in _textElements) foreach (PdfTextElement elem in _textElements)
{ {
if(elem == columnHead){continue;} if (elem == columnHead) { continue; }
if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; } if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; }
double elemX1 = elem.GetX(); double elemX1 = elem.GetX();
double elemX2 = elemX1 + elem.VisibleWidth; double elemX2 = elemX1 + elem.VisibleWidth;
@@ -688,7 +710,7 @@ namespace VAR.PdfTools
fieldData.Add(elem); fieldData.Add(elem);
} }
if(fieldData.Count == 0) if (fieldData.Count == 0)
{ {
return null; return null;
} }
@@ -703,12 +725,8 @@ namespace VAR.PdfTools
public bool HasText(string text, bool fuzzy) public bool HasText(string text, bool fuzzy)
{ {
PdfTextElement fieldTitle = FindElementByText(text, fuzzy); List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
if (fieldTitle == null) return (list.Count > 0);
{
return false;
}
return true;
} }
#endregion #endregion

View File

@@ -6,9 +6,9 @@ using System.Runtime.InteropServices;
[assembly: AssemblyConfiguration("")] [assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("VAR")] [assembly: AssemblyCompany("VAR")]
[assembly: AssemblyProduct("VAR.PdfTools")] [assembly: AssemblyProduct("VAR.PdfTools")]
[assembly: AssemblyCopyright("Copyright © VAR 2016")] [assembly: AssemblyCopyright("Copyright © VAR 2016-2017")]
[assembly: AssemblyTrademark("")] [assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")] [assembly: AssemblyCulture("")]
[assembly: ComVisible(false)] [assembly: ComVisible(false)]
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")] [assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
[assembly: AssemblyVersion("1.1.*")] [assembly: AssemblyVersion("1.4.0.*")]

View File

@@ -1,61 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{EB7E003A-6A95-4002-809F-926C7C8A11E9}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>VAR.PdfTools</RootNamespace>
<AssemblyName>VAR.PdfTools</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="PdfContentAction.cs" />
<Compile Include="PdfDocument.cs" />
<Compile Include="PdfDocumentPage.cs" />
<Compile Include="PdfElements.cs" />
<Compile Include="PdfFilters.cs" />
<Compile Include="PdfFont.cs" />
<Compile Include="PdfParser.cs" />
<Compile Include="PdfStandar14FontMetrics.cs" />
<Compile Include="PdfTextExtractor.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

View File

@@ -13,22 +13,43 @@
<FileAlignment>512</FileAlignment> <FileAlignment>512</FileAlignment>
<TargetFrameworkProfile /> <TargetFrameworkProfile />
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug .Net 4.6.1|AnyCPU' ">
<DebugSymbols>true</DebugSymbols> <DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType> <DebugType>full</DebugType>
<Optimize>false</Optimize> <Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath> <OutputPath>bin\Debug\net461</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants> <DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release .Net 4.6.1|AnyCPU' ">
<DebugType>pdbonly</DebugType> <DebugType>pdbonly</DebugType>
<Optimize>true</Optimize> <Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath> <OutputPath>bin\Release\net461</OutputPath>
<DefineConstants>TRACE</DefineConstants> <DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug .Net 3.5|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\net35</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release .Net 3.5|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\net35</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<Reference Include="System" /> <Reference Include="System" />
@@ -39,16 +60,39 @@
<Reference Include="System.Xml" /> <Reference Include="System.Xml" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Compile Include="Maths\Matrix3x3.cs" />
<Compile Include="PdfContentAction.cs" /> <Compile Include="PdfContentAction.cs" />
<Compile Include="PdfDocument.cs" /> <Compile Include="PdfDocument.cs" />
<Compile Include="PdfDocumentPage.cs" /> <Compile Include="PdfDocumentPage.cs" />
<Compile Include="PdfElements.cs" /> <Compile Include="PdfElements\IPdfElement.cs" />
<Compile Include="PdfElements\PdfArray.cs" />
<Compile Include="PdfElements\PdfBoolean.cs" />
<Compile Include="PdfElements\PdfDictionary.cs" />
<Compile Include="PdfElements\PdfElementTypes.cs" />
<Compile Include="PdfElements\PdfElementUtils.cs" />
<Compile Include="PdfFilters.cs" /> <Compile Include="PdfFilters.cs" />
<Compile Include="PdfFont.cs" /> <Compile Include="PdfFont.cs" />
<Compile Include="PdfElements\PdfInteger.cs" />
<Compile Include="PdfElements\PdfName.cs" />
<Compile Include="PdfElements\PdfNull.cs" />
<Compile Include="PdfElements\PdfObject.cs" />
<Compile Include="PdfElements\PdfObjectReference.cs" />
<Compile Include="PdfElements\PdfReal.cs" />
<Compile Include="PdfElements\PdfStream.cs" />
<Compile Include="PdfElements\PdfString.cs" />
<Compile Include="PdfParser.cs" /> <Compile Include="PdfParser.cs" />
<Compile Include="PdfStandar14FontMetrics.cs" /> <Compile Include="PdfStandar14FontMetrics.cs" />
<Compile Include="PdfTextExtractor.cs" /> <Compile Include="PdfTextExtractor.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Maths\Vector3D.cs" />
</ItemGroup>
<ItemGroup>
<None Include="NuGet\keep.txt" />
</ItemGroup>
<ItemGroup>
<None Include="Build.NuGet.cmd" />
<None Include="packages.config" />
<None Include="VAR.PdfTools.nuspec" />
</ItemGroup> </ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it. <!-- To modify your build process, add your task inside one of the targets below and uncomment it.

View File

@@ -0,0 +1,22 @@
<?xml version="1.0"?>
<package >
<metadata>
<id>$id$</id>
<version>$version$</version>
<title>$title$</title>
<authors>$author$</authors>
<owners>$author$</owners>
<licenseUrl>https://github.com/Kableado/VAR.PdfTools/blob/master/LICENSE.txt</licenseUrl>
<projectUrl>https://github.com/Kableado/VAR.PdfTools</projectUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>$description$</description>
<copyright>Copyright VAR 2016-2017</copyright>
<tags>PDF Library</tags>
</metadata>
<files>
<file src="bin\Release\net461\VAR.PdfTools.dll" target="lib\net461\VAR.PdfTools.dll" />
<file src="bin\Release\net461\VAR.PdfTools.pdb" target="lib\net461\VAR.PdfTools.pdb" />
<file src="bin\Release\net35\VAR.PdfTools.dll" target="lib\net35\VAR.PdfTools.dll" />
<file src="bin\Release\net35\VAR.PdfTools.pdb" target="lib\net35\VAR.PdfTools.pdb" />
</files>
</package>

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="NuGet.CommandLine" version="3.4.3" targetFramework="net461" developmentDependency="true" />
</packages>