Initialize issues branch

This commit is contained in:
2025-05-11 20:00:29 +02:00
parent 98a6b8e746
commit 668b816810
41 changed files with 0 additions and 7198 deletions

33
.gitignore vendored
View File

@@ -1,33 +0,0 @@
#ignorar miniaturas creadas por windows
Thumbs.db
#Ignorar archivos construidos por Visual Studio
*.obj
*.exe
*.pdb
*.user
*.aps
*.pch
*.vspscc
*_i.c
*_p.c
*.ncb
*.suo
*.tlb
*.tlh
*.bak
*.cache
*.ilk
*.log
[Bb]in
[Dd]ebug*/
*.lib
*.sbr
obj/
[Rr]elease*/
_ReSharper*/
*.userprefs
*.nupkg
.vs
PDFTests
Doc

View File

@@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2016-2019 Valeriano Alfonso Rodriguez
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,53 +0,0 @@
# .Net library and tool to work with PDF files
## Usage
### VAR.PdfTools
Add the resulting assembly as reference in your projects, and this line on code:
```csharp
using VAR.PdfTools;
```
Then extract the contents of a data column using:
```csharp
var columnData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
columnData.AddRange(extractor.GetColumnAsStrings("Column"));
}
```
Or the content of a field (text on the right of the indicated text):
```csharp
var fieldData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
fieldData.Add(extractor.GetFieldAsString(txtFieldName.Text));
}
```
### VAR.PdfTools.Workbench
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
## Building
A Visual Studio solution is provided. Simply, click build on the IDE.
The build generates a DLL and a Nuget package.
## Contributing
1. Fork it!
2. Create your feature branch: `git checkout -b my-new-feature`
3. Commit your changes: `git commit -am 'Add some feature'`
4. Push to the branch: `git push origin my-new-feature`
5. Submit a pull request :D
## Credits
* Valeriano Alfonso Rodriguez.

View File

@@ -1,117 +0,0 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace VAR.PdfTools.Workbench
{
public class Configuration
{
private Dictionary<string, string> _configItems = new Dictionary<string, string>();
private static string GetConfigFileName()
{
string location = System.Reflection.Assembly.GetEntryAssembly().Location;
string path = Path.GetDirectoryName(location);
string filenameWithoutExtension = Path.GetFileNameWithoutExtension(location);
string configFile = string.Format("{0}/{1}.cfg", path, filenameWithoutExtension);
return configFile;
}
private static string[] GetConfigurationLines()
{
string configFile = GetConfigFileName();
string[] config;
if (File.Exists(configFile) == false)
{
config = new string[0];
}
else
{
config = File.ReadAllLines(configFile);
}
return config;
}
public void Load()
{
_configItems.Clear();
string[] configLines = GetConfigurationLines();
foreach (string configLine in configLines)
{
int idxSplit = configLine.IndexOf('|');
if (idxSplit < 0) { continue; }
string configName = configLine.Substring(0, idxSplit);
string configData = configLine.Substring(idxSplit + 1);
if (_configItems.ContainsKey(configName))
{
_configItems[configName] = configData;
}
else
{
_configItems.Add(configName, configData);
}
}
}
public void Save()
{
StringBuilder sbConfig = new StringBuilder();
foreach (KeyValuePair<string, string> pair in _configItems)
{
sbConfig.AppendFormat("{0}|{1}\n", pair.Key, pair.Value);
}
string configFileName = GetConfigFileName();
File.WriteAllText(configFileName, sbConfig.ToString());
}
public string Get(string key, string defaultValue)
{
if (_configItems == null) { return defaultValue; }
if (_configItems.ContainsKey(key))
{
return _configItems[key];
}
return defaultValue;
}
public bool Get(string key, bool defaultValue)
{
if (_configItems == null) { return defaultValue; }
if (_configItems.ContainsKey(key))
{
string value = _configItems[key];
return (value == "true");
}
return defaultValue;
}
public void Set(string key, string value)
{
if (_configItems == null) { return; }
if (_configItems.ContainsKey(key))
{
_configItems[key] = value;
}
else
{
_configItems.Add(key, value);
}
}
public void Set(string key, bool value)
{
if (_configItems == null) { return; }
if (_configItems.ContainsKey(key))
{
_configItems[key] = value ? "true" : "false";
}
else
{
_configItems.Add(key, value ? "true" : "false");
}
}
}
}

View File

@@ -1,319 +0,0 @@
namespace VAR.PdfTools.Workbench
{
partial class FrmPdfInfo
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.lblOutputs = new System.Windows.Forms.Label();
this.lblInputs = new System.Windows.Forms.Label();
this.btnBrowse = new System.Windows.Forms.Button();
this.txtPdfPath = new System.Windows.Forms.TextBox();
this.txtOutput = new System.Windows.Forms.TextBox();
this.btnProcess = new System.Windows.Forms.Button();
this.btnGetColumn1 = new System.Windows.Forms.Button();
this.txtField1 = new System.Windows.Forms.TextBox();
this.btnGetField1 = new System.Windows.Forms.Button();
this.btnHasText1 = new System.Windows.Forms.Button();
this.btnRender = new System.Windows.Forms.Button();
this.btnHasText2 = new System.Windows.Forms.Button();
this.btnGetField2 = new System.Windows.Forms.Button();
this.txtField2 = new System.Windows.Forms.TextBox();
this.btnGetColumn2 = new System.Windows.Forms.Button();
this.btnHasText3 = new System.Windows.Forms.Button();
this.btnGetField3 = new System.Windows.Forms.Button();
this.txtField3 = new System.Windows.Forms.TextBox();
this.btnGetColumn3 = new System.Windows.Forms.Button();
this.txtPages = new System.Windows.Forms.TextBox();
this.chkRender = new System.Windows.Forms.CheckBox();
this.SuspendLayout();
//
// lblOutputs
//
this.lblOutputs.AutoSize = true;
this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.lblOutputs.Location = new System.Drawing.Point(12, 143);
this.lblOutputs.Name = "lblOutputs";
this.lblOutputs.Size = new System.Drawing.Size(51, 13);
this.lblOutputs.TabIndex = 11;
this.lblOutputs.Text = "Outputs";
//
// lblInputs
//
this.lblInputs.AutoSize = true;
this.lblInputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.lblInputs.Location = new System.Drawing.Point(12, 9);
this.lblInputs.Name = "lblInputs";
this.lblInputs.Size = new System.Drawing.Size(42, 13);
this.lblInputs.TabIndex = 10;
this.lblInputs.Text = "Inputs";
//
// btnBrowse
//
this.btnBrowse.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.btnBrowse.Location = new System.Drawing.Point(316, 23);
this.btnBrowse.Name = "btnBrowse";
this.btnBrowse.Size = new System.Drawing.Size(75, 23);
this.btnBrowse.TabIndex = 9;
this.btnBrowse.Text = "Browse";
this.btnBrowse.UseVisualStyleBackColor = true;
this.btnBrowse.Click += new System.EventHandler(this.btnBrowse_Click);
//
// txtPdfPath
//
this.txtPdfPath.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.txtPdfPath.Location = new System.Drawing.Point(15, 25);
this.txtPdfPath.Name = "txtPdfPath";
this.txtPdfPath.Size = new System.Drawing.Size(295, 20);
this.txtPdfPath.TabIndex = 8;
//
// txtOutput
//
this.txtOutput.AcceptsReturn = true;
this.txtOutput.AcceptsTab = true;
this.txtOutput.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.txtOutput.Location = new System.Drawing.Point(15, 159);
this.txtOutput.Multiline = true;
this.txtOutput.Name = "txtOutput";
this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.txtOutput.Size = new System.Drawing.Size(457, 290);
this.txtOutput.TabIndex = 7;
//
// btnProcess
//
this.btnProcess.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.btnProcess.Location = new System.Drawing.Point(397, 23);
this.btnProcess.Name = "btnProcess";
this.btnProcess.Size = new System.Drawing.Size(75, 23);
this.btnProcess.TabIndex = 6;
this.btnProcess.Text = "Process";
this.btnProcess.UseVisualStyleBackColor = true;
this.btnProcess.Click += new System.EventHandler(this.btnProcess_Click);
//
// btnGetColumn1
//
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
this.btnGetColumn1.Name = "btnGetColumn1";
this.btnGetColumn1.Size = new System.Drawing.Size(69, 23);
this.btnGetColumn1.TabIndex = 12;
this.btnGetColumn1.Text = "GetColumn";
this.btnGetColumn1.UseVisualStyleBackColor = true;
this.btnGetColumn1.Click += new System.EventHandler(this.btnGetColumn1_Click);
//
// txtField1
//
this.txtField1.Location = new System.Drawing.Point(15, 53);
this.txtField1.Name = "txtField1";
this.txtField1.Size = new System.Drawing.Size(142, 20);
this.txtField1.TabIndex = 13;
//
// btnGetField1
//
this.btnGetField1.Location = new System.Drawing.Point(226, 51);
this.btnGetField1.Name = "btnGetField1";
this.btnGetField1.Size = new System.Drawing.Size(60, 23);
this.btnGetField1.TabIndex = 14;
this.btnGetField1.Text = "GetField";
this.btnGetField1.UseVisualStyleBackColor = true;
this.btnGetField1.Click += new System.EventHandler(this.btnGetField1_Click);
//
// btnHasText1
//
this.btnHasText1.Location = new System.Drawing.Point(163, 51);
this.btnHasText1.Name = "btnHasText1";
this.btnHasText1.Size = new System.Drawing.Size(57, 23);
this.btnHasText1.TabIndex = 16;
this.btnHasText1.Text = "HasText";
this.btnHasText1.UseVisualStyleBackColor = true;
this.btnHasText1.Click += new System.EventHandler(this.btnHasText1_Click);
//
// btnRender
//
this.btnRender.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.btnRender.Location = new System.Drawing.Point(397, 52);
this.btnRender.Name = "btnRender";
this.btnRender.Size = new System.Drawing.Size(75, 23);
this.btnRender.TabIndex = 18;
this.btnRender.Text = "Render";
this.btnRender.UseVisualStyleBackColor = true;
this.btnRender.Click += new System.EventHandler(this.btnRender_Click);
//
// btnHasText2
//
this.btnHasText2.Location = new System.Drawing.Point(163, 80);
this.btnHasText2.Name = "btnHasText2";
this.btnHasText2.Size = new System.Drawing.Size(57, 23);
this.btnHasText2.TabIndex = 22;
this.btnHasText2.Text = "HasText";
this.btnHasText2.UseVisualStyleBackColor = true;
this.btnHasText2.Click += new System.EventHandler(this.btnHasText2_Click);
//
// btnGetField2
//
this.btnGetField2.Location = new System.Drawing.Point(226, 80);
this.btnGetField2.Name = "btnGetField2";
this.btnGetField2.Size = new System.Drawing.Size(60, 23);
this.btnGetField2.TabIndex = 21;
this.btnGetField2.Text = "GetField";
this.btnGetField2.UseVisualStyleBackColor = true;
this.btnGetField2.Click += new System.EventHandler(this.btnGetField2_Click);
//
// txtField2
//
this.txtField2.Location = new System.Drawing.Point(15, 82);
this.txtField2.Name = "txtField2";
this.txtField2.Size = new System.Drawing.Size(142, 20);
this.txtField2.TabIndex = 20;
//
// btnGetColumn2
//
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
this.btnGetColumn2.Name = "btnGetColumn2";
this.btnGetColumn2.Size = new System.Drawing.Size(69, 23);
this.btnGetColumn2.TabIndex = 19;
this.btnGetColumn2.Text = "GetColumn";
this.btnGetColumn2.UseVisualStyleBackColor = true;
this.btnGetColumn2.Click += new System.EventHandler(this.btnGetColumn2_Click);
//
// btnHasText3
//
this.btnHasText3.Location = new System.Drawing.Point(163, 109);
this.btnHasText3.Name = "btnHasText3";
this.btnHasText3.Size = new System.Drawing.Size(57, 23);
this.btnHasText3.TabIndex = 26;
this.btnHasText3.Text = "HasText";
this.btnHasText3.UseVisualStyleBackColor = true;
this.btnHasText3.Click += new System.EventHandler(this.btnHasText3_Click);
//
// btnGetField3
//
this.btnGetField3.Location = new System.Drawing.Point(226, 109);
this.btnGetField3.Name = "btnGetField3";
this.btnGetField3.Size = new System.Drawing.Size(60, 23);
this.btnGetField3.TabIndex = 25;
this.btnGetField3.Text = "GetField";
this.btnGetField3.UseVisualStyleBackColor = true;
this.btnGetField3.Click += new System.EventHandler(this.btnGetField3_Click);
//
// txtField3
//
this.txtField3.Location = new System.Drawing.Point(15, 111);
this.txtField3.Name = "txtField3";
this.txtField3.Size = new System.Drawing.Size(142, 20);
this.txtField3.TabIndex = 24;
//
// btnGetColumn3
//
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
this.btnGetColumn3.Name = "btnGetColumn3";
this.btnGetColumn3.Size = new System.Drawing.Size(69, 23);
this.btnGetColumn3.TabIndex = 23;
this.btnGetColumn3.Text = "GetColumn";
this.btnGetColumn3.UseVisualStyleBackColor = true;
this.btnGetColumn3.Click += new System.EventHandler(this.btnGetColumn3_Click);
//
// txtPages
//
this.txtPages.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.txtPages.Location = new System.Drawing.Point(397, 82);
this.txtPages.Name = "txtPages";
this.txtPages.Size = new System.Drawing.Size(75, 20);
this.txtPages.TabIndex = 27;
//
// chkRender
//
this.chkRender.AutoSize = true;
this.chkRender.Location = new System.Drawing.Point(292, 138);
this.chkRender.Name = "chkRender";
this.chkRender.Size = new System.Drawing.Size(61, 17);
this.chkRender.TabIndex = 28;
this.chkRender.Text = "Render";
this.chkRender.UseVisualStyleBackColor = true;
//
// FrmPdfInfo
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(484, 461);
this.Controls.Add(this.chkRender);
this.Controls.Add(this.txtPages);
this.Controls.Add(this.btnHasText3);
this.Controls.Add(this.btnGetField3);
this.Controls.Add(this.txtField3);
this.Controls.Add(this.btnGetColumn3);
this.Controls.Add(this.btnHasText2);
this.Controls.Add(this.btnGetField2);
this.Controls.Add(this.txtField2);
this.Controls.Add(this.btnGetColumn2);
this.Controls.Add(this.btnRender);
this.Controls.Add(this.btnHasText1);
this.Controls.Add(this.btnGetField1);
this.Controls.Add(this.txtField1);
this.Controls.Add(this.btnGetColumn1);
this.Controls.Add(this.lblOutputs);
this.Controls.Add(this.lblInputs);
this.Controls.Add(this.btnBrowse);
this.Controls.Add(this.txtPdfPath);
this.Controls.Add(this.txtOutput);
this.Controls.Add(this.btnProcess);
this.Name = "FrmPdfInfo";
this.Text = "PdfInfo";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.FrmPdfInfo_FormClosing);
this.Load += new System.EventHandler(this.FrmPdfInfo_Load);
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Label lblOutputs;
private System.Windows.Forms.Label lblInputs;
private System.Windows.Forms.Button btnBrowse;
private System.Windows.Forms.TextBox txtPdfPath;
private System.Windows.Forms.TextBox txtOutput;
private System.Windows.Forms.Button btnProcess;
private System.Windows.Forms.Button btnGetColumn1;
private System.Windows.Forms.TextBox txtField1;
private System.Windows.Forms.Button btnGetField1;
private System.Windows.Forms.Button btnHasText1;
private System.Windows.Forms.Button btnRender;
private System.Windows.Forms.Button btnHasText2;
private System.Windows.Forms.Button btnGetField2;
private System.Windows.Forms.TextBox txtField2;
private System.Windows.Forms.Button btnGetColumn2;
private System.Windows.Forms.Button btnHasText3;
private System.Windows.Forms.Button btnGetField3;
private System.Windows.Forms.TextBox txtField3;
private System.Windows.Forms.Button btnGetColumn3;
private System.Windows.Forms.TextBox txtPages;
private System.Windows.Forms.CheckBox chkRender;
}
}

View File

@@ -1,382 +0,0 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools.Workbench
{
public partial class FrmPdfInfo : Form
{
public FrmPdfInfo()
{
InitializeComponent();
}
private void FrmPdfInfo_Load(object sender, EventArgs e)
{
var configuration = new Configuration();
configuration.Load();
txtPdfPath.Text = configuration.Get("LastPdfPath", string.Empty);
txtField1.Text = configuration.Get("Field1", string.Empty);
txtField2.Text = configuration.Get("Field2", string.Empty);
txtField3.Text = configuration.Get("Field3", string.Empty);
txtPages.Text = configuration.Get("Pages", string.Empty);
chkRender.Checked = configuration.Get("Render", false);
}
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
{
var configuration = new Configuration();
var configItems = new Dictionary<string, string>();
configuration.Set("LastPdfPath", txtPdfPath.Text);
configuration.Set("Field1", txtField1.Text);
configuration.Set("Field2", txtField2.Text);
configuration.Set("Field3", txtField3.Text);
configuration.Set("Pages", txtPages.Text);
configuration.Set("Render", chkRender.Checked);
configuration.Save();
}
private void btnBrowse_Click(object sender, EventArgs e)
{
var dlgFile = new OpenFileDialog();
DialogResult result = dlgFile.ShowDialog();
if (result == DialogResult.OK)
{
txtPdfPath.Text = dlgFile.FileName;
}
}
private void btnProcess_Click(object sender, EventArgs e)
{
if (System.IO.File.Exists(txtPdfPath.Text) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
int nObjects = doc.Objects.Count;
int nRootObject = doc.Objects.Where(obj => obj.UsageCount == 0).Count();
List<PdfStream> streams = doc.Objects
.Where(obj => obj.Data.Type == PdfElementTypes.Stream)
.Select(obj => (PdfStream)obj.Data)
.ToList();
int nStreams = streams.Count;
int nPages = doc.Pages.Count;
List<string> lines = new List<string>();
lines.Add(string.Format("Filename : {0}", System.IO.Path.GetFileNameWithoutExtension(txtPdfPath.Text)));
lines.Add(string.Format("Number of Objects : {0}", nObjects));
lines.Add(string.Format("Number of Roots : {0}", nRootObject));
lines.Add(string.Format("Number of Streams : {0}", nStreams));
lines.Add(string.Format("Number of Pages : {0}", nPages));
int pageNumber = 1;
foreach (PdfDocumentPage page in doc.Pages)
{
lines.Add("-----------------------------------------------------------------------------------------");
if (page.BaseData.Values.ContainsKey("CropBox"))
{
PdfArray cropBox = page.BaseData.Values["CropBox"] as PdfArray;
lines.Add(string.Format("Page({0} of {1}): {2} {3} {4} {5}", pageNumber, doc.Pages.Count,
PdfElementUtils.GetReal(cropBox.Values[0], 0),
PdfElementUtils.GetReal(cropBox.Values[1], 0),
PdfElementUtils.GetReal(cropBox.Values[2], 0),
PdfElementUtils.GetReal(cropBox.Values[3], 0)));
}
else
{
lines.Add(string.Format("Page({0} of {1}): ", pageNumber, doc.Pages.Count));
}
pageNumber++;
PdfTextExtractor extractor = new PdfTextExtractor(page);
foreach (PdfTextElement textElement in extractor.Elements)
{
string fontName = textElement.Font == null ? "#NULL#" : textElement.Font.Name;
if (fontName == "#NULL#" && textElement.Childs.Count > 0)
{
var fontNames = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name);
StringBuilder sbFontName = new StringBuilder();
foreach (string fontNameAux in fontNames)
{
if (sbFontName.Length > 0) { sbFontName.Append(";"); }
sbFontName.Append(fontNameAux);
}
fontName = sbFontName.ToString();
}
lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
Math.Round(textElement.Matrix.Matrix[0, 2], 2),
Math.Round(textElement.Matrix.Matrix[1, 2], 2),
Math.Round(textElement.VisibleWidth, 2),
Math.Round(textElement.VisibleHeight, 2),
fontName,
textElement.VisibleText));
}
}
txtOutput.Lines = lines.ToArray();
}
private void btnHasText1_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string text = txtField1.Text;
Action_HasText(pdfPath, text);
}
private void btnGetField1_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string field = txtField1.Text;
Action_GetField(pdfPath, field);
}
private void btnGetColumn1_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string column = txtField1.Text;
Action_GetColumn(pdfPath, column);
}
private void btnHasText2_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string text = txtField2.Text;
Action_HasText(pdfPath, text);
}
private void btnGetField2_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string field = txtField2.Text;
Action_GetField(pdfPath, field);
}
private void btnGetColumn2_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string column = txtField2.Text;
Action_GetColumn(pdfPath, column);
}
private void btnHasText3_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string text = txtField3.Text;
Action_HasText(pdfPath, text);
}
private void btnGetField3_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string field = txtField3.Text;
Action_GetField(pdfPath, field);
}
private void btnGetColumn3_Click(object sender, EventArgs e)
{
string pdfPath = txtPdfPath.Text;
string column = txtField3.Text;
Action_GetColumn(pdfPath, column);
}
private IEnumerable<int> GetSelectedPages(int maxPages)
{
string pages = txtPages.Text;
if (string.IsNullOrEmpty(pages))
{
return Enumerable.Range(1, maxPages);
}
string[] pagesParts;
if (pages.Contains(","))
{
pagesParts = pages.Split(',');
}
else
{
pagesParts = new string[] { pages };
}
List<int> listPages = new List<int>();
foreach (string part in pagesParts)
{
if (part.Contains("-"))
{
string[] range = part.Split('-');
if (range.Length == 2)
{
int pageStart;
int pageEnd;
if (int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
{
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
}
}
}
else
{
int pageNum;
if (int.TryParse(part, out pageNum))
{
listPages.Add(pageNum);
}
}
}
if (listPages.Count == 0)
{
listPages.AddRange(Enumerable.Range(1, maxPages));
}
return listPages;
}
private void Action_HasText(string pdfPath, string text)
{
if (System.IO.File.Exists(pdfPath) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(pdfPath);
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
List<string> lines = new List<string>();
int pageNum = 0;
foreach (PdfDocumentPage page in doc.Pages)
{
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfTextExtractor extractor = new PdfTextExtractor(page);
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(text))));
}
txtOutput.Lines = lines.ToArray();
}
private void Action_GetField(string pdfPath, string field)
{
if (System.IO.File.Exists(pdfPath) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(pdfPath);
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
var fieldData = new List<string>();
int pageNum = 0;
foreach (PdfDocumentPage page in doc.Pages)
{
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfTextExtractor extractor = new PdfTextExtractor(page);
fieldData.Add(extractor.GetFieldAsString(field));
}
txtOutput.Lines = fieldData.ToArray();
}
private void Action_GetColumn(string pdfPath, string column)
{
if (System.IO.File.Exists(pdfPath) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(pdfPath);
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
var columns = new List<string>();
int pageNum = 0;
foreach (PdfDocumentPage page in doc.Pages)
{
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfTextExtractor extractor = new PdfTextExtractor(page);
PdfTextElementColumn columnData;
if (column.StartsWith("#"))
{
string[] columnParts = column.Substring(1).Split(';');
double y = Convert.ToDouble(columnParts[0]);
double x1 = Convert.ToDouble(columnParts[1]);
double x2 = Convert.ToDouble(columnParts[2]);
columnData = extractor.GetColumn(null, y, x1, x2, x1, x2);
}
else
{
columnData = extractor.GetColumn(column);
}
if (chkRender.Checked)
{
var pdfPageRenderer = new PdfPageRenderer(extractor);
Bitmap bmp = pdfPageRenderer.Render();
pdfPageRenderer.RenderColumn(columnData, bmp);
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
bmp.Save(fileName, ImageFormat.Png);
bmp.Dispose();
GC.Collect();
}
columns.AddRange(columnData.Elements.Select(t => t.VisibleText));
}
txtOutput.Lines = columns.ToArray();
}
private void btnRender_Click(object sender, EventArgs e)
{
if (File.Exists(txtPdfPath.Text) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
List<string> lines = new List<string>();
lines.Add(string.Format("Filename : {0}", baseDocumentFilename));
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
int pageNum = 0;
foreach (PdfDocumentPage page in doc.Pages)
{
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
Bitmap bmp = pdfPageRenderer.Render();
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNum, pdfPageRenderer.Extractor.Elements.Count));
// Save image to disk
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
bmp.Save(fileName, ImageFormat.Png);
bmp.Dispose();
GC.Collect();
}
txtOutput.Lines = lines.ToArray();
}
}
}

View File

@@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@@ -1,19 +0,0 @@
using System;
using System.Windows.Forms;
namespace VAR.PdfTools.Workbench
{
static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new FrmPdfInfo());
}
}
}

View File

@@ -1,19 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
https://go.microsoft.com/fwlink/?LinkID=208121.
-->
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration>Release</Configuration>
<Platform>Any CPU</Platform>
<PublishDir>bin\Release\net5.0-windows\publish\</PublishDir>
<PublishProtocol>FileSystem</PublishProtocol>
<TargetFramework>net5.0-windows</TargetFramework>
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
<SelfContained>true</SelfContained>
<PublishSingleFile>True</PublishSingleFile>
<PublishReadyToRun>False</PublishReadyToRun>
<IncludeNativeLibrariesForSelfExtract>True</IncludeNativeLibrariesForSelfExtract>
<PublishTrimmed>True</PublishTrimmed>
</PropertyGroup>
</Project>

View File

@@ -1,81 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}</ProjectGuid>
<OutputType>WinExe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>VAR.PdfTools.Workbench</RootNamespace>
<AssemblyName>VAR.PdfTools.Workbench</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile />
<ProductVersion>10.0.0</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="FrmPdfInfo.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="FrmPdfInfo.Designer.cs">
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
</Compile>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Settings.settings</DependentUpon>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
<ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.Net35.csproj">
<Project>{EB7E003A-6A95-4002-809F-926C7C8A11E9}</Project>
<Name>VAR.PdfTools.Net35</Name>
</ProjectReference>
</ItemGroup>
</Project>

View File

@@ -1,26 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net5.0-windows</TargetFramework>
<OutputType>WinExe</OutputType>
<UseWindowsForms>true</UseWindowsForms>
</PropertyGroup>
<PropertyGroup>
<PackageId>VAR.PdfTools.Workbench</PackageId>
<Title>VAR.PdfTools.Workbench</Title>
<Version>1.6.1</Version>
<Description>PdfTools Workbench</Description>
<Authors>VAR</Authors>
<Company>VAR</Company>
<Copyright>Copyright © VAR 2016-2019</Copyright>
<RequireLicenseAcceptance>false</RequireLicenseAcceptance>
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
<PackageProjectUrl>https://github.com/Kableado/VAR.PdfTools</PackageProjectUrl>
<PackageTags>PDF;PDF Tool</PackageTags>
</PropertyGroup>
<ItemGroup>
<Content Include="..\LICENSE.txt" Link="LICENSE.txt" Pack="true" PackagePath="" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj" />
</ItemGroup>
</Project>

View File

@@ -1,37 +0,0 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.31402.337
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools", "VAR.PdfTools\VAR.PdfTools.csproj", "{EB7E003A-6A95-4002-809F-926C7C8A11E9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VAR.PdfTools.Workbench", "VAR.PdfTools.Workbench\VAR.PdfTools.Workbench.csproj", "{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Notes", "Notes", "{CE2D7584-5D82-401E-9A88-A9961CBB6959}"
ProjectSection(SolutionItems) = preProject
LICENSE.txt = LICENSE.txt
README.md = README.md
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EB7E003A-6A95-4002-809F-926C7C8A11E9}.Release|Any CPU.Build.0 = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A5825D8E-9F81-49E0-B610-8AE5E46D02EA}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {7E5F981A-8918-4C9E-AC9C-A798E2F3DA69}
EndGlobalSection
EndGlobal

View File

@@ -1,121 +0,0 @@
using System;
namespace VAR.PdfTools.Maths
{
public class Matrix3x3
{
#region Declarations
public double[,] _matrix = new double[3, 3];
#endregion
#region Properties
public double[,] Matrix { get { return _matrix; } }
#endregion
#region Creator
public Matrix3x3()
{
Idenity();
}
public Matrix3x3(double a, double b, double c, double d, double e, double f)
{
Set(a, b, c, d, e, f);
}
#endregion
#region Public methods
public void Idenity()
{
_matrix[0, 0] = 1.0;
_matrix[0, 1] = 0.0;
_matrix[0, 2] = 0.0;
_matrix[1, 0] = 0.0;
_matrix[1, 1] = 1.0;
_matrix[1, 2] = 0.0;
_matrix[2, 0] = 0.0;
_matrix[2, 1] = 0.0;
_matrix[2, 2] = 1.0;
}
public void Set(double a, double b, double c, double d, double e, double f)
{
_matrix[0, 0] = a;
_matrix[1, 0] = b;
_matrix[2, 0] = 0;
_matrix[0, 1] = c;
_matrix[1, 1] = d;
_matrix[2, 1] = 0;
_matrix[0, 2] = e;
_matrix[1, 2] = f;
_matrix[2, 2] = 1;
}
public Vector3D Multiply(Vector3D vect)
{
Vector3D vectResult = new Vector3D();
vectResult.Vector[0] = (vect.Vector[0] * _matrix[0, 0]) + (vect.Vector[1] * _matrix[0, 1]) + (vect.Vector[2] * _matrix[0, 2]);
vectResult.Vector[1] = (vect.Vector[0] * _matrix[1, 0]) + (vect.Vector[1] * _matrix[1, 1]) + (vect.Vector[2] * _matrix[1, 2]);
vectResult.Vector[2] = (vect.Vector[0] * _matrix[2, 0]) + (vect.Vector[1] * _matrix[2, 1]) + (vect.Vector[2] * _matrix[2, 2]);
return vectResult;
}
public Matrix3x3 Multiply(Matrix3x3 matrix)
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = (_matrix[0, 0] * matrix._matrix[0, 0]) + (_matrix[1, 0] * matrix._matrix[0, 1]) + (_matrix[2, 0] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 1] = (_matrix[0, 1] * matrix._matrix[0, 0]) + (_matrix[1, 1] * matrix._matrix[0, 1]) + (_matrix[2, 1] * matrix._matrix[0, 2]);
newMatrix._matrix[0, 2] = (_matrix[0, 2] * matrix._matrix[0, 0]) + (_matrix[1, 2] * matrix._matrix[0, 1]) + (_matrix[2, 2] * matrix._matrix[0, 2]);
newMatrix._matrix[1, 0] = (_matrix[0, 0] * matrix._matrix[1, 0]) + (_matrix[1, 0] * matrix._matrix[1, 1]) + (_matrix[2, 0] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 1] = (_matrix[0, 1] * matrix._matrix[1, 0]) + (_matrix[1, 1] * matrix._matrix[1, 1]) + (_matrix[2, 1] * matrix._matrix[1, 2]);
newMatrix._matrix[1, 2] = (_matrix[0, 2] * matrix._matrix[1, 0]) + (_matrix[1, 2] * matrix._matrix[1, 1]) + (_matrix[2, 2] * matrix._matrix[1, 2]);
newMatrix._matrix[2, 0] = (_matrix[0, 0] * matrix._matrix[2, 0]) + (_matrix[1, 0] * matrix._matrix[2, 1]) + (_matrix[2, 0] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 1] = (_matrix[0, 1] * matrix._matrix[2, 0]) + (_matrix[1, 1] * matrix._matrix[2, 1]) + (_matrix[2, 1] * matrix._matrix[2, 2]);
newMatrix._matrix[2, 2] = (_matrix[0, 2] * matrix._matrix[2, 0]) + (_matrix[1, 2] * matrix._matrix[2, 1]) + (_matrix[2, 2] * matrix._matrix[2, 2]);
return newMatrix;
}
public Matrix3x3 Copy()
{
Matrix3x3 newMatrix = new Matrix3x3();
newMatrix._matrix[0, 0] = _matrix[0, 0];
newMatrix._matrix[0, 1] = _matrix[0, 1];
newMatrix._matrix[0, 2] = _matrix[0, 2];
newMatrix._matrix[1, 0] = _matrix[1, 0];
newMatrix._matrix[1, 1] = _matrix[1, 1];
newMatrix._matrix[1, 2] = _matrix[1, 2];
newMatrix._matrix[2, 0] = _matrix[2, 0];
newMatrix._matrix[2, 1] = _matrix[2, 1];
newMatrix._matrix[2, 2] = _matrix[2, 2];
return newMatrix;
}
public bool IsCollinear(Matrix3x3 otherMatrix, double horizontalDelta = 0.00001, double verticalDelta = 0.00001)
{
double epsilon = 0.00001;
return (
Math.Abs(_matrix[0, 0] - otherMatrix.Matrix[0, 0]) <= epsilon &&
Math.Abs(_matrix[1, 0] - otherMatrix.Matrix[1, 0]) <= epsilon &&
Math.Abs(_matrix[0, 1] - otherMatrix.Matrix[0, 1]) <= epsilon &&
Math.Abs(_matrix[1, 1] - otherMatrix.Matrix[1, 1]) <= epsilon &&
Math.Abs(_matrix[0, 2] - otherMatrix.Matrix[0, 2]) <= horizontalDelta &&
Math.Abs(_matrix[1, 2] - otherMatrix.Matrix[1, 2]) <= verticalDelta &&
true);
}
#endregion
}
}

View File

@@ -1,19 +0,0 @@
namespace VAR.PdfTools.Maths
{
public class Rect
{
public double XMin { get; set; }
public double XMax { get; set; }
public double YMin { get; set; }
public double YMax { get; set; }
public void Add(Rect rect)
{
if (rect.XMax > XMax) { XMax = rect.XMax; }
if (rect.YMax > YMax) { YMax = rect.YMax; }
if (rect.XMin < XMin) { XMin = rect.XMin; }
if (rect.YMin < YMin) { YMin = rect.YMin; }
}
}
}

View File

@@ -1,33 +0,0 @@
namespace VAR.PdfTools.Maths
{
public class Vector3D
{
#region Declarations
public double[] _vector = new double[3];
#endregion
#region Properties
public double[] Vector { get { return _vector; } }
#endregion
#region Creator
public Vector3D()
{
Init();
}
public void Init()
{
_vector[0] = 0.0;
_vector[1] = 0.0;
_vector[2] = 1.0;
}
#endregion
}
}

View File

@@ -1,34 +0,0 @@
using System.Collections.Generic;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools
{
public class PdfContentAction
{
#region Declarations
string _token = null;
private List<IPdfElement> _parameters = null;
#endregion
#region Properties
public string Token { get { return _token; } }
public List<IPdfElement> Parameters { get { return _parameters; } }
#endregion
#region Life cycle
public PdfContentAction(string token, List<IPdfElement> parameters)
{
_token = token;
_parameters = parameters;
}
#endregion
}
}

View File

@@ -1,337 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools
{
public class PdfDocument
{
#region Declarations
private List<PdfObject> _objects = new List<PdfObject>();
private PdfDictionary _catalog = null;
private List<PdfDocumentPage> _pages = new List<PdfDocumentPage>();
#endregion
#region Properties
public List<PdfObject> Objects { get { return _objects; } }
public PdfDictionary Catalog { get { return _catalog; } }
public List<PdfDocumentPage> Pages { get { return _pages; } }
#endregion
#region Life cycle
private PdfDocument() { }
#endregion
#region Private methods
private static void ApplyFilterToStream(PdfStream stream, string filter)
{
if(filter == "ASCIIHexDecode")
{
// TODO: Implement ASCIIHexDecode Filter
}
else if (filter == "ASCII85Decode" || filter == "A85")
{
// TODO: Implement ASCII85Decode Filter
}
else if (filter == "LZWDecode")
{
// TODO: Implement LZWDecode Filter
}
else if (filter == "FlateDecode")
{
byte[] decodedStreamData = PdfFilters.FlateDecode.Decode(stream.Data);
stream.Data = decodedStreamData;
}
else if (filter == "RunLengthDecode")
{
// TODO: Implement RunLengthDecode Filter
}
else if (filter == "CCITTFaxDecode")
{
// TODO: Implement CCITTFaxDecode Filter
}
else if (filter == "JBIG2Decode")
{
// TODO: Implement JBIG2Decode Filter
}
else if (filter == "DCTDecode")
{
// TODO: Implement DCTDecode Filter
}
else if (filter == "JPXDecode")
{
// TODO: Implement JPXDecode Filter
}
else if (filter == "Crypt")
{
// TODO: Implement Crypt Filter
}
else
{
// TODO: Handle unknown filters
}
}
private static void ApplyFiltersToStreams(PdfStream stream)
{
if (stream.Dictionary.Values.ContainsKey("Filter") == false) { return; }
IPdfElement elemFilter = stream.Dictionary.Values["Filter"];
stream.OriginalData = stream.Data;
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
if (elemFilter is PdfString)
{
ApplyFilterToStream(stream, ((PdfString)elemFilter).Value);
}
else if (elemFilter is PdfName)
{
ApplyFilterToStream(stream, ((PdfName)elemFilter).Value);
}
else if(elemFilter is PdfArray)
{
foreach(IPdfElement elemSubFilter in ((PdfArray)elemFilter).Values)
{
if (elemSubFilter is PdfString)
{
ApplyFilterToStream(stream, ((PdfString)elemSubFilter).Value);
}
else if (elemSubFilter is PdfName)
{
ApplyFilterToStream(stream, ((PdfName)elemSubFilter).Value);
}
else
{
throw new Exception("PdfFilter not correctly specified");
}
}
}
else
{
throw new Exception("PdfFilter not correctly specified");
}
stream.Dictionary.Values["Length"] = new PdfInteger { Value = stream.Data.Length };
stream.Dictionary.Values.Remove("Filter");
}
private static IPdfElement ResolveIndirectReferences(IPdfElement elem, Dictionary<int, PdfObject> dictReferences)
{
if (elem is PdfObjectReference)
{
int objectId = ((PdfObjectReference)elem).ObjectID;
if (dictReferences.ContainsKey(objectId))
{
PdfObject referencedObject = dictReferences[objectId];
referencedObject.UsageCount++;
return referencedObject.Data;
}
else
{
return new PdfNull();
}
}
PdfObject obj = elem as PdfObject;
if (obj != null)
{
IPdfElement result = ResolveIndirectReferences(obj.Data, dictReferences);
if (result != obj.Data)
{
obj.Data = result;
}
return elem;
}
PdfArray array = elem as PdfArray;
if (array != null)
{
for (int i = 0; i < array.Values.Count; i++)
{
IPdfElement result = ResolveIndirectReferences(array.Values[i], dictReferences);
if(result != array.Values[i])
{
array.Values[i] = result;
}
}
return elem;
}
PdfDictionary dict = elem as PdfDictionary;
if (dict != null)
{
List<string> keys = dict.Values.Keys.ToList();
foreach (string key in keys)
{
IPdfElement value = dict.Values[key];
IPdfElement result = ResolveIndirectReferences(value, dictReferences);
if (result != value)
{
dict.Values[key] = result;
}
}
return elem;
}
return elem;
}
private static void ExtractPages(PdfDictionary page, PdfDocument doc, PdfDictionary resources)
{
string type = page.GetParamAsString("Type");
if (type == "Page")
{
PdfDocumentPage docPage = new PdfDocumentPage(page, resources);
doc._pages.Add(docPage);
return;
}
else if (type == "Pages")
{
if (page.Values.ContainsKey("Kids") == false || (page.Values["Kids"] is PdfArray) == false)
{
throw new Exception("PdfDocument: Pages \"Kids\" not found");
}
PdfArray kids = page.Values["Kids"] as PdfArray;
foreach (IPdfElement elem in kids.Values)
{
PdfDictionary childPage = elem as PdfDictionary;
if (page == null) { continue; }
PdfDictionary resourcesAux = null;
if (page.Values.ContainsKey("Resources"))
{
resourcesAux = page.Values["Resources"] as PdfDictionary;
}
ExtractPages(childPage, doc, resourcesAux);
}
}
else
{
throw new Exception(string.Format("PdfDocument: Unexpected page type, found: {0}", type));
}
}
#endregion
#region Public methods
public static PdfDocument Load(string filename)
{
byte[] fileBytes = File.ReadAllBytes(filename);
return Load(fileBytes);
}
public static PdfDocument Load(byte[] data)
{
var doc = new PdfDocument();
// Parse data
var parser = new PdfParser(data);
do
{
PdfObject obj = parser.ParseObject(doc.Objects);
if (obj != null && obj.Data != null)
{
if (obj.Data is PdfStream)
{
ApplyFiltersToStreams((PdfStream)obj.Data);
}
doc.Objects.Add(obj);
}
} while (parser.IsEndOfStream() == false);
// Expand Object Streams
List<PdfObject> streamObjects = new List<PdfObject>();
foreach (PdfObject obj in doc.Objects)
{
if (obj.Data.Type != PdfElementTypes.Stream) { continue; }
PdfStream stream = obj.Data as PdfStream;
string type = stream.Dictionary.GetParamAsString("Type");
long? number = stream.Dictionary.GetParamAsInt("N");
long? first = stream.Dictionary.GetParamAsInt("First");
if (type == "ObjStm" && number != null && first != null)
{
obj.UsageCount++;
PdfParser parserAux = new PdfParser(stream.Data);
streamObjects.AddRange(parserAux.ParseObjectStream((int)number, (long)first));
}
}
foreach (PdfObject obj in streamObjects)
{
doc.Objects.Add(obj);
}
// Build cross reference table
Dictionary<int, PdfObject> dictObjects = new Dictionary<int, PdfObject>();
foreach (PdfObject obj in doc.Objects)
{
if (dictObjects.ContainsKey(obj.ObjectID))
{
if (dictObjects[obj.ObjectID].ObjectGeneration < obj.ObjectGeneration)
{
dictObjects[obj.ObjectID] = obj;
}
}
else
{
dictObjects.Add(obj.ObjectID, obj);
}
}
// Iterate full document to resolve all indirect references
foreach(PdfObject obj in doc.Objects)
{
ResolveIndirectReferences(obj, dictObjects);
}
// Search Catalog
foreach(PdfObject obj in doc.Objects)
{
if ((obj.Data is PdfDictionary) == false) { continue; }
string type = ((PdfDictionary)obj.Data).GetParamAsString("Type");
if(type == "Catalog")
{
doc._catalog = (PdfDictionary)obj.Data;
break;
}
}
if(doc._catalog == null)
{
throw new Exception("PdfDocument: Catalog not found");
}
// Search pages
if(doc.Catalog.Values.ContainsKey("Pages") == false ||
(doc.Catalog.Values["Pages"] is PdfDictionary) == false)
{
throw new Exception("PdfDocument: Pages not found");
}
PdfDictionary pages = (PdfDictionary)doc.Catalog.Values["Pages"];
PdfDictionary resources = null;
if (doc.Catalog.Values.ContainsKey("Resources"))
{
resources = doc.Catalog.Values["Resources"] as PdfDictionary;
}
ExtractPages(pages, doc, resources);
return doc;
}
#endregion
}
}

View File

@@ -1,81 +0,0 @@
using System;
using System.Collections.Generic;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools
{
public class PdfDocumentPage
{
#region Declarations
private PdfDictionary _baseData = null;
private byte[] _content = null;
private PdfDictionary _resources = null;
private Dictionary<string, PdfFont> _fonts = new Dictionary<string, PdfFont>();
private List<PdfContentAction> _contentActions = null;
#endregion
#region Properties
public PdfDictionary BaseData { get { return _baseData; } }
public byte[] Content { get { return _content; } }
public Dictionary<string, PdfFont> Fonts { get { return _fonts; } }
public List<PdfContentAction> ContentActions { get { return _contentActions; } }
#endregion
#region Life cycle
public PdfDocumentPage(PdfDictionary baseData, PdfDictionary resources)
{
_baseData = baseData;
string type = baseData.GetParamAsString("Type");
if (type != "Page")
{
throw new Exception(string.Format("PdfDocumentPage: Expected dictionary of type:\"Page\". Found: {0}", type));
}
// Get content, resources and fonts
_content = _baseData.GetParamAsStream("Contents");
if (_baseData.Values.ContainsKey("Resources") == false)
{
_resources = resources;
}
else
{
_resources = _baseData.Values["Resources"] as PdfDictionary;
}
if (_resources != null && _resources.Values.ContainsKey("Font"))
{
PdfDictionary fonts = _resources.Values["Font"] as PdfDictionary;
foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values)
{
var font = new PdfFont(pair.Value as PdfDictionary);
font.Name = pair.Key;
_fonts.Add(pair.Key, font);
}
}
// Parse content
if (_content != null)
{
PdfParser parser = new PdfParser(_content);
_contentActions = parser.ParseContent();
}
else
{
_contentActions = new List<PdfContentAction>();
}
}
#endregion
}
}

View File

@@ -1,7 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public interface IPdfElement
{
PdfElementTypes Type { get; }
}
}

View File

@@ -1,11 +0,0 @@
using System.Collections.Generic;
namespace VAR.PdfTools.PdfElements
{
public class PdfArray : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Array; } }
private List<IPdfElement> _values = new List<IPdfElement>();
public List<IPdfElement> Values { get { return _values; } }
}
}

View File

@@ -1,8 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfBoolean : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Boolean; } }
public bool Value { get; set; }
}
}

View File

@@ -1,77 +0,0 @@
using System.Collections.Generic;
using System.IO;
namespace VAR.PdfTools.PdfElements
{
public class PdfDictionary : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Dictionary; } }
private Dictionary<string, IPdfElement> _values = new Dictionary<string, IPdfElement>();
public Dictionary<string, IPdfElement> Values { get { return _values; } }
public string GetParamAsString(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfName)
{
return ((PdfName)value).Value;
}
if (value is PdfString)
{
return ((PdfString)value).Value;
}
return null;
}
public long? GetParamAsInt(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfInteger)
{
return ((PdfInteger)value).Value;
}
return null;
}
public byte[] GetParamAsStream(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
PdfArray array = value as PdfArray;
MemoryStream memStream = new MemoryStream();
foreach (IPdfElement elem in array.Values)
{
PdfStream stream = elem as PdfStream;
if (stream == null) { continue; }
memStream.Write(stream.Data, 0, stream.Data.Length);
}
if (memStream.Length > 0)
{
return memStream.ToArray();
}
return null;
}
if (value is PdfStream)
{
return ((PdfStream)value).Data;
}
return null;
}
}
}

View File

@@ -1,18 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public enum PdfElementTypes
{
Undefined,
Boolean,
Integer,
Real,
String,
Name,
Array,
Dictionary,
Null,
ObjectReference,
Object,
Stream,
};
}

View File

@@ -1,56 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public static class PdfElementUtils
{
public static double GetReal(IPdfElement elem, double defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return ((PdfReal)elem).Value;
}
return defaultValue;
}
public static long GetInt(IPdfElement elem, long defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfInteger)
{
return ((PdfInteger)elem).Value;
}
if (elem is PdfReal)
{
return (long)((PdfReal)elem).Value;
}
return defaultValue;
}
public static string GetString(IPdfElement elem, string defaultValue)
{
if (elem == null)
{
return defaultValue;
}
if (elem is PdfString)
{
return ((PdfString)elem).Value;
}
if (elem is PdfName)
{
return ((PdfName)elem).Value;
}
return defaultValue;
}
}
}

View File

@@ -1,8 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfInteger : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Integer; } }
public long Value { get; set; }
}
}

View File

@@ -1,8 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfName : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Name; } }
public string Value { get; set; }
}
}

View File

@@ -1,7 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfNull : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Null; } }
}
}

View File

@@ -1,11 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfObject : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Object; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
public IPdfElement Data { get; set; }
public int UsageCount { get; set; }
}
}

View File

@@ -1,9 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfObjectReference : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.ObjectReference; } }
public int ObjectID { get; set; }
public int ObjectGeneration { get; set; }
}
}

View File

@@ -1,8 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfReal : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Real; } }
public double Value { get; set; }
}
}

View File

@@ -1,12 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfStream : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.Stream; } }
public PdfDictionary Dictionary { get; set; }
public byte[] Data { get; set; }
public byte[] OriginalData { get; set; }
public IPdfElement OriginalFilter { get; set; }
}
}

View File

@@ -1,8 +0,0 @@
namespace VAR.PdfTools.PdfElements
{
public class PdfString : IPdfElement
{
public PdfElementTypes Type { get { return PdfElementTypes.String; } }
public string Value { get; set; }
}
}

View File

@@ -1,50 +0,0 @@
using System;
using System.IO;
using System.IO.Compression;
namespace VAR.PdfTools
{
public static class PdfFilters
{
public class FlateDecode
{
public byte[] Encode(byte[] streamData)
{
throw new NotImplementedException("FlateFilter.Encode: Not implemented");
}
public static byte[] Decode(byte[] streamData)
{
MemoryStream msInput = new MemoryStream(streamData);
MemoryStream msOutput = new MemoryStream();
// It seems to work when skipping the first two bytes.
byte header;
header = (byte)msInput.ReadByte();
header = (byte)msInput.ReadByte();
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
int cbRead;
byte[] abResult = new byte[1024];
do
{
cbRead = zip.Read(abResult, 0, abResult.Length);
if (cbRead > 0)
{
msOutput.Write(abResult, 0, cbRead);
}
}
while (cbRead > 0);
zip.Close();
msOutput.Flush();
if (msOutput.Length >= 0)
{
msOutput.Capacity = (int)msOutput.Length;
return msOutput.GetBuffer();
}
return null;
}
}
}
}

View File

@@ -1,215 +0,0 @@
using System.Collections.Generic;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools
{
public class PdfFont
{
#region Declarations
private PdfDictionary _baseData = null;
private Dictionary<char, string> _toUnicode = null;
private Dictionary<char, double> _widths = null;
private double _height = 1.0;
private string _name = string.Empty;
private bool _tainted = false;
#endregion
#region Properties
public PdfDictionary BaseData { get { return _baseData; } }
public double Height { get { return _height; } }
public string Name { get { return _name; } set { _name = value; } }
public bool Tainted { get { return _tainted; } }
#endregion
#region Life cycle
public PdfFont(PdfDictionary baseData)
{
_baseData = baseData;
string type = baseData.GetParamAsString("Type");
if (type != "Font")
{
// NOTE: Type="Font" is Required by the standard, continuing anyway
_tainted = true;
}
PrepareSizes(baseData);
}
#endregion
#region Private methods
private void PrepareSizes(PdfDictionary baseData)
{
// Set "Times-Roman" as default basefont sizes
_widths = PdfStandar14FontMetrics.Times_Roman.Widths;
_height = PdfStandar14FontMetrics.Times_Roman.ApproxHeight;
if (baseData.Values.ContainsKey("ToUnicode"))
{
byte[] toUnicodeStream = ((PdfStream)baseData.Values["ToUnicode"]).Data;
PdfParser parser = new PdfParser(toUnicodeStream);
_toUnicode = parser.ParseToUnicode();
}
string baseFont = _baseData.GetParamAsString("BaseFont");
if (string.IsNullOrEmpty(baseFont))
{
SetBaseFontSizes(baseFont);
}
if (_baseData.Values.ContainsKey("FirstChar") && _baseData.Values.ContainsKey("LastChar") && _baseData.Values.ContainsKey("Widths"))
{
ParseSizes();
}
}
private void ParseSizes()
{
double glyphSpaceToTextSpace = 1000.0; // TODO: PdfFont.ParseSizes: SubType:Type3 Uses a FontMatrix that may not correspond to 1/1000th
_widths = new Dictionary<char, double>();
char firstChar = (char)_baseData.GetParamAsInt("FirstChar");
char lastChar = (char)_baseData.GetParamAsInt("LastChar");
PdfArray widths = _baseData.Values["Widths"] as PdfArray;
char actualChar = firstChar;
foreach (IPdfElement elem in widths.Values)
{
double width = PdfElementUtils.GetReal(elem, 500);
if (width < 0.0001f && width > -0.0001f) { width = 500; }
_widths.Add(actualChar, width / glyphSpaceToTextSpace);
actualChar++;
}
// FIMXE: Calculate real height
}
private void SetBaseFontSizes(string baseFont)
{
if (baseFont == "Times-Roman")
{
_widths = PdfStandar14FontMetrics.Times_Roman.Widths;
_height = PdfStandar14FontMetrics.Times_Roman.ApproxHeight;
}
if (baseFont == "Times-Bold")
{
_widths = PdfStandar14FontMetrics.Times_Bold.Widths;
_height = PdfStandar14FontMetrics.Times_Bold.ApproxHeight;
}
if (baseFont == "Times-Italic")
{
_widths = PdfStandar14FontMetrics.Times_Italic.Widths;
_height = PdfStandar14FontMetrics.Times_Italic.ApproxHeight;
}
if (baseFont == "Times-BoldItalic")
{
_widths = PdfStandar14FontMetrics.Times_BoldItalic.Widths;
_height = PdfStandar14FontMetrics.Times_BoldItalic.ApproxHeight;
}
if (baseFont == "Helvetica")
{
_widths = PdfStandar14FontMetrics.Helvetica.Widths;
_height = PdfStandar14FontMetrics.Helvetica.ApproxHeight;
}
if (baseFont == "Helvetica-Bold")
{
_widths = PdfStandar14FontMetrics.Helvetica_Bold.Widths;
_height = PdfStandar14FontMetrics.Helvetica_Bold.ApproxHeight;
}
if (baseFont == "Helvetica-Oblique")
{
_widths = PdfStandar14FontMetrics.Helvetica_Oblique.Widths;
_height = PdfStandar14FontMetrics.Helvetica_Oblique.ApproxHeight;
}
if (baseFont == "Helvetica-BoldOblique")
{
_widths = PdfStandar14FontMetrics.Helvetica_BoldOblique.Widths;
_height = PdfStandar14FontMetrics.Helvetica_BoldOblique.ApproxHeight;
}
if (baseFont == "Courier")
{
_widths = PdfStandar14FontMetrics.Courier.Widths;
_height = PdfStandar14FontMetrics.Courier.ApproxHeight;
}
if (baseFont == "Courier-Bold")
{
_widths = PdfStandar14FontMetrics.Courier_Bold.Widths;
_height = PdfStandar14FontMetrics.Courier_Bold.ApproxHeight;
}
if (baseFont == "Courier-Oblique")
{
_widths = PdfStandar14FontMetrics.Courier_Oblique.Widths;
_height = PdfStandar14FontMetrics.Courier_Oblique.ApproxHeight;
}
if (baseFont == "Courier-BoldOblique")
{
_widths = PdfStandar14FontMetrics.Courier_BoldOblique.Widths;
_height = PdfStandar14FontMetrics.Courier_BoldOblique.ApproxHeight;
}
if (baseFont == "Symbol")
{
_widths = PdfStandar14FontMetrics.Symbol.Widths;
_height = PdfStandar14FontMetrics.Symbol.ApproxHeight;
}
if (baseFont == "ZapfDingbats")
{
_widths = PdfStandar14FontMetrics.ZapfDingbats.Widths;
_height = PdfStandar14FontMetrics.ZapfDingbats.ApproxHeight;
}
}
#endregion
#region Public methods
public string ToUnicode(char character)
{
if (_toUnicode == null)
{
// TODO: PdfFont.ToUnicode: use standar tables
return new string(character, 1);
}
if (_toUnicode.ContainsKey(character))
{
return _toUnicode[character];
}
return new string(character, 1);
}
public double GetCharWidth(char character)
{
double charWidth = 0;
if (_widths == null)
{
return charWidth;
}
if (_widths.ContainsKey(character))
{
charWidth = _widths[character];
}
// NOTE: Convert "Zero" to default width of 0.5
if (charWidth <= 0.0001)
{
charWidth = 0.5;
}
return charWidth;
}
#endregion
}
}

View File

@@ -1,210 +0,0 @@
using System;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using VAR.PdfTools.Maths;
namespace VAR.PdfTools
{
public class PdfPageRenderer
{
private PdfDocumentPage _page;
private PdfTextExtractor _pdfTextExtractor;
private Rect _pageRect;
private int _pageWidth;
private int _pageHeight;
private int _scale = 10;
private const int MaxSize = 10000;
public PdfTextExtractor Extractor { get { return _pdfTextExtractor; } }
public PdfPageRenderer(PdfDocumentPage page)
{
_page = page;
_pdfTextExtractor = new PdfTextExtractor(_page);
InitPage();
}
public PdfPageRenderer(PdfTextExtractor pdfTextExtractor)
{
_pdfTextExtractor = pdfTextExtractor;
_page = pdfTextExtractor.Page;
InitPage();
}
private void InitPage()
{
_pageRect = _pdfTextExtractor.GetRect();
_pageWidth = (int)Math.Ceiling(_pageRect.XMax - _pageRect.XMin);
_pageHeight = (int)Math.Ceiling(_pageRect.YMax - _pageRect.YMin);
while ((_pageWidth * _scale) > MaxSize) { _scale--; }
while ((_pageHeight * _scale) > MaxSize) { _scale--; }
if (_scale <= 0) { _scale = 1; }
}
public Bitmap Render()
{
if (_pdfTextExtractor.Elements.Count == 0)
{
// Nothing to render
Bitmap emptyBmp = new Bitmap(100, 200, PixelFormat.Format32bppArgb);
using (Graphics gcEmpty = Graphics.FromImage(emptyBmp))
gcEmpty.Clear(Color.White);
return emptyBmp;
}
// Prepare image
Bitmap bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
Graphics gc = Graphics.FromImage(bmp);
gc.Clear(Color.White);
// Draw text elements of the page
using (Pen penTextElem = new Pen(Color.Blue))
using (Pen penCharElem = new Pen(Color.Navy))
{
foreach (PdfTextElement textElement in _pdfTextExtractor.Elements)
{
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Black);
}
}
gc.Dispose();
return bmp;
}
public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null)
{
Graphics gc;
if (bmp == null)
{
bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
gc = Graphics.FromImage(bmp);
gc.Clear(Color.White);
}
else
{
gc = Graphics.FromImage(bmp);
}
// Draw text elements of the column header
using (Pen penTextElem = new Pen(Color.Green))
using (Pen penCharElem = new Pen(Color.DarkGreen))
{
DrawTextElement(columnData.HeadTextElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Olive);
}
// Draw text elements of the column
using (Pen penTextElem = new Pen(Color.Red))
using (Pen penCharElem = new Pen(Color.DarkRed))
{
foreach (PdfTextElement textElement in columnData.Elements)
{
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed);
}
}
// Draw column extents
using (Pen penColumn = new Pen(Color.Red))
{
float y = (float)(_pageRect.YMax - columnData.Y);
float x1 = (float)(columnData.X1 - _pageRect.XMin);
float x2 = (float)(columnData.X2 - _pageRect.XMin);
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale);
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale);
gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale);
}
gc.Dispose();
return bmp;
}
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
{
if (textElement == null) { return; }
double textElementX = textElement.GetX() - pageXMin;
double textElementY = textElement.GetY() - pageYMin;
double textElementWidth = textElement.VisibleWidth;
double textElementHeight = textElement.VisibleHeight;
string textElementText = textElement.VisibleText;
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
if (textElementHeight < 0.0001) { return; }
double textElementPageX = textElementX;
double textElementPageY = pageHeight - textElementY;
if (penTextElem != null)
{
DrawRoundedRectangle(gc, penTextElem,
(int)(textElementPageX * scale),
(int)(textElementPageY * scale),
(int)(textElementWidth * scale),
(int)(textElementHeight * scale),
5);
}
using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel))
{
foreach (PdfCharElement c in textElement.Characters)
{
gc.DrawString(c.Char,
font,
brushText,
(int)((textElementPageX + c.Displacement) * scale),
(int)(textElementPageY * scale));
if (penCharElem != null)
{
DrawRoundedRectangle(gc, penCharElem,
(int)((textElementPageX + c.Displacement) * scale),
(int)(textElementPageY * scale),
(int)(c.Width * scale),
(int)(textElementHeight * scale),
5);
}
}
}
}
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
{
int diameter = radius * 2;
Size size = new Size(diameter, diameter);
Rectangle arc = new Rectangle(x, y, diameter, diameter);
GraphicsPath path = new GraphicsPath();
// top left arc
path.AddArc(arc, 180, 90);
// top right arc
arc.X = (x + width) - diameter;
path.AddArc(arc, 270, 90);
// bottom right arc
arc.Y = (y + height) - diameter;
path.AddArc(arc, 0, 90);
// bottom left arc
arc.X = x;
path.AddArc(arc, 90, 90);
path.CloseFigure();
return path;
}
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
{
if (graphics == null)
throw new ArgumentNullException("graphics");
if (pen == null)
throw new ArgumentNullException("pen");
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
{
graphics.DrawPath(pen, path);
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,149 +0,0 @@
using System.Collections.Generic;
using System.Linq;
using VAR.PdfTools.Maths;
namespace VAR.PdfTools
{
public struct PdfCharElement
{
public string Char;
public double Displacement;
public double Width;
}
public class PdfTextElement
{
#region Properties
public PdfFont Font { get; set; }
public double FontSize { get; set; }
public Matrix3x3 Matrix { get; set; }
public string RawText { get; set; }
public string VisibleText { get; set; }
public double VisibleWidth { get; set; }
public double VisibleHeight { get; set; }
public List<PdfCharElement> Characters { get; set; }
public List<PdfTextElement> Childs { get; set; }
#endregion
#region Public methods
public double GetX()
{
return Matrix.Matrix[0, 2];
}
public double GetY()
{
return Matrix.Matrix[1, 2];
}
public PdfTextElement SubPart(int startIndex, int endIndex)
{
PdfTextElement blockElem = new PdfTextElement
{
Font = null,
FontSize = FontSize,
Matrix = Matrix.Copy(),
RawText = RawText.Substring(startIndex, endIndex - startIndex),
VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex),
VisibleWidth = 0,
VisibleHeight = VisibleHeight,
Characters = new List<PdfCharElement>(),
Childs = new List<PdfTextElement>(),
};
double displacement = Characters[startIndex].Displacement;
blockElem.Matrix.Matrix[0, 2] += displacement;
for (int j = startIndex; j < endIndex; j++)
{
blockElem.Characters.Add(new PdfCharElement
{
Char = Characters[j].Char,
Displacement = Characters[j].Displacement - displacement,
Width = Characters[j].Width,
});
}
PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1];
blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width;
foreach (PdfTextElement elem in Childs)
{
blockElem.Childs.Add(elem);
}
return blockElem;
}
public double MaxWidth()
{
return Characters.Average(c => c.Width);
}
public Rect GetRect()
{
double x = GetX();
double y = GetY();
return new Rect
{
XMin = x,
YMax = y,
XMax = x + VisibleWidth,
YMin = y - VisibleHeight,
};
}
public double GetCharacterPreviousSpacing(int index)
{
if (index <= 0) { return 0; }
double previousEnd = Characters[index - 1].Displacement + Characters[index - 1].Width;
double spacing = Characters[index].Displacement - previousEnd;
return spacing;
}
public double GetCharacterPrecedingSpacing(int index)
{
if (index >= (Characters.Count - 1)) { return 0; }
double currentEnd = Characters[index].Displacement + Characters[index].Width;
double spacing = Characters[index + 1].Displacement - currentEnd;
return spacing;
}
#endregion
}
public class PdfTextElementColumn
{
public PdfTextElement HeadTextElement { get; private set; }
public IEnumerable<PdfTextElement> Elements { get; private set; }
public double Y { get; private set; }
public double X1 { get; private set; }
public double X2 { get; private set; }
public static PdfTextElementColumn Empty { get; } = new PdfTextElementColumn();
private PdfTextElementColumn()
{
Elements = new List<PdfTextElement>();
}
public PdfTextElementColumn(PdfTextElement head, IEnumerable<PdfTextElement> elements, double y, double x1, double x2)
{
HeadTextElement = head;
Elements = elements;
Y = y;
X1 = x1;
X2 = x2;
}
}
}

View File

@@ -1,856 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using VAR.PdfTools.Maths;
using VAR.PdfTools.PdfElements;
namespace VAR.PdfTools
{
public class PdfTextExtractor
{
#region Declarations
private PdfDocumentPage _page = null;
private List<PdfTextElement> _textElements = new List<PdfTextElement>();
// Graphics state
private List<Matrix3x3> _graphicsMatrixStack = new List<Matrix3x3>();
private Matrix3x3 _graphicsMatrix = new Matrix3x3();
// Text state
private PdfFont _font = null;
private double _fontSize = 1;
private double _charSpacing = 0;
private double _wordSpacing = 0;
private double _textLeading = 0;
// Text object state
private bool inText = false;
private Matrix3x3 _textMatrix = new Matrix3x3();
private Matrix3x3 _textMatrixCurrent = new Matrix3x3();
private StringBuilder _sbText = new StringBuilder();
private double _textWidth = 0;
private List<PdfCharElement> _listCharacters = new List<PdfCharElement>();
#endregion
#region Properties
public PdfDocumentPage Page { get { return _page; } }
public List<PdfTextElement> Elements { get { return _textElements; } }
#endregion
#region lifecycle
public PdfTextExtractor(PdfDocumentPage page)
{
_page = page;
ProcessPageContent();
JoinTextElements();
SplitTextElements();
}
#endregion
#region Utility methods
private string PdfString_ToUnicode(string text, PdfFont font)
{
if (font == null)
{
return text;
}
StringBuilder sbText = new StringBuilder();
foreach (char c in text)
{
sbText.Append(font.ToUnicode(c).Replace("\0", ""));
}
return sbText.ToString();
}
private PdfTextElement BuildTextElement()
{
PdfTextElement textElem = new PdfTextElement();
textElem.Font = _font;
textElem.FontSize = _fontSize;
textElem.Matrix = _textMatrixCurrent.Multiply(_graphicsMatrix);
textElem.RawText = _sbText.ToString();
textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
PdfCharElement lastChar = _listCharacters[_listCharacters.Count - 1];
textElem.VisibleWidth = (lastChar.Displacement + lastChar.Width) * textElem.Matrix.Matrix[0, 0];
textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
textElem.Characters = new List<PdfCharElement>();
foreach (PdfCharElement c in _listCharacters)
{
textElem.Characters.Add(new PdfCharElement
{
Char = c.Char,
Displacement = (c.Displacement * textElem.Matrix.Matrix[0, 0]),
Width = (c.Width * textElem.Matrix.Matrix[0, 0]),
});
}
textElem.Childs = new List<PdfTextElement>();
return textElem;
}
private void AddTextElement(PdfTextElement textElement)
{
if (string.IsNullOrEmpty(textElement.VisibleText.Trim()))
{
return;
}
_textElements.Add(textElement);
}
private void FlushTextElement()
{
if (_sbText.Length == 0)
{
_textWidth = 0;
return;
}
PdfTextElement textElem = BuildTextElement();
AddTextElement(textElem);
_textMatrixCurrent.Matrix[0, 2] += _textWidth;
_sbText = new StringBuilder();
_listCharacters.Clear();
_textWidth = 0;
}
private string SimplifyText(string text)
{
StringBuilder sbResult = new StringBuilder();
foreach (char c in text)
{
if (c == '.' || c == ',' ||
c == ':' || c == ';' ||
c == '-' || c == '_' ||
c == ' ' || c == '\t')
{
continue;
}
sbResult.Append(char.ToUpper(c));
}
return sbResult.ToString();
}
private PdfTextElement FindElementByText(string text, bool fuzzy)
{
string matchingText = fuzzy ? SimplifyText(text) : text;
foreach (PdfTextElement elem in _textElements)
{
string elemText = fuzzy ? SimplifyText(elem.VisibleText) : elem.VisibleText;
if (elemText == matchingText)
{
return elem;
}
}
return null;
}
private List<PdfTextElement> FindElementsContainingText(string text, bool fuzzy)
{
List<PdfTextElement> list = new List<PdfTextElement>();
string matchingText = fuzzy ? SimplifyText(text) : text;
foreach (PdfTextElement elem in _textElements)
{
string elemText = fuzzy ? SimplifyText(elem.VisibleText) : elem.VisibleText;
if (elemText.Contains(matchingText))
{
list.Add(elem);
}
}
return list;
}
private bool TextElementVerticalIntersection(PdfTextElement elem1, double elem2X1, double elem2X2)
{
double elem1X1 = elem1.GetX();
double elem1X2 = elem1.GetX() + elem1.VisibleWidth;
return elem1X2 >= elem2X1 && elem2X2 >= elem1X1;
}
private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2)
{
double elem1X1 = elem1.GetX();
double elem1X2 = elem1.GetX() + elem1.VisibleWidth;
double elem2X1 = elem2.GetX();
double elem2X2 = elem2.GetX() + elem2.VisibleWidth;
return elem1X2 >= elem2X1 && elem2X2 >= elem1X1;
}
private bool TextElementHorizontalIntersection(PdfTextElement elem1, PdfTextElement elem2)
{
double elem1Y1 = elem1.GetY();
double elem1Y2 = elem1.GetY() + elem1.VisibleHeight;
double elem2Y1 = elem2.GetY();
double elem2Y2 = elem2.GetY() + elem2.VisibleHeight;
return elem1Y2 >= elem2Y1 && elem2Y2 >= elem1Y1;
}
#endregion
#region Operations
private void OpPushGraphState()
{
_graphicsMatrixStack.Add(_graphicsMatrix.Copy());
}
private void OpSetGraphMatrix(double a, double b, double c, double d, double e, double f)
{
_graphicsMatrix.Set(a, b, c, d, e, f);
}
private void OpPopGraphState()
{
_graphicsMatrix = _graphicsMatrixStack[_graphicsMatrixStack.Count - 1];
_graphicsMatrixStack.RemoveAt(_graphicsMatrixStack.Count - 1);
}
private void OpBeginText()
{
_textMatrix.Idenity();
_textMatrixCurrent.Idenity();
inText = true;
}
private void OpEndText()
{
FlushTextElement();
inText = false;
}
private void OpTextFont(string fontName, double size)
{
FlushTextElement();
_font = _page.Fonts[fontName];
_fontSize = size;
}
private void OpTextCharSpacing(double charSpacing)
{
_charSpacing = charSpacing;
}
private void OpTextWordSpacing(double wordSpacing)
{
_wordSpacing = wordSpacing;
}
private void OpTextLeading(double textLeading)
{
_textLeading = textLeading;
}
private void OpTextDisplace(double x, double y)
{
FlushTextElement();
var newMatrix = new Matrix3x3();
newMatrix.Matrix[0, 2] = x;
newMatrix.Matrix[1, 2] = y;
_textMatrix = newMatrix.Multiply(_textMatrix);
_textMatrixCurrent = _textMatrix.Copy();
}
private void OpTextLineFeed()
{
OpTextDisplace(0, -_textLeading);
}
private void OpSetTextMatrix(double a, double b, double c, double d, double e, double f)
{
Matrix3x3 newMatrix = new Matrix3x3(a, b, c, d, e, f);
FlushTextElement();
_textMatrix = newMatrix;
_textMatrixCurrent = _textMatrix.Copy();
}
private void OpTextPut(string text)
{
if (inText == false) { return; }
_sbText.Append(text);
if (_font != null)
{
foreach (char c in text)
{
string realChar = _font.ToUnicode(c);
if (realChar == "\0") { continue; }
double charWidth = _font.GetCharWidth(c) * _fontSize;
_listCharacters.Add(new PdfCharElement { Char = _font.ToUnicode(c), Displacement = _textWidth, Width = charWidth });
_textWidth += charWidth;
_textWidth += ((c == 0x20) ? _wordSpacing : _charSpacing);
}
}
}
private void OpTextPutMultiple(PdfArray array)
{
if (inText == false) { return; }
foreach (IPdfElement elem in array.Values)
{
if (elem is PdfString)
{
OpTextPut(((PdfString)elem).Value);
}
else if (elem is PdfInteger || elem is PdfReal)
{
double spacing = PdfElementUtils.GetReal(elem, 0);
_textWidth -= (spacing / 1000) * _fontSize;
}
else if (elem is PdfArray)
{
OpTextPutMultiple(((PdfArray)elem));
}
}
}
#endregion
#region Private methods
private void ProcessPageContent()
{
int unknowCount = 0;
int lineCount = 0;
int strokeCount = 0;
int pathCount = 0;
for (int i = 0; i < _page.ContentActions.Count; i++)
{
PdfContentAction action = _page.ContentActions[i];
// Special graphics state
if (action.Token == "q")
{
OpPushGraphState();
}
else if (action.Token == "Q")
{
OpPopGraphState();
}
else if (action.Token == "cm")
{
double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
OpSetGraphMatrix(a, b, c, d, e, f);
}
// Text Operations
else if (action.Token == "BT")
{
OpBeginText();
}
else if (action.Token == "ET")
{
OpEndText();
}
else if (action.Token == "Tc")
{
double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
OpTextCharSpacing(charSpacing);
}
else if (action.Token == "Tw")
{
double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
OpTextWordSpacing(wordSpacing);
}
else if (action.Token == "Tz")
{
// TODO: PdfTextExtractor: Horizontal Scale
}
else if (action.Token == "Tf")
{
string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextFont(fontName, fontSize);
}
else if (action.Token == "TL")
{
double leading = PdfElementUtils.GetReal(action.Parameters[0], 0);
OpTextLeading(leading);
}
else if (action.Token == "Tr")
{
// TODO: PdfTextExtractor: Rendering mode
}
else if (action.Token == "Ts")
{
// TODO: PdfTextExtractor: Text rise
}
else if (action.Token == "Td")
{
double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextDisplace(x, y);
}
else if (action.Token == "TD")
{
double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
OpTextLeading(-y);
OpTextDisplace(x, y);
}
else if (action.Token == "Tm")
{
double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
OpSetTextMatrix(a, b, c, d, e, f);
}
else if (action.Token == "T*")
{
OpTextLineFeed();
}
else if (action.Token == "Tj")
{
string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextPut(text);
}
else if (action.Token == "'")
{
string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextLineFeed();
OpTextPut(text);
}
else if (action.Token == "\"")
{
double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0);
string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
OpTextCharSpacing(charSpacing);
OpTextWordSpacing(wordSpacing);
OpTextPut(text);
}
else if (action.Token == "TJ")
{
OpTextPutMultiple(((PdfArray)action.Parameters[0]));
}
else if (action.Token == "re")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "f")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "g")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "rg")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "BI")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "ID")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "EI")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "W")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "n")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "Do")
{
// TODO: PdfTextExtractor: Interpret this
}
else if (action.Token == "m")
{
// TODO: PdfTextExtractor: Interpret this "moveto: Begin new subpath"
}
else if (action.Token == "l")
{
// TODO: PdfTextExtractor: Interpret this "lineto: Append straight line segment to path"
lineCount++;
}
else if (action.Token == "h")
{
// TODO: PdfTextExtractor: Interpret this "closepath: Close subpath"
pathCount++;
}
else if (action.Token == "W")
{
// TODO: PdfTextExtractor: Interpret this "clip: Set clipping path using nonzero winding number rule"
}
else if (action.Token == "W*")
{
// TODO: PdfTextExtractor: Interpret this "eoclip: Set clipping path using even-odd rule"
}
else if (action.Token == "w")
{
// TODO: PdfTextExtractor: Interpret this "setlinewidth: Set line width"
}
else if (action.Token == "G")
{
// TODO: PdfTextExtractor: Interpret this "setgray: Set gray level for stroking operations"
}
else if (action.Token == "S")
{
// TODO: PdfTextExtractor: Interpret this "stroke: Stroke path"
strokeCount++;
}
else if (action.Token == "M")
{
// TODO: PdfTextExtractor: Interpret this "setmiterlimit: Set miter limit"
}
else
{
unknowCount++;
}
}
FlushTextElement();
}
private void JoinTextElements()
{
var textElementsCondensed = new List<PdfTextElement>();
while (_textElements.Count > 0)
{
PdfTextElement elem = _textElements[0];
_textElements.Remove(elem);
double blockY = elem.GetY();
double blockXMin = elem.GetX();
double blockXMax = blockXMin + elem.VisibleWidth;
// Prepare first neighbour
var textElementNeighbours = new List<PdfTextElement>();
textElementNeighbours.Add(elem);
// Search Neighbours
int i = 0;
while (i < _textElements.Count)
{
PdfTextElement neighbour = _textElements[i];
if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize)
{
i++;
continue;
}
double neighbourY = neighbour.GetY();
if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; }
double maxWidth = neighbour.MaxWidth();
double neighbourXMin = neighbour.GetX();
double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
double auxBlockXMin = blockXMin - maxWidth;
double auxBlockXMax = blockXMax + maxWidth;
if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
{
_textElements.Remove(neighbour);
textElementNeighbours.Add(neighbour);
if (blockXMax < neighbourXMax) { blockXMax = neighbourXMax; }
if (blockXMin > neighbourXMin) { blockXMin = neighbourXMin; }
i = 0;
continue;
}
i++;
}
if (textElementNeighbours.Count == 1)
{
textElementsCondensed.Add(elem);
continue;
}
// Join neighbours
var chars = new List<PdfCharElement>();
foreach (PdfTextElement neighbour in textElementNeighbours)
{
double neighbourXMin = neighbour.GetX();
foreach (PdfCharElement c in neighbour.Characters)
{
chars.Add(new PdfCharElement
{
Char = c.Char,
Displacement = (c.Displacement + neighbourXMin) - blockXMin,
Width = c.Width,
});
}
}
chars = chars.OrderBy(c => c.Displacement).ToList();
var sbText = new StringBuilder();
foreach (PdfCharElement c in chars)
{
sbText.Append(c.Char);
}
PdfTextElement blockElem = new PdfTextElement
{
Font = null,
FontSize = elem.FontSize,
Matrix = elem.Matrix.Copy(),
RawText = sbText.ToString(),
VisibleText = sbText.ToString(),
VisibleWidth = blockXMax - blockXMin,
VisibleHeight = elem.VisibleHeight,
Characters = chars,
Childs = textElementNeighbours,
};
blockElem.Matrix.Matrix[0, 2] = blockXMin;
textElementsCondensed.Add(blockElem);
}
_textElements = textElementsCondensed;
}
private void SplitTextElements()
{
var textElementsSplitted = new List<PdfTextElement>();
while (_textElements.Count > 0)
{
PdfTextElement elem = _textElements[0];
_textElements.Remove(elem);
double maxWidth = elem.MaxWidth();
int prevBreak = 0;
for (int i = 1; i < elem.Characters.Count; i++)
{
double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width;
double charSeparation = elem.Characters[i].Displacement - prevCharEnd;
if (charSeparation > maxWidth)
{
PdfTextElement partElem = elem.SubPart(prevBreak, i);
textElementsSplitted.Add(partElem);
prevBreak = i;
}
}
if (prevBreak == 0)
{
textElementsSplitted.Add(elem);
continue;
}
PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count);
textElementsSplitted.Add(lastElem);
}
_textElements = textElementsSplitted;
}
#endregion
#region Public methods
public Rect GetRect()
{
Rect rect = null;
foreach (PdfTextElement textElement in _textElements)
{
Rect elementRect = textElement.GetRect();
if (rect == null) { rect = elementRect; }
rect.Add(elementRect);
}
return rect;
}
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
{
PdfTextElement columnHead = FindElementByText(column, fuzzy);
if (columnHead == null)
{
return PdfTextElementColumn.Empty;
}
double headY = columnHead.GetY() - columnHead.VisibleHeight;
double headX1 = columnHead.GetX();
double headX2 = headX1 + columnHead.VisibleWidth;
// Determine horizontal extent
double extentX1 = double.MinValue;
double extentX2 = double.MaxValue;
foreach (PdfTextElement elem in _textElements)
{
if (elem == columnHead) { continue; }
if (TextElementHorizontalIntersection(columnHead, elem) == false) { continue; }
double elemX1 = elem.GetX();
double elemX2 = elemX1 + elem.VisibleWidth;
if (elemX2 < headX1)
{
if (elemX2 > extentX1)
{
extentX1 = elemX2;
}
}
if (elemX1 > headX2)
{
if (elemX1 < extentX2)
{
extentX2 = elemX1;
}
}
}
PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2);
return columnData;
}
public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2)
{
// Get all the elements that intersects vertically, are down and sort results
var columnDataRaw = new List<PdfTextElement>();
foreach (PdfTextElement elem in _textElements)
{
if (TextElementVerticalIntersection(elem, headX1, headX2) == false) { continue; }
// Only intems down the column
double elemY = elem.GetY();
if (elemY >= headY) { continue; }
columnDataRaw.Add(elem);
}
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
// Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted
var columnElements = new List<PdfTextElement>();
foreach (PdfTextElement elem in columnDataRaw)
{
double elemX1 = elem.GetX();
double elemX2 = elemX1 + elem.VisibleWidth;
// Add elements completely inside
if (elemX1 > extentX1 && elemX2 < extentX2)
{
columnElements.Add(elem);
continue;
}
// Try to split elements intersecting extents of the column
double maxSpacing = elem.Characters.Average(c => c.Width) / 10;
int indexStart = 0;
int indexEnd = elem.Characters.Count - 1;
bool indexStartValid = true;
bool indexEndValid = true;
if (elemX1 < extentX1)
{
// Search best start
int index = 0;
double characterPosition = elemX1 + elem.Characters[index].Displacement;
while (characterPosition < extentX1 && index < (elem.Characters.Count - 1))
{
index++;
characterPosition = elemX1 + elem.Characters[index].Displacement;
}
double spacing = elem.GetCharacterPreviousSpacing(index);
while (spacing < maxSpacing && index < (elem.Characters.Count - 1))
{
index++;
spacing = elem.GetCharacterPreviousSpacing(index);
}
if (spacing < maxSpacing) { indexStartValid = false; }
indexStart = index;
}
if (elemX2 > extentX2)
{
// Search best end
int index = elem.Characters.Count - 1;
double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
while (characterPosition > extentX2 && index > 0)
{
index--;
characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
}
double spacing = elem.GetCharacterPrecedingSpacing(index);
while (spacing < maxSpacing && index > 0)
{
index--;
spacing = elem.GetCharacterPrecedingSpacing(index);
}
if (spacing < maxSpacing) { indexEndValid = false; }
indexEnd = index;
}
// Break when there is no good split, spaning all extent
if (indexStartValid == false && indexEndValid == false) { break; }
// Continue when only one of the sides is invalid. (outside elements intersecting extents of the column)
if (indexStartValid == false || indexEndValid == false) { continue; }
// Add splitted element
columnElements.Add(elem.SubPart(indexStart, indexEnd + 1));
}
var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);
return columnData;
}
public List<string> GetColumnAsStrings(string column, bool fuzzy = true)
{
PdfTextElementColumn columnData = GetColumn(column, fuzzy);
// Emit result
var result = new List<string>();
foreach (PdfTextElement elem in columnData.Elements)
{
result.Add(elem.VisibleText);
}
return result;
}
public string GetFieldAsString(string field, bool fuzzy = true)
{
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
if (fieldTitle == null)
{
return null;
}
double titleX = fieldTitle.GetX();
var fieldData = new List<PdfTextElement>();
foreach (PdfTextElement elem in _textElements)
{
if (TextElementHorizontalIntersection(fieldTitle, elem) == false) { continue; }
double elemX = elem.GetX();
if (elemX <= titleX) { continue; }
fieldData.Add(elem);
}
if (fieldData.Count == 0)
{
return null;
}
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
}
public bool HasText(string text, bool fuzzy = true)
{
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
return (list.Count > 0);
}
#endregion
}
}

View File

@@ -1,30 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<OutputType>Library</OutputType>
<IsPackable>true</IsPackable>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
</PropertyGroup>
<PropertyGroup>
<PackageId>VAR.PdfTools</PackageId>
<Title>VAR.PdfTools</Title>
<Version>1.6.1</Version>
<Description>PdfTools Library</Description>
<Authors>VAR</Authors>
<Company>VAR</Company>
<Copyright>Copyright © VAR 2016-2019</Copyright>
<RequireLicenseAcceptance>false</RequireLicenseAcceptance>
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
<PackageProjectUrl>https://github.com/Kableado/VAR.PdfTools</PackageProjectUrl>
<PackageTags>PDF;PDF Library</PackageTags>
</PropertyGroup>
<ItemGroup>
<Content Include="..\LICENSE.txt" Link="LICENSE.txt" Pack="true" PackagePath="" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="System.Drawing.Common" Version="5.0.2" />
</ItemGroup>
<Target Name="CopyPackage" AfterTargets="Pack">
<Copy SourceFiles="$(OutputPath)..\$(PackageId).$(PackageVersion).nupkg" DestinationFolder="Nuget\" />
</Target>
</Project>