Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d5d843014a | |||
| b9750745bc | |||
| c8c7e32acc | |||
| 781f212289 | |||
| 8a966049f6 | |||
| 80ab9b9ff3 | |||
| 9af363529c | |||
| 386b38bd21 | |||
| 53d07db9c0 | |||
| 9bc7854b48 | |||
| 77a5cd1b0e | |||
| b6611b6285 | |||
| 7badc8e4b1 | |||
| 203f30e55c | |||
| c3967dd439 | |||
| da8b512c1b | |||
| beb3b931ea | |||
| 8806020036 | |||
| f3b7cd1b0d | |||
| 33f9723ac6 | |||
| 13ba41f851 | |||
| 06de734658 | |||
| 901d7e62ca | |||
| 631f8c34b2 | |||
| 7ac6b19331 | |||
| 34e7424273 | |||
| 6b8bbc367f | |||
| 6dfc248b9a | |||
| f3aca2ffa5 | |||
| 7ba320a22c | |||
| 1edddf17b1 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -27,3 +27,5 @@ obj/
|
||||
_ReSharper*/
|
||||
*.userprefs
|
||||
*.nupkg
|
||||
|
||||
.vs
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016-2017 Valeriano Alfonso Rodriguez
|
||||
Copyright (c) 2016-2019 Valeriano Alfonso Rodriguez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
13
README.md
13
README.md
@@ -5,27 +5,33 @@
|
||||
### VAR.PdfTools
|
||||
Add the resulting assembly as reference in your projects, and this line on code:
|
||||
|
||||
```csharp
|
||||
using VAR.PdfTools;
|
||||
```
|
||||
|
||||
Then extract the contents of a data column using:
|
||||
|
||||
```csharp
|
||||
var columnData = new List<string>();
|
||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
columnData.AddRange(extractor.GetColumn("Column"));
|
||||
columnData.AddRange(extractor.GetColumnAsStrings("Column"));
|
||||
}
|
||||
```
|
||||
|
||||
Or the content of a field (text on the right of the indicated text):
|
||||
|
||||
```csharp
|
||||
var fieldData = new List<string>();
|
||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
fieldData.Add(extractor.GetField(txtFieldName.Text));
|
||||
fieldData.Add(extractor.GetFieldAsString(txtFieldName.Text));
|
||||
}
|
||||
```
|
||||
|
||||
### VAR.PdfTools.Workbench
|
||||
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
|
||||
@@ -34,6 +40,7 @@ It is a simple Windows.Forms application, to test basic funcitionallity of the l
|
||||
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
|
||||
|
||||
A .nuget package can be build using:
|
||||
|
||||
VAR.PdfTools\Build.NuGet.cmd
|
||||
|
||||
## Contributing
|
||||
@@ -50,7 +57,7 @@ A .nuget package can be build using:
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016-2017 Valeriano Alfonso Rodriguez
|
||||
Copyright (c) 2016-2019 Valeriano Alfonso Rodriguez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
117
VAR.PdfTools.Workbench/Configuration.cs
Normal file
117
VAR.PdfTools.Workbench/Configuration.cs
Normal file
@@ -0,0 +1,117 @@
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace VAR.PdfTools.Workbench
|
||||
{
|
||||
public class Configuration
|
||||
{
|
||||
private Dictionary<string, string> _configItems = new Dictionary<string, string>();
|
||||
|
||||
private static string GetConfigFileName()
|
||||
{
|
||||
string location = System.Reflection.Assembly.GetEntryAssembly().Location;
|
||||
string path = Path.GetDirectoryName(location);
|
||||
string filenameWithoutExtension = Path.GetFileNameWithoutExtension(location);
|
||||
|
||||
string configFile = string.Format("{0}/{1}.cfg", path, filenameWithoutExtension);
|
||||
return configFile;
|
||||
}
|
||||
|
||||
private static string[] GetConfigurationLines()
|
||||
{
|
||||
string configFile = GetConfigFileName();
|
||||
string[] config;
|
||||
if (File.Exists(configFile) == false)
|
||||
{
|
||||
config = new string[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
config = File.ReadAllLines(configFile);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
public void Load()
|
||||
{
|
||||
_configItems.Clear();
|
||||
string[] configLines = GetConfigurationLines();
|
||||
foreach (string configLine in configLines)
|
||||
{
|
||||
int idxSplit = configLine.IndexOf('|');
|
||||
if (idxSplit < 0) { continue; }
|
||||
string configName = configLine.Substring(0, idxSplit);
|
||||
string configData = configLine.Substring(idxSplit + 1);
|
||||
|
||||
if (_configItems.ContainsKey(configName))
|
||||
{
|
||||
_configItems[configName] = configData;
|
||||
}
|
||||
else
|
||||
{
|
||||
_configItems.Add(configName, configData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void Save()
|
||||
{
|
||||
StringBuilder sbConfig = new StringBuilder();
|
||||
foreach (KeyValuePair<string, string> pair in _configItems)
|
||||
{
|
||||
sbConfig.AppendFormat("{0}|{1}\n", pair.Key, pair.Value);
|
||||
}
|
||||
string configFileName = GetConfigFileName();
|
||||
File.WriteAllText(configFileName, sbConfig.ToString());
|
||||
}
|
||||
|
||||
public string Get(string key, string defaultValue)
|
||||
{
|
||||
if (_configItems == null) { return defaultValue; }
|
||||
if (_configItems.ContainsKey(key))
|
||||
{
|
||||
return _configItems[key];
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
public bool Get(string key, bool defaultValue)
|
||||
{
|
||||
if (_configItems == null) { return defaultValue; }
|
||||
if (_configItems.ContainsKey(key))
|
||||
{
|
||||
string value = _configItems[key];
|
||||
return (value == "true");
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
public void Set(string key, string value)
|
||||
{
|
||||
if (_configItems == null) { return; }
|
||||
if (_configItems.ContainsKey(key))
|
||||
{
|
||||
_configItems[key] = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
_configItems.Add(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
public void Set(string key, bool value)
|
||||
{
|
||||
if (_configItems == null) { return; }
|
||||
if (_configItems.ContainsKey(key))
|
||||
{
|
||||
_configItems[key] = value ? "true" : "false";
|
||||
}
|
||||
else
|
||||
{
|
||||
_configItems.Add(key, value ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
224
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
224
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
@@ -34,13 +34,21 @@
|
||||
this.txtPdfPath = new System.Windows.Forms.TextBox();
|
||||
this.txtOutput = new System.Windows.Forms.TextBox();
|
||||
this.btnProcess = new System.Windows.Forms.Button();
|
||||
this.btnGetColumn = new System.Windows.Forms.Button();
|
||||
this.txtColumnName = new System.Windows.Forms.TextBox();
|
||||
this.txtFieldName = new System.Windows.Forms.TextBox();
|
||||
this.btnGetField = new System.Windows.Forms.Button();
|
||||
this.txtText = new System.Windows.Forms.TextBox();
|
||||
this.btnHasText = new System.Windows.Forms.Button();
|
||||
this.btnGetColumn1 = new System.Windows.Forms.Button();
|
||||
this.txtField1 = new System.Windows.Forms.TextBox();
|
||||
this.btnGetField1 = new System.Windows.Forms.Button();
|
||||
this.btnHasText1 = new System.Windows.Forms.Button();
|
||||
this.btnRender = new System.Windows.Forms.Button();
|
||||
this.btnHasText2 = new System.Windows.Forms.Button();
|
||||
this.btnGetField2 = new System.Windows.Forms.Button();
|
||||
this.txtField2 = new System.Windows.Forms.TextBox();
|
||||
this.btnGetColumn2 = new System.Windows.Forms.Button();
|
||||
this.btnHasText3 = new System.Windows.Forms.Button();
|
||||
this.btnGetField3 = new System.Windows.Forms.Button();
|
||||
this.txtField3 = new System.Windows.Forms.TextBox();
|
||||
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
||||
this.txtPages = new System.Windows.Forms.TextBox();
|
||||
this.chkRender = new System.Windows.Forms.CheckBox();
|
||||
this.SuspendLayout();
|
||||
//
|
||||
// lblOutputs
|
||||
@@ -109,56 +117,42 @@
|
||||
this.btnProcess.UseVisualStyleBackColor = true;
|
||||
this.btnProcess.Click += new System.EventHandler(this.btnProcess_Click);
|
||||
//
|
||||
// btnGetColumn
|
||||
// btnGetColumn1
|
||||
//
|
||||
this.btnGetColumn.Location = new System.Drawing.Point(163, 51);
|
||||
this.btnGetColumn.Name = "btnGetColumn";
|
||||
this.btnGetColumn.Size = new System.Drawing.Size(75, 23);
|
||||
this.btnGetColumn.TabIndex = 12;
|
||||
this.btnGetColumn.Text = "GetColumn";
|
||||
this.btnGetColumn.UseVisualStyleBackColor = true;
|
||||
this.btnGetColumn.Click += new System.EventHandler(this.btnGetColumn_Click);
|
||||
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
||||
this.btnGetColumn1.Name = "btnGetColumn1";
|
||||
this.btnGetColumn1.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn1.TabIndex = 12;
|
||||
this.btnGetColumn1.Text = "GetColumn";
|
||||
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
||||
this.btnGetColumn1.Click += new System.EventHandler(this.btnGetColumn1_Click);
|
||||
//
|
||||
// txtColumnName
|
||||
// txtField1
|
||||
//
|
||||
this.txtColumnName.Location = new System.Drawing.Point(15, 53);
|
||||
this.txtColumnName.Name = "txtColumnName";
|
||||
this.txtColumnName.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtColumnName.TabIndex = 13;
|
||||
this.txtField1.Location = new System.Drawing.Point(15, 53);
|
||||
this.txtField1.Name = "txtField1";
|
||||
this.txtField1.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtField1.TabIndex = 13;
|
||||
//
|
||||
// txtFieldName
|
||||
// btnGetField1
|
||||
//
|
||||
this.txtFieldName.Location = new System.Drawing.Point(15, 82);
|
||||
this.txtFieldName.Name = "txtFieldName";
|
||||
this.txtFieldName.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtFieldName.TabIndex = 15;
|
||||
this.btnGetField1.Location = new System.Drawing.Point(226, 51);
|
||||
this.btnGetField1.Name = "btnGetField1";
|
||||
this.btnGetField1.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetField1.TabIndex = 14;
|
||||
this.btnGetField1.Text = "GetField";
|
||||
this.btnGetField1.UseVisualStyleBackColor = true;
|
||||
this.btnGetField1.Click += new System.EventHandler(this.btnGetField1_Click);
|
||||
//
|
||||
// btnGetField
|
||||
// btnHasText1
|
||||
//
|
||||
this.btnGetField.Location = new System.Drawing.Point(163, 80);
|
||||
this.btnGetField.Name = "btnGetField";
|
||||
this.btnGetField.Size = new System.Drawing.Size(75, 23);
|
||||
this.btnGetField.TabIndex = 14;
|
||||
this.btnGetField.Text = "GetField";
|
||||
this.btnGetField.UseVisualStyleBackColor = true;
|
||||
this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click);
|
||||
//
|
||||
// txtText
|
||||
//
|
||||
this.txtText.Location = new System.Drawing.Point(15, 111);
|
||||
this.txtText.Name = "txtText";
|
||||
this.txtText.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtText.TabIndex = 17;
|
||||
//
|
||||
// btnHasText
|
||||
//
|
||||
this.btnHasText.Location = new System.Drawing.Point(163, 109);
|
||||
this.btnHasText.Name = "btnHasText";
|
||||
this.btnHasText.Size = new System.Drawing.Size(75, 23);
|
||||
this.btnHasText.TabIndex = 16;
|
||||
this.btnHasText.Text = "HasText";
|
||||
this.btnHasText.UseVisualStyleBackColor = true;
|
||||
this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click);
|
||||
this.btnHasText1.Location = new System.Drawing.Point(163, 51);
|
||||
this.btnHasText1.Name = "btnHasText1";
|
||||
this.btnHasText1.Size = new System.Drawing.Size(57, 23);
|
||||
this.btnHasText1.TabIndex = 16;
|
||||
this.btnHasText1.Text = "HasText";
|
||||
this.btnHasText1.UseVisualStyleBackColor = true;
|
||||
this.btnHasText1.Click += new System.EventHandler(this.btnHasText1_Click);
|
||||
//
|
||||
// btnRender
|
||||
//
|
||||
@@ -171,18 +165,118 @@
|
||||
this.btnRender.UseVisualStyleBackColor = true;
|
||||
this.btnRender.Click += new System.EventHandler(this.btnRender_Click);
|
||||
//
|
||||
// btnHasText2
|
||||
//
|
||||
this.btnHasText2.Location = new System.Drawing.Point(163, 80);
|
||||
this.btnHasText2.Name = "btnHasText2";
|
||||
this.btnHasText2.Size = new System.Drawing.Size(57, 23);
|
||||
this.btnHasText2.TabIndex = 22;
|
||||
this.btnHasText2.Text = "HasText";
|
||||
this.btnHasText2.UseVisualStyleBackColor = true;
|
||||
this.btnHasText2.Click += new System.EventHandler(this.btnHasText2_Click);
|
||||
//
|
||||
// btnGetField2
|
||||
//
|
||||
this.btnGetField2.Location = new System.Drawing.Point(226, 80);
|
||||
this.btnGetField2.Name = "btnGetField2";
|
||||
this.btnGetField2.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetField2.TabIndex = 21;
|
||||
this.btnGetField2.Text = "GetField";
|
||||
this.btnGetField2.UseVisualStyleBackColor = true;
|
||||
this.btnGetField2.Click += new System.EventHandler(this.btnGetField2_Click);
|
||||
//
|
||||
// txtField2
|
||||
//
|
||||
this.txtField2.Location = new System.Drawing.Point(15, 82);
|
||||
this.txtField2.Name = "txtField2";
|
||||
this.txtField2.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtField2.TabIndex = 20;
|
||||
//
|
||||
// btnGetColumn2
|
||||
//
|
||||
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
||||
this.btnGetColumn2.Name = "btnGetColumn2";
|
||||
this.btnGetColumn2.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn2.TabIndex = 19;
|
||||
this.btnGetColumn2.Text = "GetColumn";
|
||||
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
||||
this.btnGetColumn2.Click += new System.EventHandler(this.btnGetColumn2_Click);
|
||||
//
|
||||
// btnHasText3
|
||||
//
|
||||
this.btnHasText3.Location = new System.Drawing.Point(163, 109);
|
||||
this.btnHasText3.Name = "btnHasText3";
|
||||
this.btnHasText3.Size = new System.Drawing.Size(57, 23);
|
||||
this.btnHasText3.TabIndex = 26;
|
||||
this.btnHasText3.Text = "HasText";
|
||||
this.btnHasText3.UseVisualStyleBackColor = true;
|
||||
this.btnHasText3.Click += new System.EventHandler(this.btnHasText3_Click);
|
||||
//
|
||||
// btnGetField3
|
||||
//
|
||||
this.btnGetField3.Location = new System.Drawing.Point(226, 109);
|
||||
this.btnGetField3.Name = "btnGetField3";
|
||||
this.btnGetField3.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetField3.TabIndex = 25;
|
||||
this.btnGetField3.Text = "GetField";
|
||||
this.btnGetField3.UseVisualStyleBackColor = true;
|
||||
this.btnGetField3.Click += new System.EventHandler(this.btnGetField3_Click);
|
||||
//
|
||||
// txtField3
|
||||
//
|
||||
this.txtField3.Location = new System.Drawing.Point(15, 111);
|
||||
this.txtField3.Name = "txtField3";
|
||||
this.txtField3.Size = new System.Drawing.Size(142, 20);
|
||||
this.txtField3.TabIndex = 24;
|
||||
//
|
||||
// btnGetColumn3
|
||||
//
|
||||
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
||||
this.btnGetColumn3.Name = "btnGetColumn3";
|
||||
this.btnGetColumn3.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn3.TabIndex = 23;
|
||||
this.btnGetColumn3.Text = "GetColumn";
|
||||
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
||||
this.btnGetColumn3.Click += new System.EventHandler(this.btnGetColumn3_Click);
|
||||
//
|
||||
// txtPages
|
||||
//
|
||||
this.txtPages.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
|
||||
this.txtPages.Location = new System.Drawing.Point(397, 82);
|
||||
this.txtPages.Name = "txtPages";
|
||||
this.txtPages.Size = new System.Drawing.Size(75, 20);
|
||||
this.txtPages.TabIndex = 27;
|
||||
//
|
||||
// chkRender
|
||||
//
|
||||
this.chkRender.AutoSize = true;
|
||||
this.chkRender.Location = new System.Drawing.Point(292, 138);
|
||||
this.chkRender.Name = "chkRender";
|
||||
this.chkRender.Size = new System.Drawing.Size(61, 17);
|
||||
this.chkRender.TabIndex = 28;
|
||||
this.chkRender.Text = "Render";
|
||||
this.chkRender.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// FrmPdfInfo
|
||||
//
|
||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||
this.ClientSize = new System.Drawing.Size(484, 461);
|
||||
this.Controls.Add(this.chkRender);
|
||||
this.Controls.Add(this.txtPages);
|
||||
this.Controls.Add(this.btnHasText3);
|
||||
this.Controls.Add(this.btnGetField3);
|
||||
this.Controls.Add(this.txtField3);
|
||||
this.Controls.Add(this.btnGetColumn3);
|
||||
this.Controls.Add(this.btnHasText2);
|
||||
this.Controls.Add(this.btnGetField2);
|
||||
this.Controls.Add(this.txtField2);
|
||||
this.Controls.Add(this.btnGetColumn2);
|
||||
this.Controls.Add(this.btnRender);
|
||||
this.Controls.Add(this.txtText);
|
||||
this.Controls.Add(this.btnHasText);
|
||||
this.Controls.Add(this.txtFieldName);
|
||||
this.Controls.Add(this.btnGetField);
|
||||
this.Controls.Add(this.txtColumnName);
|
||||
this.Controls.Add(this.btnGetColumn);
|
||||
this.Controls.Add(this.btnHasText1);
|
||||
this.Controls.Add(this.btnGetField1);
|
||||
this.Controls.Add(this.txtField1);
|
||||
this.Controls.Add(this.btnGetColumn1);
|
||||
this.Controls.Add(this.lblOutputs);
|
||||
this.Controls.Add(this.lblInputs);
|
||||
this.Controls.Add(this.btnBrowse);
|
||||
@@ -206,12 +300,20 @@
|
||||
private System.Windows.Forms.TextBox txtPdfPath;
|
||||
private System.Windows.Forms.TextBox txtOutput;
|
||||
private System.Windows.Forms.Button btnProcess;
|
||||
private System.Windows.Forms.Button btnGetColumn;
|
||||
private System.Windows.Forms.TextBox txtColumnName;
|
||||
private System.Windows.Forms.TextBox txtFieldName;
|
||||
private System.Windows.Forms.Button btnGetField;
|
||||
private System.Windows.Forms.TextBox txtText;
|
||||
private System.Windows.Forms.Button btnHasText;
|
||||
private System.Windows.Forms.Button btnGetColumn1;
|
||||
private System.Windows.Forms.TextBox txtField1;
|
||||
private System.Windows.Forms.Button btnGetField1;
|
||||
private System.Windows.Forms.Button btnHasText1;
|
||||
private System.Windows.Forms.Button btnRender;
|
||||
private System.Windows.Forms.Button btnHasText2;
|
||||
private System.Windows.Forms.Button btnGetField2;
|
||||
private System.Windows.Forms.TextBox txtField2;
|
||||
private System.Windows.Forms.Button btnGetColumn2;
|
||||
private System.Windows.Forms.Button btnHasText3;
|
||||
private System.Windows.Forms.Button btnGetField3;
|
||||
private System.Windows.Forms.TextBox txtField3;
|
||||
private System.Windows.Forms.Button btnGetColumn3;
|
||||
private System.Windows.Forms.TextBox txtPages;
|
||||
private System.Windows.Forms.CheckBox chkRender;
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,10 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.Drawing.Drawing2D;
|
||||
using System.Drawing.Imaging;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Windows.Forms;
|
||||
using VAR.PdfTools.PdfElements;
|
||||
|
||||
@@ -19,19 +19,27 @@ namespace VAR.PdfTools.Workbench
|
||||
|
||||
private void FrmPdfInfo_Load(object sender, EventArgs e)
|
||||
{
|
||||
txtPdfPath.Text = Properties.Settings.Default.LastPdfPath;
|
||||
txtColumnName.Text = Properties.Settings.Default.LastColumnName;
|
||||
txtFieldName.Text = Properties.Settings.Default.LastFieldName;
|
||||
txtText.Text = Properties.Settings.Default.LastText;
|
||||
var configuration = new Configuration();
|
||||
configuration.Load();
|
||||
txtPdfPath.Text = configuration.Get("LastPdfPath", string.Empty);
|
||||
txtField1.Text = configuration.Get("Field1", string.Empty);
|
||||
txtField2.Text = configuration.Get("Field2", string.Empty);
|
||||
txtField3.Text = configuration.Get("Field3", string.Empty);
|
||||
txtPages.Text = configuration.Get("Pages", string.Empty);
|
||||
chkRender.Checked = configuration.Get("Render", false);
|
||||
}
|
||||
|
||||
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
|
||||
{
|
||||
Properties.Settings.Default.LastPdfPath = txtPdfPath.Text;
|
||||
Properties.Settings.Default.LastColumnName = txtColumnName.Text;
|
||||
Properties.Settings.Default.LastFieldName = txtFieldName.Text;
|
||||
Properties.Settings.Default.LastText = txtText.Text;
|
||||
Properties.Settings.Default.Save();
|
||||
var configuration = new Configuration();
|
||||
var configItems = new Dictionary<string, string>();
|
||||
configuration.Set("LastPdfPath", txtPdfPath.Text);
|
||||
configuration.Set("Field1", txtField1.Text);
|
||||
configuration.Set("Field2", txtField2.Text);
|
||||
configuration.Set("Field3", txtField3.Text);
|
||||
configuration.Set("Pages", txtPages.Text);
|
||||
configuration.Set("Render", chkRender.Checked);
|
||||
configuration.Save();
|
||||
}
|
||||
|
||||
private void btnBrowse_Click(object sender, EventArgs e)
|
||||
@@ -92,9 +100,25 @@ namespace VAR.PdfTools.Workbench
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
foreach (PdfTextElement textElement in extractor.Elements)
|
||||
{
|
||||
string fontName = textElement.Font == null ? "#NULL#" : textElement.Font.Name;
|
||||
if (fontName == "#NULL#" && textElement.Childs.Count > 0)
|
||||
{
|
||||
var fontNames = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name);
|
||||
StringBuilder sbFontName = new StringBuilder();
|
||||
foreach (string fontNameAux in fontNames)
|
||||
{
|
||||
if (sbFontName.Length > 0) { sbFontName.Append(";"); }
|
||||
sbFontName.Append(fontNameAux);
|
||||
}
|
||||
fontName = sbFontName.ToString();
|
||||
}
|
||||
|
||||
lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
|
||||
textElement.Matrix.Matrix[0, 2], textElement.Matrix.Matrix[1, 2], textElement.VisibleWidth, textElement.VisibleHeight,
|
||||
textElement.Font == null ? "#NULL#" : textElement.Font.Name,
|
||||
Math.Round(textElement.Matrix.Matrix[0, 2], 2),
|
||||
Math.Round(textElement.Matrix.Matrix[1, 2], 2),
|
||||
Math.Round(textElement.VisibleWidth, 2),
|
||||
Math.Round(textElement.VisibleHeight, 2),
|
||||
fontName,
|
||||
textElement.VisibleText));
|
||||
}
|
||||
}
|
||||
@@ -102,62 +126,217 @@ namespace VAR.PdfTools.Workbench
|
||||
txtOutput.Lines = lines.ToArray();
|
||||
}
|
||||
|
||||
private void btnGetColumn_Click(object sender, EventArgs e)
|
||||
private void btnHasText1_Click(object sender, EventArgs e)
|
||||
{
|
||||
if (System.IO.File.Exists(txtPdfPath.Text) == false)
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string text = txtField1.Text;
|
||||
|
||||
Action_HasText(pdfPath, text);
|
||||
}
|
||||
|
||||
private void btnGetField1_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string field = txtField1.Text;
|
||||
|
||||
Action_GetField(pdfPath, field);
|
||||
}
|
||||
|
||||
private void btnGetColumn1_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string column = txtField1.Text;
|
||||
|
||||
Action_GetColumn(pdfPath, column);
|
||||
}
|
||||
|
||||
private void btnHasText2_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string text = txtField2.Text;
|
||||
|
||||
Action_HasText(pdfPath, text);
|
||||
}
|
||||
|
||||
private void btnGetField2_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string field = txtField2.Text;
|
||||
|
||||
Action_GetField(pdfPath, field);
|
||||
}
|
||||
|
||||
private void btnGetColumn2_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string column = txtField2.Text;
|
||||
|
||||
Action_GetColumn(pdfPath, column);
|
||||
}
|
||||
|
||||
private void btnHasText3_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string text = txtField3.Text;
|
||||
|
||||
Action_HasText(pdfPath, text);
|
||||
}
|
||||
|
||||
private void btnGetField3_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string field = txtField3.Text;
|
||||
|
||||
Action_GetField(pdfPath, field);
|
||||
}
|
||||
|
||||
private void btnGetColumn3_Click(object sender, EventArgs e)
|
||||
{
|
||||
string pdfPath = txtPdfPath.Text;
|
||||
string column = txtField3.Text;
|
||||
|
||||
Action_GetColumn(pdfPath, column);
|
||||
}
|
||||
|
||||
private IEnumerable<int> GetSelectedPages(int maxPages)
|
||||
{
|
||||
string pages = txtPages.Text;
|
||||
if (string.IsNullOrEmpty(pages))
|
||||
{
|
||||
return Enumerable.Range(1, maxPages);
|
||||
}
|
||||
|
||||
string[] pagesParts;
|
||||
if (pages.Contains(","))
|
||||
{
|
||||
pagesParts = pages.Split(',');
|
||||
}
|
||||
else
|
||||
{
|
||||
pagesParts = new string[] { pages };
|
||||
}
|
||||
List<int> listPages = new List<int>();
|
||||
foreach (string part in pagesParts)
|
||||
{
|
||||
if (part.Contains("-"))
|
||||
{
|
||||
string[] range = part.Split('-');
|
||||
if (range.Length == 2)
|
||||
{
|
||||
int pageStart;
|
||||
int pageEnd;
|
||||
if (int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
||||
{
|
||||
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int pageNum;
|
||||
if (int.TryParse(part, out pageNum))
|
||||
{
|
||||
listPages.Add(pageNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (listPages.Count == 0)
|
||||
{
|
||||
listPages.AddRange(Enumerable.Range(1, maxPages));
|
||||
}
|
||||
return listPages;
|
||||
}
|
||||
|
||||
private void Action_HasText(string pdfPath, string text)
|
||||
{
|
||||
if (System.IO.File.Exists(pdfPath) == false)
|
||||
{
|
||||
MessageBox.Show("File does not exist");
|
||||
return;
|
||||
}
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||
|
||||
var columnData = new List<string>();
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
List<string> lines = new List<string>();
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
columnData.AddRange(extractor.GetColumn(txtColumnName.Text));
|
||||
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(text))));
|
||||
}
|
||||
txtOutput.Lines = columnData.ToArray();
|
||||
txtOutput.Lines = lines.ToArray();
|
||||
}
|
||||
|
||||
private void btnGetField_Click(object sender, EventArgs e)
|
||||
private void Action_GetField(string pdfPath, string field)
|
||||
{
|
||||
if (System.IO.File.Exists(txtPdfPath.Text) == false)
|
||||
if (System.IO.File.Exists(pdfPath) == false)
|
||||
{
|
||||
MessageBox.Show("File does not exist");
|
||||
return;
|
||||
}
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
var fieldData = new List<string>();
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
fieldData.Add(extractor.GetField(txtFieldName.Text));
|
||||
fieldData.Add(extractor.GetFieldAsString(field));
|
||||
}
|
||||
txtOutput.Lines = fieldData.ToArray();
|
||||
}
|
||||
|
||||
private void btnHasText_Click(object sender, EventArgs e)
|
||||
private void Action_GetColumn(string pdfPath, string column)
|
||||
{
|
||||
if (System.IO.File.Exists(txtPdfPath.Text) == false)
|
||||
if (System.IO.File.Exists(pdfPath) == false)
|
||||
{
|
||||
MessageBox.Show("File does not exist");
|
||||
return;
|
||||
}
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||
|
||||
List<string> lines = new List<string>();
|
||||
int pageNum = 1;
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
var columns = new List<string>();
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(txtText.Text))));
|
||||
PdfTextElementColumn columnData;
|
||||
if (column.StartsWith("#"))
|
||||
{
|
||||
string[] columnParts = column.Substring(1).Split(';');
|
||||
double y = Convert.ToDouble(columnParts[0]);
|
||||
double x1 = Convert.ToDouble(columnParts[1]);
|
||||
double x2 = Convert.ToDouble(columnParts[2]);
|
||||
columnData = extractor.GetColumn(null, y, x1, x2, x1, x2);
|
||||
}
|
||||
txtOutput.Lines = lines.ToArray();
|
||||
else
|
||||
{
|
||||
columnData = extractor.GetColumn(column);
|
||||
}
|
||||
if (chkRender.Checked)
|
||||
{
|
||||
var pdfPageRenderer = new PdfPageRenderer(extractor);
|
||||
Bitmap bmp = pdfPageRenderer.Render();
|
||||
pdfPageRenderer.RenderColumn(columnData, bmp);
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||
bmp.Save(fileName, ImageFormat.Png);
|
||||
}
|
||||
columns.AddRange(columnData.Elements.Select(t => t.VisibleText));
|
||||
}
|
||||
txtOutput.Lines = columns.ToArray();
|
||||
}
|
||||
|
||||
private void btnRender_Click(object sender, EventArgs e)
|
||||
@@ -168,8 +347,6 @@ namespace VAR.PdfTools.Workbench
|
||||
return;
|
||||
}
|
||||
|
||||
const int Scale = 5;
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||
@@ -178,132 +355,24 @@ namespace VAR.PdfTools.Workbench
|
||||
lines.Add(string.Format("Filename : {0}", baseDocumentFilename));
|
||||
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
||||
|
||||
int pageNumber = 1;
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
double pageXMin = double.MaxValue;
|
||||
double pageYMin = double.MaxValue;
|
||||
double pageXMax = double.MinValue;
|
||||
double pageYMax = double.MinValue;
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
|
||||
// Preprocess page to get max size
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
foreach (PdfTextElement textElement in extractor.Elements)
|
||||
{
|
||||
double textElementXMin = textElement.GetX();
|
||||
double textElementYMax = textElement.GetY();
|
||||
double textElementXMax = textElementXMin + textElement.VisibleWidth;
|
||||
double textElementYMin = textElementYMax - textElement.VisibleHeight;
|
||||
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
|
||||
Bitmap bmp = pdfPageRenderer.Render();
|
||||
|
||||
if (textElementXMax > pageXMax) { pageXMax = textElementXMax; }
|
||||
if (textElementYMax > pageYMax) { pageYMax = textElementYMax; }
|
||||
if (textElementXMin < pageXMin) { pageXMin = textElementXMin; }
|
||||
if (textElementYMin < pageYMin) { pageYMin = textElementYMin; }
|
||||
}
|
||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNumber, extractor.Elements.Count));
|
||||
|
||||
// Prepare page image
|
||||
int pageWidth = (int)Math.Ceiling(pageXMax - pageXMin);
|
||||
int pageHeight = (int)Math.Ceiling(pageYMax - pageYMin);
|
||||
using (Bitmap bmp = new Bitmap(pageWidth * Scale, pageHeight * Scale, PixelFormat.Format32bppArgb))
|
||||
using (Graphics gc = Graphics.FromImage(bmp))
|
||||
using (Pen penTextElem = new Pen(Color.Blue))
|
||||
{
|
||||
gc.Clear(Color.White);
|
||||
|
||||
// Draw text elements
|
||||
foreach (PdfTextElement textElement in extractor.Elements)
|
||||
{
|
||||
DrawTextElement(textElement, gc, penTextElem, Scale, pageHeight, pageXMin, pageYMin);
|
||||
}
|
||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNum, pdfPageRenderer.Extractor.Elements.Count));
|
||||
|
||||
// Save image to disk
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNumber));
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||
bmp.Save(fileName, ImageFormat.Png);
|
||||
}
|
||||
pageNumber++;
|
||||
}
|
||||
|
||||
txtOutput.Lines = lines.ToArray();
|
||||
}
|
||||
|
||||
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, int Scale, int pageHeight, double pageXMin, double pageYMin)
|
||||
{
|
||||
double textElementX = textElement.GetX() - pageXMin;
|
||||
double textElementY = textElement.GetY() - pageYMin;
|
||||
double textElementWidth = textElement.VisibleWidth;
|
||||
double textElementHeight = textElement.VisibleHeight;
|
||||
string textElementText = textElement.VisibleText;
|
||||
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
|
||||
|
||||
if (textElementHeight < 0.0001) { return; }
|
||||
|
||||
double textElementPageX = textElementX;
|
||||
double textElementPageY = pageHeight - textElementY;
|
||||
|
||||
DrawRoundedRectangle(gc, penTextElem,
|
||||
(int)(textElementPageX * Scale),
|
||||
(int)(textElementPageY * Scale),
|
||||
(int)(textElementWidth * Scale),
|
||||
(int)(textElementHeight * Scale),
|
||||
Scale);
|
||||
|
||||
|
||||
using (Font font = new Font("Arial", (int)(textElementHeight * Scale), GraphicsUnit.Pixel))
|
||||
{
|
||||
foreach (PdfCharElement c in textElement.Characters)
|
||||
{
|
||||
gc.DrawString(c.Char,
|
||||
font,
|
||||
Brushes.Black,
|
||||
(int)((textElementPageX + c.Displacement) * Scale),
|
||||
(int)(textElementPageY * Scale));
|
||||
gc.FillRectangle(Brushes.Red,
|
||||
(int)((textElementPageX + c.Displacement) * Scale),
|
||||
(int)(textElementPageY * Scale),
|
||||
2, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
|
||||
{
|
||||
int diameter = radius * 2;
|
||||
Size size = new Size(diameter, diameter);
|
||||
Rectangle arc = new Rectangle(x, y, diameter, diameter);
|
||||
GraphicsPath path = new GraphicsPath();
|
||||
|
||||
// top left arc
|
||||
path.AddArc(arc, 180, 90);
|
||||
|
||||
// top right arc
|
||||
arc.X = (x + width) - diameter;
|
||||
path.AddArc(arc, 270, 90);
|
||||
|
||||
// bottom right arc
|
||||
arc.Y = (y + height) - diameter;
|
||||
path.AddArc(arc, 0, 90);
|
||||
|
||||
// bottom left arc
|
||||
arc.X = x;
|
||||
path.AddArc(arc, 90, 90);
|
||||
|
||||
path.CloseFigure();
|
||||
return path;
|
||||
}
|
||||
|
||||
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
|
||||
{
|
||||
if (graphics == null)
|
||||
throw new ArgumentNullException("graphics");
|
||||
if (pen == null)
|
||||
throw new ArgumentNullException("pen");
|
||||
|
||||
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
|
||||
{
|
||||
graphics.DrawPath(pen, path);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
120
VAR.PdfTools.Workbench/FrmPdfInfo.resx
Normal file
120
VAR.PdfTools.Workbench/FrmPdfInfo.resx
Normal file
@@ -0,0 +1,120 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<root>
|
||||
<!--
|
||||
Microsoft ResX Schema
|
||||
|
||||
Version 2.0
|
||||
|
||||
The primary goals of this format is to allow a simple XML format
|
||||
that is mostly human readable. The generation and parsing of the
|
||||
various data types are done through the TypeConverter classes
|
||||
associated with the data types.
|
||||
|
||||
Example:
|
||||
|
||||
... ado.net/XML headers & schema ...
|
||||
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||
<resheader name="version">2.0</resheader>
|
||||
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||
</data>
|
||||
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||
<comment>This is a comment</comment>
|
||||
</data>
|
||||
|
||||
There are any number of "resheader" rows that contain simple
|
||||
name/value pairs.
|
||||
|
||||
Each data row contains a name, and value. The row also contains a
|
||||
type or mimetype. Type corresponds to a .NET class that support
|
||||
text/value conversion through the TypeConverter architecture.
|
||||
Classes that don't support this are serialized and stored with the
|
||||
mimetype set.
|
||||
|
||||
The mimetype is used for serialized objects, and tells the
|
||||
ResXResourceReader how to depersist the object. This is currently not
|
||||
extensible. For a given mimetype the value must be set accordingly:
|
||||
|
||||
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||
that the ResXResourceWriter will generate, however the reader can
|
||||
read any of the formats listed below.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.binary.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.soap.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||
value : The object must be serialized into a byte array
|
||||
: using a System.ComponentModel.TypeConverter
|
||||
: and then encoded with base64 encoding.
|
||||
-->
|
||||
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
|
||||
<xsd:element name="root" msdata:IsDataSet="true">
|
||||
<xsd:complexType>
|
||||
<xsd:choice maxOccurs="unbounded">
|
||||
<xsd:element name="metadata">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" use="required" type="xsd:string" />
|
||||
<xsd:attribute name="type" type="xsd:string" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||
<xsd:attribute ref="xml:space" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="assembly">
|
||||
<xsd:complexType>
|
||||
<xsd:attribute name="alias" type="xsd:string" />
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="data">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
|
||||
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||
<xsd:attribute ref="xml:space" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="resheader">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:choice>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<resheader name="resmimetype">
|
||||
<value>text/microsoft-resx</value>
|
||||
</resheader>
|
||||
<resheader name="version">
|
||||
<value>2.0</value>
|
||||
</resheader>
|
||||
<resheader name="reader">
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
</root>
|
||||
@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
|
||||
[assembly: AssemblyCulture("")]
|
||||
[assembly: ComVisible(false)]
|
||||
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
|
||||
[assembly: AssemblyVersion("1.3.*")]
|
||||
[assembly: AssemblyVersion("1.6.0.*")]
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Runtime Version:4.0.30319.42000
|
||||
//
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace VAR.PdfTools.Workbench.Properties {
|
||||
|
||||
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")]
|
||||
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
|
||||
|
||||
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
|
||||
|
||||
public static Settings Default {
|
||||
get {
|
||||
return defaultInstance;
|
||||
}
|
||||
}
|
||||
|
||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
||||
public string LastPdfPath {
|
||||
get {
|
||||
return ((string)(this["LastPdfPath"]));
|
||||
}
|
||||
set {
|
||||
this["LastPdfPath"] = value;
|
||||
}
|
||||
}
|
||||
|
||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
||||
public string LastColumnName {
|
||||
get {
|
||||
return ((string)(this["LastColumnName"]));
|
||||
}
|
||||
set {
|
||||
this["LastColumnName"] = value;
|
||||
}
|
||||
}
|
||||
|
||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
||||
public string LastFieldName {
|
||||
get {
|
||||
return ((string)(this["LastFieldName"]));
|
||||
}
|
||||
set {
|
||||
this["LastFieldName"] = value;
|
||||
}
|
||||
}
|
||||
|
||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
||||
public string LastText {
|
||||
get {
|
||||
return ((string)(this["LastText"]));
|
||||
}
|
||||
set {
|
||||
this["LastText"] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)" GeneratedClassNamespace="VAR.PdfTools.Workbench.Properties" GeneratedClassName="Settings">
|
||||
<Profiles />
|
||||
<Settings>
|
||||
<Setting Name="LastPdfPath" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
<Setting Name="LastColumnName" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
<Setting Name="LastFieldName" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
<Setting Name="LastText" Type="System.String" Scope="User">
|
||||
<Value Profile="(Default)" />
|
||||
</Setting>
|
||||
</Settings>
|
||||
</SettingsFile>
|
||||
@@ -23,6 +23,7 @@
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<LangVersion>6</LangVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
@@ -47,6 +48,7 @@
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Configuration.cs" />
|
||||
<Compile Include="FrmPdfInfo.cs">
|
||||
<SubType>Form</SubType>
|
||||
</Compile>
|
||||
@@ -55,15 +57,6 @@
|
||||
</Compile>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<None Include="Properties\Settings.settings">
|
||||
<Generator>SettingsSingleFileGenerator</Generator>
|
||||
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
||||
</None>
|
||||
<Compile Include="Properties\Settings.Designer.cs">
|
||||
<AutoGen>True</AutoGen>
|
||||
<DependentUpon>Settings.settings</DependentUpon>
|
||||
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
|
||||
@@ -71,6 +64,11 @@
|
||||
<Name>VAR.PdfTools</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="FrmPdfInfo.resx">
|
||||
<DependentUpon>FrmPdfInfo.cs</DependentUpon>
|
||||
</EmbeddedResource>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
|
||||
25
VAR.PdfTools/Maths/Rect.cs
Normal file
25
VAR.PdfTools/Maths/Rect.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace VAR.PdfTools.Maths
|
||||
{
|
||||
public class Rect
|
||||
{
|
||||
public double XMin { get; set; }
|
||||
public double XMax { get; set; }
|
||||
public double YMin { get; set; }
|
||||
public double YMax { get; set; }
|
||||
|
||||
public void Add(Rect rect)
|
||||
{
|
||||
if (rect.XMax > XMax) { XMax = rect.XMax; }
|
||||
if (rect.YMax > YMax) { YMax = rect.YMax; }
|
||||
if (rect.XMin < XMin) { XMin = rect.XMin; }
|
||||
if (rect.YMin < YMin) { YMin = rect.YMin; }
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -69,7 +69,8 @@ namespace VAR.PdfTools
|
||||
{
|
||||
PdfParser parser = new PdfParser(_content);
|
||||
_contentActions = parser.ParseContent();
|
||||
}else
|
||||
}
|
||||
else
|
||||
{
|
||||
_contentActions = new List<PdfContentAction>();
|
||||
}
|
||||
|
||||
@@ -87,7 +87,9 @@ namespace VAR.PdfTools
|
||||
char actualChar = firstChar;
|
||||
foreach (IPdfElement elem in widths.Values)
|
||||
{
|
||||
_widths.Add(actualChar, PdfElementUtils.GetReal(elem, 500) / glyphSpaceToTextSpace);
|
||||
double width = PdfElementUtils.GetReal(elem, 500);
|
||||
if (width < 0.0001f && width > -0.0001f) { width = 500; }
|
||||
_widths.Add(actualChar, width / glyphSpaceToTextSpace);
|
||||
actualChar++;
|
||||
}
|
||||
// FIMXE: Calculate real height
|
||||
|
||||
210
VAR.PdfTools/PdfPageRenderer.cs
Normal file
210
VAR.PdfTools/PdfPageRenderer.cs
Normal file
@@ -0,0 +1,210 @@
|
||||
using System;
|
||||
using System.Drawing;
|
||||
using System.Drawing.Drawing2D;
|
||||
using System.Drawing.Imaging;
|
||||
using VAR.PdfTools.Maths;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
public class PdfPageRenderer
|
||||
{
|
||||
private PdfDocumentPage _page;
|
||||
private PdfTextExtractor _pdfTextExtractor;
|
||||
private Rect _pageRect;
|
||||
private int _pageWidth;
|
||||
private int _pageHeight;
|
||||
private int _scale = 10;
|
||||
|
||||
private const int MaxSize = 10000;
|
||||
|
||||
|
||||
public PdfTextExtractor Extractor { get { return _pdfTextExtractor; } }
|
||||
|
||||
public PdfPageRenderer(PdfDocumentPage page)
|
||||
{
|
||||
_page = page;
|
||||
_pdfTextExtractor = new PdfTextExtractor(_page);
|
||||
InitPage();
|
||||
}
|
||||
|
||||
public PdfPageRenderer(PdfTextExtractor pdfTextExtractor)
|
||||
{
|
||||
_pdfTextExtractor = pdfTextExtractor;
|
||||
_page = pdfTextExtractor.Page;
|
||||
InitPage();
|
||||
}
|
||||
|
||||
private void InitPage()
|
||||
{
|
||||
_pageRect = _pdfTextExtractor.GetRect();
|
||||
_pageWidth = (int)Math.Ceiling(_pageRect.XMax - _pageRect.XMin);
|
||||
_pageHeight = (int)Math.Ceiling(_pageRect.YMax - _pageRect.YMin);
|
||||
while ((_pageWidth * _scale) > MaxSize) { _scale--; }
|
||||
while ((_pageHeight * _scale) > MaxSize) { _scale--; }
|
||||
if (_scale <= 0) { _scale = 1; }
|
||||
}
|
||||
|
||||
public Bitmap Render()
|
||||
{
|
||||
if (_pdfTextExtractor.Elements.Count == 0)
|
||||
{
|
||||
// Nothing to render
|
||||
Bitmap emptyBmp = new Bitmap(100, 200, PixelFormat.Format32bppArgb);
|
||||
using (Graphics gcEmpty = Graphics.FromImage(emptyBmp))
|
||||
gcEmpty.Clear(Color.White);
|
||||
return emptyBmp;
|
||||
}
|
||||
|
||||
// Prepare image
|
||||
Bitmap bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||
Graphics gc = Graphics.FromImage(bmp);
|
||||
gc.Clear(Color.White);
|
||||
|
||||
// Draw text elements of the page
|
||||
using (Pen penTextElem = new Pen(Color.Blue))
|
||||
using (Pen penCharElem = new Pen(Color.Navy))
|
||||
{
|
||||
foreach (PdfTextElement textElement in _pdfTextExtractor.Elements)
|
||||
{
|
||||
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Black);
|
||||
}
|
||||
}
|
||||
|
||||
gc.Dispose();
|
||||
return bmp;
|
||||
}
|
||||
|
||||
public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null)
|
||||
{
|
||||
Graphics gc;
|
||||
if (bmp == null)
|
||||
{
|
||||
bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||
gc = Graphics.FromImage(bmp);
|
||||
gc.Clear(Color.White);
|
||||
}
|
||||
else
|
||||
{
|
||||
gc = Graphics.FromImage(bmp);
|
||||
}
|
||||
|
||||
// Draw text elements of the column header
|
||||
using (Pen penTextElem = new Pen(Color.Green))
|
||||
using (Pen penCharElem = new Pen(Color.DarkGreen))
|
||||
{
|
||||
DrawTextElement(columnData.HeadTextElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Olive);
|
||||
}
|
||||
|
||||
// Draw text elements of the column
|
||||
using (Pen penTextElem = new Pen(Color.Red))
|
||||
using (Pen penCharElem = new Pen(Color.DarkRed))
|
||||
{
|
||||
foreach (PdfTextElement textElement in columnData.Elements)
|
||||
{
|
||||
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed);
|
||||
}
|
||||
}
|
||||
|
||||
// Draw column extents
|
||||
using (Pen penColumn = new Pen(Color.Red))
|
||||
{
|
||||
float y = (float)(_pageRect.YMax - columnData.Y);
|
||||
float x1 = (float)(columnData.X1 - _pageRect.XMin);
|
||||
float x2 = (float)(columnData.X2 - _pageRect.XMin);
|
||||
|
||||
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale);
|
||||
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale);
|
||||
gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale);
|
||||
}
|
||||
|
||||
gc.Dispose();
|
||||
return bmp;
|
||||
}
|
||||
|
||||
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
|
||||
{
|
||||
if (textElement == null) { return; }
|
||||
double textElementX = textElement.GetX() - pageXMin;
|
||||
double textElementY = textElement.GetY() - pageYMin;
|
||||
double textElementWidth = textElement.VisibleWidth;
|
||||
double textElementHeight = textElement.VisibleHeight;
|
||||
string textElementText = textElement.VisibleText;
|
||||
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
|
||||
|
||||
if (textElementHeight < 0.0001) { return; }
|
||||
|
||||
double textElementPageX = textElementX;
|
||||
double textElementPageY = pageHeight - textElementY;
|
||||
|
||||
if (penTextElem != null)
|
||||
{
|
||||
DrawRoundedRectangle(gc, penTextElem,
|
||||
(int)(textElementPageX * scale),
|
||||
(int)(textElementPageY * scale),
|
||||
(int)(textElementWidth * scale),
|
||||
(int)(textElementHeight * scale),
|
||||
5);
|
||||
}
|
||||
|
||||
using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel))
|
||||
{
|
||||
foreach (PdfCharElement c in textElement.Characters)
|
||||
{
|
||||
gc.DrawString(c.Char,
|
||||
font,
|
||||
brushText,
|
||||
(int)((textElementPageX + c.Displacement) * scale),
|
||||
(int)(textElementPageY * scale));
|
||||
if (penCharElem != null)
|
||||
{
|
||||
DrawRoundedRectangle(gc, penCharElem,
|
||||
(int)((textElementPageX + c.Displacement) * scale),
|
||||
(int)(textElementPageY * scale),
|
||||
(int)(c.Width * scale),
|
||||
(int)(textElementHeight * scale),
|
||||
5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
|
||||
{
|
||||
int diameter = radius * 2;
|
||||
Size size = new Size(diameter, diameter);
|
||||
Rectangle arc = new Rectangle(x, y, diameter, diameter);
|
||||
GraphicsPath path = new GraphicsPath();
|
||||
|
||||
// top left arc
|
||||
path.AddArc(arc, 180, 90);
|
||||
|
||||
// top right arc
|
||||
arc.X = (x + width) - diameter;
|
||||
path.AddArc(arc, 270, 90);
|
||||
|
||||
// bottom right arc
|
||||
arc.Y = (y + height) - diameter;
|
||||
path.AddArc(arc, 0, 90);
|
||||
|
||||
// bottom left arc
|
||||
arc.X = x;
|
||||
path.AddArc(arc, 90, 90);
|
||||
|
||||
path.CloseFigure();
|
||||
return path;
|
||||
}
|
||||
|
||||
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
|
||||
{
|
||||
if (graphics == null)
|
||||
throw new ArgumentNullException("graphics");
|
||||
if (pen == null)
|
||||
throw new ArgumentNullException("pen");
|
||||
|
||||
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
|
||||
{
|
||||
graphics.DrawPath(pen, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
149
VAR.PdfTools/PdfTextElement.cs
Normal file
149
VAR.PdfTools/PdfTextElement.cs
Normal file
@@ -0,0 +1,149 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using VAR.PdfTools.Maths;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
public struct PdfCharElement
|
||||
{
|
||||
public string Char;
|
||||
public double Displacement;
|
||||
public double Width;
|
||||
}
|
||||
|
||||
public class PdfTextElement
|
||||
{
|
||||
#region Properties
|
||||
|
||||
public PdfFont Font { get; set; }
|
||||
|
||||
public double FontSize { get; set; }
|
||||
|
||||
public Matrix3x3 Matrix { get; set; }
|
||||
|
||||
public string RawText { get; set; }
|
||||
|
||||
public string VisibleText { get; set; }
|
||||
|
||||
public double VisibleWidth { get; set; }
|
||||
|
||||
public double VisibleHeight { get; set; }
|
||||
|
||||
public List<PdfCharElement> Characters { get; set; }
|
||||
|
||||
public List<PdfTextElement> Childs { get; set; }
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
|
||||
public double GetX()
|
||||
{
|
||||
return Matrix.Matrix[0, 2];
|
||||
}
|
||||
|
||||
public double GetY()
|
||||
{
|
||||
return Matrix.Matrix[1, 2];
|
||||
}
|
||||
|
||||
public PdfTextElement SubPart(int startIndex, int endIndex)
|
||||
{
|
||||
PdfTextElement blockElem = new PdfTextElement
|
||||
{
|
||||
Font = null,
|
||||
FontSize = FontSize,
|
||||
Matrix = Matrix.Copy(),
|
||||
RawText = RawText.Substring(startIndex, endIndex - startIndex),
|
||||
VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex),
|
||||
VisibleWidth = 0,
|
||||
VisibleHeight = VisibleHeight,
|
||||
Characters = new List<PdfCharElement>(),
|
||||
Childs = new List<PdfTextElement>(),
|
||||
};
|
||||
double displacement = Characters[startIndex].Displacement;
|
||||
blockElem.Matrix.Matrix[0, 2] += displacement;
|
||||
for (int j = startIndex; j < endIndex; j++)
|
||||
{
|
||||
blockElem.Characters.Add(new PdfCharElement
|
||||
{
|
||||
Char = Characters[j].Char,
|
||||
Displacement = Characters[j].Displacement - displacement,
|
||||
Width = Characters[j].Width,
|
||||
});
|
||||
}
|
||||
PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1];
|
||||
blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width;
|
||||
foreach (PdfTextElement elem in Childs)
|
||||
{
|
||||
blockElem.Childs.Add(elem);
|
||||
}
|
||||
|
||||
return blockElem;
|
||||
}
|
||||
|
||||
public double MaxWidth()
|
||||
{
|
||||
return Characters.Average(c => c.Width);
|
||||
}
|
||||
|
||||
public Rect GetRect()
|
||||
{
|
||||
double x = GetX();
|
||||
double y = GetY();
|
||||
return new Rect
|
||||
{
|
||||
XMin = x,
|
||||
YMax = y,
|
||||
XMax = x + VisibleWidth,
|
||||
YMin = y - VisibleHeight,
|
||||
};
|
||||
}
|
||||
|
||||
public double GetCharacterPreviousSpacing(int index)
|
||||
{
|
||||
if (index <= 0) { return 0; }
|
||||
double previousEnd = Characters[index - 1].Displacement + Characters[index - 1].Width;
|
||||
double spacing = Characters[index].Displacement - previousEnd;
|
||||
return spacing;
|
||||
}
|
||||
|
||||
public double GetCharacterPrecedingSpacing(int index)
|
||||
{
|
||||
if (index >= (Characters.Count - 1)) { return 0; }
|
||||
double currentEnd = Characters[index].Displacement + Characters[index].Width;
|
||||
double spacing = Characters[index + 1].Displacement - currentEnd;
|
||||
return spacing;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
public class PdfTextElementColumn
|
||||
{
|
||||
public PdfTextElement HeadTextElement { get; private set; }
|
||||
|
||||
public IEnumerable<PdfTextElement> Elements { get; private set; }
|
||||
|
||||
public double Y { get; private set; }
|
||||
|
||||
public double X1 { get; private set; }
|
||||
public double X2 { get; private set; }
|
||||
|
||||
public static PdfTextElementColumn Empty { get; } = new PdfTextElementColumn();
|
||||
|
||||
private PdfTextElementColumn()
|
||||
{
|
||||
Elements = new List<PdfTextElement>();
|
||||
}
|
||||
|
||||
public PdfTextElementColumn(PdfTextElement head, IEnumerable<PdfTextElement> elements, double y, double x1, double x2)
|
||||
{
|
||||
HeadTextElement = head;
|
||||
Elements = elements;
|
||||
Y = y;
|
||||
X1 = x1;
|
||||
X2 = x2;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,51 +7,6 @@ using VAR.PdfTools.PdfElements;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
public struct PdfCharElement
|
||||
{
|
||||
public string Char;
|
||||
public double Displacement;
|
||||
}
|
||||
|
||||
public class PdfTextElement
|
||||
{
|
||||
#region Properties
|
||||
|
||||
public PdfFont Font { get; set; }
|
||||
|
||||
public double FontSize { get; set; }
|
||||
|
||||
public Matrix3x3 Matrix { get; set; }
|
||||
|
||||
public string RawText { get; set; }
|
||||
|
||||
public string VisibleText { get; set; }
|
||||
|
||||
public double VisibleWidth { get; set; }
|
||||
|
||||
public double VisibleHeight { get; set; }
|
||||
|
||||
public List<PdfCharElement> Characters { get; set; }
|
||||
|
||||
public List<PdfTextElement> Childs { get; set; }
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
|
||||
public double GetX()
|
||||
{
|
||||
return Matrix.Matrix[0, 2];
|
||||
}
|
||||
|
||||
public double GetY()
|
||||
{
|
||||
return Matrix.Matrix[1, 2];
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
public class PdfTextExtractor
|
||||
{
|
||||
#region Declarations
|
||||
@@ -96,6 +51,7 @@ namespace VAR.PdfTools
|
||||
_page = page;
|
||||
ProcessPageContent();
|
||||
JoinTextElements();
|
||||
SplitTextElements();
|
||||
}
|
||||
|
||||
#endregion
|
||||
@@ -125,7 +81,8 @@ namespace VAR.PdfTools
|
||||
textElem.Matrix = _textMatrixCurrent.Multiply(_graphicsMatrix);
|
||||
textElem.RawText = _sbText.ToString();
|
||||
textElem.VisibleText = PdfString_ToUnicode(textElem.RawText, _font);
|
||||
textElem.VisibleWidth = _textWidth * textElem.Matrix.Matrix[0, 0];
|
||||
PdfCharElement lastChar = _listCharacters[_listCharacters.Count - 1];
|
||||
textElem.VisibleWidth = (lastChar.Displacement + lastChar.Width) * textElem.Matrix.Matrix[0, 0];
|
||||
textElem.VisibleHeight = (_font.Height * _fontSize) * textElem.Matrix.Matrix[1, 1];
|
||||
textElem.Characters = new List<PdfCharElement>();
|
||||
foreach (PdfCharElement c in _listCharacters)
|
||||
@@ -134,6 +91,7 @@ namespace VAR.PdfTools
|
||||
{
|
||||
Char = c.Char,
|
||||
Displacement = (c.Displacement * textElem.Matrix.Matrix[0, 0]),
|
||||
Width = (c.Width * textElem.Matrix.Matrix[0, 0]),
|
||||
});
|
||||
}
|
||||
textElem.Childs = new List<PdfTextElement>();
|
||||
@@ -212,6 +170,14 @@ namespace VAR.PdfTools
|
||||
return list;
|
||||
}
|
||||
|
||||
private bool TextElementVerticalIntersection(PdfTextElement elem1, double elem2X1, double elem2X2)
|
||||
{
|
||||
double elem1X1 = elem1.GetX();
|
||||
double elem1X2 = elem1.GetX() + elem1.VisibleWidth;
|
||||
|
||||
return elem1X2 >= elem2X1 && elem2X2 >= elem1X1;
|
||||
}
|
||||
|
||||
private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2)
|
||||
{
|
||||
double elem1X1 = elem1.GetX();
|
||||
@@ -320,8 +286,8 @@ namespace VAR.PdfTools
|
||||
{
|
||||
string realChar = _font.ToUnicode(c);
|
||||
if (realChar == "\0") { continue; }
|
||||
_listCharacters.Add(new PdfCharElement { Char = _font.ToUnicode(c), Displacement = _textWidth, });
|
||||
double charWidth = _font.GetCharWidth(c) * _fontSize;
|
||||
_listCharacters.Add(new PdfCharElement { Char = _font.ToUnicode(c), Displacement = _textWidth, Width = charWidth });
|
||||
_textWidth += charWidth;
|
||||
_textWidth += ((c == 0x20) ? _wordSpacing : _charSpacing);
|
||||
}
|
||||
@@ -356,6 +322,9 @@ namespace VAR.PdfTools
|
||||
private void ProcessPageContent()
|
||||
{
|
||||
int unknowCount = 0;
|
||||
int lineCount = 0;
|
||||
int strokeCount = 0;
|
||||
int pathCount = 0;
|
||||
for (int i = 0; i < _page.ContentActions.Count; i++)
|
||||
{
|
||||
PdfContentAction action = _page.ContentActions[i];
|
||||
@@ -513,6 +482,45 @@ namespace VAR.PdfTools
|
||||
{
|
||||
// FIXME: Interpret this
|
||||
}
|
||||
else if (action.Token == "m")
|
||||
{
|
||||
// FIXME: Interpret this "moveto: Begin new subpath"
|
||||
}
|
||||
else if (action.Token == "l")
|
||||
{
|
||||
// FIXME: Interpret this "lineto: Append straight line segment to path"
|
||||
lineCount++;
|
||||
}
|
||||
else if (action.Token == "h")
|
||||
{
|
||||
// FIXME: Interpret this "closepath: Close subpath"
|
||||
pathCount++;
|
||||
}
|
||||
else if (action.Token == "W")
|
||||
{
|
||||
// FIXME: Interpret this "clip: Set clipping path using nonzero winding number rule"
|
||||
}
|
||||
else if (action.Token == "W*")
|
||||
{
|
||||
// FIXME: Interpret this "eoclip: Set clipping path using even-odd rule"
|
||||
}
|
||||
else if (action.Token == "w")
|
||||
{
|
||||
// FIXME: Interpret this "setlinewidth: Set line width"
|
||||
}
|
||||
else if (action.Token == "G")
|
||||
{
|
||||
// FIXME: Interpret this "setgray: Set gray level for stroking operations"
|
||||
}
|
||||
else if (action.Token == "S")
|
||||
{
|
||||
// FIXME: Interpret this "stroke: Stroke path"
|
||||
strokeCount++;
|
||||
}
|
||||
else if (action.Token == "M")
|
||||
{
|
||||
// FIXME: Interpret this "setmiterlimit: Set miter limit"
|
||||
}
|
||||
else
|
||||
{
|
||||
unknowCount++;
|
||||
@@ -541,13 +549,22 @@ namespace VAR.PdfTools
|
||||
while (i < _textElements.Count)
|
||||
{
|
||||
PdfTextElement neighbour = _textElements[i];
|
||||
|
||||
if (neighbour.Font != elem.Font || neighbour.FontSize != elem.FontSize)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
double neighbourY = neighbour.GetY();
|
||||
if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; }
|
||||
|
||||
double maxWidth = neighbour.MaxWidth();
|
||||
|
||||
double neighbourXMin = neighbour.GetX();
|
||||
double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
|
||||
double auxBlockXMin = blockXMin - elem.FontSize;
|
||||
double auxBlockXMax = blockXMax + elem.FontSize;
|
||||
double auxBlockXMin = blockXMin - maxWidth;
|
||||
double auxBlockXMax = blockXMax + maxWidth;
|
||||
if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
|
||||
{
|
||||
_textElements.Remove(neighbour);
|
||||
@@ -560,7 +577,7 @@ namespace VAR.PdfTools
|
||||
i++;
|
||||
}
|
||||
|
||||
if(textElementNeighbours.Count == 0)
|
||||
if (textElementNeighbours.Count == 1)
|
||||
{
|
||||
textElementsCondensed.Add(elem);
|
||||
continue;
|
||||
@@ -571,18 +588,19 @@ namespace VAR.PdfTools
|
||||
foreach (PdfTextElement neighbour in textElementNeighbours)
|
||||
{
|
||||
double neighbourXMin = neighbour.GetX();
|
||||
foreach(PdfCharElement c in neighbour.Characters)
|
||||
foreach (PdfCharElement c in neighbour.Characters)
|
||||
{
|
||||
chars.Add(new PdfCharElement
|
||||
{
|
||||
Char = c.Char,
|
||||
Displacement = (c.Displacement + neighbourXMin) - blockXMin,
|
||||
Width = c.Width,
|
||||
});
|
||||
}
|
||||
}
|
||||
chars = chars.OrderBy(c => c.Displacement).ToList();
|
||||
var sbText = new StringBuilder();
|
||||
foreach(PdfCharElement c in chars)
|
||||
foreach (PdfCharElement c in chars)
|
||||
{
|
||||
sbText.Append(c.Char);
|
||||
}
|
||||
@@ -604,23 +622,64 @@ namespace VAR.PdfTools
|
||||
_textElements = textElementsCondensed;
|
||||
}
|
||||
|
||||
private void SplitTextElements()
|
||||
{
|
||||
var textElementsSplitted = new List<PdfTextElement>();
|
||||
while (_textElements.Count > 0)
|
||||
{
|
||||
PdfTextElement elem = _textElements[0];
|
||||
_textElements.Remove(elem);
|
||||
|
||||
double maxWidth = elem.MaxWidth();
|
||||
|
||||
int prevBreak = 0;
|
||||
for (int i = 1; i < elem.Characters.Count; i++)
|
||||
{
|
||||
double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width;
|
||||
double charSeparation = elem.Characters[i].Displacement - prevCharEnd;
|
||||
if (charSeparation > maxWidth)
|
||||
{
|
||||
PdfTextElement partElem = elem.SubPart(prevBreak, i);
|
||||
textElementsSplitted.Add(partElem);
|
||||
prevBreak = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (prevBreak == 0)
|
||||
{
|
||||
textElementsSplitted.Add(elem);
|
||||
continue;
|
||||
}
|
||||
PdfTextElement lastElem = elem.SubPart(prevBreak, elem.Characters.Count);
|
||||
textElementsSplitted.Add(lastElem);
|
||||
}
|
||||
_textElements = textElementsSplitted;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
|
||||
public List<string> GetColumn(string column)
|
||||
public Rect GetRect()
|
||||
{
|
||||
return GetColumn(column, true);
|
||||
Rect rect = null;
|
||||
foreach (PdfTextElement textElement in _textElements)
|
||||
{
|
||||
Rect elementRect = textElement.GetRect();
|
||||
if (rect == null) { rect = elementRect; }
|
||||
rect.Add(elementRect);
|
||||
}
|
||||
return rect;
|
||||
}
|
||||
|
||||
public List<string> GetColumn(string column, bool fuzzy)
|
||||
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
|
||||
{
|
||||
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
||||
if (columnHead == null)
|
||||
{
|
||||
return new List<string>();
|
||||
return PdfTextElementColumn.Empty;
|
||||
}
|
||||
double headY = columnHead.GetY();
|
||||
double headY = columnHead.GetY() - columnHead.VisibleHeight;
|
||||
double headX1 = columnHead.GetX();
|
||||
double headX2 = headX1 + columnHead.VisibleWidth;
|
||||
|
||||
@@ -648,14 +707,20 @@ namespace VAR.PdfTools
|
||||
extentX2 = elemX1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2);
|
||||
|
||||
return columnData;
|
||||
}
|
||||
|
||||
public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2)
|
||||
{
|
||||
// Get all the elements that intersects vertically, are down and sort results
|
||||
var columnDataRaw = new List<PdfTextElement>();
|
||||
foreach (PdfTextElement elem in _textElements)
|
||||
{
|
||||
if (TextElementVerticalIntersection(columnHead, elem) == false) { continue; }
|
||||
if (TextElementVerticalIntersection(elem, headX1, headX2) == false) { continue; }
|
||||
|
||||
// Only intems down the column
|
||||
double elemY = elem.GetY();
|
||||
@@ -665,32 +730,94 @@ namespace VAR.PdfTools
|
||||
}
|
||||
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
||||
|
||||
// Only items completelly inside extents, amd break on the first element outside
|
||||
var columnData = new List<PdfTextElement>();
|
||||
// Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted
|
||||
var columnElements = new List<PdfTextElement>();
|
||||
foreach (PdfTextElement elem in columnDataRaw)
|
||||
{
|
||||
double elemX1 = elem.GetX();
|
||||
double elemX2 = elemX1 + elem.VisibleWidth;
|
||||
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
|
||||
|
||||
columnData.Add(elem);
|
||||
// Add elements completely inside
|
||||
if (elemX1 > extentX1 && elemX2 < extentX2)
|
||||
{
|
||||
columnElements.Add(elem);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to split elements intersecting extents of the column
|
||||
double maxSpacing = elem.Characters.Average(c => c.Width) / 10;
|
||||
int indexStart = 0;
|
||||
int indexEnd = elem.Characters.Count - 1;
|
||||
bool indexStartValid = true;
|
||||
bool indexEndValid = true;
|
||||
if (elemX1 < extentX1)
|
||||
{
|
||||
// Search best start
|
||||
int index = 0;
|
||||
double characterPosition = elemX1 + elem.Characters[index].Displacement;
|
||||
while (characterPosition < extentX1 && index < (elem.Characters.Count - 1))
|
||||
{
|
||||
index++;
|
||||
characterPosition = elemX1 + elem.Characters[index].Displacement;
|
||||
}
|
||||
double spacing = elem.GetCharacterPreviousSpacing(index);
|
||||
while (spacing < maxSpacing && index < (elem.Characters.Count - 1))
|
||||
{
|
||||
index++;
|
||||
spacing = elem.GetCharacterPreviousSpacing(index);
|
||||
}
|
||||
if (spacing < maxSpacing) { indexStartValid = false; }
|
||||
indexStart = index;
|
||||
}
|
||||
|
||||
if (elemX2 > extentX2)
|
||||
{
|
||||
// Search best end
|
||||
int index = elem.Characters.Count - 1;
|
||||
double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
|
||||
while (characterPosition > extentX2 && index > 0)
|
||||
{
|
||||
index--;
|
||||
characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
|
||||
}
|
||||
double spacing = elem.GetCharacterPrecedingSpacing(index);
|
||||
while (spacing < maxSpacing && index > 0)
|
||||
{
|
||||
index--;
|
||||
spacing = elem.GetCharacterPrecedingSpacing(index);
|
||||
}
|
||||
if (spacing < maxSpacing) { indexEndValid = false; }
|
||||
indexEnd = index;
|
||||
}
|
||||
|
||||
// Break when there is no good split, spaning all extent
|
||||
if (indexStartValid == false && indexEndValid == false) { break; }
|
||||
|
||||
// Continue when only one of the sides is invalid. (outside elements intersecting extents of the column)
|
||||
if (indexStartValid == false || indexEndValid == false) { continue; }
|
||||
|
||||
// Add splitted element
|
||||
columnElements.Add(elem.SubPart(indexStart, indexEnd + 1));
|
||||
}
|
||||
|
||||
var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);
|
||||
return columnData;
|
||||
}
|
||||
|
||||
public List<string> GetColumnAsStrings(string column, bool fuzzy = true)
|
||||
{
|
||||
PdfTextElementColumn columnData = GetColumn(column, fuzzy);
|
||||
|
||||
// Emit result
|
||||
var result = new List<string>();
|
||||
foreach (PdfTextElement elem in columnData)
|
||||
foreach (PdfTextElement elem in columnData.Elements)
|
||||
{
|
||||
result.Add(elem.VisibleText);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public string GetField(string field)
|
||||
{
|
||||
return GetField(field, true);
|
||||
}
|
||||
|
||||
public string GetField(string field, bool fuzzy)
|
||||
public string GetFieldAsString(string field, bool fuzzy = true)
|
||||
{
|
||||
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
||||
if (fieldTitle == null)
|
||||
@@ -718,12 +845,7 @@ namespace VAR.PdfTools
|
||||
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
||||
}
|
||||
|
||||
public bool HasText(string text)
|
||||
{
|
||||
return HasText(text, true);
|
||||
}
|
||||
|
||||
public bool HasText(string text, bool fuzzy)
|
||||
public bool HasText(string text, bool fuzzy = true)
|
||||
{
|
||||
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
|
||||
return (list.Count > 0);
|
||||
|
||||
@@ -6,9 +6,9 @@ using System.Runtime.InteropServices;
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("VAR")]
|
||||
[assembly: AssemblyProduct("VAR.PdfTools")]
|
||||
[assembly: AssemblyCopyright("Copyright © VAR 2016-2017")]
|
||||
[assembly: AssemblyCopyright("Copyright © VAR 2016-2019")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
[assembly: ComVisible(false)]
|
||||
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
|
||||
[assembly: AssemblyVersion("1.3.*")]
|
||||
[assembly: AssemblyVersion("1.6.0.*")]
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
|
||||
<LangVersion>6</LangVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release .Net 4.6.1|AnyCPU' ">
|
||||
<DebugType>pdbonly</DebugType>
|
||||
@@ -54,6 +55,7 @@
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Data" />
|
||||
@@ -61,6 +63,7 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Maths\Matrix3x3.cs" />
|
||||
<Compile Include="Maths\Rect.cs" />
|
||||
<Compile Include="PdfContentAction.cs" />
|
||||
<Compile Include="PdfDocument.cs" />
|
||||
<Compile Include="PdfDocumentPage.cs" />
|
||||
@@ -81,7 +84,9 @@
|
||||
<Compile Include="PdfElements\PdfStream.cs" />
|
||||
<Compile Include="PdfElements\PdfString.cs" />
|
||||
<Compile Include="PdfParser.cs" />
|
||||
<Compile Include="PdfPageRenderer.cs" />
|
||||
<Compile Include="PdfStandar14FontMetrics.cs" />
|
||||
<Compile Include="PdfTextElement.cs" />
|
||||
<Compile Include="PdfTextExtractor.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="Maths\Vector3D.cs" />
|
||||
|
||||
Reference in New Issue
Block a user