Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d5d843014a | |||
| b9750745bc | |||
| c8c7e32acc | |||
| 781f212289 | |||
| 8a966049f6 | |||
| 80ab9b9ff3 | |||
| 9af363529c | |||
| 386b38bd21 | |||
| 53d07db9c0 | |||
| 9bc7854b48 | |||
| 77a5cd1b0e | |||
| b6611b6285 | |||
| 7badc8e4b1 | |||
| 203f30e55c | |||
| c3967dd439 | |||
| da8b512c1b |
19
README.md
19
README.md
@@ -5,27 +5,33 @@
|
|||||||
### VAR.PdfTools
|
### VAR.PdfTools
|
||||||
Add the resulting assembly as reference in your projects, and this line on code:
|
Add the resulting assembly as reference in your projects, and this line on code:
|
||||||
|
|
||||||
using VAR.PdfTools;
|
```csharp
|
||||||
|
using VAR.PdfTools;
|
||||||
|
```
|
||||||
|
|
||||||
Then extract the contents of a data column using:
|
Then extract the contents of a data column using:
|
||||||
|
|
||||||
|
```csharp
|
||||||
var columnData = new List<string>();
|
var columnData = new List<string>();
|
||||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
columnData.AddRange(extractor.GetColumn("Column"));
|
columnData.AddRange(extractor.GetColumnAsStrings("Column"));
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Or the content of a field (text on the right of the indicated text):
|
Or the content of a field (text on the right of the indicated text):
|
||||||
|
|
||||||
|
```csharp
|
||||||
var fieldData = new List<string>();
|
var fieldData = new List<string>();
|
||||||
PdfDocument doc = PdfDocument.Load("document.pdf");
|
PdfDocument doc = PdfDocument.Load("document.pdf");
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
fieldData.Add(extractor.GetField(txtFieldName.Text));
|
fieldData.Add(extractor.GetFieldAsString(txtFieldName.Text));
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### VAR.PdfTools.Workbench
|
### VAR.PdfTools.Workbench
|
||||||
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
|
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
|
||||||
@@ -34,7 +40,8 @@ It is a simple Windows.Forms application, to test basic funcitionallity of the l
|
|||||||
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
|
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
|
||||||
|
|
||||||
A .nuget package can be build using:
|
A .nuget package can be build using:
|
||||||
VAR.PdfTools\Build.NuGet.cmd
|
|
||||||
|
VAR.PdfTools\Build.NuGet.cmd
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
1. Fork it!
|
1. Fork it!
|
||||||
@@ -50,7 +57,7 @@ A .nuget package can be build using:
|
|||||||
|
|
||||||
The MIT License (MIT)
|
The MIT License (MIT)
|
||||||
|
|
||||||
Copyright (c) 2016-2017 Valeriano Alfonso Rodriguez
|
Copyright (c) 2016-2019 Valeriano Alfonso Rodriguez
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
117
VAR.PdfTools.Workbench/Configuration.cs
Normal file
117
VAR.PdfTools.Workbench/Configuration.cs
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace VAR.PdfTools.Workbench
|
||||||
|
{
|
||||||
|
public class Configuration
|
||||||
|
{
|
||||||
|
private Dictionary<string, string> _configItems = new Dictionary<string, string>();
|
||||||
|
|
||||||
|
private static string GetConfigFileName()
|
||||||
|
{
|
||||||
|
string location = System.Reflection.Assembly.GetEntryAssembly().Location;
|
||||||
|
string path = Path.GetDirectoryName(location);
|
||||||
|
string filenameWithoutExtension = Path.GetFileNameWithoutExtension(location);
|
||||||
|
|
||||||
|
string configFile = string.Format("{0}/{1}.cfg", path, filenameWithoutExtension);
|
||||||
|
return configFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string[] GetConfigurationLines()
|
||||||
|
{
|
||||||
|
string configFile = GetConfigFileName();
|
||||||
|
string[] config;
|
||||||
|
if (File.Exists(configFile) == false)
|
||||||
|
{
|
||||||
|
config = new string[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
config = File.ReadAllLines(configFile);
|
||||||
|
}
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Load()
|
||||||
|
{
|
||||||
|
_configItems.Clear();
|
||||||
|
string[] configLines = GetConfigurationLines();
|
||||||
|
foreach (string configLine in configLines)
|
||||||
|
{
|
||||||
|
int idxSplit = configLine.IndexOf('|');
|
||||||
|
if (idxSplit < 0) { continue; }
|
||||||
|
string configName = configLine.Substring(0, idxSplit);
|
||||||
|
string configData = configLine.Substring(idxSplit + 1);
|
||||||
|
|
||||||
|
if (_configItems.ContainsKey(configName))
|
||||||
|
{
|
||||||
|
_configItems[configName] = configData;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_configItems.Add(configName, configData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Save()
|
||||||
|
{
|
||||||
|
StringBuilder sbConfig = new StringBuilder();
|
||||||
|
foreach (KeyValuePair<string, string> pair in _configItems)
|
||||||
|
{
|
||||||
|
sbConfig.AppendFormat("{0}|{1}\n", pair.Key, pair.Value);
|
||||||
|
}
|
||||||
|
string configFileName = GetConfigFileName();
|
||||||
|
File.WriteAllText(configFileName, sbConfig.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public string Get(string key, string defaultValue)
|
||||||
|
{
|
||||||
|
if (_configItems == null) { return defaultValue; }
|
||||||
|
if (_configItems.ContainsKey(key))
|
||||||
|
{
|
||||||
|
return _configItems[key];
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool Get(string key, bool defaultValue)
|
||||||
|
{
|
||||||
|
if (_configItems == null) { return defaultValue; }
|
||||||
|
if (_configItems.ContainsKey(key))
|
||||||
|
{
|
||||||
|
string value = _configItems[key];
|
||||||
|
return (value == "true");
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Set(string key, string value)
|
||||||
|
{
|
||||||
|
if (_configItems == null) { return; }
|
||||||
|
if (_configItems.ContainsKey(key))
|
||||||
|
{
|
||||||
|
_configItems[key] = value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_configItems.Add(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Set(string key, bool value)
|
||||||
|
{
|
||||||
|
if (_configItems == null) { return; }
|
||||||
|
if (_configItems.ContainsKey(key))
|
||||||
|
{
|
||||||
|
_configItems[key] = value ? "true" : "false";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_configItems.Add(key, value ? "true" : "false");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
30
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
30
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
@@ -47,6 +47,8 @@
|
|||||||
this.btnGetField3 = new System.Windows.Forms.Button();
|
this.btnGetField3 = new System.Windows.Forms.Button();
|
||||||
this.txtField3 = new System.Windows.Forms.TextBox();
|
this.txtField3 = new System.Windows.Forms.TextBox();
|
||||||
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
||||||
|
this.txtPages = new System.Windows.Forms.TextBox();
|
||||||
|
this.chkRender = new System.Windows.Forms.CheckBox();
|
||||||
this.SuspendLayout();
|
this.SuspendLayout();
|
||||||
//
|
//
|
||||||
// lblOutputs
|
// lblOutputs
|
||||||
@@ -119,7 +121,7 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
||||||
this.btnGetColumn1.Name = "btnGetColumn1";
|
this.btnGetColumn1.Name = "btnGetColumn1";
|
||||||
this.btnGetColumn1.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn1.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn1.TabIndex = 12;
|
this.btnGetColumn1.TabIndex = 12;
|
||||||
this.btnGetColumn1.Text = "GetColumn";
|
this.btnGetColumn1.Text = "GetColumn";
|
||||||
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
||||||
@@ -194,7 +196,7 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
||||||
this.btnGetColumn2.Name = "btnGetColumn2";
|
this.btnGetColumn2.Name = "btnGetColumn2";
|
||||||
this.btnGetColumn2.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn2.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn2.TabIndex = 19;
|
this.btnGetColumn2.TabIndex = 19;
|
||||||
this.btnGetColumn2.Text = "GetColumn";
|
this.btnGetColumn2.Text = "GetColumn";
|
||||||
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
||||||
@@ -231,17 +233,37 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
||||||
this.btnGetColumn3.Name = "btnGetColumn3";
|
this.btnGetColumn3.Name = "btnGetColumn3";
|
||||||
this.btnGetColumn3.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn3.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn3.TabIndex = 23;
|
this.btnGetColumn3.TabIndex = 23;
|
||||||
this.btnGetColumn3.Text = "GetColumn";
|
this.btnGetColumn3.Text = "GetColumn";
|
||||||
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
||||||
this.btnGetColumn3.Click += new System.EventHandler(this.btnGetColumn3_Click);
|
this.btnGetColumn3.Click += new System.EventHandler(this.btnGetColumn3_Click);
|
||||||
//
|
//
|
||||||
|
// txtPages
|
||||||
|
//
|
||||||
|
this.txtPages.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
|
||||||
|
this.txtPages.Location = new System.Drawing.Point(397, 82);
|
||||||
|
this.txtPages.Name = "txtPages";
|
||||||
|
this.txtPages.Size = new System.Drawing.Size(75, 20);
|
||||||
|
this.txtPages.TabIndex = 27;
|
||||||
|
//
|
||||||
|
// chkRender
|
||||||
|
//
|
||||||
|
this.chkRender.AutoSize = true;
|
||||||
|
this.chkRender.Location = new System.Drawing.Point(292, 138);
|
||||||
|
this.chkRender.Name = "chkRender";
|
||||||
|
this.chkRender.Size = new System.Drawing.Size(61, 17);
|
||||||
|
this.chkRender.TabIndex = 28;
|
||||||
|
this.chkRender.Text = "Render";
|
||||||
|
this.chkRender.UseVisualStyleBackColor = true;
|
||||||
|
//
|
||||||
// FrmPdfInfo
|
// FrmPdfInfo
|
||||||
//
|
//
|
||||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||||
this.ClientSize = new System.Drawing.Size(484, 461);
|
this.ClientSize = new System.Drawing.Size(484, 461);
|
||||||
|
this.Controls.Add(this.chkRender);
|
||||||
|
this.Controls.Add(this.txtPages);
|
||||||
this.Controls.Add(this.btnHasText3);
|
this.Controls.Add(this.btnHasText3);
|
||||||
this.Controls.Add(this.btnGetField3);
|
this.Controls.Add(this.btnGetField3);
|
||||||
this.Controls.Add(this.txtField3);
|
this.Controls.Add(this.txtField3);
|
||||||
@@ -291,5 +313,7 @@
|
|||||||
private System.Windows.Forms.Button btnGetField3;
|
private System.Windows.Forms.Button btnGetField3;
|
||||||
private System.Windows.Forms.TextBox txtField3;
|
private System.Windows.Forms.TextBox txtField3;
|
||||||
private System.Windows.Forms.Button btnGetColumn3;
|
private System.Windows.Forms.Button btnGetColumn3;
|
||||||
|
private System.Windows.Forms.TextBox txtPages;
|
||||||
|
private System.Windows.Forms.CheckBox chkRender;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Drawing;
|
using System.Drawing;
|
||||||
using System.Drawing.Drawing2D;
|
|
||||||
using System.Drawing.Imaging;
|
using System.Drawing.Imaging;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
@@ -20,19 +19,27 @@ namespace VAR.PdfTools.Workbench
|
|||||||
|
|
||||||
private void FrmPdfInfo_Load(object sender, EventArgs e)
|
private void FrmPdfInfo_Load(object sender, EventArgs e)
|
||||||
{
|
{
|
||||||
txtPdfPath.Text = Properties.Settings.Default.LastPdfPath;
|
var configuration = new Configuration();
|
||||||
txtField1.Text = Properties.Settings.Default.Field1;
|
configuration.Load();
|
||||||
txtField2.Text = Properties.Settings.Default.Field2;
|
txtPdfPath.Text = configuration.Get("LastPdfPath", string.Empty);
|
||||||
txtField3.Text = Properties.Settings.Default.Field3;
|
txtField1.Text = configuration.Get("Field1", string.Empty);
|
||||||
|
txtField2.Text = configuration.Get("Field2", string.Empty);
|
||||||
|
txtField3.Text = configuration.Get("Field3", string.Empty);
|
||||||
|
txtPages.Text = configuration.Get("Pages", string.Empty);
|
||||||
|
chkRender.Checked = configuration.Get("Render", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
|
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
|
||||||
{
|
{
|
||||||
Properties.Settings.Default.LastPdfPath = txtPdfPath.Text;
|
var configuration = new Configuration();
|
||||||
Properties.Settings.Default.Field1 = txtField1.Text;
|
var configItems = new Dictionary<string, string>();
|
||||||
Properties.Settings.Default.Field2 = txtField2.Text;
|
configuration.Set("LastPdfPath", txtPdfPath.Text);
|
||||||
Properties.Settings.Default.Field3 = txtField3.Text;
|
configuration.Set("Field1", txtField1.Text);
|
||||||
Properties.Settings.Default.Save();
|
configuration.Set("Field2", txtField2.Text);
|
||||||
|
configuration.Set("Field3", txtField3.Text);
|
||||||
|
configuration.Set("Pages", txtPages.Text);
|
||||||
|
configuration.Set("Render", chkRender.Checked);
|
||||||
|
configuration.Save();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void btnBrowse_Click(object sender, EventArgs e)
|
private void btnBrowse_Click(object sender, EventArgs e)
|
||||||
@@ -98,7 +105,7 @@ namespace VAR.PdfTools.Workbench
|
|||||||
{
|
{
|
||||||
var fontNames = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name);
|
var fontNames = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name);
|
||||||
StringBuilder sbFontName = new StringBuilder();
|
StringBuilder sbFontName = new StringBuilder();
|
||||||
foreach(string fontNameAux in fontNames)
|
foreach (string fontNameAux in fontNames)
|
||||||
{
|
{
|
||||||
if (sbFontName.Length > 0) { sbFontName.Append(";"); }
|
if (sbFontName.Length > 0) { sbFontName.Append(";"); }
|
||||||
sbFontName.Append(fontNameAux);
|
sbFontName.Append(fontNameAux);
|
||||||
@@ -191,6 +198,55 @@ namespace VAR.PdfTools.Workbench
|
|||||||
Action_GetColumn(pdfPath, column);
|
Action_GetColumn(pdfPath, column);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private IEnumerable<int> GetSelectedPages(int maxPages)
|
||||||
|
{
|
||||||
|
string pages = txtPages.Text;
|
||||||
|
if (string.IsNullOrEmpty(pages))
|
||||||
|
{
|
||||||
|
return Enumerable.Range(1, maxPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
string[] pagesParts;
|
||||||
|
if (pages.Contains(","))
|
||||||
|
{
|
||||||
|
pagesParts = pages.Split(',');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pagesParts = new string[] { pages };
|
||||||
|
}
|
||||||
|
List<int> listPages = new List<int>();
|
||||||
|
foreach (string part in pagesParts)
|
||||||
|
{
|
||||||
|
if (part.Contains("-"))
|
||||||
|
{
|
||||||
|
string[] range = part.Split('-');
|
||||||
|
if (range.Length == 2)
|
||||||
|
{
|
||||||
|
int pageStart;
|
||||||
|
int pageEnd;
|
||||||
|
if (int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
||||||
|
{
|
||||||
|
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int pageNum;
|
||||||
|
if (int.TryParse(part, out pageNum))
|
||||||
|
{
|
||||||
|
listPages.Add(pageNum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (listPages.Count == 0)
|
||||||
|
{
|
||||||
|
listPages.AddRange(Enumerable.Range(1, maxPages));
|
||||||
|
}
|
||||||
|
return listPages;
|
||||||
|
}
|
||||||
|
|
||||||
private void Action_HasText(string pdfPath, string text)
|
private void Action_HasText(string pdfPath, string text)
|
||||||
{
|
{
|
||||||
if (System.IO.File.Exists(pdfPath) == false)
|
if (System.IO.File.Exists(pdfPath) == false)
|
||||||
@@ -201,10 +257,13 @@ namespace VAR.PdfTools.Workbench
|
|||||||
|
|
||||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||||
|
|
||||||
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
List<string> lines = new List<string>();
|
List<string> lines = new List<string>();
|
||||||
int pageNum = 1;
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
|
pageNum++;
|
||||||
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(text))));
|
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(text))));
|
||||||
}
|
}
|
||||||
@@ -221,11 +280,15 @@ namespace VAR.PdfTools.Workbench
|
|||||||
|
|
||||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||||
|
|
||||||
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
var fieldData = new List<string>();
|
var fieldData = new List<string>();
|
||||||
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
|
pageNum++;
|
||||||
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
fieldData.Add(extractor.GetField(field));
|
fieldData.Add(extractor.GetFieldAsString(field));
|
||||||
}
|
}
|
||||||
txtOutput.Lines = fieldData.ToArray();
|
txtOutput.Lines = fieldData.ToArray();
|
||||||
}
|
}
|
||||||
@@ -239,16 +302,43 @@ namespace VAR.PdfTools.Workbench
|
|||||||
}
|
}
|
||||||
|
|
||||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||||
|
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||||
|
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||||
|
|
||||||
var columnData = new List<string>();
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
|
var columns = new List<string>();
|
||||||
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
|
pageNum++;
|
||||||
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
columnData.AddRange(extractor.GetColumn(column));
|
PdfTextElementColumn columnData;
|
||||||
|
if (column.StartsWith("#"))
|
||||||
|
{
|
||||||
|
string[] columnParts = column.Substring(1).Split(';');
|
||||||
|
double y = Convert.ToDouble(columnParts[0]);
|
||||||
|
double x1 = Convert.ToDouble(columnParts[1]);
|
||||||
|
double x2 = Convert.ToDouble(columnParts[2]);
|
||||||
|
columnData = extractor.GetColumn(null, y, x1, x2, x1, x2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
columnData = extractor.GetColumn(column);
|
||||||
|
}
|
||||||
|
if (chkRender.Checked)
|
||||||
|
{
|
||||||
|
var pdfPageRenderer = new PdfPageRenderer(extractor);
|
||||||
|
Bitmap bmp = pdfPageRenderer.Render();
|
||||||
|
pdfPageRenderer.RenderColumn(columnData, bmp);
|
||||||
|
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||||
|
bmp.Save(fileName, ImageFormat.Png);
|
||||||
|
}
|
||||||
|
columns.AddRange(columnData.Elements.Select(t => t.VisibleText));
|
||||||
}
|
}
|
||||||
txtOutput.Lines = columnData.ToArray();
|
txtOutput.Lines = columns.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void btnRender_Click(object sender, EventArgs e)
|
private void btnRender_Click(object sender, EventArgs e)
|
||||||
{
|
{
|
||||||
if (File.Exists(txtPdfPath.Text) == false)
|
if (File.Exists(txtPdfPath.Text) == false)
|
||||||
@@ -257,8 +347,6 @@ namespace VAR.PdfTools.Workbench
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int MaxSize = 10000;
|
|
||||||
|
|
||||||
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
|
||||||
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||||
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||||
@@ -267,139 +355,24 @@ namespace VAR.PdfTools.Workbench
|
|||||||
lines.Add(string.Format("Filename : {0}", baseDocumentFilename));
|
lines.Add(string.Format("Filename : {0}", baseDocumentFilename));
|
||||||
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
||||||
|
|
||||||
int pageNumber = 1;
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
double pageXMin = double.MaxValue;
|
pageNum++;
|
||||||
double pageYMin = double.MaxValue;
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
double pageXMax = double.MinValue;
|
|
||||||
double pageYMax = double.MinValue;
|
|
||||||
|
|
||||||
// Preprocess page to get max size
|
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
Bitmap bmp = pdfPageRenderer.Render();
|
||||||
foreach (PdfTextElement textElement in extractor.Elements)
|
|
||||||
{
|
|
||||||
double textElementXMin = textElement.GetX();
|
|
||||||
double textElementYMax = textElement.GetY();
|
|
||||||
double textElementXMax = textElementXMin + textElement.VisibleWidth;
|
|
||||||
double textElementYMin = textElementYMax - textElement.VisibleHeight;
|
|
||||||
|
|
||||||
if (textElementXMax > pageXMax) { pageXMax = textElementXMax; }
|
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNum, pdfPageRenderer.Extractor.Elements.Count));
|
||||||
if (textElementYMax > pageYMax) { pageYMax = textElementYMax; }
|
|
||||||
if (textElementXMin < pageXMin) { pageXMin = textElementXMin; }
|
|
||||||
if (textElementYMin < pageYMin) { pageYMin = textElementYMin; }
|
|
||||||
}
|
|
||||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNumber, extractor.Elements.Count));
|
|
||||||
|
|
||||||
// Prepare page image
|
// Save image to disk
|
||||||
int pageWidth = (int)Math.Ceiling(pageXMax - pageXMin);
|
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||||
int pageHeight = (int)Math.Ceiling(pageYMax - pageYMin);
|
bmp.Save(fileName, ImageFormat.Png);
|
||||||
int Scale = 10;
|
|
||||||
while ((pageWidth * Scale) > MaxSize) { Scale--; }
|
|
||||||
while ((pageHeight * Scale) > MaxSize) { Scale--; }
|
|
||||||
if (Scale <= 0) { Scale = 1; }
|
|
||||||
using (Bitmap bmp = new Bitmap(pageWidth * Scale, pageHeight * Scale, PixelFormat.Format32bppArgb))
|
|
||||||
using (Graphics gc = Graphics.FromImage(bmp))
|
|
||||||
using (Pen penTextElem = new Pen(Color.Blue))
|
|
||||||
{
|
|
||||||
gc.Clear(Color.White);
|
|
||||||
|
|
||||||
// Draw text elements
|
|
||||||
foreach (PdfTextElement textElement in extractor.Elements)
|
|
||||||
{
|
|
||||||
DrawTextElement(textElement, gc, penTextElem, Scale, pageHeight, pageXMin, pageYMin);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save image to disk
|
|
||||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNumber));
|
|
||||||
bmp.Save(fileName, ImageFormat.Png);
|
|
||||||
}
|
|
||||||
pageNumber++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
txtOutput.Lines = lines.ToArray();
|
txtOutput.Lines = lines.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, int Scale, int pageHeight, double pageXMin, double pageYMin)
|
|
||||||
{
|
|
||||||
double textElementX = textElement.GetX() - pageXMin;
|
|
||||||
double textElementY = textElement.GetY() - pageYMin;
|
|
||||||
double textElementWidth = textElement.VisibleWidth;
|
|
||||||
double textElementHeight = textElement.VisibleHeight;
|
|
||||||
string textElementText = textElement.VisibleText;
|
|
||||||
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
|
|
||||||
|
|
||||||
if (textElementHeight < 0.0001) { return; }
|
|
||||||
|
|
||||||
double textElementPageX = textElementX;
|
|
||||||
double textElementPageY = pageHeight - textElementY;
|
|
||||||
|
|
||||||
DrawRoundedRectangle(gc, penTextElem,
|
|
||||||
(int)(textElementPageX * Scale),
|
|
||||||
(int)(textElementPageY * Scale),
|
|
||||||
(int)(textElementWidth * Scale),
|
|
||||||
(int)(textElementHeight * Scale),
|
|
||||||
5);
|
|
||||||
|
|
||||||
using (Font font = new Font("Arial", (int)(textElementHeight * Scale), GraphicsUnit.Pixel))
|
|
||||||
{
|
|
||||||
foreach (PdfCharElement c in textElement.Characters)
|
|
||||||
{
|
|
||||||
gc.DrawString(c.Char,
|
|
||||||
font,
|
|
||||||
Brushes.Black,
|
|
||||||
(int)((textElementPageX + c.Displacement) * Scale),
|
|
||||||
(int)(textElementPageY * Scale));
|
|
||||||
gc.FillRectangle(Brushes.Red,
|
|
||||||
(int)((textElementPageX + c.Displacement) * Scale),
|
|
||||||
(int)(textElementPageY * Scale),
|
|
||||||
2, 2);
|
|
||||||
gc.FillRectangle(Brushes.Green,
|
|
||||||
(int)((textElementPageX + c.Displacement + c.Width) * Scale),
|
|
||||||
(int)(textElementPageY * Scale),
|
|
||||||
2, 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
|
|
||||||
{
|
|
||||||
int diameter = radius * 2;
|
|
||||||
Size size = new Size(diameter, diameter);
|
|
||||||
Rectangle arc = new Rectangle(x, y, diameter, diameter);
|
|
||||||
GraphicsPath path = new GraphicsPath();
|
|
||||||
|
|
||||||
// top left arc
|
|
||||||
path.AddArc(arc, 180, 90);
|
|
||||||
|
|
||||||
// top right arc
|
|
||||||
arc.X = (x + width) - diameter;
|
|
||||||
path.AddArc(arc, 270, 90);
|
|
||||||
|
|
||||||
// bottom right arc
|
|
||||||
arc.Y = (y + height) - diameter;
|
|
||||||
path.AddArc(arc, 0, 90);
|
|
||||||
|
|
||||||
// bottom left arc
|
|
||||||
arc.X = x;
|
|
||||||
path.AddArc(arc, 90, 90);
|
|
||||||
|
|
||||||
path.CloseFigure();
|
|
||||||
return path;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
|
|
||||||
{
|
|
||||||
if (graphics == null)
|
|
||||||
throw new ArgumentNullException("graphics");
|
|
||||||
if (pen == null)
|
|
||||||
throw new ArgumentNullException("pen");
|
|
||||||
|
|
||||||
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
|
|
||||||
{
|
|
||||||
graphics.DrawPath(pen, path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
|
|||||||
[assembly: AssemblyCulture("")]
|
[assembly: AssemblyCulture("")]
|
||||||
[assembly: ComVisible(false)]
|
[assembly: ComVisible(false)]
|
||||||
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
|
[assembly: Guid("a5825d8e-9f81-49e0-b610-8ae5e46d02ea")]
|
||||||
[assembly: AssemblyVersion("1.5.1.*")]
|
[assembly: AssemblyVersion("1.6.0.*")]
|
||||||
|
|||||||
@@ -1,74 +0,0 @@
|
|||||||
//------------------------------------------------------------------------------
|
|
||||||
// <auto-generated>
|
|
||||||
// This code was generated by a tool.
|
|
||||||
// Runtime Version:4.0.30319.42000
|
|
||||||
//
|
|
||||||
// Changes to this file may cause incorrect behavior and will be lost if
|
|
||||||
// the code is regenerated.
|
|
||||||
// </auto-generated>
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace VAR.PdfTools.Workbench.Properties {
|
|
||||||
|
|
||||||
|
|
||||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
|
||||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "14.0.0.0")]
|
|
||||||
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
|
|
||||||
|
|
||||||
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
|
|
||||||
|
|
||||||
public static Settings Default {
|
|
||||||
get {
|
|
||||||
return defaultInstance;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
|
||||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
|
||||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
|
||||||
public string LastPdfPath {
|
|
||||||
get {
|
|
||||||
return ((string)(this["LastPdfPath"]));
|
|
||||||
}
|
|
||||||
set {
|
|
||||||
this["LastPdfPath"] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
|
||||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
|
||||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
|
||||||
public string Field1 {
|
|
||||||
get {
|
|
||||||
return ((string)(this["Field1"]));
|
|
||||||
}
|
|
||||||
set {
|
|
||||||
this["Field1"] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
|
||||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
|
||||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
|
||||||
public string Field2 {
|
|
||||||
get {
|
|
||||||
return ((string)(this["Field2"]));
|
|
||||||
}
|
|
||||||
set {
|
|
||||||
this["Field2"] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[global::System.Configuration.UserScopedSettingAttribute()]
|
|
||||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
|
||||||
[global::System.Configuration.DefaultSettingValueAttribute("")]
|
|
||||||
public string Field3 {
|
|
||||||
get {
|
|
||||||
return ((string)(this["Field3"]));
|
|
||||||
}
|
|
||||||
set {
|
|
||||||
this["Field3"] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
<?xml version='1.0' encoding='utf-8'?>
|
|
||||||
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)" GeneratedClassNamespace="VAR.PdfTools.Workbench.Properties" GeneratedClassName="Settings">
|
|
||||||
<Profiles />
|
|
||||||
<Settings>
|
|
||||||
<Setting Name="LastPdfPath" Type="System.String" Scope="User">
|
|
||||||
<Value Profile="(Default)" />
|
|
||||||
</Setting>
|
|
||||||
<Setting Name="Field1" Type="System.String" Scope="User">
|
|
||||||
<Value Profile="(Default)" />
|
|
||||||
</Setting>
|
|
||||||
<Setting Name="Field2" Type="System.String" Scope="User">
|
|
||||||
<Value Profile="(Default)" />
|
|
||||||
</Setting>
|
|
||||||
<Setting Name="Field3" Type="System.String" Scope="User">
|
|
||||||
<Value Profile="(Default)" />
|
|
||||||
</Setting>
|
|
||||||
</Settings>
|
|
||||||
</SettingsFile>
|
|
||||||
@@ -23,6 +23,7 @@
|
|||||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||||
<ErrorReport>prompt</ErrorReport>
|
<ErrorReport>prompt</ErrorReport>
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
|
<LangVersion>6</LangVersion>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||||
@@ -47,6 +48,7 @@
|
|||||||
<Reference Include="System.Xml" />
|
<Reference Include="System.Xml" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Compile Include="Configuration.cs" />
|
||||||
<Compile Include="FrmPdfInfo.cs">
|
<Compile Include="FrmPdfInfo.cs">
|
||||||
<SubType>Form</SubType>
|
<SubType>Form</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
@@ -55,16 +57,6 @@
|
|||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="Program.cs" />
|
<Compile Include="Program.cs" />
|
||||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
<None Include="app.config" />
|
|
||||||
<None Include="Properties\Settings.settings">
|
|
||||||
<Generator>SettingsSingleFileGenerator</Generator>
|
|
||||||
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
|
||||||
</None>
|
|
||||||
<Compile Include="Properties\Settings.Designer.cs">
|
|
||||||
<AutoGen>True</AutoGen>
|
|
||||||
<DependentUpon>Settings.settings</DependentUpon>
|
|
||||||
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
|
||||||
</Compile>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
|
<ProjectReference Include="..\VAR.PdfTools\VAR.PdfTools.csproj">
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8" ?>
|
|
||||||
<configuration>
|
|
||||||
<configSections>
|
|
||||||
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" >
|
|
||||||
<section name="VAR.PdfTools.Workbench.Properties.Settings" type="System.Configuration.ClientSettingsSection, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" allowExeDefinition="MachineToLocalUser" requirePermission="false" />
|
|
||||||
</sectionGroup>
|
|
||||||
</configSections>
|
|
||||||
<userSettings>
|
|
||||||
<VAR.PdfTools.Workbench.Properties.Settings>
|
|
||||||
<setting name="LastPdfPath" serializeAs="String">
|
|
||||||
<value />
|
|
||||||
</setting>
|
|
||||||
<setting name="Field1" serializeAs="String">
|
|
||||||
<value />
|
|
||||||
</setting>
|
|
||||||
<setting name="Field2" serializeAs="String">
|
|
||||||
<value />
|
|
||||||
</setting>
|
|
||||||
<setting name="Field3" serializeAs="String">
|
|
||||||
<value />
|
|
||||||
</setting>
|
|
||||||
</VAR.PdfTools.Workbench.Properties.Settings>
|
|
||||||
</userSettings>
|
|
||||||
</configuration>
|
|
||||||
25
VAR.PdfTools/Maths/Rect.cs
Normal file
25
VAR.PdfTools/Maths/Rect.cs
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace VAR.PdfTools.Maths
|
||||||
|
{
|
||||||
|
public class Rect
|
||||||
|
{
|
||||||
|
public double XMin { get; set; }
|
||||||
|
public double XMax { get; set; }
|
||||||
|
public double YMin { get; set; }
|
||||||
|
public double YMax { get; set; }
|
||||||
|
|
||||||
|
public void Add(Rect rect)
|
||||||
|
{
|
||||||
|
if (rect.XMax > XMax) { XMax = rect.XMax; }
|
||||||
|
if (rect.YMax > YMax) { YMax = rect.YMax; }
|
||||||
|
if (rect.XMin < XMin) { XMin = rect.XMin; }
|
||||||
|
if (rect.YMin < YMin) { YMin = rect.YMin; }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -21,11 +21,11 @@ namespace VAR.PdfTools
|
|||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region Properties
|
#region Properties
|
||||||
|
|
||||||
public PdfDictionary BaseData { get { return _baseData; } }
|
public PdfDictionary BaseData { get { return _baseData; } }
|
||||||
|
|
||||||
public byte[] Content { get { return _content; } }
|
public byte[] Content { get { return _content; } }
|
||||||
|
|
||||||
public Dictionary<string, PdfFont> Fonts { get { return _fonts; } }
|
public Dictionary<string, PdfFont> Fonts { get { return _fonts; } }
|
||||||
|
|
||||||
public List<PdfContentAction> ContentActions { get { return _contentActions; } }
|
public List<PdfContentAction> ContentActions { get { return _contentActions; } }
|
||||||
@@ -69,7 +69,8 @@ namespace VAR.PdfTools
|
|||||||
{
|
{
|
||||||
PdfParser parser = new PdfParser(_content);
|
PdfParser parser = new PdfParser(_content);
|
||||||
_contentActions = parser.ParseContent();
|
_contentActions = parser.ParseContent();
|
||||||
}else
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
_contentActions = new List<PdfContentAction>();
|
_contentActions = new List<PdfContentAction>();
|
||||||
}
|
}
|
||||||
|
|||||||
210
VAR.PdfTools/PdfPageRenderer.cs
Normal file
210
VAR.PdfTools/PdfPageRenderer.cs
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
using System;
|
||||||
|
using System.Drawing;
|
||||||
|
using System.Drawing.Drawing2D;
|
||||||
|
using System.Drawing.Imaging;
|
||||||
|
using VAR.PdfTools.Maths;
|
||||||
|
|
||||||
|
namespace VAR.PdfTools
|
||||||
|
{
|
||||||
|
public class PdfPageRenderer
|
||||||
|
{
|
||||||
|
private PdfDocumentPage _page;
|
||||||
|
private PdfTextExtractor _pdfTextExtractor;
|
||||||
|
private Rect _pageRect;
|
||||||
|
private int _pageWidth;
|
||||||
|
private int _pageHeight;
|
||||||
|
private int _scale = 10;
|
||||||
|
|
||||||
|
private const int MaxSize = 10000;
|
||||||
|
|
||||||
|
|
||||||
|
public PdfTextExtractor Extractor { get { return _pdfTextExtractor; } }
|
||||||
|
|
||||||
|
public PdfPageRenderer(PdfDocumentPage page)
|
||||||
|
{
|
||||||
|
_page = page;
|
||||||
|
_pdfTextExtractor = new PdfTextExtractor(_page);
|
||||||
|
InitPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
public PdfPageRenderer(PdfTextExtractor pdfTextExtractor)
|
||||||
|
{
|
||||||
|
_pdfTextExtractor = pdfTextExtractor;
|
||||||
|
_page = pdfTextExtractor.Page;
|
||||||
|
InitPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void InitPage()
|
||||||
|
{
|
||||||
|
_pageRect = _pdfTextExtractor.GetRect();
|
||||||
|
_pageWidth = (int)Math.Ceiling(_pageRect.XMax - _pageRect.XMin);
|
||||||
|
_pageHeight = (int)Math.Ceiling(_pageRect.YMax - _pageRect.YMin);
|
||||||
|
while ((_pageWidth * _scale) > MaxSize) { _scale--; }
|
||||||
|
while ((_pageHeight * _scale) > MaxSize) { _scale--; }
|
||||||
|
if (_scale <= 0) { _scale = 1; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public Bitmap Render()
|
||||||
|
{
|
||||||
|
if (_pdfTextExtractor.Elements.Count == 0)
|
||||||
|
{
|
||||||
|
// Nothing to render
|
||||||
|
Bitmap emptyBmp = new Bitmap(100, 200, PixelFormat.Format32bppArgb);
|
||||||
|
using (Graphics gcEmpty = Graphics.FromImage(emptyBmp))
|
||||||
|
gcEmpty.Clear(Color.White);
|
||||||
|
return emptyBmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare image
|
||||||
|
Bitmap bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||||
|
Graphics gc = Graphics.FromImage(bmp);
|
||||||
|
gc.Clear(Color.White);
|
||||||
|
|
||||||
|
// Draw text elements of the page
|
||||||
|
using (Pen penTextElem = new Pen(Color.Blue))
|
||||||
|
using (Pen penCharElem = new Pen(Color.Navy))
|
||||||
|
{
|
||||||
|
foreach (PdfTextElement textElement in _pdfTextExtractor.Elements)
|
||||||
|
{
|
||||||
|
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Black);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gc.Dispose();
|
||||||
|
return bmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null)
|
||||||
|
{
|
||||||
|
Graphics gc;
|
||||||
|
if (bmp == null)
|
||||||
|
{
|
||||||
|
bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||||
|
gc = Graphics.FromImage(bmp);
|
||||||
|
gc.Clear(Color.White);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gc = Graphics.FromImage(bmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw text elements of the column header
|
||||||
|
using (Pen penTextElem = new Pen(Color.Green))
|
||||||
|
using (Pen penCharElem = new Pen(Color.DarkGreen))
|
||||||
|
{
|
||||||
|
DrawTextElement(columnData.HeadTextElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Olive);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw text elements of the column
|
||||||
|
using (Pen penTextElem = new Pen(Color.Red))
|
||||||
|
using (Pen penCharElem = new Pen(Color.DarkRed))
|
||||||
|
{
|
||||||
|
foreach (PdfTextElement textElement in columnData.Elements)
|
||||||
|
{
|
||||||
|
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw column extents
|
||||||
|
using (Pen penColumn = new Pen(Color.Red))
|
||||||
|
{
|
||||||
|
float y = (float)(_pageRect.YMax - columnData.Y);
|
||||||
|
float x1 = (float)(columnData.X1 - _pageRect.XMin);
|
||||||
|
float x2 = (float)(columnData.X2 - _pageRect.XMin);
|
||||||
|
|
||||||
|
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale);
|
||||||
|
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale);
|
||||||
|
gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
gc.Dispose();
|
||||||
|
return bmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
|
||||||
|
{
|
||||||
|
if (textElement == null) { return; }
|
||||||
|
double textElementX = textElement.GetX() - pageXMin;
|
||||||
|
double textElementY = textElement.GetY() - pageYMin;
|
||||||
|
double textElementWidth = textElement.VisibleWidth;
|
||||||
|
double textElementHeight = textElement.VisibleHeight;
|
||||||
|
string textElementText = textElement.VisibleText;
|
||||||
|
string textElementFontName = (textElement.Font == null ? string.Empty : textElement.Font.Name);
|
||||||
|
|
||||||
|
if (textElementHeight < 0.0001) { return; }
|
||||||
|
|
||||||
|
double textElementPageX = textElementX;
|
||||||
|
double textElementPageY = pageHeight - textElementY;
|
||||||
|
|
||||||
|
if (penTextElem != null)
|
||||||
|
{
|
||||||
|
DrawRoundedRectangle(gc, penTextElem,
|
||||||
|
(int)(textElementPageX * scale),
|
||||||
|
(int)(textElementPageY * scale),
|
||||||
|
(int)(textElementWidth * scale),
|
||||||
|
(int)(textElementHeight * scale),
|
||||||
|
5);
|
||||||
|
}
|
||||||
|
|
||||||
|
using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel))
|
||||||
|
{
|
||||||
|
foreach (PdfCharElement c in textElement.Characters)
|
||||||
|
{
|
||||||
|
gc.DrawString(c.Char,
|
||||||
|
font,
|
||||||
|
brushText,
|
||||||
|
(int)((textElementPageX + c.Displacement) * scale),
|
||||||
|
(int)(textElementPageY * scale));
|
||||||
|
if (penCharElem != null)
|
||||||
|
{
|
||||||
|
DrawRoundedRectangle(gc, penCharElem,
|
||||||
|
(int)((textElementPageX + c.Displacement) * scale),
|
||||||
|
(int)(textElementPageY * scale),
|
||||||
|
(int)(c.Width * scale),
|
||||||
|
(int)(textElementHeight * scale),
|
||||||
|
5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static GraphicsPath RoundedRect(int x, int y, int width, int height, int radius)
|
||||||
|
{
|
||||||
|
int diameter = radius * 2;
|
||||||
|
Size size = new Size(diameter, diameter);
|
||||||
|
Rectangle arc = new Rectangle(x, y, diameter, diameter);
|
||||||
|
GraphicsPath path = new GraphicsPath();
|
||||||
|
|
||||||
|
// top left arc
|
||||||
|
path.AddArc(arc, 180, 90);
|
||||||
|
|
||||||
|
// top right arc
|
||||||
|
arc.X = (x + width) - diameter;
|
||||||
|
path.AddArc(arc, 270, 90);
|
||||||
|
|
||||||
|
// bottom right arc
|
||||||
|
arc.Y = (y + height) - diameter;
|
||||||
|
path.AddArc(arc, 0, 90);
|
||||||
|
|
||||||
|
// bottom left arc
|
||||||
|
arc.X = x;
|
||||||
|
path.AddArc(arc, 90, 90);
|
||||||
|
|
||||||
|
path.CloseFigure();
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void DrawRoundedRectangle(Graphics graphics, Pen pen, int x, int y, int width, int height, int cornerRadius)
|
||||||
|
{
|
||||||
|
if (graphics == null)
|
||||||
|
throw new ArgumentNullException("graphics");
|
||||||
|
if (pen == null)
|
||||||
|
throw new ArgumentNullException("pen");
|
||||||
|
|
||||||
|
using (GraphicsPath path = RoundedRect(x, y, width, height, cornerRadius))
|
||||||
|
{
|
||||||
|
graphics.DrawPath(pen, path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
149
VAR.PdfTools/PdfTextElement.cs
Normal file
149
VAR.PdfTools/PdfTextElement.cs
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using VAR.PdfTools.Maths;
|
||||||
|
|
||||||
|
namespace VAR.PdfTools
|
||||||
|
{
|
||||||
|
public struct PdfCharElement
|
||||||
|
{
|
||||||
|
public string Char;
|
||||||
|
public double Displacement;
|
||||||
|
public double Width;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class PdfTextElement
|
||||||
|
{
|
||||||
|
#region Properties
|
||||||
|
|
||||||
|
public PdfFont Font { get; set; }
|
||||||
|
|
||||||
|
public double FontSize { get; set; }
|
||||||
|
|
||||||
|
public Matrix3x3 Matrix { get; set; }
|
||||||
|
|
||||||
|
public string RawText { get; set; }
|
||||||
|
|
||||||
|
public string VisibleText { get; set; }
|
||||||
|
|
||||||
|
public double VisibleWidth { get; set; }
|
||||||
|
|
||||||
|
public double VisibleHeight { get; set; }
|
||||||
|
|
||||||
|
public List<PdfCharElement> Characters { get; set; }
|
||||||
|
|
||||||
|
public List<PdfTextElement> Childs { get; set; }
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Public methods
|
||||||
|
|
||||||
|
public double GetX()
|
||||||
|
{
|
||||||
|
return Matrix.Matrix[0, 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double GetY()
|
||||||
|
{
|
||||||
|
return Matrix.Matrix[1, 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
public PdfTextElement SubPart(int startIndex, int endIndex)
|
||||||
|
{
|
||||||
|
PdfTextElement blockElem = new PdfTextElement
|
||||||
|
{
|
||||||
|
Font = null,
|
||||||
|
FontSize = FontSize,
|
||||||
|
Matrix = Matrix.Copy(),
|
||||||
|
RawText = RawText.Substring(startIndex, endIndex - startIndex),
|
||||||
|
VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex),
|
||||||
|
VisibleWidth = 0,
|
||||||
|
VisibleHeight = VisibleHeight,
|
||||||
|
Characters = new List<PdfCharElement>(),
|
||||||
|
Childs = new List<PdfTextElement>(),
|
||||||
|
};
|
||||||
|
double displacement = Characters[startIndex].Displacement;
|
||||||
|
blockElem.Matrix.Matrix[0, 2] += displacement;
|
||||||
|
for (int j = startIndex; j < endIndex; j++)
|
||||||
|
{
|
||||||
|
blockElem.Characters.Add(new PdfCharElement
|
||||||
|
{
|
||||||
|
Char = Characters[j].Char,
|
||||||
|
Displacement = Characters[j].Displacement - displacement,
|
||||||
|
Width = Characters[j].Width,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1];
|
||||||
|
blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width;
|
||||||
|
foreach (PdfTextElement elem in Childs)
|
||||||
|
{
|
||||||
|
blockElem.Childs.Add(elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return blockElem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double MaxWidth()
|
||||||
|
{
|
||||||
|
return Characters.Average(c => c.Width);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Rect GetRect()
|
||||||
|
{
|
||||||
|
double x = GetX();
|
||||||
|
double y = GetY();
|
||||||
|
return new Rect
|
||||||
|
{
|
||||||
|
XMin = x,
|
||||||
|
YMax = y,
|
||||||
|
XMax = x + VisibleWidth,
|
||||||
|
YMin = y - VisibleHeight,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public double GetCharacterPreviousSpacing(int index)
|
||||||
|
{
|
||||||
|
if (index <= 0) { return 0; }
|
||||||
|
double previousEnd = Characters[index - 1].Displacement + Characters[index - 1].Width;
|
||||||
|
double spacing = Characters[index].Displacement - previousEnd;
|
||||||
|
return spacing;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double GetCharacterPrecedingSpacing(int index)
|
||||||
|
{
|
||||||
|
if (index >= (Characters.Count - 1)) { return 0; }
|
||||||
|
double currentEnd = Characters[index].Displacement + Characters[index].Width;
|
||||||
|
double spacing = Characters[index + 1].Displacement - currentEnd;
|
||||||
|
return spacing;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
}
|
||||||
|
|
||||||
|
public class PdfTextElementColumn
|
||||||
|
{
|
||||||
|
public PdfTextElement HeadTextElement { get; private set; }
|
||||||
|
|
||||||
|
public IEnumerable<PdfTextElement> Elements { get; private set; }
|
||||||
|
|
||||||
|
public double Y { get; private set; }
|
||||||
|
|
||||||
|
public double X1 { get; private set; }
|
||||||
|
public double X2 { get; private set; }
|
||||||
|
|
||||||
|
public static PdfTextElementColumn Empty { get; } = new PdfTextElementColumn();
|
||||||
|
|
||||||
|
private PdfTextElementColumn()
|
||||||
|
{
|
||||||
|
Elements = new List<PdfTextElement>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public PdfTextElementColumn(PdfTextElement head, IEnumerable<PdfTextElement> elements, double y, double x1, double x2)
|
||||||
|
{
|
||||||
|
HeadTextElement = head;
|
||||||
|
Elements = elements;
|
||||||
|
Y = y;
|
||||||
|
X1 = x1;
|
||||||
|
X2 = x2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,92 +7,6 @@ using VAR.PdfTools.PdfElements;
|
|||||||
|
|
||||||
namespace VAR.PdfTools
|
namespace VAR.PdfTools
|
||||||
{
|
{
|
||||||
public struct PdfCharElement
|
|
||||||
{
|
|
||||||
public string Char;
|
|
||||||
public double Displacement;
|
|
||||||
public double Width;
|
|
||||||
}
|
|
||||||
|
|
||||||
public class PdfTextElement
|
|
||||||
{
|
|
||||||
#region Properties
|
|
||||||
|
|
||||||
public PdfFont Font { get; set; }
|
|
||||||
|
|
||||||
public double FontSize { get; set; }
|
|
||||||
|
|
||||||
public Matrix3x3 Matrix { get; set; }
|
|
||||||
|
|
||||||
public string RawText { get; set; }
|
|
||||||
|
|
||||||
public string VisibleText { get; set; }
|
|
||||||
|
|
||||||
public double VisibleWidth { get; set; }
|
|
||||||
|
|
||||||
public double VisibleHeight { get; set; }
|
|
||||||
|
|
||||||
public List<PdfCharElement> Characters { get; set; }
|
|
||||||
|
|
||||||
public List<PdfTextElement> Childs { get; set; }
|
|
||||||
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
#region Public methods
|
|
||||||
|
|
||||||
public double GetX()
|
|
||||||
{
|
|
||||||
return Matrix.Matrix[0, 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double GetY()
|
|
||||||
{
|
|
||||||
return Matrix.Matrix[1, 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
public PdfTextElement SubPart(int startIndex, int endIndex)
|
|
||||||
{
|
|
||||||
PdfTextElement blockElem = new PdfTextElement
|
|
||||||
{
|
|
||||||
Font = null,
|
|
||||||
FontSize = FontSize,
|
|
||||||
Matrix = Matrix.Copy(),
|
|
||||||
RawText = RawText.Substring(startIndex, endIndex - startIndex),
|
|
||||||
VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex),
|
|
||||||
VisibleWidth = 0,
|
|
||||||
VisibleHeight = VisibleHeight,
|
|
||||||
Characters = new List<PdfCharElement>(),
|
|
||||||
Childs = new List<PdfTextElement>(),
|
|
||||||
};
|
|
||||||
double displacement = Characters[startIndex].Displacement;
|
|
||||||
blockElem.Matrix.Matrix[0, 2] += displacement;
|
|
||||||
for (int j = startIndex; j < endIndex; j++)
|
|
||||||
{
|
|
||||||
blockElem.Characters.Add(new PdfCharElement
|
|
||||||
{
|
|
||||||
Char = Characters[j].Char,
|
|
||||||
Displacement = Characters[j].Displacement - displacement,
|
|
||||||
Width = Characters[j].Width,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1];
|
|
||||||
blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width;
|
|
||||||
foreach (PdfTextElement elem in Childs)
|
|
||||||
{
|
|
||||||
blockElem.Childs.Add(elem);
|
|
||||||
}
|
|
||||||
|
|
||||||
return blockElem;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double MaxWidth()
|
|
||||||
{
|
|
||||||
return Characters.Average(c => c.Width) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endregion
|
|
||||||
}
|
|
||||||
|
|
||||||
public class PdfTextExtractor
|
public class PdfTextExtractor
|
||||||
{
|
{
|
||||||
#region Declarations
|
#region Declarations
|
||||||
@@ -256,6 +170,14 @@ namespace VAR.PdfTools
|
|||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bool TextElementVerticalIntersection(PdfTextElement elem1, double elem2X1, double elem2X2)
|
||||||
|
{
|
||||||
|
double elem1X1 = elem1.GetX();
|
||||||
|
double elem1X2 = elem1.GetX() + elem1.VisibleWidth;
|
||||||
|
|
||||||
|
return elem1X2 >= elem2X1 && elem2X2 >= elem1X1;
|
||||||
|
}
|
||||||
|
|
||||||
private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2)
|
private bool TextElementVerticalIntersection(PdfTextElement elem1, PdfTextElement elem2)
|
||||||
{
|
{
|
||||||
double elem1X1 = elem1.GetX();
|
double elem1X1 = elem1.GetX();
|
||||||
@@ -738,19 +660,26 @@ namespace VAR.PdfTools
|
|||||||
|
|
||||||
#region Public methods
|
#region Public methods
|
||||||
|
|
||||||
public List<string> GetColumn(string column)
|
public Rect GetRect()
|
||||||
{
|
{
|
||||||
return GetColumn(column, true);
|
Rect rect = null;
|
||||||
|
foreach (PdfTextElement textElement in _textElements)
|
||||||
|
{
|
||||||
|
Rect elementRect = textElement.GetRect();
|
||||||
|
if (rect == null) { rect = elementRect; }
|
||||||
|
rect.Add(elementRect);
|
||||||
|
}
|
||||||
|
return rect;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<string> GetColumn(string column, bool fuzzy)
|
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
|
||||||
{
|
{
|
||||||
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
||||||
if (columnHead == null)
|
if (columnHead == null)
|
||||||
{
|
{
|
||||||
return new List<string>();
|
return PdfTextElementColumn.Empty;
|
||||||
}
|
}
|
||||||
double headY = columnHead.GetY();
|
double headY = columnHead.GetY() - columnHead.VisibleHeight;
|
||||||
double headX1 = columnHead.GetX();
|
double headX1 = columnHead.GetX();
|
||||||
double headX2 = headX1 + columnHead.VisibleWidth;
|
double headX2 = headX1 + columnHead.VisibleWidth;
|
||||||
|
|
||||||
@@ -778,14 +707,20 @@ namespace VAR.PdfTools
|
|||||||
extentX2 = elemX1;
|
extentX2 = elemX1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PdfTextElementColumn columnData = GetColumn(columnHead, headY, headX1, headX2, extentX1, extentX2);
|
||||||
|
|
||||||
|
return columnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PdfTextElementColumn GetColumn(PdfTextElement columnHead, double headY, double headX1, double headX2, double extentX1, double extentX2)
|
||||||
|
{
|
||||||
// Get all the elements that intersects vertically, are down and sort results
|
// Get all the elements that intersects vertically, are down and sort results
|
||||||
var columnDataRaw = new List<PdfTextElement>();
|
var columnDataRaw = new List<PdfTextElement>();
|
||||||
foreach (PdfTextElement elem in _textElements)
|
foreach (PdfTextElement elem in _textElements)
|
||||||
{
|
{
|
||||||
if (TextElementVerticalIntersection(columnHead, elem) == false) { continue; }
|
if (TextElementVerticalIntersection(elem, headX1, headX2) == false) { continue; }
|
||||||
|
|
||||||
// Only intems down the column
|
// Only intems down the column
|
||||||
double elemY = elem.GetY();
|
double elemY = elem.GetY();
|
||||||
@@ -795,32 +730,94 @@ namespace VAR.PdfTools
|
|||||||
}
|
}
|
||||||
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
||||||
|
|
||||||
// Only items completelly inside extents, amd break on the first element outside
|
// Only items completelly inside extents, try spliting big elements and break on big elements that can't be splitted
|
||||||
var columnData = new List<PdfTextElement>();
|
var columnElements = new List<PdfTextElement>();
|
||||||
foreach (PdfTextElement elem in columnDataRaw)
|
foreach (PdfTextElement elem in columnDataRaw)
|
||||||
{
|
{
|
||||||
double elemX1 = elem.GetX();
|
double elemX1 = elem.GetX();
|
||||||
double elemX2 = elemX1 + elem.VisibleWidth;
|
double elemX2 = elemX1 + elem.VisibleWidth;
|
||||||
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
|
|
||||||
|
|
||||||
columnData.Add(elem);
|
// Add elements completely inside
|
||||||
|
if (elemX1 > extentX1 && elemX2 < extentX2)
|
||||||
|
{
|
||||||
|
columnElements.Add(elem);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to split elements intersecting extents of the column
|
||||||
|
double maxSpacing = elem.Characters.Average(c => c.Width) / 10;
|
||||||
|
int indexStart = 0;
|
||||||
|
int indexEnd = elem.Characters.Count - 1;
|
||||||
|
bool indexStartValid = true;
|
||||||
|
bool indexEndValid = true;
|
||||||
|
if (elemX1 < extentX1)
|
||||||
|
{
|
||||||
|
// Search best start
|
||||||
|
int index = 0;
|
||||||
|
double characterPosition = elemX1 + elem.Characters[index].Displacement;
|
||||||
|
while (characterPosition < extentX1 && index < (elem.Characters.Count - 1))
|
||||||
|
{
|
||||||
|
index++;
|
||||||
|
characterPosition = elemX1 + elem.Characters[index].Displacement;
|
||||||
|
}
|
||||||
|
double spacing = elem.GetCharacterPreviousSpacing(index);
|
||||||
|
while (spacing < maxSpacing && index < (elem.Characters.Count - 1))
|
||||||
|
{
|
||||||
|
index++;
|
||||||
|
spacing = elem.GetCharacterPreviousSpacing(index);
|
||||||
|
}
|
||||||
|
if (spacing < maxSpacing) { indexStartValid = false; }
|
||||||
|
indexStart = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elemX2 > extentX2)
|
||||||
|
{
|
||||||
|
// Search best end
|
||||||
|
int index = elem.Characters.Count - 1;
|
||||||
|
double characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
|
||||||
|
while (characterPosition > extentX2 && index > 0)
|
||||||
|
{
|
||||||
|
index--;
|
||||||
|
characterPosition = elemX1 + elem.Characters[index].Displacement + elem.Characters[index].Width;
|
||||||
|
}
|
||||||
|
double spacing = elem.GetCharacterPrecedingSpacing(index);
|
||||||
|
while (spacing < maxSpacing && index > 0)
|
||||||
|
{
|
||||||
|
index--;
|
||||||
|
spacing = elem.GetCharacterPrecedingSpacing(index);
|
||||||
|
}
|
||||||
|
if (spacing < maxSpacing) { indexEndValid = false; }
|
||||||
|
indexEnd = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Break when there is no good split, spaning all extent
|
||||||
|
if (indexStartValid == false && indexEndValid == false) { break; }
|
||||||
|
|
||||||
|
// Continue when only one of the sides is invalid. (outside elements intersecting extents of the column)
|
||||||
|
if (indexStartValid == false || indexEndValid == false) { continue; }
|
||||||
|
|
||||||
|
// Add splitted element
|
||||||
|
columnElements.Add(elem.SubPart(indexStart, indexEnd + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);
|
||||||
|
return columnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<string> GetColumnAsStrings(string column, bool fuzzy = true)
|
||||||
|
{
|
||||||
|
PdfTextElementColumn columnData = GetColumn(column, fuzzy);
|
||||||
|
|
||||||
// Emit result
|
// Emit result
|
||||||
var result = new List<string>();
|
var result = new List<string>();
|
||||||
foreach (PdfTextElement elem in columnData)
|
foreach (PdfTextElement elem in columnData.Elements)
|
||||||
{
|
{
|
||||||
result.Add(elem.VisibleText);
|
result.Add(elem.VisibleText);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public string GetField(string field)
|
public string GetFieldAsString(string field, bool fuzzy = true)
|
||||||
{
|
|
||||||
return GetField(field, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public string GetField(string field, bool fuzzy)
|
|
||||||
{
|
{
|
||||||
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
||||||
if (fieldTitle == null)
|
if (fieldTitle == null)
|
||||||
@@ -848,12 +845,7 @@ namespace VAR.PdfTools
|
|||||||
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool HasText(string text)
|
public bool HasText(string text, bool fuzzy = true)
|
||||||
{
|
|
||||||
return HasText(text, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public bool HasText(string text, bool fuzzy)
|
|
||||||
{
|
{
|
||||||
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
|
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
|
||||||
return (list.Count > 0);
|
return (list.Count > 0);
|
||||||
|
|||||||
@@ -11,4 +11,4 @@ using System.Runtime.InteropServices;
|
|||||||
[assembly: AssemblyCulture("")]
|
[assembly: AssemblyCulture("")]
|
||||||
[assembly: ComVisible(false)]
|
[assembly: ComVisible(false)]
|
||||||
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
|
[assembly: Guid("eb7e003a-6a95-4002-809f-926c7c8a11e9")]
|
||||||
[assembly: AssemblyVersion("1.5.2.*")]
|
[assembly: AssemblyVersion("1.6.0.*")]
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
<ErrorReport>prompt</ErrorReport>
|
<ErrorReport>prompt</ErrorReport>
|
||||||
<WarningLevel>4</WarningLevel>
|
<WarningLevel>4</WarningLevel>
|
||||||
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
|
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
|
||||||
|
<LangVersion>6</LangVersion>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release .Net 4.6.1|AnyCPU' ">
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release .Net 4.6.1|AnyCPU' ">
|
||||||
<DebugType>pdbonly</DebugType>
|
<DebugType>pdbonly</DebugType>
|
||||||
@@ -54,6 +55,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="System" />
|
<Reference Include="System" />
|
||||||
<Reference Include="System.Core" />
|
<Reference Include="System.Core" />
|
||||||
|
<Reference Include="System.Drawing" />
|
||||||
<Reference Include="System.Xml.Linq" />
|
<Reference Include="System.Xml.Linq" />
|
||||||
<Reference Include="System.Data.DataSetExtensions" />
|
<Reference Include="System.Data.DataSetExtensions" />
|
||||||
<Reference Include="System.Data" />
|
<Reference Include="System.Data" />
|
||||||
@@ -61,6 +63,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Compile Include="Maths\Matrix3x3.cs" />
|
<Compile Include="Maths\Matrix3x3.cs" />
|
||||||
|
<Compile Include="Maths\Rect.cs" />
|
||||||
<Compile Include="PdfContentAction.cs" />
|
<Compile Include="PdfContentAction.cs" />
|
||||||
<Compile Include="PdfDocument.cs" />
|
<Compile Include="PdfDocument.cs" />
|
||||||
<Compile Include="PdfDocumentPage.cs" />
|
<Compile Include="PdfDocumentPage.cs" />
|
||||||
@@ -81,7 +84,9 @@
|
|||||||
<Compile Include="PdfElements\PdfStream.cs" />
|
<Compile Include="PdfElements\PdfStream.cs" />
|
||||||
<Compile Include="PdfElements\PdfString.cs" />
|
<Compile Include="PdfElements\PdfString.cs" />
|
||||||
<Compile Include="PdfParser.cs" />
|
<Compile Include="PdfParser.cs" />
|
||||||
|
<Compile Include="PdfPageRenderer.cs" />
|
||||||
<Compile Include="PdfStandar14FontMetrics.cs" />
|
<Compile Include="PdfStandar14FontMetrics.cs" />
|
||||||
|
<Compile Include="PdfTextElement.cs" />
|
||||||
<Compile Include="PdfTextExtractor.cs" />
|
<Compile Include="PdfTextExtractor.cs" />
|
||||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
<Compile Include="Maths\Vector3D.cs" />
|
<Compile Include="Maths\Vector3D.cs" />
|
||||||
|
|||||||
Reference in New Issue
Block a user