PdfTextExtractor: Get results as PdfTextElementColumn, for debugging purposes.
This commit is contained in:
19
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
19
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
@@ -48,6 +48,7 @@
|
|||||||
this.txtField3 = new System.Windows.Forms.TextBox();
|
this.txtField3 = new System.Windows.Forms.TextBox();
|
||||||
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
||||||
this.txtPages = new System.Windows.Forms.TextBox();
|
this.txtPages = new System.Windows.Forms.TextBox();
|
||||||
|
this.chkRender = new System.Windows.Forms.CheckBox();
|
||||||
this.SuspendLayout();
|
this.SuspendLayout();
|
||||||
//
|
//
|
||||||
// lblOutputs
|
// lblOutputs
|
||||||
@@ -120,7 +121,7 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
||||||
this.btnGetColumn1.Name = "btnGetColumn1";
|
this.btnGetColumn1.Name = "btnGetColumn1";
|
||||||
this.btnGetColumn1.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn1.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn1.TabIndex = 12;
|
this.btnGetColumn1.TabIndex = 12;
|
||||||
this.btnGetColumn1.Text = "GetColumn";
|
this.btnGetColumn1.Text = "GetColumn";
|
||||||
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
||||||
@@ -195,7 +196,7 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
||||||
this.btnGetColumn2.Name = "btnGetColumn2";
|
this.btnGetColumn2.Name = "btnGetColumn2";
|
||||||
this.btnGetColumn2.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn2.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn2.TabIndex = 19;
|
this.btnGetColumn2.TabIndex = 19;
|
||||||
this.btnGetColumn2.Text = "GetColumn";
|
this.btnGetColumn2.Text = "GetColumn";
|
||||||
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
||||||
@@ -232,7 +233,7 @@
|
|||||||
//
|
//
|
||||||
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
||||||
this.btnGetColumn3.Name = "btnGetColumn3";
|
this.btnGetColumn3.Name = "btnGetColumn3";
|
||||||
this.btnGetColumn3.Size = new System.Drawing.Size(60, 23);
|
this.btnGetColumn3.Size = new System.Drawing.Size(69, 23);
|
||||||
this.btnGetColumn3.TabIndex = 23;
|
this.btnGetColumn3.TabIndex = 23;
|
||||||
this.btnGetColumn3.Text = "GetColumn";
|
this.btnGetColumn3.Text = "GetColumn";
|
||||||
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
||||||
@@ -246,11 +247,22 @@
|
|||||||
this.txtPages.Size = new System.Drawing.Size(75, 20);
|
this.txtPages.Size = new System.Drawing.Size(75, 20);
|
||||||
this.txtPages.TabIndex = 27;
|
this.txtPages.TabIndex = 27;
|
||||||
//
|
//
|
||||||
|
// chkRender
|
||||||
|
//
|
||||||
|
this.chkRender.AutoSize = true;
|
||||||
|
this.chkRender.Location = new System.Drawing.Point(292, 138);
|
||||||
|
this.chkRender.Name = "chkRender";
|
||||||
|
this.chkRender.Size = new System.Drawing.Size(61, 17);
|
||||||
|
this.chkRender.TabIndex = 28;
|
||||||
|
this.chkRender.Text = "Render";
|
||||||
|
this.chkRender.UseVisualStyleBackColor = true;
|
||||||
|
//
|
||||||
// FrmPdfInfo
|
// FrmPdfInfo
|
||||||
//
|
//
|
||||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||||
this.ClientSize = new System.Drawing.Size(484, 461);
|
this.ClientSize = new System.Drawing.Size(484, 461);
|
||||||
|
this.Controls.Add(this.chkRender);
|
||||||
this.Controls.Add(this.txtPages);
|
this.Controls.Add(this.txtPages);
|
||||||
this.Controls.Add(this.btnHasText3);
|
this.Controls.Add(this.btnHasText3);
|
||||||
this.Controls.Add(this.btnGetField3);
|
this.Controls.Add(this.btnGetField3);
|
||||||
@@ -302,5 +314,6 @@
|
|||||||
private System.Windows.Forms.TextBox txtField3;
|
private System.Windows.Forms.TextBox txtField3;
|
||||||
private System.Windows.Forms.Button btnGetColumn3;
|
private System.Windows.Forms.Button btnGetColumn3;
|
||||||
private System.Windows.Forms.TextBox txtPages;
|
private System.Windows.Forms.TextBox txtPages;
|
||||||
|
private System.Windows.Forms.CheckBox chkRender;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Drawing;
|
using System.Drawing;
|
||||||
using System.Drawing.Drawing2D;
|
|
||||||
using System.Drawing.Imaging;
|
using System.Drawing.Imaging;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
@@ -214,10 +213,11 @@ namespace VAR.PdfTools.Workbench
|
|||||||
if (part.Contains("-"))
|
if (part.Contains("-"))
|
||||||
{
|
{
|
||||||
string[] range = part.Split('-');
|
string[] range = part.Split('-');
|
||||||
if (range.Length == 2) {
|
if (range.Length == 2)
|
||||||
|
{
|
||||||
int pageStart;
|
int pageStart;
|
||||||
int pageEnd;
|
int pageEnd;
|
||||||
if(int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
if (int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
||||||
{
|
{
|
||||||
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
|
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
|
||||||
}
|
}
|
||||||
@@ -226,13 +226,13 @@ namespace VAR.PdfTools.Workbench
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
int pageNum;
|
int pageNum;
|
||||||
if(int.TryParse(part, out pageNum))
|
if (int.TryParse(part, out pageNum))
|
||||||
{
|
{
|
||||||
listPages.Add(pageNum);
|
listPages.Add(pageNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(listPages.Count == 0)
|
if (listPages.Count == 0)
|
||||||
{
|
{
|
||||||
listPages.AddRange(Enumerable.Range(1, maxPages));
|
listPages.AddRange(Enumerable.Range(1, maxPages));
|
||||||
}
|
}
|
||||||
@@ -294,18 +294,29 @@ namespace VAR.PdfTools.Workbench
|
|||||||
}
|
}
|
||||||
|
|
||||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||||
|
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||||
|
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||||
|
|
||||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
var columnData = new List<string>();
|
var columns = new List<string>();
|
||||||
int pageNum = 0;
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
pageNum++;
|
pageNum++;
|
||||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||||
columnData.AddRange(extractor.GetColumnAsStrings(column));
|
PdfTextElementColumn columnData = extractor.GetColumn(column);
|
||||||
|
if (chkRender.Checked)
|
||||||
|
{
|
||||||
|
var pdfPageRenderer = new PdfPageRenderer(extractor);
|
||||||
|
Bitmap bmp = pdfPageRenderer.Render();
|
||||||
|
pdfPageRenderer.RenderColumn(columnData, bmp);
|
||||||
|
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||||
|
bmp.Save(fileName, ImageFormat.Png);
|
||||||
|
}
|
||||||
|
columns.AddRange(columnData.Elements.Select(t => t.VisibleText));
|
||||||
}
|
}
|
||||||
txtOutput.Lines = columnData.ToArray();
|
txtOutput.Lines = columns.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void btnRender_Click(object sender, EventArgs e)
|
private void btnRender_Click(object sender, EventArgs e)
|
||||||
@@ -325,19 +336,19 @@ namespace VAR.PdfTools.Workbench
|
|||||||
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
||||||
|
|
||||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||||
int pageNumber = 0;
|
int pageNum = 0;
|
||||||
foreach (PdfDocumentPage page in doc.Pages)
|
foreach (PdfDocumentPage page in doc.Pages)
|
||||||
{
|
{
|
||||||
pageNumber++;
|
pageNum++;
|
||||||
if (selectedPages.Contains(pageNumber) == false) { continue; }
|
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||||
|
|
||||||
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
|
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
|
||||||
Bitmap bmp = pdfPageRenderer.Render();
|
Bitmap bmp = pdfPageRenderer.Render();
|
||||||
|
|
||||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNumber, pdfPageRenderer.Extractor.Elements.Count));
|
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNum, pdfPageRenderer.Extractor.Elements.Count));
|
||||||
|
|
||||||
// Save image to disk
|
// Save image to disk
|
||||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNumber));
|
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||||
bmp.Save(fileName, ImageFormat.Png);
|
bmp.Save(fileName, ImageFormat.Png);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,14 @@ namespace VAR.PdfTools
|
|||||||
{
|
{
|
||||||
private PdfDocumentPage _page;
|
private PdfDocumentPage _page;
|
||||||
private PdfTextExtractor _pdfTextExtractor;
|
private PdfTextExtractor _pdfTextExtractor;
|
||||||
|
private Rect _pageRect;
|
||||||
|
private int _pageWidth;
|
||||||
|
private int _pageHeight;
|
||||||
|
private int _scale = 10;
|
||||||
|
|
||||||
|
private const int MaxSize = 10000;
|
||||||
|
|
||||||
|
|
||||||
public PdfTextExtractor Extractor { get { return _pdfTextExtractor; } }
|
public PdfTextExtractor Extractor { get { return _pdfTextExtractor; } }
|
||||||
|
|
||||||
public PdfPageRenderer(PdfDocumentPage page)
|
public PdfPageRenderer(PdfDocumentPage page)
|
||||||
@@ -19,45 +26,92 @@ namespace VAR.PdfTools
|
|||||||
_pdfTextExtractor = new PdfTextExtractor(_page);
|
_pdfTextExtractor = new PdfTextExtractor(_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PdfPageRenderer(PdfTextExtractor pdfTextExtractor)
|
||||||
|
{
|
||||||
|
_pdfTextExtractor = pdfTextExtractor;
|
||||||
|
_page = pdfTextExtractor.Page;
|
||||||
|
|
||||||
|
|
||||||
|
// Calculate page size and scale
|
||||||
|
_pageRect = _pdfTextExtractor.GetRect();
|
||||||
|
_pageWidth = (int)Math.Ceiling(_pageRect.XMax - _pageRect.XMin);
|
||||||
|
_pageHeight = (int)Math.Ceiling(_pageRect.YMax - _pageRect.YMin);
|
||||||
|
while ((_pageWidth * _scale) > MaxSize) { _scale--; }
|
||||||
|
while ((_pageHeight * _scale) > MaxSize) { _scale--; }
|
||||||
|
if (_scale <= 0) { _scale = 1; }
|
||||||
|
}
|
||||||
|
|
||||||
public Bitmap Render()
|
public Bitmap Render()
|
||||||
{
|
{
|
||||||
if (_pdfTextExtractor.Elements.Count == 0)
|
if (_pdfTextExtractor.Elements.Count == 0)
|
||||||
{
|
{
|
||||||
// Nothing to render
|
// Nothing to render
|
||||||
Bitmap emptyBmp = new Bitmap(100, 200, PixelFormat.Format32bppArgb);
|
Bitmap emptyBmp = new Bitmap(100, 200, PixelFormat.Format32bppArgb);
|
||||||
using (Graphics gc = Graphics.FromImage(emptyBmp))
|
using (Graphics gcEmpty = Graphics.FromImage(emptyBmp))
|
||||||
gc.Clear(Color.White);
|
gcEmpty.Clear(Color.White);
|
||||||
return emptyBmp;
|
return emptyBmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate page size and scale
|
// Prepare image
|
||||||
Rect pageRect = _pdfTextExtractor.GetRect();
|
Bitmap bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||||
int pageWidth = (int)Math.Ceiling(pageRect.XMax - pageRect.XMin);
|
Graphics gc = Graphics.FromImage(bmp);
|
||||||
int pageHeight = (int)Math.Ceiling(pageRect.YMax - pageRect.YMin);
|
gc.Clear(Color.White);
|
||||||
int Scale = 10;
|
|
||||||
int MaxSize = 10000;
|
|
||||||
while ((pageWidth * Scale) > MaxSize) { Scale--; }
|
|
||||||
while ((pageHeight * Scale) > MaxSize && Scale > 1) { Scale--; }
|
|
||||||
if (Scale <= 0) { Scale = 1; }
|
|
||||||
|
|
||||||
// Draw page image
|
// Draw text elements of the page
|
||||||
Bitmap bmp = new Bitmap(pageWidth * Scale, pageHeight * Scale, PixelFormat.Format32bppArgb);
|
|
||||||
using (Graphics gc = Graphics.FromImage(bmp))
|
|
||||||
using (Pen penTextElem = new Pen(Color.Blue))
|
using (Pen penTextElem = new Pen(Color.Blue))
|
||||||
using (Pen penCharElem = new Pen(Color.Navy))
|
using (Pen penCharElem = new Pen(Color.Navy))
|
||||||
{
|
{
|
||||||
gc.Clear(Color.White);
|
|
||||||
|
|
||||||
// Draw text elements
|
|
||||||
foreach (PdfTextElement textElement in _pdfTextExtractor.Elements)
|
foreach (PdfTextElement textElement in _pdfTextExtractor.Elements)
|
||||||
{
|
{
|
||||||
DrawTextElement(textElement, gc, penTextElem, penCharElem, Scale, pageHeight, pageRect.XMin, pageRect.YMin, Brushes.Black);
|
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.Black);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gc.Dispose();
|
||||||
return bmp;
|
return bmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int Scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
|
public Bitmap RenderColumn(PdfTextElementColumn columnData, Bitmap bmp = null)
|
||||||
|
{
|
||||||
|
Graphics gc;
|
||||||
|
if (bmp == null)
|
||||||
|
{
|
||||||
|
bmp = new Bitmap(_pageWidth * _scale, _pageHeight * _scale, PixelFormat.Format32bppArgb);
|
||||||
|
gc = Graphics.FromImage(bmp);
|
||||||
|
gc.Clear(Color.White);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gc = Graphics.FromImage(bmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw text elements of the column
|
||||||
|
using (Pen penTextElem = new Pen(Color.Red))
|
||||||
|
using (Pen penCharElem = new Pen(Color.DarkRed))
|
||||||
|
{
|
||||||
|
foreach (PdfTextElement textElement in columnData.Elements)
|
||||||
|
{
|
||||||
|
DrawTextElement(textElement, gc, penTextElem, penCharElem, _scale, _pageHeight, _pageRect.XMin, _pageRect.YMin, Brushes.OrangeRed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw column extents
|
||||||
|
using (Pen penColumn = new Pen(Color.Red))
|
||||||
|
{
|
||||||
|
float y = (float)(_pageRect.YMax - columnData.Y);
|
||||||
|
float x1 = (float)(columnData.X1 - _pageRect.XMin);
|
||||||
|
float x2 = (float)(columnData.X2 - _pageRect.XMin);
|
||||||
|
|
||||||
|
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x2 * _scale, y * _scale);
|
||||||
|
gc.DrawLine(penColumn, x1 * _scale, y * _scale, x1 * _scale, _pageHeight * _scale);
|
||||||
|
gc.DrawLine(penColumn, x2 * _scale, y * _scale, x2 * _scale, _pageHeight * _scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
gc.Dispose();
|
||||||
|
return bmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void DrawTextElement(PdfTextElement textElement, Graphics gc, Pen penTextElem, Pen penCharElem, int scale, int pageHeight, double pageXMin, double pageYMin, Brush brushText)
|
||||||
{
|
{
|
||||||
double textElementX = textElement.GetX() - pageXMin;
|
double textElementX = textElement.GetX() - pageXMin;
|
||||||
double textElementY = textElement.GetY() - pageYMin;
|
double textElementY = textElement.GetY() - pageYMin;
|
||||||
@@ -74,29 +128,29 @@ namespace VAR.PdfTools
|
|||||||
if (penTextElem != null)
|
if (penTextElem != null)
|
||||||
{
|
{
|
||||||
DrawRoundedRectangle(gc, penTextElem,
|
DrawRoundedRectangle(gc, penTextElem,
|
||||||
(int)(textElementPageX * Scale),
|
(int)(textElementPageX * scale),
|
||||||
(int)(textElementPageY * Scale),
|
(int)(textElementPageY * scale),
|
||||||
(int)(textElementWidth * Scale),
|
(int)(textElementWidth * scale),
|
||||||
(int)(textElementHeight * Scale),
|
(int)(textElementHeight * scale),
|
||||||
5);
|
5);
|
||||||
}
|
}
|
||||||
|
|
||||||
using (Font font = new Font("Arial", (int)(textElementHeight * Scale), GraphicsUnit.Pixel))
|
using (Font font = new Font("Arial", (int)(textElementHeight * scale), GraphicsUnit.Pixel))
|
||||||
{
|
{
|
||||||
foreach (PdfCharElement c in textElement.Characters)
|
foreach (PdfCharElement c in textElement.Characters)
|
||||||
{
|
{
|
||||||
gc.DrawString(c.Char,
|
gc.DrawString(c.Char,
|
||||||
font,
|
font,
|
||||||
brushText,
|
brushText,
|
||||||
(int)((textElementPageX + c.Displacement) * Scale),
|
(int)((textElementPageX + c.Displacement) * scale),
|
||||||
(int)(textElementPageY * Scale));
|
(int)(textElementPageY * scale));
|
||||||
if (penCharElem != null)
|
if (penCharElem != null)
|
||||||
{
|
{
|
||||||
DrawRoundedRectangle(gc, penCharElem,
|
DrawRoundedRectangle(gc, penCharElem,
|
||||||
(int)((textElementPageX + c.Displacement) * Scale),
|
(int)((textElementPageX + c.Displacement) * scale),
|
||||||
(int)(textElementPageY * Scale),
|
(int)(textElementPageY * scale),
|
||||||
(int)(c.Width * Scale),
|
(int)(c.Width * scale),
|
||||||
(int)(textElementHeight * Scale),
|
(int)(textElementHeight * scale),
|
||||||
5);
|
5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -103,4 +103,28 @@ namespace VAR.PdfTools
|
|||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class PdfTextElementColumn
|
||||||
|
{
|
||||||
|
public PdfTextElement HeadTextElement { get; private set; }
|
||||||
|
|
||||||
|
public IEnumerable<PdfTextElement> Elements { get; private set; }
|
||||||
|
|
||||||
|
public double Y { get; private set; }
|
||||||
|
|
||||||
|
public double X1 { get; private set; }
|
||||||
|
public double X2 { get; private set; }
|
||||||
|
|
||||||
|
public static PdfTextElementColumn Empty { get; } = new PdfTextElementColumn();
|
||||||
|
|
||||||
|
private PdfTextElementColumn() { }
|
||||||
|
|
||||||
|
public PdfTextElementColumn(PdfTextElement head, IEnumerable<PdfTextElement> elements, double y, double x1, double x2)
|
||||||
|
{
|
||||||
|
HeadTextElement = head;
|
||||||
|
Elements = elements;
|
||||||
|
Y = y;
|
||||||
|
X1 = x1;
|
||||||
|
X2 = x2;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -651,7 +651,7 @@ namespace VAR.PdfTools
|
|||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region Public methods
|
#region Public methods
|
||||||
|
|
||||||
public Rect GetRect()
|
public Rect GetRect()
|
||||||
{
|
{
|
||||||
Rect rect = null;
|
Rect rect = null;
|
||||||
@@ -664,12 +664,12 @@ namespace VAR.PdfTools
|
|||||||
return rect;
|
return rect;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<string> GetColumnAsStrings(string column, bool fuzzy =true)
|
public PdfTextElementColumn GetColumn(string column, bool fuzzy = true)
|
||||||
{
|
{
|
||||||
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
PdfTextElement columnHead = FindElementByText(column, fuzzy);
|
||||||
if (columnHead == null)
|
if (columnHead == null)
|
||||||
{
|
{
|
||||||
return new List<string>();
|
return PdfTextElementColumn.Empty;
|
||||||
}
|
}
|
||||||
double headY = columnHead.GetY();
|
double headY = columnHead.GetY();
|
||||||
double headX1 = columnHead.GetX();
|
double headX1 = columnHead.GetX();
|
||||||
@@ -717,25 +717,34 @@ namespace VAR.PdfTools
|
|||||||
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
|
||||||
|
|
||||||
// Only items completelly inside extents, and break on the first element outside
|
// Only items completelly inside extents, and break on the first element outside
|
||||||
var columnData = new List<PdfTextElement>();
|
var columnElements = new List<PdfTextElement>();
|
||||||
foreach (PdfTextElement elem in columnDataRaw)
|
foreach (PdfTextElement elem in columnDataRaw)
|
||||||
{
|
{
|
||||||
double elemX1 = elem.GetX();
|
double elemX1 = elem.GetX();
|
||||||
double elemX2 = elemX1 + elem.VisibleWidth;
|
double elemX2 = elemX1 + elem.VisibleWidth;
|
||||||
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
|
if (elemX1 < extentX1 || elemX2 > extentX2) { break; }
|
||||||
|
|
||||||
columnData.Add(elem);
|
columnElements.Add(elem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var columnData = new PdfTextElementColumn(columnHead, columnElements, headY, extentX1, extentX2);
|
||||||
|
|
||||||
|
return columnData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<string> GetColumnAsStrings(string column, bool fuzzy = true)
|
||||||
|
{
|
||||||
|
PdfTextElementColumn columnData = GetColumn(column, fuzzy);
|
||||||
|
|
||||||
// Emit result
|
// Emit result
|
||||||
var result = new List<string>();
|
var result = new List<string>();
|
||||||
foreach (PdfTextElement elem in columnData)
|
foreach (PdfTextElement elem in columnData.Elements)
|
||||||
{
|
{
|
||||||
result.Add(elem.VisibleText);
|
result.Add(elem.VisibleText);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public string GetFieldAsString(string field, bool fuzzy = true)
|
public string GetFieldAsString(string field, bool fuzzy = true)
|
||||||
{
|
{
|
||||||
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
|
||||||
@@ -763,7 +772,7 @@ namespace VAR.PdfTools
|
|||||||
|
|
||||||
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool HasText(string text, bool fuzzy = true)
|
public bool HasText(string text, bool fuzzy = true)
|
||||||
{
|
{
|
||||||
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
|
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
|
||||||
|
|||||||
Reference in New Issue
Block a user