PdfTextExtractor: Get results as PdfTextElementColumn, for debugging purposes.
This commit is contained in:
19
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
19
VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs
generated
@@ -48,6 +48,7 @@
|
||||
this.txtField3 = new System.Windows.Forms.TextBox();
|
||||
this.btnGetColumn3 = new System.Windows.Forms.Button();
|
||||
this.txtPages = new System.Windows.Forms.TextBox();
|
||||
this.chkRender = new System.Windows.Forms.CheckBox();
|
||||
this.SuspendLayout();
|
||||
//
|
||||
// lblOutputs
|
||||
@@ -120,7 +121,7 @@
|
||||
//
|
||||
this.btnGetColumn1.Location = new System.Drawing.Point(292, 51);
|
||||
this.btnGetColumn1.Name = "btnGetColumn1";
|
||||
this.btnGetColumn1.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetColumn1.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn1.TabIndex = 12;
|
||||
this.btnGetColumn1.Text = "GetColumn";
|
||||
this.btnGetColumn1.UseVisualStyleBackColor = true;
|
||||
@@ -195,7 +196,7 @@
|
||||
//
|
||||
this.btnGetColumn2.Location = new System.Drawing.Point(292, 80);
|
||||
this.btnGetColumn2.Name = "btnGetColumn2";
|
||||
this.btnGetColumn2.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetColumn2.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn2.TabIndex = 19;
|
||||
this.btnGetColumn2.Text = "GetColumn";
|
||||
this.btnGetColumn2.UseVisualStyleBackColor = true;
|
||||
@@ -232,7 +233,7 @@
|
||||
//
|
||||
this.btnGetColumn3.Location = new System.Drawing.Point(292, 109);
|
||||
this.btnGetColumn3.Name = "btnGetColumn3";
|
||||
this.btnGetColumn3.Size = new System.Drawing.Size(60, 23);
|
||||
this.btnGetColumn3.Size = new System.Drawing.Size(69, 23);
|
||||
this.btnGetColumn3.TabIndex = 23;
|
||||
this.btnGetColumn3.Text = "GetColumn";
|
||||
this.btnGetColumn3.UseVisualStyleBackColor = true;
|
||||
@@ -246,11 +247,22 @@
|
||||
this.txtPages.Size = new System.Drawing.Size(75, 20);
|
||||
this.txtPages.TabIndex = 27;
|
||||
//
|
||||
// chkRender
|
||||
//
|
||||
this.chkRender.AutoSize = true;
|
||||
this.chkRender.Location = new System.Drawing.Point(292, 138);
|
||||
this.chkRender.Name = "chkRender";
|
||||
this.chkRender.Size = new System.Drawing.Size(61, 17);
|
||||
this.chkRender.TabIndex = 28;
|
||||
this.chkRender.Text = "Render";
|
||||
this.chkRender.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// FrmPdfInfo
|
||||
//
|
||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||
this.ClientSize = new System.Drawing.Size(484, 461);
|
||||
this.Controls.Add(this.chkRender);
|
||||
this.Controls.Add(this.txtPages);
|
||||
this.Controls.Add(this.btnHasText3);
|
||||
this.Controls.Add(this.btnGetField3);
|
||||
@@ -302,5 +314,6 @@
|
||||
private System.Windows.Forms.TextBox txtField3;
|
||||
private System.Windows.Forms.Button btnGetColumn3;
|
||||
private System.Windows.Forms.TextBox txtPages;
|
||||
private System.Windows.Forms.CheckBox chkRender;
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.Drawing.Drawing2D;
|
||||
using System.Drawing.Imaging;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
@@ -214,10 +213,11 @@ namespace VAR.PdfTools.Workbench
|
||||
if (part.Contains("-"))
|
||||
{
|
||||
string[] range = part.Split('-');
|
||||
if (range.Length == 2) {
|
||||
if (range.Length == 2)
|
||||
{
|
||||
int pageStart;
|
||||
int pageEnd;
|
||||
if(int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
||||
if (int.TryParse(range[0], out pageStart) && int.TryParse(range[1], out pageEnd))
|
||||
{
|
||||
listPages.AddRange(Enumerable.Range(pageStart, (pageEnd - pageStart) + 1));
|
||||
}
|
||||
@@ -226,13 +226,13 @@ namespace VAR.PdfTools.Workbench
|
||||
else
|
||||
{
|
||||
int pageNum;
|
||||
if(int.TryParse(part, out pageNum))
|
||||
if (int.TryParse(part, out pageNum))
|
||||
{
|
||||
listPages.Add(pageNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(listPages.Count == 0)
|
||||
if (listPages.Count == 0)
|
||||
{
|
||||
listPages.AddRange(Enumerable.Range(1, maxPages));
|
||||
}
|
||||
@@ -294,18 +294,29 @@ namespace VAR.PdfTools.Workbench
|
||||
}
|
||||
|
||||
PdfDocument doc = PdfDocument.Load(pdfPath);
|
||||
string baseDocumentPath = Path.GetDirectoryName(txtPdfPath.Text);
|
||||
string baseDocumentFilename = Path.GetFileNameWithoutExtension(txtPdfPath.Text);
|
||||
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
var columnData = new List<string>();
|
||||
var columns = new List<string>();
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
PdfTextExtractor extractor = new PdfTextExtractor(page);
|
||||
columnData.AddRange(extractor.GetColumnAsStrings(column));
|
||||
PdfTextElementColumn columnData = extractor.GetColumn(column);
|
||||
if (chkRender.Checked)
|
||||
{
|
||||
var pdfPageRenderer = new PdfPageRenderer(extractor);
|
||||
Bitmap bmp = pdfPageRenderer.Render();
|
||||
pdfPageRenderer.RenderColumn(columnData, bmp);
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||
bmp.Save(fileName, ImageFormat.Png);
|
||||
}
|
||||
columns.AddRange(columnData.Elements.Select(t => t.VisibleText));
|
||||
}
|
||||
txtOutput.Lines = columnData.ToArray();
|
||||
txtOutput.Lines = columns.ToArray();
|
||||
}
|
||||
|
||||
private void btnRender_Click(object sender, EventArgs e)
|
||||
@@ -325,19 +336,19 @@ namespace VAR.PdfTools.Workbench
|
||||
lines.Add(string.Format("Number of Pages : {0}", doc.Pages.Count));
|
||||
|
||||
IEnumerable<int> selectedPages = GetSelectedPages(doc.Pages.Count);
|
||||
int pageNumber = 0;
|
||||
int pageNum = 0;
|
||||
foreach (PdfDocumentPage page in doc.Pages)
|
||||
{
|
||||
pageNumber++;
|
||||
if (selectedPages.Contains(pageNumber) == false) { continue; }
|
||||
pageNum++;
|
||||
if (selectedPages.Contains(pageNum) == false) { continue; }
|
||||
|
||||
PdfPageRenderer pdfPageRenderer = new PdfPageRenderer(page);
|
||||
Bitmap bmp = pdfPageRenderer.Render();
|
||||
|
||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNumber, pdfPageRenderer.Extractor.Elements.Count));
|
||||
lines.Add(string.Format("Page {0:0000} TextElements : {1}", pageNum, pdfPageRenderer.Extractor.Elements.Count));
|
||||
|
||||
// Save image to disk
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNumber));
|
||||
string fileName = Path.Combine(baseDocumentPath, string.Format("{0}_{1:0000}.png", baseDocumentFilename, pageNum));
|
||||
bmp.Save(fileName, ImageFormat.Png);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user