From d46f8d2abecd6bc1fd4b6c08cb944a55c597db32 Mon Sep 17 00:00:00 2001 From: "Valeriano A.R" Date: Mon, 27 Jun 2016 12:20:24 +0200 Subject: [PATCH] PdfTextExtractor: New method HasText to determine if there is some text in the document. --- VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs | 29 +++++++- VAR.PdfTools.Workbench/FrmPdfInfo.cs | 22 ++++++ VAR.PdfTools.Workbench/FrmPdfInfo.resx | 4 +- .../Properties/Settings.Designer.cs | 67 +++++++++---------- .../Properties/Settings.settings | 3 + VAR.PdfTools/PdfTextExtractor.cs | 15 +++++ 6 files changed, 101 insertions(+), 39 deletions(-) diff --git a/VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs b/VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs index 3ebbebf..cb53e9c 100644 --- a/VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs +++ b/VAR.PdfTools.Workbench/FrmPdfInfo.Designer.cs @@ -38,13 +38,15 @@ this.txtColumnName = new System.Windows.Forms.TextBox(); this.txtFieldName = new System.Windows.Forms.TextBox(); this.btnGetField = new System.Windows.Forms.Button(); + this.txtText = new System.Windows.Forms.TextBox(); + this.btnHasText = new System.Windows.Forms.Button(); this.SuspendLayout(); // // lblOutputs // this.lblOutputs.AutoSize = true; this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.lblOutputs.Location = new System.Drawing.Point(12, 130); + this.lblOutputs.Location = new System.Drawing.Point(12, 143); this.lblOutputs.Name = "lblOutputs"; this.lblOutputs.Size = new System.Drawing.Size(51, 13); this.lblOutputs.TabIndex = 11; @@ -88,11 +90,11 @@ | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.txtOutput.Location = new System.Drawing.Point(15, 146); + this.txtOutput.Location = new System.Drawing.Point(15, 159); this.txtOutput.Multiline = true; this.txtOutput.Name = "txtOutput"; this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; - this.txtOutput.Size = new System.Drawing.Size(457, 303); + this.txtOutput.Size = new System.Drawing.Size(457, 290); this.txtOutput.TabIndex = 7; // // btnProcess @@ -140,11 +142,30 @@ this.btnGetField.UseVisualStyleBackColor = true; this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click); // + // txtText + // + this.txtText.Location = new System.Drawing.Point(15, 111); + this.txtText.Name = "txtText"; + this.txtText.Size = new System.Drawing.Size(142, 20); + this.txtText.TabIndex = 17; + // + // btnHasText + // + this.btnHasText.Location = new System.Drawing.Point(163, 109); + this.btnHasText.Name = "btnHasText"; + this.btnHasText.Size = new System.Drawing.Size(75, 23); + this.btnHasText.TabIndex = 16; + this.btnHasText.Text = "HasText"; + this.btnHasText.UseVisualStyleBackColor = true; + this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click); + // // FrmPdfInfo // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(484, 461); + this.Controls.Add(this.txtText); + this.Controls.Add(this.btnHasText); this.Controls.Add(this.txtFieldName); this.Controls.Add(this.btnGetField); this.Controls.Add(this.txtColumnName); @@ -176,5 +197,7 @@ private System.Windows.Forms.TextBox txtColumnName; private System.Windows.Forms.TextBox txtFieldName; private System.Windows.Forms.Button btnGetField; + private System.Windows.Forms.TextBox txtText; + private System.Windows.Forms.Button btnHasText; } } \ No newline at end of file diff --git a/VAR.PdfTools.Workbench/FrmPdfInfo.cs b/VAR.PdfTools.Workbench/FrmPdfInfo.cs index 7fb2c8f..f2064be 100644 --- a/VAR.PdfTools.Workbench/FrmPdfInfo.cs +++ b/VAR.PdfTools.Workbench/FrmPdfInfo.cs @@ -17,6 +17,7 @@ namespace VAR.PdfTools.Workbench txtPdfPath.Text = Properties.Settings.Default.LastPdfPath; txtColumnName.Text = Properties.Settings.Default.LastColumnName; txtFieldName.Text = Properties.Settings.Default.LastFieldName; + txtText.Text = Properties.Settings.Default.LastText; } private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e) @@ -24,6 +25,7 @@ namespace VAR.PdfTools.Workbench Properties.Settings.Default.LastPdfPath = txtPdfPath.Text; Properties.Settings.Default.LastColumnName = txtColumnName.Text; Properties.Settings.Default.LastFieldName = txtFieldName.Text; + Properties.Settings.Default.LastText = txtText.Text; Properties.Settings.Default.Save(); } @@ -132,5 +134,25 @@ namespace VAR.PdfTools.Workbench } txtOutput.Lines = fieldData.ToArray(); } + + private void btnHasText_Click(object sender, EventArgs e) + { + if (System.IO.File.Exists(txtPdfPath.Text) == false) + { + MessageBox.Show("File does not exist"); + return; + } + + PdfDocument doc = PdfDocument.Load(txtPdfPath.Text); + + List lines = new List(); + int pageNum = 1; + foreach (PdfDocumentPage page in doc.Pages) + { + PdfTextExtractor extractor = new PdfTextExtractor(page); + lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(txtText.Text)))); + } + txtOutput.Lines = lines.ToArray(); + } } } diff --git a/VAR.PdfTools.Workbench/FrmPdfInfo.resx b/VAR.PdfTools.Workbench/FrmPdfInfo.resx index 1af7de1..7080a7d 100644 --- a/VAR.PdfTools.Workbench/FrmPdfInfo.resx +++ b/VAR.PdfTools.Workbench/FrmPdfInfo.resx @@ -112,9 +112,9 @@ 2.0 - System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 \ No newline at end of file diff --git a/VAR.PdfTools.Workbench/Properties/Settings.Designer.cs b/VAR.PdfTools.Workbench/Properties/Settings.Designer.cs index 63098fa..07d72bb 100644 --- a/VAR.PdfTools.Workbench/Properties/Settings.Designer.cs +++ b/VAR.PdfTools.Workbench/Properties/Settings.Designer.cs @@ -8,68 +8,67 @@ // //------------------------------------------------------------------------------ -namespace VAR.PdfTools.Workbench.Properties -{ - - +namespace VAR.PdfTools.Workbench.Properties { + + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] - [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "14.0.0.0")] - internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase - { - + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")] + internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { + private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); - - public static Settings Default - { - get - { + + public static Settings Default { + get { return defaultInstance; } } - + [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Configuration.DefaultSettingValueAttribute("")] - public string LastPdfPath - { - get - { + public string LastPdfPath { + get { return ((string)(this["LastPdfPath"])); } - set - { + set { this["LastPdfPath"] = value; } } - + [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Configuration.DefaultSettingValueAttribute("")] - public string LastColumnName - { - get - { + public string LastColumnName { + get { return ((string)(this["LastColumnName"])); } - set - { + set { this["LastColumnName"] = value; } } - + [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Configuration.DefaultSettingValueAttribute("")] - public string LastFieldName - { - get - { + public string LastFieldName { + get { return ((string)(this["LastFieldName"])); } - set - { + set { this["LastFieldName"] = value; } } + + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("")] + public string LastText { + get { + return ((string)(this["LastText"])); + } + set { + this["LastText"] = value; + } + } } } diff --git a/VAR.PdfTools.Workbench/Properties/Settings.settings b/VAR.PdfTools.Workbench/Properties/Settings.settings index a2e8457..efe1137 100644 --- a/VAR.PdfTools.Workbench/Properties/Settings.settings +++ b/VAR.PdfTools.Workbench/Properties/Settings.settings @@ -11,5 +11,8 @@ + + + \ No newline at end of file diff --git a/VAR.PdfTools/PdfTextExtractor.cs b/VAR.PdfTools/PdfTextExtractor.cs index d53ebeb..9b4f8f7 100644 --- a/VAR.PdfTools/PdfTextExtractor.cs +++ b/VAR.PdfTools/PdfTextExtractor.cs @@ -696,6 +696,21 @@ namespace VAR.PdfTools return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText; } + public bool HasText(string text) + { + return HasText(text, true); + } + + public bool HasText(string text, bool fuzzy) + { + PdfTextElement fieldTitle = FindElementByText(text, fuzzy); + if (fieldTitle == null) + { + return false; + } + return true; + } + #endregion } }