PdfTextExtractor: New method HasText to determine if there is some text in the document.

This commit is contained in:
2016-06-27 12:20:24 +02:00
parent 7d9b7981a8
commit d46f8d2abe
6 changed files with 101 additions and 39 deletions

View File

@@ -38,13 +38,15 @@
this.txtColumnName = new System.Windows.Forms.TextBox(); this.txtColumnName = new System.Windows.Forms.TextBox();
this.txtFieldName = new System.Windows.Forms.TextBox(); this.txtFieldName = new System.Windows.Forms.TextBox();
this.btnGetField = new System.Windows.Forms.Button(); this.btnGetField = new System.Windows.Forms.Button();
this.txtText = new System.Windows.Forms.TextBox();
this.btnHasText = new System.Windows.Forms.Button();
this.SuspendLayout(); this.SuspendLayout();
// //
// lblOutputs // lblOutputs
// //
this.lblOutputs.AutoSize = true; this.lblOutputs.AutoSize = true;
this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.lblOutputs.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.lblOutputs.Location = new System.Drawing.Point(12, 130); this.lblOutputs.Location = new System.Drawing.Point(12, 143);
this.lblOutputs.Name = "lblOutputs"; this.lblOutputs.Name = "lblOutputs";
this.lblOutputs.Size = new System.Drawing.Size(51, 13); this.lblOutputs.Size = new System.Drawing.Size(51, 13);
this.lblOutputs.TabIndex = 11; this.lblOutputs.TabIndex = 11;
@@ -88,11 +90,11 @@
| System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right))); | System.Windows.Forms.AnchorStyles.Right)));
this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.txtOutput.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.txtOutput.Location = new System.Drawing.Point(15, 146); this.txtOutput.Location = new System.Drawing.Point(15, 159);
this.txtOutput.Multiline = true; this.txtOutput.Multiline = true;
this.txtOutput.Name = "txtOutput"; this.txtOutput.Name = "txtOutput";
this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; this.txtOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.txtOutput.Size = new System.Drawing.Size(457, 303); this.txtOutput.Size = new System.Drawing.Size(457, 290);
this.txtOutput.TabIndex = 7; this.txtOutput.TabIndex = 7;
// //
// btnProcess // btnProcess
@@ -140,11 +142,30 @@
this.btnGetField.UseVisualStyleBackColor = true; this.btnGetField.UseVisualStyleBackColor = true;
this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click); this.btnGetField.Click += new System.EventHandler(this.btnGetField_Click);
// //
// txtText
//
this.txtText.Location = new System.Drawing.Point(15, 111);
this.txtText.Name = "txtText";
this.txtText.Size = new System.Drawing.Size(142, 20);
this.txtText.TabIndex = 17;
//
// btnHasText
//
this.btnHasText.Location = new System.Drawing.Point(163, 109);
this.btnHasText.Name = "btnHasText";
this.btnHasText.Size = new System.Drawing.Size(75, 23);
this.btnHasText.TabIndex = 16;
this.btnHasText.Text = "HasText";
this.btnHasText.UseVisualStyleBackColor = true;
this.btnHasText.Click += new System.EventHandler(this.btnHasText_Click);
//
// FrmPdfInfo // FrmPdfInfo
// //
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(484, 461); this.ClientSize = new System.Drawing.Size(484, 461);
this.Controls.Add(this.txtText);
this.Controls.Add(this.btnHasText);
this.Controls.Add(this.txtFieldName); this.Controls.Add(this.txtFieldName);
this.Controls.Add(this.btnGetField); this.Controls.Add(this.btnGetField);
this.Controls.Add(this.txtColumnName); this.Controls.Add(this.txtColumnName);
@@ -176,5 +197,7 @@
private System.Windows.Forms.TextBox txtColumnName; private System.Windows.Forms.TextBox txtColumnName;
private System.Windows.Forms.TextBox txtFieldName; private System.Windows.Forms.TextBox txtFieldName;
private System.Windows.Forms.Button btnGetField; private System.Windows.Forms.Button btnGetField;
private System.Windows.Forms.TextBox txtText;
private System.Windows.Forms.Button btnHasText;
} }
} }

View File

@@ -17,6 +17,7 @@ namespace VAR.PdfTools.Workbench
txtPdfPath.Text = Properties.Settings.Default.LastPdfPath; txtPdfPath.Text = Properties.Settings.Default.LastPdfPath;
txtColumnName.Text = Properties.Settings.Default.LastColumnName; txtColumnName.Text = Properties.Settings.Default.LastColumnName;
txtFieldName.Text = Properties.Settings.Default.LastFieldName; txtFieldName.Text = Properties.Settings.Default.LastFieldName;
txtText.Text = Properties.Settings.Default.LastText;
} }
private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e) private void FrmPdfInfo_FormClosing(object sender, FormClosingEventArgs e)
@@ -24,6 +25,7 @@ namespace VAR.PdfTools.Workbench
Properties.Settings.Default.LastPdfPath = txtPdfPath.Text; Properties.Settings.Default.LastPdfPath = txtPdfPath.Text;
Properties.Settings.Default.LastColumnName = txtColumnName.Text; Properties.Settings.Default.LastColumnName = txtColumnName.Text;
Properties.Settings.Default.LastFieldName = txtFieldName.Text; Properties.Settings.Default.LastFieldName = txtFieldName.Text;
Properties.Settings.Default.LastText = txtText.Text;
Properties.Settings.Default.Save(); Properties.Settings.Default.Save();
} }
@@ -132,5 +134,25 @@ namespace VAR.PdfTools.Workbench
} }
txtOutput.Lines = fieldData.ToArray(); txtOutput.Lines = fieldData.ToArray();
} }
private void btnHasText_Click(object sender, EventArgs e)
{
if (System.IO.File.Exists(txtPdfPath.Text) == false)
{
MessageBox.Show("File does not exist");
return;
}
PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);
List<string> lines = new List<string>();
int pageNum = 1;
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
lines.Add(string.Format("Page({0}) : {1}", pageNum, Convert.ToString(extractor.HasText(txtText.Text))));
}
txtOutput.Lines = lines.ToArray();
}
} }
} }

View File

@@ -112,9 +112,9 @@
<value>2.0</value> <value>2.0</value>
</resheader> </resheader>
<resheader name="reader"> <resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader> </resheader>
<resheader name="writer"> <resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader> </resheader>
</root> </root>

View File

@@ -8,21 +8,17 @@
// </auto-generated> // </auto-generated>
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
namespace VAR.PdfTools.Workbench.Properties namespace VAR.PdfTools.Workbench.Properties {
{
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "14.0.0.0")] [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
{
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default public static Settings Default {
{ get {
get
{
return defaultInstance; return defaultInstance;
} }
} }
@@ -30,14 +26,11 @@ namespace VAR.PdfTools.Workbench.Properties
[global::System.Configuration.UserScopedSettingAttribute()] [global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("")] [global::System.Configuration.DefaultSettingValueAttribute("")]
public string LastPdfPath public string LastPdfPath {
{ get {
get
{
return ((string)(this["LastPdfPath"])); return ((string)(this["LastPdfPath"]));
} }
set set {
{
this["LastPdfPath"] = value; this["LastPdfPath"] = value;
} }
} }
@@ -45,14 +38,11 @@ namespace VAR.PdfTools.Workbench.Properties
[global::System.Configuration.UserScopedSettingAttribute()] [global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("")] [global::System.Configuration.DefaultSettingValueAttribute("")]
public string LastColumnName public string LastColumnName {
{ get {
get
{
return ((string)(this["LastColumnName"])); return ((string)(this["LastColumnName"]));
} }
set set {
{
this["LastColumnName"] = value; this["LastColumnName"] = value;
} }
} }
@@ -60,16 +50,25 @@ namespace VAR.PdfTools.Workbench.Properties
[global::System.Configuration.UserScopedSettingAttribute()] [global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("")] [global::System.Configuration.DefaultSettingValueAttribute("")]
public string LastFieldName public string LastFieldName {
{ get {
get
{
return ((string)(this["LastFieldName"])); return ((string)(this["LastFieldName"]));
} }
set set {
{
this["LastFieldName"] = value; this["LastFieldName"] = value;
} }
} }
[global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("")]
public string LastText {
get {
return ((string)(this["LastText"]));
}
set {
this["LastText"] = value;
}
}
} }
} }

View File

@@ -11,5 +11,8 @@
<Setting Name="LastFieldName" Type="System.String" Scope="User"> <Setting Name="LastFieldName" Type="System.String" Scope="User">
<Value Profile="(Default)" /> <Value Profile="(Default)" />
</Setting> </Setting>
<Setting Name="LastText" Type="System.String" Scope="User">
<Value Profile="(Default)" />
</Setting>
</Settings> </Settings>
</SettingsFile> </SettingsFile>

View File

@@ -696,6 +696,21 @@ namespace VAR.PdfTools
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText; return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
} }
public bool HasText(string text)
{
return HasText(text, true);
}
public bool HasText(string text, bool fuzzy)
{
PdfTextElement fieldTitle = FindElementByText(text, fuzzy);
if (fieldTitle == null)
{
return false;
}
return true;
}
#endregion #endregion
} }
} }