From 77a5cd1b0ea44b5e656ecaaf4698445f55d70135 Mon Sep 17 00:00:00 2001 From: "Valeriano A.R" Date: Sun, 27 Oct 2019 12:40:17 +0100 Subject: [PATCH] PdfTextExtractor: Adjust public method names. --- README.md | 17 +++++++++++----- VAR.PdfTools.Workbench/FrmPdfInfo.cs | 4 ++-- VAR.PdfTools/PdfTextElement.cs | 5 +---- VAR.PdfTools/PdfTextExtractor.cs | 29 +++++++--------------------- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index e1dfe97..f905865 100644 --- a/README.md +++ b/README.md @@ -5,27 +5,33 @@ ### VAR.PdfTools Add the resulting assembly as reference in your projects, and this line on code: - using VAR.PdfTools; +```csharp + using VAR.PdfTools; +``` Then extract the contents of a data column using: +```csharp var columnData = new List(); PdfDocument doc = PdfDocument.Load("document.pdf"); foreach (PdfDocumentPage page in doc.Pages) { PdfTextExtractor extractor = new PdfTextExtractor(page); - columnData.AddRange(extractor.GetColumn("Column")); + columnData.AddRange(extractor.GetColumnAsStrings("Column")); } - +``` + Or the content of a field (text on the right of the indicated text): +```csharp var fieldData = new List(); PdfDocument doc = PdfDocument.Load("document.pdf"); foreach (PdfDocumentPage page in doc.Pages) { PdfTextExtractor extractor = new PdfTextExtractor(page); - fieldData.Add(extractor.GetField(txtFieldName.Text)); + fieldData.Add(extractor.GetFieldAsString(txtFieldName.Text)); } +``` ### VAR.PdfTools.Workbench It is a simple Windows.Forms application, to test basic funcitionallity of the library. @@ -34,7 +40,8 @@ It is a simple Windows.Forms application, to test basic funcitionallity of the l A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE. A .nuget package can be build using: - VAR.PdfTools\Build.NuGet.cmd + + VAR.PdfTools\Build.NuGet.cmd ## Contributing 1. Fork it! diff --git a/VAR.PdfTools.Workbench/FrmPdfInfo.cs b/VAR.PdfTools.Workbench/FrmPdfInfo.cs index 62ddcdd..76743fb 100644 --- a/VAR.PdfTools.Workbench/FrmPdfInfo.cs +++ b/VAR.PdfTools.Workbench/FrmPdfInfo.cs @@ -280,7 +280,7 @@ namespace VAR.PdfTools.Workbench pageNum++; if (selectedPages.Contains(pageNum) == false) { continue; } PdfTextExtractor extractor = new PdfTextExtractor(page); - fieldData.Add(extractor.GetField(field)); + fieldData.Add(extractor.GetFieldAsString(field)); } txtOutput.Lines = fieldData.ToArray(); } @@ -303,7 +303,7 @@ namespace VAR.PdfTools.Workbench pageNum++; if (selectedPages.Contains(pageNum) == false) { continue; } PdfTextExtractor extractor = new PdfTextExtractor(page); - columnData.AddRange(extractor.GetColumn(column)); + columnData.AddRange(extractor.GetColumnAsStrings(column)); } txtOutput.Lines = columnData.ToArray(); } diff --git a/VAR.PdfTools/PdfTextElement.cs b/VAR.PdfTools/PdfTextElement.cs index 8d970cd..916c246 100644 --- a/VAR.PdfTools/PdfTextElement.cs +++ b/VAR.PdfTools/PdfTextElement.cs @@ -1,8 +1,5 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; using VAR.PdfTools.Maths; namespace VAR.PdfTools diff --git a/VAR.PdfTools/PdfTextExtractor.cs b/VAR.PdfTools/PdfTextExtractor.cs index 6b4ce32..2df7519 100644 --- a/VAR.PdfTools/PdfTextExtractor.cs +++ b/VAR.PdfTools/PdfTextExtractor.cs @@ -651,13 +651,8 @@ namespace VAR.PdfTools #endregion #region Public methods - - public List GetColumn(string column) - { - return GetColumn(column, true); - } - - public List GetColumn(string column, bool fuzzy) + + public List GetColumnAsStrings(string column, bool fuzzy =true) { PdfTextElement columnHead = FindElementByText(column, fuzzy); if (columnHead == null) @@ -709,7 +704,7 @@ namespace VAR.PdfTools } columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList(); - // Only items completelly inside extents, amd break on the first element outside + // Only items completelly inside extents, and break on the first element outside var columnData = new List(); foreach (PdfTextElement elem in columnDataRaw) { @@ -728,13 +723,8 @@ namespace VAR.PdfTools } return result; } - - public string GetField(string field) - { - return GetField(field, true); - } - - public string GetField(string field, bool fuzzy) + + public string GetFieldAsString(string field, bool fuzzy = true) { PdfTextElement fieldTitle = FindElementByText(field, fuzzy); if (fieldTitle == null) @@ -761,13 +751,8 @@ namespace VAR.PdfTools return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText; } - - public bool HasText(string text) - { - return HasText(text, true); - } - - public bool HasText(string text, bool fuzzy) + + public bool HasText(string text, bool fuzzy = true) { List list = FindElementsContainingText(text, fuzzy); return (list.Count > 0);