PdfTextExtractor: Adjust public method names.

This commit is contained in:
2019-10-27 12:40:17 +01:00
parent b6611b6285
commit 77a5cd1b0e
4 changed files with 22 additions and 33 deletions

View File

@@ -5,27 +5,33 @@
### VAR.PdfTools
Add the resulting assembly as reference in your projects, and this line on code:
using VAR.PdfTools;
```csharp
using VAR.PdfTools;
```
Then extract the contents of a data column using:
```csharp
var columnData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
columnData.AddRange(extractor.GetColumn("Column"));
columnData.AddRange(extractor.GetColumnAsStrings("Column"));
}
```
Or the content of a field (text on the right of the indicated text):
```csharp
var fieldData = new List<string>();
PdfDocument doc = PdfDocument.Load("document.pdf");
foreach (PdfDocumentPage page in doc.Pages)
{
PdfTextExtractor extractor = new PdfTextExtractor(page);
fieldData.Add(extractor.GetField(txtFieldName.Text));
fieldData.Add(extractor.GetFieldAsString(txtFieldName.Text));
}
```
### VAR.PdfTools.Workbench
It is a simple Windows.Forms application, to test basic funcitionallity of the library.
@@ -34,7 +40,8 @@ It is a simple Windows.Forms application, to test basic funcitionallity of the l
A Visual Studio 2015 and 2010 solutions are provided. Simply, click build on the IDE.
A .nuget package can be build using:
VAR.PdfTools\Build.NuGet.cmd
VAR.PdfTools\Build.NuGet.cmd
## Contributing
1. Fork it!

View File

@@ -280,7 +280,7 @@ namespace VAR.PdfTools.Workbench
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfTextExtractor extractor = new PdfTextExtractor(page);
fieldData.Add(extractor.GetField(field));
fieldData.Add(extractor.GetFieldAsString(field));
}
txtOutput.Lines = fieldData.ToArray();
}
@@ -303,7 +303,7 @@ namespace VAR.PdfTools.Workbench
pageNum++;
if (selectedPages.Contains(pageNum) == false) { continue; }
PdfTextExtractor extractor = new PdfTextExtractor(page);
columnData.AddRange(extractor.GetColumn(column));
columnData.AddRange(extractor.GetColumnAsStrings(column));
}
txtOutput.Lines = columnData.ToArray();
}

View File

@@ -1,8 +1,5 @@
using System;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using VAR.PdfTools.Maths;
namespace VAR.PdfTools

View File

@@ -652,12 +652,7 @@ namespace VAR.PdfTools
#region Public methods
public List<string> GetColumn(string column)
{
return GetColumn(column, true);
}
public List<string> GetColumn(string column, bool fuzzy)
public List<string> GetColumnAsStrings(string column, bool fuzzy =true)
{
PdfTextElement columnHead = FindElementByText(column, fuzzy);
if (columnHead == null)
@@ -709,7 +704,7 @@ namespace VAR.PdfTools
}
columnDataRaw = columnDataRaw.OrderByDescending(elem => elem.GetY()).ToList();
// Only items completelly inside extents, amd break on the first element outside
// Only items completelly inside extents, and break on the first element outside
var columnData = new List<PdfTextElement>();
foreach (PdfTextElement elem in columnDataRaw)
{
@@ -729,12 +724,7 @@ namespace VAR.PdfTools
return result;
}
public string GetField(string field)
{
return GetField(field, true);
}
public string GetField(string field, bool fuzzy)
public string GetFieldAsString(string field, bool fuzzy = true)
{
PdfTextElement fieldTitle = FindElementByText(field, fuzzy);
if (fieldTitle == null)
@@ -762,12 +752,7 @@ namespace VAR.PdfTools
return fieldData.OrderBy(elem => elem.GetX()).FirstOrDefault().VisibleText;
}
public bool HasText(string text)
{
return HasText(text, true);
}
public bool HasText(string text, bool fuzzy)
public bool HasText(string text, bool fuzzy = true)
{
List<PdfTextElement> list = FindElementsContainingText(text, fuzzy);
return (list.Count > 0);