From b6611b62853587ba2968ee42751d4db5f836b2a0 Mon Sep 17 00:00:00 2001 From: "Valeriano A.R" Date: Sun, 27 Oct 2019 12:37:16 +0100 Subject: [PATCH] Put class PdfTextElement in his own file. --- VAR.PdfTools/PdfTextElement.cs | 96 ++++++++++++++++++++++++++++++++ VAR.PdfTools/PdfTextExtractor.cs | 86 ---------------------------- VAR.PdfTools/VAR.PdfTools.csproj | 1 + 3 files changed, 97 insertions(+), 86 deletions(-) create mode 100644 VAR.PdfTools/PdfTextElement.cs diff --git a/VAR.PdfTools/PdfTextElement.cs b/VAR.PdfTools/PdfTextElement.cs new file mode 100644 index 0000000..8d970cd --- /dev/null +++ b/VAR.PdfTools/PdfTextElement.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using VAR.PdfTools.Maths; + +namespace VAR.PdfTools +{ + public struct PdfCharElement + { + public string Char; + public double Displacement; + public double Width; + } + + public class PdfTextElement + { + #region Properties + + public PdfFont Font { get; set; } + + public double FontSize { get; set; } + + public Matrix3x3 Matrix { get; set; } + + public string RawText { get; set; } + + public string VisibleText { get; set; } + + public double VisibleWidth { get; set; } + + public double VisibleHeight { get; set; } + + public List Characters { get; set; } + + public List Childs { get; set; } + + #endregion + + #region Public methods + + public double GetX() + { + return Matrix.Matrix[0, 2]; + } + + public double GetY() + { + return Matrix.Matrix[1, 2]; + } + + public PdfTextElement SubPart(int startIndex, int endIndex) + { + PdfTextElement blockElem = new PdfTextElement + { + Font = null, + FontSize = FontSize, + Matrix = Matrix.Copy(), + RawText = RawText.Substring(startIndex, endIndex - startIndex), + VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex), + VisibleWidth = 0, + VisibleHeight = VisibleHeight, + Characters = new List(), + Childs = new List(), + }; + double displacement = Characters[startIndex].Displacement; + blockElem.Matrix.Matrix[0, 2] += displacement; + for (int j = startIndex; j < endIndex; j++) + { + blockElem.Characters.Add(new PdfCharElement + { + Char = Characters[j].Char, + Displacement = Characters[j].Displacement - displacement, + Width = Characters[j].Width, + }); + } + PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1]; + blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width; + foreach (PdfTextElement elem in Childs) + { + blockElem.Childs.Add(elem); + } + + return blockElem; + } + + public double MaxWidth() + { + return Characters.Average(c => c.Width); + } + + #endregion + } + +} diff --git a/VAR.PdfTools/PdfTextExtractor.cs b/VAR.PdfTools/PdfTextExtractor.cs index 01813d5..6b4ce32 100644 --- a/VAR.PdfTools/PdfTextExtractor.cs +++ b/VAR.PdfTools/PdfTextExtractor.cs @@ -7,92 +7,6 @@ using VAR.PdfTools.PdfElements; namespace VAR.PdfTools { - public struct PdfCharElement - { - public string Char; - public double Displacement; - public double Width; - } - - public class PdfTextElement - { - #region Properties - - public PdfFont Font { get; set; } - - public double FontSize { get; set; } - - public Matrix3x3 Matrix { get; set; } - - public string RawText { get; set; } - - public string VisibleText { get; set; } - - public double VisibleWidth { get; set; } - - public double VisibleHeight { get; set; } - - public List Characters { get; set; } - - public List Childs { get; set; } - - #endregion - - #region Public methods - - public double GetX() - { - return Matrix.Matrix[0, 2]; - } - - public double GetY() - { - return Matrix.Matrix[1, 2]; - } - - public PdfTextElement SubPart(int startIndex, int endIndex) - { - PdfTextElement blockElem = new PdfTextElement - { - Font = null, - FontSize = FontSize, - Matrix = Matrix.Copy(), - RawText = RawText.Substring(startIndex, endIndex - startIndex), - VisibleText = VisibleText.Substring(startIndex, endIndex - startIndex), - VisibleWidth = 0, - VisibleHeight = VisibleHeight, - Characters = new List(), - Childs = new List(), - }; - double displacement = Characters[startIndex].Displacement; - blockElem.Matrix.Matrix[0, 2] += displacement; - for (int j = startIndex; j < endIndex; j++) - { - blockElem.Characters.Add(new PdfCharElement - { - Char = Characters[j].Char, - Displacement = Characters[j].Displacement - displacement, - Width = Characters[j].Width, - }); - } - PdfCharElement lastChar = blockElem.Characters[blockElem.Characters.Count - 1]; - blockElem.VisibleWidth = lastChar.Displacement + lastChar.Width; - foreach (PdfTextElement elem in Childs) - { - blockElem.Childs.Add(elem); - } - - return blockElem; - } - - public double MaxWidth() - { - return Characters.Average(c => c.Width) / 2; - } - - #endregion - } - public class PdfTextExtractor { #region Declarations diff --git a/VAR.PdfTools/VAR.PdfTools.csproj b/VAR.PdfTools/VAR.PdfTools.csproj index d15310b..fa528c5 100644 --- a/VAR.PdfTools/VAR.PdfTools.csproj +++ b/VAR.PdfTools/VAR.PdfTools.csproj @@ -85,6 +85,7 @@ +