PdfTextExtractor: Change Join and Split logic to use max character width of the elements.

This commit is contained in:
2017-11-02 13:27:38 +01:00
parent 06de734658
commit 13ba41f851

View File

@@ -625,10 +625,12 @@ namespace VAR.PdfTools
double neighbourY = neighbour.GetY(); double neighbourY = neighbour.GetY();
if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; } if (Math.Abs(neighbourY - blockY) > 0.001) { i++; continue; }
double maxWidth = neighbour.Characters.Max(c => c.Width);
double neighbourXMin = neighbour.GetX(); double neighbourXMin = neighbour.GetX();
double neighbourXMax = neighbourXMin + neighbour.VisibleWidth; double neighbourXMax = neighbourXMin + neighbour.VisibleWidth;
double auxBlockXMin = blockXMin - (elem.FontSize * elem.Font.GetCharWidth('m')); double auxBlockXMin = blockXMin - maxWidth;
double auxBlockXMax = blockXMax + (elem.FontSize * elem.Font.GetCharWidth('m')); double auxBlockXMax = blockXMax + maxWidth;
if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin) if (auxBlockXMax >= neighbourXMin && neighbourXMax >= auxBlockXMin)
{ {
_textElements.Remove(neighbour); _textElements.Remove(neighbour);
@@ -694,12 +696,14 @@ namespace VAR.PdfTools
PdfTextElement elem = _textElements[0]; PdfTextElement elem = _textElements[0];
_textElements.Remove(elem); _textElements.Remove(elem);
double maxWidth = elem.Characters.Max(c => c.Width);
int prevBreak = 0; int prevBreak = 0;
for (int i = 1; i < elem.Characters.Count; i++) for (int i = 1; i < elem.Characters.Count; i++)
{ {
double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width; double prevCharEnd = elem.Characters[i - 1].Displacement + elem.Characters[i - 1].Width;
double charSeparation = elem.Characters[i].Displacement - prevCharEnd; double charSeparation = elem.Characters[i].Displacement - prevCharEnd;
if (charSeparation > (elem.Characters[i - 1].Width * 2)) if (charSeparation > maxWidth)
{ {
PdfTextElement partElem = elem.SubPart(prevBreak, i); PdfTextElement partElem = elem.SubPart(prevBreak, i);
textElementsSplitted.Add(partElem); textElementsSplitted.Add(partElem);