From a326d7e9d978fa679737cc133a0cbeb244aa9f0b Mon Sep 17 00:00:00 2001 From: BobLd Date: Fri, 17 Jan 2020 11:33:59 +0000 Subject: [PATCH] TextDirection.Unknown -> TextDirection.Other Imporve NearestNeighbourWordExtractor for TextDirection.Other --- src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs | 2 +- .../WordExtractor/NearestNeighbourWordExtractor.cs | 6 +++--- src/UglyToad.PdfPig/Content/Letter.cs | 2 +- src/UglyToad.PdfPig/Content/TextDirection.cs | 4 ++-- src/UglyToad.PdfPig/Content/Word.cs | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs index 7bb4100b..96e5c88e 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs @@ -65,7 +65,7 @@ } else { - TextDirection = TextDirection.Unknown; + TextDirection = TextDirection.Other; } } diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs index 25a43245..25256511 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs @@ -63,9 +63,9 @@ wordsH.AddRange(words90); List wordsU = GetWords( - letters.Where(l => l.TextDirection == TextDirection.Unknown), - (l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, - Distances.Manhattan, MaxDegreeOfParallelism) + letters.Where(l => l.TextDirection == TextDirection.Other), + (l1, l2) => Math.Max(Math.Abs(l1.GlyphRectangle.Width), Math.Abs(l2.GlyphRectangle.Width)) * 0.2, + Distances.Euclidean, MaxDegreeOfParallelism) .OrderByDescending(x => x.BoundingBox.Bottom) .ThenBy(x => x.BoundingBox.Left).ToList(); wordsH.AddRange(wordsU); diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index 37199891..0213d978 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -118,7 +118,7 @@ return TextDirection.Rotate270; } - return TextDirection.Unknown; + return TextDirection.Other; } /// diff --git a/src/UglyToad.PdfPig/Content/TextDirection.cs b/src/UglyToad.PdfPig/Content/TextDirection.cs index 0c56a942..2f4ae4f9 100644 --- a/src/UglyToad.PdfPig/Content/TextDirection.cs +++ b/src/UglyToad.PdfPig/Content/TextDirection.cs @@ -6,9 +6,9 @@ public enum TextDirection { /// - /// Text direction not known. + /// Other text direction. /// - Unknown, + Other, /// /// Usual text direction (Left to Right). diff --git a/src/UglyToad.PdfPig/Content/Word.cs b/src/UglyToad.PdfPig/Content/Word.cs index 445d2467..ae94d0eb 100644 --- a/src/UglyToad.PdfPig/Content/Word.cs +++ b/src/UglyToad.PdfPig/Content/Word.cs @@ -57,7 +57,7 @@ var tempTextDirection = letters[0].TextDirection; if (letters.Any(l => l.TextDirection != tempTextDirection)) { - tempTextDirection = TextDirection.Unknown; + tempTextDirection = TextDirection.Other; } Tuple data; @@ -80,9 +80,9 @@ data = GetBoundingBox270(letters); break; - case TextDirection.Unknown: + case TextDirection.Other: default: - data = GetBoundingBoxUnknown(letters); + data = GetBoundingBoxOther(letters); break; } @@ -256,7 +256,7 @@ new PdfPoint(maxX, maxY))); } - private Tuple GetBoundingBoxUnknown(IReadOnlyList letters) + private Tuple GetBoundingBoxOther(IReadOnlyList letters) { var builder = new StringBuilder(); for (var i = 0; i < letters.Count; i++)