From b5a0faaa3bb8da4d888e1203f99d2d1fb61a6bd6 Mon Sep 17 00:00:00 2001 From: BobLd Date: Fri, 6 Dec 2019 16:02:30 +0000 Subject: [PATCH 1/3] Improving clustering algorithm --- .../ClusteringAlgorithms.cs | 191 +++++++++--------- .../DocumentLayoutAnalysis/Distances.cs | 81 +++----- .../DocstrumBoundingBoxes.cs | 28 +-- .../NearestNeighbourWordExtractor .cs | 2 +- 4 files changed, 138 insertions(+), 164 deletions(-) diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs index de016723..0f60779c 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -23,7 +23,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The candidates' point to use for pairing, e.g. BottomLeft, TopLeft. /// Filter to apply to the pivot point. If false, point will not be paired at all, e.g. is white space. /// Filter to apply to both the pivot and the paired point. If false, point will not be paired at all, e.g. pivot and paired point have same font. - internal static IEnumerable> SimpleTransitiveClosure(List elements, + internal static IEnumerable> ClusterNearestNeighbours(List elements, Func distMeasure, Func maxDistanceFunction, Func pivotPoint, Func candidatesPoint, @@ -41,7 +41,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * that if indexes[i] = j then indexes[j] != i. * * 2. Group indexes - * Group indexes if share neighbours in common - Transitive closure + * Group indexes if share neighbours in common - Depth-first search * e.g. if we have indexes[i] = j, indexes[j] = k, indexes[m] = n and indexes[n] = -1 * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ @@ -56,12 +56,15 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis if (filterPivot(pivot)) { - int index = pivotPoint(pivot).FindIndexNearest(candidatesPoints, distMeasure, out double dist); - var paired = elements[index]; + int index = pivot.FindIndexNearest(elements, candidatesPoint, pivotPoint, distMeasure, out double dist); - if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + if (index != -1) { - indexes[e] = index; + var paired = elements[index]; + if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + { + indexes[e] = index; + } } } }); @@ -84,7 +87,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The candidates' point to use for pairing, e.g. BottomLeft, TopLeft. /// Filter to apply to the pivot point. If false, point will not be paired at all, e.g. is white space. /// Filter to apply to both the pivot and the paired point. If false, point will not be paired at all, e.g. pivot and paired point have same font. - internal static IEnumerable> SimpleTransitiveClosure(T[] elements, + internal static IEnumerable> ClusterNearestNeighbours(T[] elements, Func distMeasure, Func maxDistanceFunction, Func pivotPoint, Func candidatesPoint, @@ -102,7 +105,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * that if indexes[i] = j then indexes[j] != i. * * 2. Group indexes - * Group indexes if share neighbours in common - Transitive closure + * Group indexes if share neighbours in common - Depth-first search * e.g. if we have indexes[i] = j, indexes[j] = k, indexes[m] = n and indexes[n] = -1 * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ @@ -117,12 +120,15 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis if (filterPivot(pivot)) { - int index = pivotPoint(pivot).FindIndexNearest(candidatesPoints, distMeasure, out double dist); - var paired = elements[index]; + int index = pivot.FindIndexNearest(elements, candidatesPoint, pivotPoint, distMeasure, out double dist); - if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + if (index != -1) { - indexes[e] = index; + var paired = elements[index]; + if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + { + indexes[e] = index; + } } } }); @@ -145,7 +151,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The candidates' line to use for pairing. /// Filter to apply to the pivot point. If false, point will not be paired at all, e.g. is white space. /// Filter to apply to both the pivot and the paired point. If false, point will not be paired at all, e.g. pivot and paired point have same font. - internal static IEnumerable> SimpleTransitiveClosure(T[] elements, + internal static IEnumerable> ClusterNearestNeighbours(T[] elements, Func distMeasure, Func maxDistanceFunction, Func pivotLine, Func candidatesLine, @@ -163,7 +169,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * that if indexes[i] = j then indexes[j] != i. * * 2. Group indexes - * Group indexes if share neighbours in common - Transitive closure + * Group indexes if share neighbours in common - Depth-first search * e.g. if we have indexes[i] = j, indexes[j] = k, indexes[m] = n and indexes[n] = -1 * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ @@ -178,12 +184,15 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis if (filterPivot(pivot)) { - int index = pivotLine(pivot).FindIndexNearest(candidatesLines, distMeasure, out double dist); - var paired = elements[index]; + int index = pivot.FindIndexNearest(elements, candidatesLine, pivotLine, distMeasure, out double dist); - if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + if (index != -1) { - indexes[e] = index; + var paired = elements[index]; + if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + { + indexes[e] = index; + } } } }); @@ -195,104 +204,98 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } /// - /// Group elements via transitive closure. Each element has only one connected neighbour. - /// https://en.wikipedia.org/wiki/Transitive_closure + /// Group elements using Depth-first search. + /// https://en.wikipedia.org/wiki/Depth-first_search /// - /// Array of paired elements index. - /// - private static List> GroupIndexes(int[] indexes) + /// The graph. edges[i] = j indicates that there is an edge between i and j. + /// A List of HashSets containing containing the grouped indexes. + internal static List> GroupIndexes(int[] edges) { - int[][] adjacency = new int[indexes.Length][]; - for (int i = 0; i < indexes.Length; i++) + int[][] adjacency = new int[edges.Length][]; + for (int i = 0; i < edges.Length; i++) { HashSet matches = new HashSet(); - for (int j = 0; j < indexes.Length; ++j) + if (edges[i] != -1) matches.Add(edges[i]); + for (int j = 0; j < edges.Length; j++) { - if (indexes[j] == i) matches.Add(j); + if (edges[j] == i) matches.Add(j); } adjacency[i] = matches.ToArray(); } List> groupedIndexes = new List>(); - bool[] isDone = new bool[indexes.Length]; + bool[] isDone = new bool[edges.Length]; - for (int p = 0; p < indexes.Length; p++) + for (int p = 0; p < edges.Length; p++) { if (isDone[p]) continue; + groupedIndexes.Add(DfsIterative(p, adjacency, ref isDone)); + } + return groupedIndexes; + } - LinkedList L = new LinkedList(); - HashSet grouped = new HashSet(); - L.AddLast(new[] { p, indexes[p] }); - - while (L.Any()) + /// + /// Group elements using Depth-first search. + /// https://en.wikipedia.org/wiki/Depth-first_search + /// + /// The graph. edges[i] = [j, k, l, ...] indicates that there is an edge between i and each element j, k, l, ... + /// A List of HashSets containing containing the grouped indexes. + internal static List> GroupIndexes(int[][] edges) + { + int[][] adjacency = new int[edges.Length][]; + for (int i = 0; i < edges.Length; i++) + { + HashSet matches = new HashSet(); + for (int j = 0; j < edges[i].Length; j++) { - var current = L.First.Value; - L.RemoveFirst(); - var current0 = current[0]; - var current1 = current[1]; + if (edges[i][j] != -1) matches.Add(edges[i][j]); + } - if (current0 != -1 && !isDone[current0]) + for (int j = 0; j < edges.Length; j++) + { + for (int k = 0; k < edges[j].Length; k++) { - var adjs = adjacency[current0]; - foreach (var k in adjs) - { - if (isDone[k]) continue; - L.AddLast(new[] { k, current0 }); - } - - int current0P = indexes[current0]; - if (current0P != -1) - { - var adjsP = adjacency[current0P]; - foreach (var k in adjsP) - { - if (isDone[k]) continue; - L.AddLast(new[] { k, current0P }); - isDone[k] = true; - grouped.Add(k); - } - } - else - { - L.AddLast(new[] { current0, current0P }); - isDone[current0] = true; - grouped.Add(current0); - } - } - - if (current1 != -1 && !isDone[current1]) - { - var adjs = adjacency[current1]; - foreach (var k in adjs) - { - if (isDone[k]) continue; - L.AddLast(new[] { k, current1 }); - } - - int current1P = indexes[current1]; - if (current1P != -1) - { - var adjsP = adjacency[current1P]; - foreach (var k in adjsP) - { - if (isDone[k]) continue; - L.AddLast(new[] { k, current1P }); - isDone[k] = true; - grouped.Add(k); - } - } - else - { - L.AddLast(new[] { current1, current1P }); - isDone[current1] = true; - grouped.Add(current1); - } + if (edges[j][k] == i) matches.Add(j); } } - groupedIndexes.Add(grouped); + adjacency[i] = matches.ToArray(); } + List> groupedIndexes = new List>(); + bool[] isDone = new bool[edges.Length]; + + for (int p = 0; p < edges.Length; p++) + { + if (isDone[p]) continue; + groupedIndexes.Add(DfsIterative(p, adjacency, ref isDone)); + } return groupedIndexes; } + + /// + /// Depth-first search + /// https://en.wikipedia.org/wiki/Depth-first_search + /// + private static HashSet DfsIterative(int c, int[][] adj, ref bool[] isDone) + { + HashSet group = new HashSet(); + Stack S = new Stack(); + S.Push(c); + + while (S.Any()) + { + var v = S.Pop(); + if (!isDone[v]) + { + group.Add(v); + isDone[v] = true; + foreach (var w in adj[v]) + { + S.Push(w); + } + } + } + return group; + } } } diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs index f099c175..84535e62 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs @@ -81,52 +81,21 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } /// - /// Find the nearest point. + /// Find the index of the nearest point, excluding itself. /// - /// The reference point, for which to find the nearest neighbour. - /// The list of neighbours candidates. + /// + /// The reference point, for which to find the nearest neighbour. + /// The list of neighbours candidates. + /// + /// /// The distance measure to use. /// The distance between reference point, and its nearest neighbour. - public static PdfPoint FindNearest(this PdfPoint pdfPoint, IReadOnlyList points, + /// + public static int FindIndexNearest(this T element, IReadOnlyList candidates, + Func candidatesPoint, Func pivotPoint, Func distanceMeasure, out double distance) { - if (points == null || points.Count == 0) - { - throw new ArgumentException("Distances.FindNearest(): The list of neighbours candidates is either null or empty.", "points"); - } - - if (distanceMeasure == null) - { - throw new ArgumentException("Distances.FindNearest(): The distance measure must not be null.", "distanceMeasure"); - } - - distance = double.MaxValue; - PdfPoint closestPoint = default; - - for (var i = 0; i < points.Count; i++) - { - double currentDistance = distanceMeasure(points[i], pdfPoint); - if (currentDistance < distance) - { - distance = currentDistance; - closestPoint = points[i]; - } - } - - return closestPoint; - } - - /// - /// Find the index of the nearest point. - /// - /// The reference point, for which to find the nearest neighbour. - /// The list of neighbours candidates. - /// The distance measure to use. - /// The distance between reference point, and its nearest neighbour. - public static int FindIndexNearest(this PdfPoint pdfPoint, IReadOnlyList points, - Func distanceMeasure, out double distance) - { - if (points == null || points.Count == 0) + if (candidates == null || candidates.Count == 0) { throw new ArgumentException("Distances.FindIndexNearest(): The list of neighbours candidates is either null or empty.", "points"); } @@ -138,11 +107,13 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis distance = double.MaxValue; int closestPointIndex = -1; + var candidatesPoints = candidates.Select(candidatesPoint).ToList(); + var pivot = pivotPoint(element); - for (var i = 0; i < points.Count; i++) + for (var i = 0; i < candidates.Count; i++) { - double currentDistance = distanceMeasure(points[i], pdfPoint); - if (currentDistance < distance) + double currentDistance = distanceMeasure(candidatesPoints[i], pivot); + if (currentDistance < distance && !candidates[i].Equals(element)) { distance = currentDistance; closestPointIndex = i; @@ -153,16 +124,20 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } /// - /// Find the index of the nearest line. + /// Find the index of the nearest line, excluding itself. /// - /// The reference line, for which to find the nearest neighbour. - /// The list of neighbours candidates. + /// + /// The reference line, for which to find the nearest neighbour. + /// The list of neighbours candidates. + /// + /// /// The distance measure between two lines to use. /// The distance between reference line, and its nearest neighbour. - public static int FindIndexNearest(this PdfLine pdfLine, IReadOnlyList lines, + public static int FindIndexNearest(this T element, IReadOnlyList candidates, + Func candidatesLine, Func pivotLine, Func distanceMeasure, out double distance) { - if (lines == null || lines.Count == 0) + if (candidates == null || candidates.Count == 0) { throw new ArgumentException("Distances.FindIndexNearest(): The list of neighbours candidates is either null or empty.", "lines"); } @@ -174,11 +149,13 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis distance = double.MaxValue; int closestLineIndex = -1; + var candidatesLines = candidates.Select(candidatesLine).ToList(); + var pivot = pivotLine(element); - for (var i = 0; i < lines.Count; i++) + for (var i = 0; i < candidates.Count; i++) { - double currentDistance = distanceMeasure(lines[i], pdfLine); - if (currentDistance < distance) + double currentDistance = distanceMeasure(candidatesLines[i], pivot); + if (currentDistance < distance && !candidates[i].Equals(element)) { distance = currentDistance; closestLineIndex = i; diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs index fd80cc25..05168b30 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs @@ -126,6 +126,10 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis continue; } + // Merge all lines (words) + blocks[b] = new TextBlock(GetLines(blocks[b].TextLines.SelectMany(l => l.Words).ToList(), + double.MaxValue, withinLine).ToList()); + for (var c = 0; c < blocks.Count; c++) { if (b == c || blocks[c] == null) @@ -142,8 +146,9 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis // 2. Rebuild lines, using max distance = +Inf as we know all words will be in the // same block. Filtering will still be done based on angle. - var mergedLines = GetLines(mergedWords, double.MaxValue, withinLine); - blocks[b] = new TextBlock(mergedLines.ToList()); + // Merge all lines (words) sharing same bottom (baseline) + var mergedLines = GetLines(mergedWords, double.MaxValue, withinLine).ToList(); + blocks[b] = new TextBlock(mergedLines.OrderByDescending(l => l.BoundingBox.Bottom).ToList()); // Remove blocks[c] = null; @@ -191,7 +196,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis return null; } - var closestWordIndex = pointR.FindIndexNearest(wordsWithinAngleBoundDistancePoints, Distances.Euclidean, out _); + var closestWordIndex = pointR.FindIndexNearest(wordsWithinAngleBoundDistancePoints, p => p, + p => p, Distances.Euclidean, out _); if (closestWordIndex < 0 || closestWordIndex >= wordsWithinAngleBoundDistancePoints.Count) { @@ -206,15 +212,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// private static IEnumerable GetLines(List words, double maxDist, AngleBounds withinLine) { - /*************************************************************************************************** - * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'word Width', the algo might not - * work as the FindIndexNearest() function might pair the pivot with itself (the pivot's right point - * (distance = width) is closer than other words' left point). - * -> Solution would be to find more than one nearest neighbours. Use KDTree? - ***************************************************************************************************/ - TextDirection textDirection = words[0].TextDirection; - var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(words, Distances.Euclidean, + var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(words, Distances.Euclidean, (pivot, candidate) => maxDist, pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft, pivot => true, @@ -257,11 +256,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * If they are overlapping, we compute the middle point (new X coordinate) of the overlapping area. * We finally compute the Euclidean distance between these two middle points. * If the two lines are not overlapping, the distance is set to the max distance. - * - * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'line Height', the algo won't - * work as the FindIndexNearest() function will always pair the pivot with itself (the pivot's top - * point (distance = height) is closer than other lines' top point). - * -> Solution would be to find more than one nearest neighbours. Use KDTree? **************************************************************************************************/ Func euclidianOverlappingMiddleDistance = (l1, l2) => @@ -276,7 +270,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis new PdfPoint(left + d / 2, l2.Point1.Y)); }; - var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(lines, + var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(lines, euclidianOverlappingMiddleDistance, (pivot, candidate) => maxDist, pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight), diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs index 1038530d..62e212da 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs @@ -102,7 +102,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis Letter[] letters = pageLetters.ToArray(); - var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(letters, + var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(letters, distMeasure, maxDistanceFunction, l => l.EndBaseLine, l => l.StartBaseLine, l => !string.IsNullOrWhiteSpace(l.Value), From fd9efb5b5de649f06afc6e2d4bd35203d28e4a60 Mon Sep 17 00:00:00 2001 From: BobLd Date: Fri, 6 Dec 2019 17:29:33 +0000 Subject: [PATCH 2/3] making FindIndexNearest() internal --- src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs index 84535e62..b1a3cdd0 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs @@ -91,7 +91,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The distance measure to use. /// The distance between reference point, and its nearest neighbour. /// - public static int FindIndexNearest(this T element, IReadOnlyList candidates, + internal static int FindIndexNearest(this T element, IReadOnlyList candidates, Func candidatesPoint, Func pivotPoint, Func distanceMeasure, out double distance) { @@ -133,7 +133,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// /// The distance measure between two lines to use. /// The distance between reference line, and its nearest neighbour. - public static int FindIndexNearest(this T element, IReadOnlyList candidates, + internal static int FindIndexNearest(this T element, IReadOnlyList candidates, Func candidatesLine, Func pivotLine, Func distanceMeasure, out double distance) { From b69c00454876908f4b558e9cc60f89e4f828b3cf Mon Sep 17 00:00:00 2001 From: BobLd Date: Sat, 7 Dec 2019 22:49:10 +0000 Subject: [PATCH 3/3] Changing functions description to reflect changes --- .../DocumentLayoutAnalysis/ClusteringAlgorithms.cs | 9 +++------ .../DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs | 6 ------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs index 0f60779c..97231d9f 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -12,8 +12,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis internal class ClusteringAlgorithms { /// - /// Algorithm to group elements via transitive closure, using nearest neighbours and maximum distance. - /// https://en.wikipedia.org/wiki/Transitive_closure + /// Algorithm to group elements using nearest neighbours. /// /// Letter, Word, TextLine, etc. /// List of elements to group. @@ -76,8 +75,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } /// - /// Algorithm to group elements via transitive closure, using nearest neighbours and maximum distance. - /// https://en.wikipedia.org/wiki/Transitive_closure + /// Algorithm to group elements using nearest neighbours. /// /// Letter, Word, TextLine, etc. /// Array of elements to group. @@ -140,8 +138,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } /// - /// Algorithm to group elements via transitive closure, using nearest neighbours and maximum distance. - /// https://en.wikipedia.org/wiki/Transitive_closure + /// Algorithm to group elements using nearest neighbours. /// /// Letter, Word, TextLine, etc. /// Array of elements to group. diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs index 05168b30..4dc02c1e 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBoundingBoxes.cs @@ -207,9 +207,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis return finalDistanceMeasure(pointR, wordsWithinAngleBoundDistancePoints[closestWordIndex]); } - /// - /// Build lines via transitive closure. - /// private static IEnumerable GetLines(List words, double maxDist, AngleBounds withinLine) { TextDirection textDirection = words[0].TextDirection; @@ -245,9 +242,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } } - /// - /// Build blocks via transitive closure. - /// private static IEnumerable GetLinesGroups(TextLine[] lines, double maxDist) { /**************************************************************************************************