diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs index 2d53a710..a60c5411 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -292,26 +292,33 @@ /// Depth-first search /// https://en.wikipedia.org/wiki/Depth-first_search /// - private static HashSet DfsIterative(int c, int[][] adj, ref bool[] isDone) + private static HashSet DfsIterative(int s, int[][] adj, ref bool[] isDone) { HashSet group = new HashSet(); Stack S = new Stack(); - S.Push(c); + S.Push(s); while (S.Any()) { - var v = S.Pop(); - if (!isDone[v]) + var u = S.Pop(); + if (!isDone[u]) { - group.Add(v); - isDone[v] = true; - foreach (var w in adj[v]) + group.Add(u); + isDone[u] = true; + foreach (var v in adj[u]) { - S.Push(w); + S.Push(v); } } } return group; } + + public class DfsEdge : Tuple + { + public DfsEdge(int item1, int item2) : base(item1, item2) + { + } + } } } diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs index a9c65ee7..c0c44353 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs @@ -97,21 +97,6 @@ throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction."); } - // TO DO: orderFunc should also take in account the edge relationships found by 'ClusterNearestNeighbours' - Func, IReadOnlyList> orderFunc = l => l.OrderBy(x => x.GlyphRectangle.Left).ToList(); - if (textDirection == TextDirection.Rotate180) - { - orderFunc = l => l.OrderByDescending(x => x.GlyphRectangle.Right).ToList(); - } - else if (textDirection == TextDirection.Rotate90) - { - orderFunc = l => l.OrderByDescending(x => x.GlyphRectangle.Top).ToList(); - } - else if (textDirection == TextDirection.Rotate270) - { - orderFunc = l => l.OrderBy(x => x.GlyphRectangle.Bottom).ToList(); - } - Letter[] letters = pageLetters.ToArray(); var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(letters, @@ -124,7 +109,7 @@ List words = new List(); for (int a = 0; a < groupedIndexes.Count(); a++) { - words.Add(new Word(orderFunc(groupedIndexes[a].Select(i => letters[i])))); + words.Add(new Word(groupedIndexes[a].Select(i => letters[i]).ToList())); } return words;