mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-18 18:27:55 +08:00
Remove 'orderFunc' from 'NearestNeighbourWordExtractor' to use the order found by clustering algo
This commit is contained in:
@@ -292,26 +292,33 @@
|
||||
/// Depth-first search
|
||||
/// <para>https://en.wikipedia.org/wiki/Depth-first_search</para>
|
||||
/// </summary>
|
||||
private static HashSet<int> DfsIterative(int c, int[][] adj, ref bool[] isDone)
|
||||
private static HashSet<int> DfsIterative(int s, int[][] adj, ref bool[] isDone)
|
||||
{
|
||||
HashSet<int> group = new HashSet<int>();
|
||||
Stack<int> S = new Stack<int>();
|
||||
S.Push(c);
|
||||
S.Push(s);
|
||||
|
||||
while (S.Any())
|
||||
{
|
||||
var v = S.Pop();
|
||||
if (!isDone[v])
|
||||
var u = S.Pop();
|
||||
if (!isDone[u])
|
||||
{
|
||||
group.Add(v);
|
||||
isDone[v] = true;
|
||||
foreach (var w in adj[v])
|
||||
group.Add(u);
|
||||
isDone[u] = true;
|
||||
foreach (var v in adj[u])
|
||||
{
|
||||
S.Push(w);
|
||||
S.Push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return group;
|
||||
}
|
||||
|
||||
public class DfsEdge : Tuple<int, int>
|
||||
{
|
||||
public DfsEdge(int item1, int item2) : base(item1, item2)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -97,21 +97,6 @@
|
||||
throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction.");
|
||||
}
|
||||
|
||||
// TO DO: orderFunc should also take in account the edge relationships found by 'ClusterNearestNeighbours'
|
||||
Func<IEnumerable<Letter>, IReadOnlyList<Letter>> orderFunc = l => l.OrderBy(x => x.GlyphRectangle.Left).ToList();
|
||||
if (textDirection == TextDirection.Rotate180)
|
||||
{
|
||||
orderFunc = l => l.OrderByDescending(x => x.GlyphRectangle.Right).ToList();
|
||||
}
|
||||
else if (textDirection == TextDirection.Rotate90)
|
||||
{
|
||||
orderFunc = l => l.OrderByDescending(x => x.GlyphRectangle.Top).ToList();
|
||||
}
|
||||
else if (textDirection == TextDirection.Rotate270)
|
||||
{
|
||||
orderFunc = l => l.OrderBy(x => x.GlyphRectangle.Bottom).ToList();
|
||||
}
|
||||
|
||||
Letter[] letters = pageLetters.ToArray();
|
||||
|
||||
var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(letters,
|
||||
@@ -124,7 +109,7 @@
|
||||
List<Word> words = new List<Word>();
|
||||
for (int a = 0; a < groupedIndexes.Count(); a++)
|
||||
{
|
||||
words.Add(new Word(orderFunc(groupedIndexes[a].Select(i => letters[i]))));
|
||||
words.Add(new Word(groupedIndexes[a].Select(i => letters[i]).ToList()));
|
||||
}
|
||||
|
||||
return words;
|
||||
|
Reference in New Issue
Block a user