Improve default max function

This commit is contained in:
BobLd
2020-02-28 11:41:32 +00:00
committed by Eliot Jones
parent c864fa512c
commit 5ae38f1bad

View File

@@ -31,43 +31,52 @@
/// <param name="letters">The letters in the page.</param> /// <param name="letters">The letters in the page.</param>
public IEnumerable<Word> GetWords(IReadOnlyList<Letter> letters) public IEnumerable<Word> GetWords(IReadOnlyList<Letter> letters)
{ {
Func<Letter, Letter, double> baseMaxFunc = (l1, l2) =>
{
return Math.Max(Math.Max(Math.Max(
Math.Abs(l1.GlyphRectangle.Width),
Math.Abs(l2.GlyphRectangle.Width)),
Math.Abs(l1.Width)),
Math.Abs(l2.Width));
};
List<Word> wordsH = GetWords( List<Word> wordsH = GetWords(
letters.Where(l => l.TextDirection == TextDirection.Horizontal), letters.Where(l => l.TextDirection == TextDirection.Horizontal).ToList(),
(l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, (l1, l2) => baseMaxFunc(l1, l2) * 0.2,
Distances.Manhattan, MaxDegreeOfParallelism) Distances.Manhattan, MaxDegreeOfParallelism)
.OrderByDescending(x => x.BoundingBox.Bottom) .OrderByDescending(x => x.BoundingBox.Bottom)
.ThenBy(x => x.BoundingBox.Left).ToList(); .ThenBy(x => x.BoundingBox.Left).ToList();
List<Word> words270 = GetWords( var words270 = GetWords(
letters.Where(l => l.TextDirection == TextDirection.Rotate270), letters.Where(l => l.TextDirection == TextDirection.Rotate270).ToList(),
(l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, (l1, l2) => baseMaxFunc(l1, l2) * 0.2,
Distances.Manhattan, MaxDegreeOfParallelism) Distances.Manhattan, MaxDegreeOfParallelism)
.OrderBy(x => x.BoundingBox.Right) .OrderBy(x => x.BoundingBox.Right)
.ThenByDescending(x => x.BoundingBox.Bottom).ToList(); .ThenByDescending(x => x.BoundingBox.Bottom);
wordsH.AddRange(words270); wordsH.AddRange(words270);
List<Word> words180 = GetWords( var words180 = GetWords(
letters.Where(l => l.TextDirection == TextDirection.Rotate180), letters.Where(l => l.TextDirection == TextDirection.Rotate180).ToList(),
(l1, l2) => Math.Max(Math.Abs(l1.GlyphRectangle.Width), Math.Abs(l2.GlyphRectangle.Width)) * 0.2, (l1, l2) => baseMaxFunc(l1, l2) * 0.2,
Distances.Manhattan, MaxDegreeOfParallelism) Distances.Manhattan, MaxDegreeOfParallelism)
.OrderBy(x => x.BoundingBox.Top) .OrderBy(x => x.BoundingBox.Top)
.ThenByDescending(x => x.BoundingBox.Right).ToList(); .ThenByDescending(x => x.BoundingBox.Right);
wordsH.AddRange(words180); wordsH.AddRange(words180);
List<Word> words90 = GetWords( var words90 = GetWords(
letters.Where(l => l.TextDirection == TextDirection.Rotate90), letters.Where(l => l.TextDirection == TextDirection.Rotate90).ToList(),
(l1, l2) => Math.Max(Math.Abs(l1.GlyphRectangle.Width), Math.Abs(l2.GlyphRectangle.Width)) * 0.2, (l1, l2) => baseMaxFunc(l1, l2) * 0.2,
Distances.Manhattan, MaxDegreeOfParallelism) Distances.Manhattan, MaxDegreeOfParallelism)
.OrderByDescending(x => x.BoundingBox.Left) .OrderByDescending(x => x.BoundingBox.Left)
.ThenBy(x => x.BoundingBox.Top).ToList(); .ThenBy(x => x.BoundingBox.Top);
wordsH.AddRange(words90); wordsH.AddRange(words90);
List<Word> wordsU = GetWords( var wordsU = GetWords(
letters.Where(l => l.TextDirection == TextDirection.Other), letters.Where(l => l.TextDirection == TextDirection.Other).ToList(),
(l1, l2) => Math.Max(Math.Abs(l1.GlyphRectangle.Width), Math.Abs(l2.GlyphRectangle.Width)) * 0.5, (l1, l2) => baseMaxFunc(l1, l2) * 0.5,
Distances.Euclidean, MaxDegreeOfParallelism) Distances.Euclidean, MaxDegreeOfParallelism)
.OrderByDescending(x => x.BoundingBox.Bottom) .OrderByDescending(x => x.BoundingBox.Bottom)
.ThenBy(x => x.BoundingBox.Left).ToList(); .ThenBy(x => x.BoundingBox.Left);
wordsH.AddRange(wordsU); wordsH.AddRange(wordsU);
return wordsH; return wordsH;
@@ -85,7 +94,7 @@
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled. /// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
/// <para>A positive property value limits the number of concurrent operations to the set value. /// <para>A positive property value limits the number of concurrent operations to the set value.
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param> /// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
public List<Word> GetWords(IEnumerable<Letter> pageLetters, public List<Word> GetWords(IReadOnlyList<Letter> pageLetters,
Func<Letter, Letter, double> maxDistanceFunction, Func<PdfPoint, PdfPoint, double> distMeasure, Func<Letter, Letter, double> maxDistanceFunction, Func<PdfPoint, PdfPoint, double> distMeasure,
int maxDegreeOfParallelism) int maxDegreeOfParallelism)
{ {
@@ -97,9 +106,7 @@
throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction."); throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction.");
} }
Letter[] letters = pageLetters.ToArray(); var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(pageLetters,
var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(letters,
distMeasure, maxDistanceFunction, distMeasure, maxDistanceFunction,
l => l.EndBaseLine, l => l.StartBaseLine, l => l.EndBaseLine, l => l.StartBaseLine,
l => !string.IsNullOrWhiteSpace(l.Value), l => !string.IsNullOrWhiteSpace(l.Value),
@@ -107,9 +114,9 @@
maxDegreeOfParallelism).ToList(); maxDegreeOfParallelism).ToList();
List<Word> words = new List<Word>(); List<Word> words = new List<Word>();
for (int a = 0; a < groupedIndexes.Count(); a++) for (int a = 0; a < groupedIndexes.Count; a++)
{ {
words.Add(new Word(groupedIndexes[a].Select(i => letters[i]).ToList())); words.Add(new Word(groupedIndexes[a].Select(i => pageLetters[i]).ToList()));
} }
return words; return words;