Improve default RecursiveXYCut dominant font width and height functions

This commit is contained in:
BobLd
2020-04-12 19:49:30 +01:00
parent 3833fff28f
commit 395c5a7fd9
2 changed files with 39 additions and 18 deletions

View File

@@ -9,9 +9,10 @@
public static class MathExtensions
{
/// <summary>
/// Computes the mode of a sequence of float values.
/// Computes the mode of a sequence of <see cref="float"/> values.
/// </summary>
/// <param name="array">The array of floats.</param>
/// <param name="array">The sequence of floats.</param>
/// <returns>The mode of the sequence. Returns <see cref="float.NaN"/> if the sequence has no mode or if it is not unique.</returns>
public static float Mode(this IEnumerable<float> array)
{
if (array == null || array.Count() == 0) return float.NaN;
@@ -22,9 +23,10 @@
}
/// <summary>
/// Computes the mode of a sequence of decimal values.
/// Computes the mode of a sequence of <see cref="double"/> values.
/// </summary>
/// <param name="array">The array of decimal.</param>
/// <param name="array">The sequence of doubles.</param>
/// <returns>The mode of the sequence. Returns <see cref="double.NaN"/> if the sequence has no mode or if it is not unique.</returns>
public static double Mode(this IEnumerable<double> array)
{
if (array == null || array.Count() == 0) return double.NaN;

View File

@@ -39,7 +39,28 @@
/// <param name="minimumWidth">The minimum width for a block.</param>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords, double minimumWidth)
{
return GetBlocks(pageWords, minimumWidth, k => Math.Round(k.Mode(), 3), k => Math.Round(k.Mode() * 1.5, 3));
return GetBlocks(pageWords, minimumWidth,
(letters) =>
{
var widths = letters.Select(x => Math.Max(Math.Round(x.Width, 3), Math.Round(x.GlyphRectangle.Width, 3)));
var mode = widths.Mode();
if (double.IsNaN(mode) || mode == 0)
{
mode = widths.Average();
}
return mode;
},
(letters) =>
{
var heights = letters.Select(x => Math.Round(x.GlyphRectangle.Height, 3));
var mode = heights.Mode();
if (double.IsNaN(mode) || mode == 0)
{
mode = heights.Average();
}
return mode * 1.5;
}
);
}
/// <summary>
@@ -63,8 +84,8 @@
/// <param name="dominantFontWidthFunc">The function that determines the dominant font width.</param>
/// <param name="dominantFontHeightFunc">The function that determines the dominant font height.</param>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords, double minimumWidth,
Func<IEnumerable<double>, double> dominantFontWidthFunc,
Func<IEnumerable<double>, double> dominantFontHeightFunc)
Func<IEnumerable<Letter>, double> dominantFontWidthFunc,
Func<IEnumerable<Letter>, double> dominantFontHeightFunc)
{
if (pageWords.Count() == 0) return EmptyArray<TextBlock>.Instance;
@@ -89,13 +110,13 @@
}
private XYNode VerticalCut(XYLeaf leaf, double minimumWidth,
Func<IEnumerable<double>, double> dominantFontWidthFunc,
Func<IEnumerable<double>, double> dominantFontHeightFunc, int level = 0)
Func<IEnumerable<Letter>, double> dominantFontWidthFunc,
Func<IEnumerable<Letter>, double> dominantFontHeightFunc, int level = 0)
{
// Order words left to right
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Normalise().Left).ToArray();
if (!words.Any())
if (words.Length == 0)
{
return new XYNode(null);
}
@@ -112,8 +133,7 @@
}
// Determine dominant font width
double dominantFontWidth = dominantFontWidthFunc(words.SelectMany(x => x.Letters)
.Select(x => x.GlyphRectangle.Normalise().Width));
double dominantFontWidth = dominantFontWidthFunc(words.SelectMany(x => x.Letters));
List<Projection> projectionProfile = new List<Projection>();
@@ -197,13 +217,13 @@
}
private XYNode HorizontalCut(XYLeaf leaf, double minimumWidth,
Func<IEnumerable<double>, double> dominantFontWidthFunc,
Func<IEnumerable<double>, double> dominantFontHeightFunc, int level = 0)
Func<IEnumerable<Letter>, double> dominantFontWidthFunc,
Func<IEnumerable<Letter>, double> dominantFontHeightFunc, int level = 0)
{
// Order words bottom to top
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Normalise().Bottom).ToArray();
if (!words.Any())
if (words.Length == 0)
{
return new XYNode(null);
}
@@ -219,14 +239,13 @@
}
// Determine dominant font height
double dominantFontHeight = dominantFontHeightFunc(words.SelectMany(x => x.Letters)
.Select(x => x.GlyphRectangle.Normalise().Height));
double dominantFontHeight = dominantFontHeightFunc(words.SelectMany(x => x.Letters));
List<Projection> projectionProfile = new List<Projection>();
var firstWordBound = words[0].BoundingBox.Normalise();
Projection currentProjection = new Projection(firstWordBound.Bottom, firstWordBound.Top);
int wordsCount = words.Count();
int wordsCount = words.Length;
for (int i = 1; i < wordsCount; i++)
{