mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 19:07:56 +08:00
Fix the bug that happens when all the words in the current leaf for VerticalCut/HorizontalCut are all white spaces.
This commit is contained in:
@@ -70,11 +70,18 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
XYLeaf root = new XYLeaf(pageWords); // Create a root node.
|
||||
XYNode node = VerticalCut(root, minimumWidth, dominantFontWidthFunc, dominantFontHeightFunc);
|
||||
|
||||
var leafs = node.GetLeafs();
|
||||
|
||||
if (leafs.Count > 0)
|
||||
if (node.IsLeaf)
|
||||
{
|
||||
return leafs.Select(l => new TextBlock(l.GetLines())).ToList();
|
||||
return new List<TextBlock>{ new TextBlock((node as XYLeaf).GetLines())};
|
||||
}
|
||||
else
|
||||
{
|
||||
var leafs = node.GetLeafs();
|
||||
|
||||
if (leafs.Count > 0)
|
||||
{
|
||||
return leafs.Select(l => new TextBlock(l.GetLines())).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
return new List<TextBlock>();
|
||||
@@ -84,6 +91,18 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
Func<IEnumerable<decimal>, decimal> dominantFontWidthFunc,
|
||||
Func<IEnumerable<decimal>, decimal> dominantFontHeightFunc, int level = 0)
|
||||
{
|
||||
// order words left to right
|
||||
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Left).ToArray();
|
||||
|
||||
if(!words.Any())
|
||||
{
|
||||
return new XYNode(null);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Create new leaf with non-whitespace words.
|
||||
leaf = new XYLeaf(words);
|
||||
}
|
||||
if (leaf.CountWords() <= 1 || leaf.BoundingBox.Width <= minimumWidth)
|
||||
{
|
||||
// we stop cutting if
|
||||
@@ -92,9 +111,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
return leaf;
|
||||
}
|
||||
|
||||
// order words left to right
|
||||
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Left).ToArray();
|
||||
|
||||
// determine dominantFontWidth and dominantFontHeight
|
||||
decimal domFontWidth = dominantFontWidthFunc(words.SelectMany(x => x.Letters)
|
||||
.Select(x => Math.Abs(x.GlyphRectangle.Width)));
|
||||
@@ -177,6 +193,18 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
Func<IEnumerable<decimal>, decimal> dominantFontWidthFunc,
|
||||
Func<IEnumerable<decimal>, decimal> dominantFontHeightFunc, int level = 0)
|
||||
{
|
||||
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Bottom).ToArray(); // order bottom to top
|
||||
|
||||
if (!words.Any())
|
||||
{
|
||||
return new XYNode(null);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Create new leaf with non-whitespace words.
|
||||
leaf = new XYLeaf(words);
|
||||
}
|
||||
|
||||
if (leaf.CountWords() <= 1)
|
||||
{
|
||||
// we stop cutting if
|
||||
@@ -184,7 +212,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
return leaf;
|
||||
}
|
||||
|
||||
var words = leaf.Words.Where(w => !string.IsNullOrWhiteSpace(w.Text)).OrderBy(w => w.BoundingBox.Bottom).ToArray(); // order bottom to top
|
||||
|
||||
// determine dominantFontWidth and dominantFontHeight
|
||||
decimal domFontWidth = dominantFontWidthFunc(words.SelectMany(x => x.Letters)
|
||||
|
Reference in New Issue
Block a user