mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
commit
f822ad48ea
@ -5,6 +5,7 @@ using System.Linq;
|
|||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using UglyToad.PdfPig.Content;
|
using UglyToad.PdfPig.Content;
|
||||||
using UglyToad.PdfPig.Geometry;
|
using UglyToad.PdfPig.Geometry;
|
||||||
|
using UglyToad.PdfPig.Util;
|
||||||
|
|
||||||
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||||
{
|
{
|
||||||
@ -47,6 +48,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
|||||||
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords, double wlAngleLB, double wlAngleUB,
|
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords, double wlAngleLB, double wlAngleUB,
|
||||||
double blAngleLB, double blAngleUB, double blMultiplier)
|
double blAngleLB, double blAngleUB, double blMultiplier)
|
||||||
{
|
{
|
||||||
|
if (pageWords.Count() == 0) return EmptyArray<TextBlock>.Instance;
|
||||||
|
|
||||||
var pageWordsArr = pageWords.Where(w => !string.IsNullOrWhiteSpace(w.Text)).ToArray(); // remove white spaces
|
var pageWordsArr = pageWords.Where(w => !string.IsNullOrWhiteSpace(w.Text)).ToArray(); // remove white spaces
|
||||||
|
|
||||||
var withinLineDistList = new ConcurrentBag<double[]>();
|
var withinLineDistList = new ConcurrentBag<double[]>();
|
||||||
@ -102,7 +105,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
|||||||
|
|
||||||
// 2. Rebuild lines, using max distance = +Inf as we know all words will be in the
|
// 2. Rebuild lines, using max distance = +Inf as we know all words will be in the
|
||||||
// same block. Filtering will still be done based on angle.
|
// same block. Filtering will still be done based on angle.
|
||||||
var mergedLines = GetLines(mergedWords.ToArray(), wlAngleLB, wlAngleUB, double.MaxValue);
|
var mergedLines = GetLines(mergedWords.ToArray(), double.MaxValue, wlAngleLB, wlAngleUB);
|
||||||
blocks[b] = new TextBlock(mergedLines.ToList());
|
blocks[b] = new TextBlock(mergedLines.ToList());
|
||||||
|
|
||||||
// Remove
|
// Remove
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using UglyToad.PdfPig.Content;
|
using UglyToad.PdfPig.Content;
|
||||||
|
using UglyToad.PdfPig.Util;
|
||||||
|
|
||||||
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||||
{
|
{
|
||||||
@ -64,6 +65,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
|||||||
Func<IEnumerable<decimal>, decimal> dominantFontWidthFunc,
|
Func<IEnumerable<decimal>, decimal> dominantFontWidthFunc,
|
||||||
Func<IEnumerable<decimal>, decimal> dominantFontHeightFunc)
|
Func<IEnumerable<decimal>, decimal> dominantFontHeightFunc)
|
||||||
{
|
{
|
||||||
|
if (pageWords.Count() == 0) return EmptyArray<TextBlock>.Instance;
|
||||||
|
|
||||||
XYLeaf root = new XYLeaf(pageWords); // Create a root node.
|
XYLeaf root = new XYLeaf(pageWords); // Create a root node.
|
||||||
XYNode node = VerticalCut(root, minimumWidth, dominantFontWidthFunc, dominantFontHeightFunc);
|
XYNode node = VerticalCut(root, minimumWidth, dominantFontWidthFunc, dominantFontHeightFunc);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user