add content order text extractor and example of use

This commit is contained in:
Eliot Jones
2020-04-19 17:06:34 +01:00
parent f18bc0766a
commit 407ee5ca51
8 changed files with 202 additions and 3 deletions

View File

@@ -1,4 +1,6 @@
namespace UglyToad.PdfPig.Tests.Integration
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
@@ -159,6 +161,19 @@ used per estimate, we introduce a “complement class” Naive Bayes is often us
Assert.NotNull(svg);
}
}
[Fact]
public void CanExtractContentOrderText()
{
using (var document = PdfDocument.Open(GetFilename()))
{
foreach (var page in document.GetPages())
{
var text = ContentOrderTextExtractor.GetText(page);
Assert.NotNull(text);
}
}
}
private static IReadOnlyList<AssertablePositionData> GetPdfBoxPositionData()
{