Added integration test to see if we can detect the presence of invisible text (text rendering mode = Neither),

visible text, the presence of images and the presence of paths.
Certain combinations thereof potentially must be run through OCR.
This commit is contained in:
mvantzet
2023-01-13 14:11:13 +01:00
parent 06253966e4
commit 2acca32987
2 changed files with 24 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using PdfPig.Core;
using Xunit;
public class PageContentTests
{
[Fact]
public void DetectPageContents()
{
var file = IntegrationHelpers.GetDocumentPath("Various Content Types");
using (var document = PdfDocument.Open(file, ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
var letters = page.Letters;
Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Stroke); // "REGULAR TEXT"
Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Neither); // "INVISIBLE TEXT"
Assert.NotEmpty(page.Content.GetImages());
Assert.NotEmpty(page.Content.Paths);
}
}
}
}