Added integration test to see if we can detect the presence of invisible text (text rendering mode = Neither),

visible text, the presence of images and the presence of paths. Certain combinations thereof potentially must be run through OCR.
2026-01-18 19:51:24 +08:00 · 2023-01-13 14:11:13 +01:00
parent 06253966e4
commit 2acca32987
2 changed files with 24 additions and 0 deletions
--- a/src/UglyToad.PdfPig.Tests/Integration/Documents/Various
+++ b/src/UglyToad.PdfPig.Tests/Integration/Documents/Various
--- a/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs
@@ -0,0 +1,24 @@
+namespace UglyToad.PdfPig.Tests.Integration
+{
+    using PdfPig.Core;
+    using Xunit;
+
+    public class PageContentTests
+    {
+        [Fact]
+        public void DetectPageContents()
+        {
+            var file = IntegrationHelpers.GetDocumentPath("Various Content Types");
+
+            using (var document = PdfDocument.Open(file, ParsingOptions.LenientParsingOff))
+            {
+                var page = document.GetPage(1);
+                var letters = page.Letters;
+                Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Stroke); // "REGULAR TEXT"
+                Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Neither); // "INVISIBLE TEXT"
+                Assert.NotEmpty(page.Content.GetImages());
+                Assert.NotEmpty(page.Content.Paths);
+            }
+        }
+    }
+}