revert flate decode handling to more lenient processing (#1254)

* revert flate decode handling to more lenient processing the change to use zlib/a adler checksum verification flow meant that invalid flate streams would not be decoded correctly. this caused issues for files that included invalid/missing checksums. this reverts the processing to the old approach for files like #1235 * fix object stream offset handling and track circular refs * update tests * normalize line endings for mac runner * fixes for mac clownery * add next pair to common crawl action * add a test case for the root cause of the int overflow
2026-03-10 00:23:29 +08:00 · 2026-02-22 11:49:50 -04:00
parent adb5713621
commit 9c0d6893e0
32 changed files with 1121 additions and 1000 deletions
--- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
@@ -327,7 +327,7 @@
            var path = IntegrationHelpers.GetSpecificTestDocumentPath("StackOverflow_Issue_1122.pdf");
            
            var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
-            Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
+            Assert.StartsWith("Circular reference encountered when looking", ex.Message);
        }

        [Fact]
@@ -386,7 +386,7 @@
        {
            var path = IntegrationHelpers.GetSpecificTestDocumentPath("SpookyPass.pdf");
            var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
-            Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
+            Assert.StartsWith("Object stream cannot contain itself", ex.Message);
        }

        [Fact]
@@ -552,7 +552,7 @@
            {
                var page = document.GetPage(13);
                // This used to fail with an overflow exception when we failed to validate the zlib encoded data
-                Assert.NotNull(DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
+                Assert.Throws<OverflowException>(() => DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
            }
        }