revert flate decode handling to more lenient processing (#1254)

* revert flate decode handling to more lenient processing

the change to use zlib/a adler checksum verification flow meant that
invalid flate streams would not be decoded correctly. this caused
issues for files that included invalid/missing checksums. this reverts
the processing to the old approach for files like #1235

* fix object stream offset handling and track circular refs

* update tests

* normalize line endings for mac runner

* fixes for mac clownery

* add next pair to common crawl action

* add a test case for the root cause of the int overflow
This commit is contained in:
Eliot Jones
2026-02-22 11:49:50 -04:00
committed by GitHub
parent adb5713621
commit 9c0d6893e0
32 changed files with 1121 additions and 1000 deletions

View File

@@ -327,7 +327,7 @@
var path = IntegrationHelpers.GetSpecificTestDocumentPath("StackOverflow_Issue_1122.pdf");
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
Assert.StartsWith("Circular reference encountered when looking", ex.Message);
}
[Fact]
@@ -386,7 +386,7 @@
{
var path = IntegrationHelpers.GetSpecificTestDocumentPath("SpookyPass.pdf");
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
Assert.StartsWith("Object stream cannot contain itself", ex.Message);
}
[Fact]
@@ -552,7 +552,7 @@
{
var page = document.GetPage(13);
// This used to fail with an overflow exception when we failed to validate the zlib encoded data
Assert.NotNull(DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
Assert.Throws<OverflowException>(() => DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
}
}