read last line of ignore file (#1155)

* read last line of ignore file

- do not cancel other matrix jobs if one test fails
- read all lines of the ignore list even if it doesn't end with a newline
- add ignore list for 0008 and 0009

* support missing object numbers when brute-forcing

the file 10404 (ironically) contains not found references with number 43 0
for its info dictionary. changes brute-force code so that objects can be
entirely missing

* fix test since document is now opened successfully but mediabox is broken
This commit is contained in:
Eliot Jones
2025-09-13 16:57:35 +02:00
committed by GitHub
parent c96880ac61
commit 07df6fd740
4 changed files with 27 additions and 9 deletions

View File

@@ -10,6 +10,7 @@ jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
pair: ["0000-0001", "0002-0003", "0004-0005", "0006-0007"]
@@ -45,7 +46,7 @@ jobs:
- name: Remove unwanted test files
run: |
while read f; do
while read f || [ -n "$f" ]; do
full="corpus/extracted/$f"
if [ -f "$full" ]; then
echo "Removing $full"

View File

@@ -102,8 +102,10 @@
{
var path = IntegrationHelpers.GetSpecificTestDocumentPath("Hang.pdf");
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
Assert.StartsWith("Could not locate object with reference:", ex.Message);
using var doc = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = true });
var ex = Assert.Throws<PdfDocumentFormatException>(() => doc.GetPage(1));
Assert.StartsWith("Could not find", ex.Message);
}
[Fact]

View File

@@ -770,7 +770,8 @@
if (!MoveNext())
{
return BruteForceFileToFindReference(reference);
TryBruteForceFileToFindReference(reference, out var bfObjectToken);
return bfObjectToken;
}
var found = (ObjectToken)CurrentToken!;
@@ -780,7 +781,9 @@
return found;
}
return BruteForceFileToFindReference(reference);
TryBruteForceFileToFindReference(reference, out var bfToken);
return bfToken;
}
public void ReplaceToken(IndirectReference reference, IToken token)
@@ -790,8 +793,9 @@
overwrittenTokens[reference] = new ObjectToken(0, reference, token);
}
private ObjectToken BruteForceFileToFindReference(IndirectReference reference)
private bool TryBruteForceFileToFindReference(IndirectReference reference, [NotNullWhen(true)] out ObjectToken? result)
{
result = null;
try
{
// Brute force read the entire file
@@ -806,10 +810,12 @@
if (!objectLocationProvider.TryGetCached(reference, out var objectToken))
{
throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search.");
return false;
}
return objectToken;
result = objectToken;
return true;
}
finally
{

View File

@@ -37,4 +37,13 @@
0006766.pdf
0006844.pdf
0007159.pdf
0007559.pdf
0007559.pdf
0008404.pdf
0008443.pdf
0008674.pdf
0008978.pdf
0009290.pdf
0009309.pdf
0009464.pdf
0009706.pdf
0009944.pdf