mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-18 09:57:56 +08:00
read last line of ignore file (#1155)
* read last line of ignore file - do not cancel other matrix jobs if one test fails - read all lines of the ignore list even if it doesn't end with a newline - add ignore list for 0008 and 0009 * support missing object numbers when brute-forcing the file 10404 (ironically) contains not found references with number 43 0 for its info dictionary. changes brute-force code so that objects can be entirely missing * fix test since document is now opened successfully but mediabox is broken
This commit is contained in:
3
.github/workflows/run_common_crawl_tests.yml
vendored
3
.github/workflows/run_common_crawl_tests.yml
vendored
@@ -10,6 +10,7 @@ jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pair: ["0000-0001", "0002-0003", "0004-0005", "0006-0007"]
|
||||
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Remove unwanted test files
|
||||
run: |
|
||||
while read f; do
|
||||
while read f || [ -n "$f" ]; do
|
||||
full="corpus/extracted/$f"
|
||||
if [ -f "$full" ]; then
|
||||
echo "Removing $full"
|
||||
|
@@ -102,8 +102,10 @@
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("Hang.pdf");
|
||||
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
|
||||
Assert.StartsWith("Could not locate object with reference:", ex.Message);
|
||||
using var doc = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = true });
|
||||
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(() => doc.GetPage(1));
|
||||
Assert.StartsWith("Could not find", ex.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
@@ -770,7 +770,8 @@
|
||||
|
||||
if (!MoveNext())
|
||||
{
|
||||
return BruteForceFileToFindReference(reference);
|
||||
TryBruteForceFileToFindReference(reference, out var bfObjectToken);
|
||||
return bfObjectToken;
|
||||
}
|
||||
|
||||
var found = (ObjectToken)CurrentToken!;
|
||||
@@ -780,7 +781,9 @@
|
||||
return found;
|
||||
}
|
||||
|
||||
return BruteForceFileToFindReference(reference);
|
||||
TryBruteForceFileToFindReference(reference, out var bfToken);
|
||||
|
||||
return bfToken;
|
||||
}
|
||||
|
||||
public void ReplaceToken(IndirectReference reference, IToken token)
|
||||
@@ -790,8 +793,9 @@
|
||||
overwrittenTokens[reference] = new ObjectToken(0, reference, token);
|
||||
}
|
||||
|
||||
private ObjectToken BruteForceFileToFindReference(IndirectReference reference)
|
||||
private bool TryBruteForceFileToFindReference(IndirectReference reference, [NotNullWhen(true)] out ObjectToken? result)
|
||||
{
|
||||
result = null;
|
||||
try
|
||||
{
|
||||
// Brute force read the entire file
|
||||
@@ -806,10 +810,12 @@
|
||||
|
||||
if (!objectLocationProvider.TryGetCached(reference, out var objectToken))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return objectToken;
|
||||
result = objectToken;
|
||||
|
||||
return true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
@@ -37,4 +37,13 @@
|
||||
0006766.pdf
|
||||
0006844.pdf
|
||||
0007159.pdf
|
||||
0007559.pdf
|
||||
0007559.pdf
|
||||
0008404.pdf
|
||||
0008443.pdf
|
||||
0008674.pdf
|
||||
0008978.pdf
|
||||
0009290.pdf
|
||||
0009309.pdf
|
||||
0009464.pdf
|
||||
0009706.pdf
|
||||
0009944.pdf
|
Reference in New Issue
Block a user