diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileStructure/CrossReferenceTableParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileStructure/CrossReferenceTableParserTests.cs index 3494cef1..d0e5efb1 100644 --- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileStructure/CrossReferenceTableParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileStructure/CrossReferenceTableParserTests.cs @@ -227,7 +227,7 @@ trailer } [Fact] - public void EntryPointingAtOffsetInTableThrows() + public void EntryPointingAtOffsetInTableDoesNotThrow() { var input = GetReader(@"xref 0 2 @@ -236,9 +236,10 @@ trailer trailer <<>>"); - Action action = () => parser.Parse(input, 0, false); + var result = parser.Parse(input, 0, false); - Assert.Throws(action); + var offset = Assert.Single(result.ObjectOffsets); + Assert.Equal(10, offset.Value); } [Fact] diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceTableParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceTableParser.cs index cbe108a3..2d553854 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceTableParser.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceTableParser.cs @@ -69,7 +69,7 @@ readingLine = false; - count = ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition); + count = ProcessTokens(tokens, builder, isLenientParsing, count, ref definition); tokens.Clear(); @@ -94,7 +94,7 @@ if (tokens.Count > 0) { - ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition); + ProcessTokens(tokens, builder, isLenientParsing, count, ref definition); } scanner.DeregisterCustomTokenizer(tokenizer); @@ -105,7 +105,7 @@ return builder.Build(); } - private static int ProcessTokens(List tokens, ISeekableTokenScanner scanner, CrossReferenceTablePartBuilder builder, bool isLenientParsing, + private static int ProcessTokens(List tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing, int objectCount, ref TableSubsectionDefinition definition) { string GetErrorMessage() @@ -161,11 +161,6 @@ if (tokens[0] is NumericToken offset && tokens[1] is NumericToken generationNumber) { - if (offset.Long >= builder.Offset && offset.Long <= scanner.CurrentPosition) - { - throw new PdfDocumentFormatException($"Object offset {offset} is within its own cross-reference table for object {definition.FirstNumber + objectCount}"); - } - builder.Add(definition.FirstNumber + objectCount, generationNumber.Int, offset.Long); return objectCount + 1; diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index eeec8dc4..0a747bb2 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -29,6 +29,7 @@ private IEncryptionHandler encryptionHandler; private bool isDisposed; + private bool isBruteForcing; /// /// Stores tokens encountered between obj - endobj markers for each call. @@ -153,7 +154,7 @@ var streamIdentifier = new IndirectReference(objectNumber.Long, generation.Int); // Prevent an infinite loop where a stream's length references the stream or the stream's offset. - var getLengthFromFile = !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier)); + var getLengthFromFile = !isBruteForcing && !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier)); var outerCallingObject = callingObject; @@ -673,7 +674,7 @@ if (!MoveNext()) { - throw new PdfDocumentFormatException($"Could not parse the object with reference: {reference}."); + return BruteForceFileToFindReference(reference); } var found = (ObjectToken)CurrentToken; @@ -683,20 +684,34 @@ return found; } - // Brute force read the entire file - Seek(0); + return BruteForceFileToFindReference(reference); + } - while (MoveNext()) + private ObjectToken BruteForceFileToFindReference(IndirectReference reference) + { + try { - objectLocationProvider.Cache((ObjectToken)CurrentToken, true); - } + // Brute force read the entire file + isBruteForcing = true; - if (!objectLocationProvider.TryGetCached(reference, out objectToken)) + Seek(0); + + while (MoveNext()) + { + objectLocationProvider.Cache((ObjectToken)CurrentToken, true); + } + + if (!objectLocationProvider.TryGetCached(reference, out var objectToken)) + { + throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search."); + } + + return objectToken; + } + finally { - throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search."); + isBruteForcing = false; } - - return objectToken; } private ObjectToken GetObjectFromStream(IndirectReference reference, long offset)