diff --git a/src/UglyToad.PdfPig.Tests/Integration/CrossReferenceParserTests.cs b/src/UglyToad.PdfPig.Tests/Integration/CrossReferenceParserTests.cs index beab121b..71165e59 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/CrossReferenceParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/CrossReferenceParserTests.cs @@ -9,5 +9,17 @@ using var document = PdfDocument.Open(path); Assert.Equal(3, document.NumberOfPages); } + + [Fact] + public void CanReadDocumentWithCircularXRef() + { + string path = IntegrationHelpers.GetSpecificTestDocumentPath("B17-2000-transportation-fuels.pdf"); + + // If parser can't deal with xrefs that have circular references then + // opening the document will loop forever + using var document = PdfDocument.Open(path); + + Assert.Equal(1, document.NumberOfPages); + } } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/B17-2000-transportation-fuels.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/B17-2000-transportation-fuels.pdf new file mode 100644 index 00000000..fb1315f2 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/B17-2000-transportation-fuels.pdf differ diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/XrefBruteForcer.cs b/src/UglyToad.PdfPig/Parser/FileStructure/XrefBruteForcer.cs index d7b46448..b038f162 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/XrefBruteForcer.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/XrefBruteForcer.cs @@ -16,6 +16,9 @@ internal static class XrefBruteForcer { var results = new List(); + // Guard against circular references; only read xref at each offset once + var xrefOffsetSeen = new HashSet(); + var bruteForceObjPositions = new Dictionary(); DictionaryToken? trailer = null; @@ -131,6 +134,14 @@ internal static class XrefBruteForcer ClearQueues(); var potentialTableOffset = bytes.CurrentOffset - 4; + + if (xrefOffsetSeen.Contains(potentialTableOffset)) + { + log.Debug($"Skipping circular xref reference at {potentialTableOffset}"); + continue; + } + xrefOffsetSeen.Add(potentialTableOffset); + var table = XrefTableParser.TryReadTableAtOffset( new FileHeaderOffset(0), potentialTableOffset, @@ -152,15 +163,22 @@ internal static class XrefBruteForcer { ClearQueues(); - if (!lastObjPosition.HasValue) + if (lastObjPosition is not long offset) { log.Error("Found an /XRef without having encountered an object first"); continue; } + if (xrefOffsetSeen.Contains(offset)) + { + log.Debug($"Skipping circular /XRef reference at {offset}"); + continue; + } + xrefOffsetSeen.Add(offset); + var stream = XrefStreamParser.TryReadStreamAtOffset( new FileHeaderOffset(0), - lastObjPosition.Value, + offset, bytes, scanner, log);