Guard against circular references in XRef tables/streams
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled

- Detect and prevent an xref table/stream at a certain offset from being read twice; malformed xref tables with circular references could otherwise cause the table-reading to loop forever.
- Another approach could be to prevent TryReadTableAtOffset from changing the bytes' CurrentOffset to the lastObjPosition in its attempt to read a table (eg restore CurrentOffset after the attempt to read a Table) so the outer bytes-loop could continue its search through the entire bytes unaffected.
This commit is contained in:
ricflams
2025-10-01 01:25:36 +02:00
committed by BobLd
parent d7d01f842e
commit c28d114b79
3 changed files with 32 additions and 2 deletions

View File

@@ -9,5 +9,17 @@
using var document = PdfDocument.Open(path);
Assert.Equal(3, document.NumberOfPages);
}
[Fact]
public void CanReadDocumentWithCircularXRef()
{
string path = IntegrationHelpers.GetSpecificTestDocumentPath("B17-2000-transportation-fuels.pdf");
// If parser can't deal with xrefs that have circular references then
// opening the document will loop forever
using var document = PdfDocument.Open(path);
Assert.Equal(1, document.NumberOfPages);
}
}
}

View File

@@ -16,6 +16,9 @@ internal static class XrefBruteForcer
{
var results = new List<IXrefSection>();
// Guard against circular references; only read xref at each offset once
var xrefOffsetSeen = new HashSet<long>();
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
DictionaryToken? trailer = null;
@@ -131,6 +134,14 @@ internal static class XrefBruteForcer
ClearQueues();
var potentialTableOffset = bytes.CurrentOffset - 4;
if (xrefOffsetSeen.Contains(potentialTableOffset))
{
log.Debug($"Skipping circular xref reference at {potentialTableOffset}");
continue;
}
xrefOffsetSeen.Add(potentialTableOffset);
var table = XrefTableParser.TryReadTableAtOffset(
new FileHeaderOffset(0),
potentialTableOffset,
@@ -152,15 +163,22 @@ internal static class XrefBruteForcer
{
ClearQueues();
if (!lastObjPosition.HasValue)
if (lastObjPosition is not long offset)
{
log.Error("Found an /XRef without having encountered an object first");
continue;
}
if (xrefOffsetSeen.Contains(offset))
{
log.Debug($"Skipping circular /XRef reference at {offset}");
continue;
}
xrefOffsetSeen.Add(offset);
var stream = XrefStreamParser.TryReadStreamAtOffset(
new FileHeaderOffset(0),
lastObjPosition.Value,
offset,
bytes,
scanner,
log);