mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-07 07:34:36 +08:00
Guard against circular references in XRef tables/streams
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
- Detect and prevent an xref table/stream at a certain offset from being read twice; malformed xref tables with circular references could otherwise cause the table-reading to loop forever. - Another approach could be to prevent TryReadTableAtOffset from changing the bytes' CurrentOffset to the lastObjPosition in its attempt to read a table (eg restore CurrentOffset after the attempt to read a Table) so the outer bytes-loop could continue its search through the entire bytes unaffected.
This commit is contained in:
@@ -9,5 +9,17 @@
|
||||
using var document = PdfDocument.Open(path);
|
||||
Assert.Equal(3, document.NumberOfPages);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadDocumentWithCircularXRef()
|
||||
{
|
||||
string path = IntegrationHelpers.GetSpecificTestDocumentPath("B17-2000-transportation-fuels.pdf");
|
||||
|
||||
// If parser can't deal with xrefs that have circular references then
|
||||
// opening the document will loop forever
|
||||
using var document = PdfDocument.Open(path);
|
||||
|
||||
Assert.Equal(1, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
@@ -16,6 +16,9 @@ internal static class XrefBruteForcer
|
||||
{
|
||||
var results = new List<IXrefSection>();
|
||||
|
||||
// Guard against circular references; only read xref at each offset once
|
||||
var xrefOffsetSeen = new HashSet<long>();
|
||||
|
||||
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
|
||||
|
||||
DictionaryToken? trailer = null;
|
||||
@@ -131,6 +134,14 @@ internal static class XrefBruteForcer
|
||||
ClearQueues();
|
||||
|
||||
var potentialTableOffset = bytes.CurrentOffset - 4;
|
||||
|
||||
if (xrefOffsetSeen.Contains(potentialTableOffset))
|
||||
{
|
||||
log.Debug($"Skipping circular xref reference at {potentialTableOffset}");
|
||||
continue;
|
||||
}
|
||||
xrefOffsetSeen.Add(potentialTableOffset);
|
||||
|
||||
var table = XrefTableParser.TryReadTableAtOffset(
|
||||
new FileHeaderOffset(0),
|
||||
potentialTableOffset,
|
||||
@@ -152,15 +163,22 @@ internal static class XrefBruteForcer
|
||||
{
|
||||
ClearQueues();
|
||||
|
||||
if (!lastObjPosition.HasValue)
|
||||
if (lastObjPosition is not long offset)
|
||||
{
|
||||
log.Error("Found an /XRef without having encountered an object first");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xrefOffsetSeen.Contains(offset))
|
||||
{
|
||||
log.Debug($"Skipping circular /XRef reference at {offset}");
|
||||
continue;
|
||||
}
|
||||
xrefOffsetSeen.Add(offset);
|
||||
|
||||
var stream = XrefStreamParser.TryReadStreamAtOffset(
|
||||
new FileHeaderOffset(0),
|
||||
lastObjPosition.Value,
|
||||
offset,
|
||||
bytes,
|
||||
scanner,
|
||||
log);
|
||||
|
Reference in New Issue
Block a user