mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-07 15:56:36 +08:00
Guard against circular references in XRef tables/streams
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
- Detect and prevent an xref table/stream at a certain offset from being read twice; malformed xref tables with circular references could otherwise cause the table-reading to loop forever. - Another approach could be to prevent TryReadTableAtOffset from changing the bytes' CurrentOffset to the lastObjPosition in its attempt to read a table (eg restore CurrentOffset after the attempt to read a Table) so the outer bytes-loop could continue its search through the entire bytes unaffected.
This commit is contained in:
@@ -9,5 +9,17 @@
|
|||||||
using var document = PdfDocument.Open(path);
|
using var document = PdfDocument.Open(path);
|
||||||
Assert.Equal(3, document.NumberOfPages);
|
Assert.Equal(3, document.NumberOfPages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanReadDocumentWithCircularXRef()
|
||||||
|
{
|
||||||
|
string path = IntegrationHelpers.GetSpecificTestDocumentPath("B17-2000-transportation-fuels.pdf");
|
||||||
|
|
||||||
|
// If parser can't deal with xrefs that have circular references then
|
||||||
|
// opening the document will loop forever
|
||||||
|
using var document = PdfDocument.Open(path);
|
||||||
|
|
||||||
|
Assert.Equal(1, document.NumberOfPages);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
@@ -16,6 +16,9 @@ internal static class XrefBruteForcer
|
|||||||
{
|
{
|
||||||
var results = new List<IXrefSection>();
|
var results = new List<IXrefSection>();
|
||||||
|
|
||||||
|
// Guard against circular references; only read xref at each offset once
|
||||||
|
var xrefOffsetSeen = new HashSet<long>();
|
||||||
|
|
||||||
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
|
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
|
||||||
|
|
||||||
DictionaryToken? trailer = null;
|
DictionaryToken? trailer = null;
|
||||||
@@ -131,6 +134,14 @@ internal static class XrefBruteForcer
|
|||||||
ClearQueues();
|
ClearQueues();
|
||||||
|
|
||||||
var potentialTableOffset = bytes.CurrentOffset - 4;
|
var potentialTableOffset = bytes.CurrentOffset - 4;
|
||||||
|
|
||||||
|
if (xrefOffsetSeen.Contains(potentialTableOffset))
|
||||||
|
{
|
||||||
|
log.Debug($"Skipping circular xref reference at {potentialTableOffset}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
xrefOffsetSeen.Add(potentialTableOffset);
|
||||||
|
|
||||||
var table = XrefTableParser.TryReadTableAtOffset(
|
var table = XrefTableParser.TryReadTableAtOffset(
|
||||||
new FileHeaderOffset(0),
|
new FileHeaderOffset(0),
|
||||||
potentialTableOffset,
|
potentialTableOffset,
|
||||||
@@ -152,15 +163,22 @@ internal static class XrefBruteForcer
|
|||||||
{
|
{
|
||||||
ClearQueues();
|
ClearQueues();
|
||||||
|
|
||||||
if (!lastObjPosition.HasValue)
|
if (lastObjPosition is not long offset)
|
||||||
{
|
{
|
||||||
log.Error("Found an /XRef without having encountered an object first");
|
log.Error("Found an /XRef without having encountered an object first");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (xrefOffsetSeen.Contains(offset))
|
||||||
|
{
|
||||||
|
log.Debug($"Skipping circular /XRef reference at {offset}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
xrefOffsetSeen.Add(offset);
|
||||||
|
|
||||||
var stream = XrefStreamParser.TryReadStreamAtOffset(
|
var stream = XrefStreamParser.TryReadStreamAtOffset(
|
||||||
new FileHeaderOffset(0),
|
new FileHeaderOffset(0),
|
||||||
lastObjPosition.Value,
|
offset,
|
||||||
bytes,
|
bytes,
|
||||||
scanner,
|
scanner,
|
||||||
log);
|
log);
|
||||||
|
Reference in New Issue
Block a user