mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-07 16:14:03 +08:00
Compare commits
6 Commits
ca284e0cb9
...
master
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c28d114b79 | ||
![]() |
d7d01f842e | ||
![]() |
33a8d829ee | ||
![]() |
57921c7e9b | ||
![]() |
5a6b3970f0 | ||
![]() |
397ccb15d6 |
@@ -9,5 +9,17 @@
|
||||
using var document = PdfDocument.Open(path);
|
||||
Assert.Equal(3, document.NumberOfPages);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadDocumentWithCircularXRef()
|
||||
{
|
||||
string path = IntegrationHelpers.GetSpecificTestDocumentPath("B17-2000-transportation-fuels.pdf");
|
||||
|
||||
// If parser can't deal with xrefs that have circular references then
|
||||
// opening the document will loop forever
|
||||
using var document = PdfDocument.Open(path);
|
||||
|
||||
Assert.Equal(1, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -472,19 +472,13 @@
|
||||
{
|
||||
var doc = IntegrationHelpers.GetDocumentPath("ErcotFacts.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(doc, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = true }))
|
||||
{
|
||||
var page1 = document.GetPage(1);
|
||||
Assert.Equal(1788, page1.Letters.Count);
|
||||
|
||||
var page2 = document.GetPage(2);
|
||||
Assert.Equal(2430, page2.Letters.Count);
|
||||
}
|
||||
|
||||
using (var document = PdfDocument.Open(doc, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = false }))
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentNullException>(() => document.GetPage(1));
|
||||
Assert.StartsWith("Value cannot be null.", ex.Message);
|
||||
var page1 = document.GetPage(1);
|
||||
Assert.Equal(1939, page1.Letters.Count);
|
||||
|
||||
var page2 = document.GetPage(2);
|
||||
Assert.Equal(2434, page2.Letters.Count);
|
||||
}
|
||||
}
|
||||
|
||||
|
Binary file not shown.
@@ -56,16 +56,16 @@
|
||||
// add this and follow chain defined by 'Prev' keys
|
||||
xrefPartToBytePositionOrder.Add(firstCrossReferenceOffset);
|
||||
|
||||
// Get any streams that are tied to this table.
|
||||
var activePart = currentPart;
|
||||
var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset);
|
||||
foreach (var dependent in dependents)
|
||||
{
|
||||
xrefPartToBytePositionOrder.Add(dependent.Offset);
|
||||
}
|
||||
|
||||
while (currentPart.Dictionary != null)
|
||||
{
|
||||
// Get any streams that are tied to this table.
|
||||
var activePart = currentPart;
|
||||
var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset);
|
||||
foreach (var dependent in dependents)
|
||||
{
|
||||
xrefPartToBytePositionOrder.Add(dependent.Offset);
|
||||
}
|
||||
|
||||
long prevBytePos = currentPart.GetPreviousOffset();
|
||||
if (prevBytePos == -1)
|
||||
{
|
||||
|
@@ -153,6 +153,23 @@ internal static partial class FirstPassParser
|
||||
{
|
||||
results.Add(table);
|
||||
nextLocation = table.GetPrevious();
|
||||
|
||||
// Also add any optional associated Stream
|
||||
var xRefStm = table.GetXRefStm();
|
||||
if (xRefStm is long xRefStmValue)
|
||||
{
|
||||
var stream = GetXrefStreamOrTable(
|
||||
offset,
|
||||
input,
|
||||
scanner,
|
||||
xRefStmValue,
|
||||
log);
|
||||
|
||||
if (stream != null)
|
||||
{
|
||||
results.Add(stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (streamOrTable is XrefStream stream)
|
||||
{
|
||||
|
@@ -16,6 +16,9 @@ internal static class XrefBruteForcer
|
||||
{
|
||||
var results = new List<IXrefSection>();
|
||||
|
||||
// Guard against circular references; only read xref at each offset once
|
||||
var xrefOffsetSeen = new HashSet<long>();
|
||||
|
||||
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
|
||||
|
||||
DictionaryToken? trailer = null;
|
||||
@@ -131,6 +134,14 @@ internal static class XrefBruteForcer
|
||||
ClearQueues();
|
||||
|
||||
var potentialTableOffset = bytes.CurrentOffset - 4;
|
||||
|
||||
if (xrefOffsetSeen.Contains(potentialTableOffset))
|
||||
{
|
||||
log.Debug($"Skipping circular xref reference at {potentialTableOffset}");
|
||||
continue;
|
||||
}
|
||||
xrefOffsetSeen.Add(potentialTableOffset);
|
||||
|
||||
var table = XrefTableParser.TryReadTableAtOffset(
|
||||
new FileHeaderOffset(0),
|
||||
potentialTableOffset,
|
||||
@@ -152,15 +163,22 @@ internal static class XrefBruteForcer
|
||||
{
|
||||
ClearQueues();
|
||||
|
||||
if (!lastObjPosition.HasValue)
|
||||
if (lastObjPosition is not long offset)
|
||||
{
|
||||
log.Error("Found an /XRef without having encountered an object first");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xrefOffsetSeen.Contains(offset))
|
||||
{
|
||||
log.Debug($"Skipping circular /XRef reference at {offset}");
|
||||
continue;
|
||||
}
|
||||
xrefOffsetSeen.Add(offset);
|
||||
|
||||
var stream = XrefStreamParser.TryReadStreamAtOffset(
|
||||
new FileHeaderOffset(0),
|
||||
lastObjPosition.Value,
|
||||
offset,
|
||||
bytes,
|
||||
scanner,
|
||||
log);
|
||||
|
@@ -44,4 +44,14 @@ internal sealed class XrefTable : IXrefSection
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public long? GetXRefStm()
|
||||
{
|
||||
if (Dictionary != null && Dictionary.TryGet(NameToken.XrefStm, out NumericToken xRefStm))
|
||||
{
|
||||
return xRefStm.Long;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user