mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 19:07:56 +08:00
prevent infinite loops where a stream token's length entry references itself. perform brute force scans in case of a faulty xref table #33
This commit is contained in:
@@ -45,9 +45,11 @@
|
|||||||
|
|
||||||
public IToken CurrentToken { get; private set; }
|
public IToken CurrentToken { get; private set; }
|
||||||
|
|
||||||
|
private IndirectReference? callingObject;
|
||||||
|
|
||||||
public long CurrentPosition => coreTokenScanner.CurrentPosition;
|
public long CurrentPosition => coreTokenScanner.CurrentPosition;
|
||||||
|
|
||||||
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider,
|
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider,
|
||||||
IEncryptionHandler encryptionHandler)
|
IEncryptionHandler encryptionHandler)
|
||||||
{
|
{
|
||||||
this.inputBytes = inputBytes;
|
this.inputBytes = inputBytes;
|
||||||
@@ -94,8 +96,7 @@
|
|||||||
|
|
||||||
if (objectNumber == null || generation == null)
|
if (objectNumber == null || generation == null)
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException("The obj operator (start object) was not preceded by a 2 numbers." +
|
return false;
|
||||||
$"Instead got: {previousTokens[0]} {previousTokens[1]} obj");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read all tokens between obj and endobj.
|
// Read all tokens between obj and endobj.
|
||||||
@@ -115,11 +116,27 @@
|
|||||||
|
|
||||||
if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream))
|
if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream))
|
||||||
{
|
{
|
||||||
// Read stream: special case.
|
var streamIdentifier = new IndirectReference(objectNumber.Long, generation.Int);
|
||||||
if (TryReadStream(coreTokenScanner.CurrentTokenStart, out var stream))
|
|
||||||
|
// Prevent an infinite loop where a stream's length references the stream or the stream's offset.
|
||||||
|
var getLengthFromFile = !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier));
|
||||||
|
|
||||||
|
var outerCallingObject = callingObject;
|
||||||
|
|
||||||
|
try
|
||||||
{
|
{
|
||||||
readTokens.Clear();
|
callingObject = streamIdentifier;
|
||||||
readTokens.Add(stream);
|
|
||||||
|
// Read stream: special case.
|
||||||
|
if (TryReadStream(coreTokenScanner.CurrentTokenStart, getLengthFromFile, out var stream))
|
||||||
|
{
|
||||||
|
readTokens.Clear();
|
||||||
|
readTokens.Add(stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
callingObject = outerCallingObject;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -168,14 +185,14 @@
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool TryReadStream(long startStreamTokenOffset, out StreamToken stream)
|
private bool TryReadStream(long startStreamTokenOffset, bool getLength, out StreamToken stream)
|
||||||
{
|
{
|
||||||
stream = null;
|
stream = null;
|
||||||
|
|
||||||
DictionaryToken streamDictionaryToken = GetStreamDictionary();
|
DictionaryToken streamDictionaryToken = GetStreamDictionary();
|
||||||
|
|
||||||
// Get the expected length from the stream dictionary if present.
|
// Get the expected length from the stream dictionary if present.
|
||||||
long? length = GetStreamLength(streamDictionaryToken);
|
long? length = getLength ? GetStreamLength(streamDictionaryToken) : default(long?);
|
||||||
|
|
||||||
// Verify again that we start with "stream"
|
// Verify again that we start with "stream"
|
||||||
var hasStartStreamToken = ReadStreamTokenStart(inputBytes, startStreamTokenOffset);
|
var hasStartStreamToken = ReadStreamTokenStart(inputBytes, startStreamTokenOffset);
|
||||||
@@ -218,7 +235,7 @@
|
|||||||
|
|
||||||
// Track any 'endobj' or 'endstream' operators we see.
|
// Track any 'endobj' or 'endstream' operators we see.
|
||||||
var observedEndLocations = new List<PossibleStreamEndLocation>();
|
var observedEndLocations = new List<PossibleStreamEndLocation>();
|
||||||
|
|
||||||
// Begin reading the stream.
|
// Begin reading the stream.
|
||||||
using (var memoryStream = new MemoryStream())
|
using (var memoryStream = new MemoryStream())
|
||||||
using (var binaryWrite = new BinaryWriter(memoryStream))
|
using (var binaryWrite = new BinaryWriter(memoryStream))
|
||||||
@@ -304,9 +321,9 @@
|
|||||||
|
|
||||||
endStreamPosition = 0;
|
endStreamPosition = 0;
|
||||||
endObjPosition = 0;
|
endObjPosition = 0;
|
||||||
commonPartPosition = (inputBytes.CurrentByte == commonPart[0]) ? 1 : 0;
|
commonPartPosition = (inputBytes.CurrentByte == commonPart[0]) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
binaryWrite.Write(inputBytes.CurrentByte);
|
binaryWrite.Write(inputBytes.CurrentByte);
|
||||||
|
|
||||||
read++;
|
read++;
|
||||||
@@ -491,10 +508,30 @@
|
|||||||
|
|
||||||
if (!MoveNext())
|
if (!MoveNext())
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException($"Could not parse the object with reference: {reference}.");
|
throw new PdfDocumentFormatException($"Could not parse the object with reference: {reference}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return (ObjectToken)CurrentToken;
|
var found = (ObjectToken)CurrentToken;
|
||||||
|
|
||||||
|
if (found.Number.Equals(reference))
|
||||||
|
{
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Brute force read the entire file
|
||||||
|
Seek(0);
|
||||||
|
|
||||||
|
while (MoveNext())
|
||||||
|
{
|
||||||
|
objectLocationProvider.Cache((ObjectToken)CurrentToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!objectLocationProvider.TryGetCached(reference, out objectToken))
|
||||||
|
{
|
||||||
|
throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return objectToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private ObjectToken GetObjectFromStream(IndirectReference reference, long offset)
|
private ObjectToken GetObjectFromStream(IndirectReference reference, long offset)
|
||||||
@@ -548,9 +585,9 @@
|
|||||||
for (var i = 0; i < numberOfObjects.Int; i++)
|
for (var i = 0; i < numberOfObjects.Int; i++)
|
||||||
{
|
{
|
||||||
scanner.MoveNext();
|
scanner.MoveNext();
|
||||||
var objectNumber = (NumericToken) scanner.CurrentToken;
|
var objectNumber = (NumericToken)scanner.CurrentToken;
|
||||||
scanner.MoveNext();
|
scanner.MoveNext();
|
||||||
var byteOffset = (NumericToken) scanner.CurrentToken;
|
var byteOffset = (NumericToken)scanner.CurrentToken;
|
||||||
|
|
||||||
objects.Add(Tuple.Create(objectNumber.Long, byteOffset.Long));
|
objects.Add(Tuple.Create(objectNumber.Long, byteOffset.Long));
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user