cross reference offset is in the xref table we ignore the error

previously we checked the offset was not inside the table (correct thing to check), however this is only a special case of the more general issue (cross reference offsets are wrong). we move handling for this into the pdf token scanner. if we attempt to read an object at an offset and it fails we brute force the entire file to find correct offsets. we also needed to add handling to make sure we don't attempt to use stream length tokens if we're brute-forcing since we can't look up indirect references for length.
This commit is contained in:
Eliot Jones
2020-02-26 14:03:46 +00:00
parent 7d0d5806a9
commit f415c3116e
3 changed files with 33 additions and 22 deletions

View File

@@ -29,6 +29,7 @@
private IEncryptionHandler encryptionHandler;
private bool isDisposed;
private bool isBruteForcing;
/// <summary>
/// Stores tokens encountered between obj - endobj markers for each <see cref="MoveNext"/> call.
@@ -153,7 +154,7 @@
var streamIdentifier = new IndirectReference(objectNumber.Long, generation.Int);
// Prevent an infinite loop where a stream's length references the stream or the stream's offset.
var getLengthFromFile = !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier));
var getLengthFromFile = !isBruteForcing && !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier));
var outerCallingObject = callingObject;
@@ -673,7 +674,7 @@
if (!MoveNext())
{
throw new PdfDocumentFormatException($"Could not parse the object with reference: {reference}.");
return BruteForceFileToFindReference(reference);
}
var found = (ObjectToken)CurrentToken;
@@ -683,20 +684,34 @@
return found;
}
// Brute force read the entire file
Seek(0);
return BruteForceFileToFindReference(reference);
}
while (MoveNext())
private ObjectToken BruteForceFileToFindReference(IndirectReference reference)
{
try
{
objectLocationProvider.Cache((ObjectToken)CurrentToken, true);
}
// Brute force read the entire file
isBruteForcing = true;
if (!objectLocationProvider.TryGetCached(reference, out objectToken))
Seek(0);
while (MoveNext())
{
objectLocationProvider.Cache((ObjectToken)CurrentToken, true);
}
if (!objectLocationProvider.TryGetCached(reference, out var objectToken))
{
throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search.");
}
return objectToken;
}
finally
{
throw new PdfDocumentFormatException($"Could not locate object with reference: {reference} despite a full document search.");
isBruteForcing = false;
}
return objectToken;
}
private ObjectToken GetObjectFromStream(IndirectReference reference, long offset)