Do not fail when trying to recognize a cross reference stream

This commit is contained in:
Arnaud TAMAILLON 2024-09-02 23:30:15 +02:00
parent f4d1456489
commit 79be356ee0
3 changed files with 99 additions and 15 deletions

View File

@ -0,0 +1,14 @@
namespace UglyToad.PdfPig.Tests.Integration
{
public class CrossReferenceParserTests
{
[Fact]
public void ParseMustNotFailWhenTryingToRecognizeUnexpectedStructures()
{
var pdf = IntegrationHelpers.GetSpecificTestDocumentPath("startxref-jpeg-stream");
var doc = PdfDocument.Open(pdf);
Assert.NotNull(doc);
}
}
}

View File

@ -0,0 +1,56 @@
%PDF-1.4
1 0 obj
<< /Type /XObject
/Subtype /Image
/Width 2362
/Height 3504
/Filter [/DCTDecode]
/ColorSpace /DeviceRGB
/BitsPerComponent 8
/Length 4
>>
stream
aaaa
endstream
endobj
2 0 obj
<</Type/Catalog/Pages 3 0 R>>
endobj
3 0 obj
<</Kids[4 0 R]/Count 1/Type/Pages/MediaBox[0 0 595 792]>>
endobj
4 0 obj
<</Type/Page/Parent 3 0 R/Contents 5 0 R/Resources<<>>>>
endobj
6 0 obj
<</Length 58>>
stream
q
BT
/ 96 Tf
1 0 0 1 36 684 Tm
(Hello World!) Tj
ET
Q
endstream
endobj
xref
0 6
0000000000 65536 f
0000000010 00000 n
0000000229 00000 n
0000000279 00000 n
0000000357 00000 n
0000000434 00000 n
trailer
<</Size 6/Root 2 0 R>>
startxref
0
%%EOF

View File

@ -113,7 +113,7 @@
{
try
{
TryParseCrossReferenceStream(streamOffset, pdfScanner, tiedToTableAtOffset, out streamPart);
TryParseCrossReferenceStream(streamOffset, pdfScanner, tiedToTableAtOffset, true, out streamPart);
}
catch (InvalidOperationException ex)
{
@ -157,7 +157,7 @@
tokenScanner.Seek(previousCrossReferenceLocation);
// parse xref stream
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, null, out var tablePart))
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, null, false, out var tablePart))
{
if (!TryBruteForceXrefTableLocate(bytes, previousCrossReferenceLocation, out var actualOffset))
{
@ -253,26 +253,40 @@
long objByteOffset,
IPdfTokenScanner pdfScanner,
long? fromTableAtOffset,
bool throwIfFailed,
[NotNullWhen(true)] out CrossReferenceTablePart? xrefTablePart)
{
xrefTablePart = null;
pdfScanner.Seek(objByteOffset);
pdfScanner.MoveNext();
var streamObjectToken = (ObjectToken)pdfScanner.CurrentToken;
if (streamObjectToken is null || !(streamObjectToken.Data is StreamToken objectStream))
try
{
log.Error($"When reading a cross reference stream object found a non-stream object: {streamObjectToken?.Data}");
pdfScanner.Seek(objByteOffset);
pdfScanner.MoveNext();
var streamObjectToken = (ObjectToken)pdfScanner.CurrentToken;
if (streamObjectToken is null || !(streamObjectToken.Data is StreamToken objectStream))
{
log.Error($"When reading a cross reference stream object found a non-stream object: {streamObjectToken?.Data}");
return false;
}
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, fromTableAtOffset, objectStream);
return true;
}
catch (Exception e)
{
if (throwIfFailed)
{
throw;
}
log.Debug($"Failed trying to parse cross reference stream object due to an unexpected error: {e.Message}");
return false;
}
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, fromTableAtOffset, objectStream);
return true;
}
private bool TryBruteForceXrefTableLocate(IInputBytes bytes, long expectedOffset,