mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-20 03:17:57 +08:00
Support reading files with missing white space after xref in lenient mode (#906)
Support missing white space after xref --------- Co-authored-by: Arnaud TAMAILLON <arnaud.tamaillon@younited-credit.fr>
This commit is contained in:
@@ -0,0 +1,13 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
|
{
|
||||||
|
public class CrossReferenceParserTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void CanReadDocumentWithMissingWhitespaceAfterXRef()
|
||||||
|
{
|
||||||
|
string path = IntegrationHelpers.GetSpecificTestDocumentPath("xref-with-no-whitespace.pdf");
|
||||||
|
using var document = PdfDocument.Open(path);
|
||||||
|
Assert.Equal(3, document.NumberOfPages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
@@ -300,18 +300,42 @@ trailer
|
|||||||
trailer
|
trailer
|
||||||
<< >>";
|
<< >>";
|
||||||
// Strict parsing
|
// Strict parsing
|
||||||
var input = StringBytesTestConverter.Scanner(data);
|
var input = GetReader(data);
|
||||||
var act = () => CrossReferenceTableParser.Parse(input.scanner, 0, false);
|
var act = () => CrossReferenceTableParser.Parse(input, 0, false);
|
||||||
var ex = Assert.Throws<PdfDocumentFormatException>(act);
|
var ex = Assert.Throws<PdfDocumentFormatException>(act);
|
||||||
Assert.Equal("Found a line with 2 unexpected entries in the cross reference table: 127, 0.", ex.Message);
|
Assert.Equal("Found a line with 2 unexpected entries in the cross reference table: 127, 0.", ex.Message);
|
||||||
|
|
||||||
// Lenient Parsing
|
// Lenient Parsing
|
||||||
input = StringBytesTestConverter.Scanner(data);
|
input = GetReader(data);
|
||||||
var result = CrossReferenceTableParser.Parse(input.scanner, 0, true);
|
var result = CrossReferenceTableParser.Parse(input, 0, true);
|
||||||
|
|
||||||
Assert.Equal(6, result.ObjectOffsets.Count);
|
Assert.Equal(6, result.ObjectOffsets.Count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void ParsesMissingWhitespaceAfterXref()
|
||||||
|
{
|
||||||
|
var data = @"xref15 2
|
||||||
|
0000000190 00000 n
|
||||||
|
0000000250 00032 n
|
||||||
|
|
||||||
|
trailer
|
||||||
|
<<>>";
|
||||||
|
var input = GetReader(data);
|
||||||
|
|
||||||
|
// Strict parsing
|
||||||
|
var act = () => CrossReferenceTableParser.Parse(input, 0, false);
|
||||||
|
|
||||||
|
var ex = Assert.Throws<PdfDocumentFormatException>(act);
|
||||||
|
Assert.Equal("Unexpected operator in xref position: xref15.", ex.Message);
|
||||||
|
|
||||||
|
// Lenient Parsing
|
||||||
|
input = GetReader(data);
|
||||||
|
var result = CrossReferenceTableParser.Parse(input, 0, true);
|
||||||
|
|
||||||
|
Assert.Equal(2, result.ObjectOffsets.Count);
|
||||||
|
}
|
||||||
|
|
||||||
private static CoreTokenScanner GetReader(string input)
|
private static CoreTokenScanner GetReader(string input)
|
||||||
{
|
{
|
||||||
return StringBytesTestConverter.Scanner(input).scanner;
|
return StringBytesTestConverter.Scanner(input).scanner;
|
||||||
|
@@ -59,7 +59,7 @@
|
|||||||
|
|
||||||
tokenScanner.MoveNext();
|
tokenScanner.MoveNext();
|
||||||
|
|
||||||
if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
|
if (CrossReferenceTableParser.IsCrossReferenceMarker(tokenScanner, isLenientParsing))
|
||||||
{
|
{
|
||||||
missedAttempts = 0;
|
missedAttempts = 0;
|
||||||
log.Debug("Element was cross reference table.");
|
log.Debug("Element was cross reference table.");
|
||||||
|
@@ -31,10 +31,22 @@
|
|||||||
|
|
||||||
if (scanner.CurrentToken is OperatorToken operatorToken)
|
if (scanner.CurrentToken is OperatorToken operatorToken)
|
||||||
{
|
{
|
||||||
if (operatorToken.Data == "xref")
|
if (operatorToken.Data == OperatorToken.Xref.Data)
|
||||||
{
|
{
|
||||||
scanner.MoveNext();
|
scanner.MoveNext();
|
||||||
}
|
}
|
||||||
|
else if (isLenientParsing)
|
||||||
|
{
|
||||||
|
if (operatorToken.Data.StartsWith(OperatorToken.Xref.Data))
|
||||||
|
{
|
||||||
|
scanner.Seek(scanner.CurrentPosition - operatorToken.Data.Length + OperatorToken.Xref.Data.Length);
|
||||||
|
scanner.MoveNext();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
|
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
|
||||||
@@ -106,6 +118,15 @@
|
|||||||
return builder.Build();
|
return builder.Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool IsCrossReferenceMarker(ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||||
|
{
|
||||||
|
return (scanner.CurrentToken is OperatorToken operatorToken
|
||||||
|
&& (operatorToken.Data == OperatorToken.Xref.Data
|
||||||
|
|| (isLenientParsing
|
||||||
|
&& operatorToken.Data.StartsWith(OperatorToken.Xref.Data)
|
||||||
|
&& int.TryParse(operatorToken.Data.Substring(OperatorToken.Xref.Data.Length), out _))));
|
||||||
|
}
|
||||||
|
|
||||||
private static int ProcessTokens(ReadOnlySpan<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
|
private static int ProcessTokens(ReadOnlySpan<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
|
||||||
int objectCount, ref TableSubsectionDefinition definition)
|
int objectCount, ref TableSubsectionDefinition definition)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user