Support missing white space after xref

This commit is contained in:
Arnaud TAMAILLON 2024-09-02 11:13:25 +02:00
parent 09bddba778
commit 78b2edaf83
2 changed files with 25 additions and 2 deletions

View File

@ -59,7 +59,7 @@
tokenScanner.MoveNext();
if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
if (CrossReferenceTableParser.IsCrossReferenceMarker(tokenScanner, isLenientParsing))
{
missedAttempts = 0;
log.Debug("Element was cross reference table.");

View File

@ -5,6 +5,7 @@
using CrossReference;
using Core;
using Parts.CrossReference;
using System.Text.RegularExpressions;
using Tokenization;
using Tokenization.Scanner;
using Tokens;
@ -14,6 +15,8 @@
private const string InUseEntry = "n";
private const string FreeEntry = "f";
private static readonly Regex XrefWithNumberRegex = new Regex(@$"{OperatorToken.Xref.Data}(\d+)");
public static CrossReferenceTablePart Parse(ISeekableTokenScanner scanner, long offset, bool isLenientParsing)
{
var builder = new CrossReferenceTablePartBuilder
@ -31,10 +34,23 @@
if (scanner.CurrentToken is OperatorToken operatorToken)
{
if (operatorToken.Data == "xref")
if (operatorToken.Data == OperatorToken.Xref.Data)
{
scanner.MoveNext();
}
else if (isLenientParsing)
{
var match = XrefWithNumberRegex.Match(operatorToken.Data);
if (match.Success)
{
scanner.Seek(scanner.CurrentPosition - operatorToken.Data.Length + "xref".Length);
scanner.MoveNext();
}
else
{
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
}
}
else
{
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
@ -106,6 +122,13 @@
return builder.Build();
}
public static bool IsCrossReferenceMarker(ISeekableTokenScanner scanner, bool isLenientParsing)
{
return (scanner.CurrentToken is OperatorToken operatorToken
&& (operatorToken.Data == "xref"
|| (isLenientParsing && XrefWithNumberRegex.IsMatch(operatorToken.Data))));
}
private static int ProcessTokens(ReadOnlySpan<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
int objectCount, ref TableSubsectionDefinition definition)
{