mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-20 20:07:57 +08:00
create a pdf object scanner which sits on top of the core token scanner to provide complete object parsing
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Exceptions;
|
||||
using IO;
|
||||
using Parser.Parts;
|
||||
using Tokens;
|
||||
@@ -22,6 +23,7 @@
|
||||
private readonly List<byte> currentBuffer = new List<byte>();
|
||||
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
||||
|
||||
internal long CurrentTokenStart { get; private set; }
|
||||
public IToken CurrentToken { get; private set; }
|
||||
public bool TryReadToken<T>(out T token) where T : class, IToken
|
||||
{
|
||||
@@ -150,6 +152,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
CurrentTokenStart = inputBytes.CurrentOffset - 1;
|
||||
|
||||
if (tokenizer == null || !tokenizer.TryTokenize(currentByte, inputBytes, out var token))
|
||||
{
|
||||
isSkippingSymbol = true;
|
||||
|
120
src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
Normal file
120
src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
Normal file
@@ -0,0 +1,120 @@
|
||||
namespace UglyToad.PdfPig.Tokenization.Scanner
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using IO;
|
||||
using Tokens;
|
||||
|
||||
internal class PdfTokenScanner : ISeekableTokenScanner
|
||||
{
|
||||
private readonly IInputBytes inputBytes;
|
||||
private readonly CrossReferenceTable crossReferenceTable;
|
||||
private readonly CoreTokenScanner coreTokenScanner;
|
||||
|
||||
private readonly long[] previousTokenPositions = new long[2];
|
||||
private readonly IToken[] previousTokens = new IToken[2];
|
||||
|
||||
private readonly Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
|
||||
|
||||
public IToken CurrentToken { get; private set; }
|
||||
|
||||
public long CurrentPosition => coreTokenScanner.CurrentPosition;
|
||||
|
||||
public PdfTokenScanner(IInputBytes inputBytes, CrossReferenceTable crossReferenceTable)
|
||||
{
|
||||
this.inputBytes = inputBytes;
|
||||
this.crossReferenceTable = crossReferenceTable;
|
||||
coreTokenScanner = new CoreTokenScanner(inputBytes);
|
||||
}
|
||||
|
||||
public bool MoveNext()
|
||||
{
|
||||
int tokensRead = 0;
|
||||
while (coreTokenScanner.MoveNext() && coreTokenScanner.CurrentToken != OperatorToken.StartObject)
|
||||
{
|
||||
if (coreTokenScanner.CurrentToken is CommentToken)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
tokensRead++;
|
||||
|
||||
previousTokens[0] = previousTokens[1];
|
||||
previousTokenPositions[0] = previousTokenPositions[1];
|
||||
|
||||
previousTokens[1] = coreTokenScanner.CurrentToken;
|
||||
previousTokenPositions[1] = coreTokenScanner.CurrentTokenStart;
|
||||
}
|
||||
|
||||
if (tokensRead < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var startPosition = previousTokenPositions[0];
|
||||
var objectNumber = previousTokens[0] as NumericToken;
|
||||
var generation = previousTokens[1] as NumericToken;
|
||||
|
||||
if (objectNumber == null || generation == null)
|
||||
{
|
||||
throw new PdfDocumentFormatException("The obj operator (start object) was not preceded by a 2 numbers." +
|
||||
$"Instead got: {previousTokens[0]} {previousTokens[1]} obj");
|
||||
}
|
||||
|
||||
var data = new List<IToken>();
|
||||
|
||||
while (coreTokenScanner.MoveNext() && coreTokenScanner.CurrentToken != OperatorToken.EndObject)
|
||||
{
|
||||
if (coreTokenScanner.CurrentToken is CommentToken)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (coreTokenScanner.CurrentToken == OperatorToken.StartStream)
|
||||
{
|
||||
// Read stream.
|
||||
}
|
||||
|
||||
data.Add(coreTokenScanner.CurrentToken);
|
||||
|
||||
previousTokens[0] = previousTokens[1];
|
||||
previousTokenPositions[0] = previousTokenPositions[1];
|
||||
|
||||
previousTokens[1] = coreTokenScanner.CurrentToken;
|
||||
previousTokenPositions[1] = coreTokenScanner.CurrentPosition;
|
||||
}
|
||||
|
||||
if (coreTokenScanner.CurrentToken != OperatorToken.EndObject)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
CurrentToken = new ObjectToken(startPosition, new IndirectReference(objectNumber.Long, generation.Int), data[data.Count - 1]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool TryReadToken<T>(out T token) where T : class, IToken
|
||||
{
|
||||
return coreTokenScanner.TryReadToken(out token);
|
||||
}
|
||||
|
||||
public void Seek(long position)
|
||||
{
|
||||
coreTokenScanner.Seek(position);
|
||||
}
|
||||
|
||||
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
|
||||
{
|
||||
coreTokenScanner.RegisterCustomTokenizer(firstByte, tokenizer);
|
||||
}
|
||||
|
||||
public void DeregisterCustomTokenizer(ITokenizer tokenizer)
|
||||
{
|
||||
coreTokenScanner.DeregisterCustomTokenizer(tokenizer);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user