mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 08:25:09 +08:00
change pdf stream end token to use queue
This commit is contained in:
parent
bf664c3f0b
commit
282146f389
@ -1,36 +1,25 @@
|
|||||||
namespace UglyToad.PdfPig.Tests.Integration
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
{
|
{
|
||||||
//using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A class for testing files which are not checked in to source control.
|
/// A class for testing files which are not checked in to source control.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class LocalTests
|
public class LocalTests
|
||||||
{
|
{
|
||||||
//[Fact]
|
[Fact]
|
||||||
//public void Tests()
|
public void Tests()
|
||||||
//{
|
{
|
||||||
// var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
|
var file = File.ReadAllBytes(@"D:\temp\200708170550023.pdf");
|
||||||
|
using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
||||||
// foreach (var file in files)
|
{
|
||||||
// {
|
for (var i = 1; i <= document.NumberOfPages; i++)
|
||||||
// try
|
{
|
||||||
// {
|
var page = document.GetPage(i);
|
||||||
// using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
var text = page.Text;
|
||||||
// {
|
Trace.WriteLine(text);
|
||||||
// for (var i = 1; i <= document.NumberOfPages; i++)
|
}
|
||||||
// {
|
}
|
||||||
// var page = document.GetPage(i);
|
}
|
||||||
// var text = page.Text;
|
|
||||||
// Trace.WriteLine(text);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// catch (Exception ex)
|
|
||||||
// {
|
|
||||||
// throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -406,6 +406,49 @@ endobj";
|
|||||||
Assert.Equal(7, token.Number.ObjectNumber);
|
Assert.Equal(7, token.Number.ObjectNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void ReadsStreamWithDoubleEndstreamSimple()
|
||||||
|
{
|
||||||
|
const string s =
|
||||||
|
"""
|
||||||
|
250 0 obj
|
||||||
|
<< /Filter /FlateDecode >>
|
||||||
|
stream
|
||||||
|
012endstream
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
""";
|
||||||
|
|
||||||
|
var scanner = GetScanner(s);
|
||||||
|
|
||||||
|
var tokens = ReadToEnd(scanner);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void ReadsStreamWithDoubleEndstream()
|
||||||
|
{
|
||||||
|
const string s =
|
||||||
|
"""
|
||||||
|
1974 0 obj
|
||||||
|
<<
|
||||||
|
/Filter /FlateDecode
|
||||||
|
/Length 1975 0 R
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
xœ]ÔÏnÚ@€ñ'ð;øØ"Œg !Ué…Cÿ¨´ ö:B*Æ2äÀÛw¿™MZõ'þ°½ë›]<ï>ïÆÓ^|Ÿ/Ý>Ýêá4ösº^^ç.ÕÇôr«e[÷§îVÎüØSµÈ7ïï×[:ïÆáRm6ÕâGþðz›ïõ‡Oýå˜>V‹osŸæÓøRøõ¼Ïçû×iúÎi¼ÕMµÝÖ}òƒ¾¦¯‡sª~ÛîϟŸn÷‡|Ïß+~Þ§T·~¾ŒÉt—>]§C—æÃø’ªM»ÜÖ›U³ÒØÿ÷ÙJã–ãðïµ~†&msh Y„ –K‚4BK0‚yÈ¿rXVzš°Žà}$<zÐðDxò`þÐáAGÂ1‚:BÏða{B{$$Bа& „!ÂSÒä¿ýCC€B£e…PHx´x-Ã
|
||||||
|
R<˜º@!á!>,âW@!á!¼œ@!áÑ2uBÂC=@!á¡þP(¤xðU
|
||||||
|
R< (¤xø°PHx(SW(4<”—S(4<´#@¡á¡ÌT¡Ð²><@¡á¡Œ¢PhxSW(4<”õ¡Phxè‘ …†Ç’£PhY|Q
|
||||||
|
…†GëÃB¡e}à¡Phx˜¿ †‡B¡áÑú°Phx´ÆÔ
|
||||||
|
+,ƒÂÂ#/× °²>3(¬xð.……‡¡nPXx˜_……‡ùC¡°²>x}ƒÂÂCx9ƒÂНoPXxˆ…š&ùPø!ÙÚ¯€ÂŠÿ•……‡ ¶jbky y‡yÛJØlØßw±îužó曦ï\ìY§1½ï«Óeâ.ÿùz°gAendstream
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
""";
|
||||||
|
|
||||||
|
var scanner = GetScanner(s);
|
||||||
|
|
||||||
|
var tokens = ReadToEnd(scanner);
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void ReadsStringsWithMissingEndBracket()
|
public void ReadsStringsWithMissingEndBracket()
|
||||||
{
|
{
|
||||||
|
|||||||
46
src/UglyToad.PdfPig.Tests/Util/CircularByteBufferTests.cs
Normal file
46
src/UglyToad.PdfPig.Tests/Util/CircularByteBufferTests.cs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Util;
|
||||||
|
|
||||||
|
using PdfPig.Util;
|
||||||
|
|
||||||
|
public class CircularByteBufferTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void CanExceedCapacity()
|
||||||
|
{
|
||||||
|
var buffer = new CircularByteBuffer(3);
|
||||||
|
|
||||||
|
var input = "123456"u8;
|
||||||
|
for (var i = 0; i < input.Length; i++)
|
||||||
|
{
|
||||||
|
buffer.Add(input[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.True(buffer.IsCurrentlyEqual("456"));
|
||||||
|
|
||||||
|
Assert.True("456"u8.SequenceEqual(buffer.AsSpan()));
|
||||||
|
|
||||||
|
Assert.True(buffer.EndsWith("6"));
|
||||||
|
Assert.True(buffer.EndsWith("56"));
|
||||||
|
Assert.True(buffer.EndsWith("456"));
|
||||||
|
Assert.False(buffer.EndsWith("3456"));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanUndershootCapacity()
|
||||||
|
{
|
||||||
|
var buffer = new CircularByteBuffer(9);
|
||||||
|
|
||||||
|
var input = "123456"u8;
|
||||||
|
for (var i = 0; i < input.Length; i++)
|
||||||
|
{
|
||||||
|
buffer.Add(input[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.True(buffer.IsCurrentlyEqual("123456"));
|
||||||
|
|
||||||
|
Assert.True(buffer.EndsWith("3456"));
|
||||||
|
Assert.False(buffer.EndsWith("123"));
|
||||||
|
|
||||||
|
Assert.True("123456"u8.SequenceEqual(buffer.AsSpan()));
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -10,11 +10,15 @@
|
|||||||
using Core;
|
using Core;
|
||||||
using Encryption;
|
using Encryption;
|
||||||
using Filters;
|
using Filters;
|
||||||
|
using System.Text;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class PdfTokenScanner : IPdfTokenScanner
|
internal class PdfTokenScanner : IPdfTokenScanner
|
||||||
{
|
{
|
||||||
private static ReadOnlySpan<byte> EndstreamBytes => "endstream"u8;
|
private static ReadOnlySpan<byte> EndstreamBytes => "endstream"u8;
|
||||||
|
private static ReadOnlySpan<byte> EndObjBytes => "endobj"u8;
|
||||||
|
private static ReadOnlySpan<byte> StartstreamBytes => "stream"u8;
|
||||||
|
|
||||||
private static readonly Regex EndsWithNumberRegex = new Regex(@"(?<=^[^\s\d]+)\d+$");
|
private static readonly Regex EndsWithNumberRegex = new Regex(@"(?<=^[^\s\d]+)\d+$");
|
||||||
|
|
||||||
@ -178,20 +182,20 @@
|
|||||||
coreTokenScanner.Seek(previousTokenPositions[2]);
|
coreTokenScanner.Seek(previousTokenPositions[2]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (readTokens.Count == 1)
|
if (readTokens.Count == 1)
|
||||||
{
|
{
|
||||||
// An obj was encountered after reading the actual token and the object and generation number of the following token.
|
// An obj was encountered after reading the actual token and the object and generation number of the following token.
|
||||||
var actualReference = new IndirectReference(objectNumber.Int, generation.Int);
|
var actualReference = new IndirectReference(objectNumber.Int, generation.Int);
|
||||||
var actualToken = encryptionHandler.Decrypt(actualReference, readTokens[0]);
|
var actualToken = encryptionHandler.Decrypt(actualReference, readTokens[0]);
|
||||||
|
|
||||||
CurrentToken = new ObjectToken(startPosition, actualReference, actualToken);
|
CurrentToken = new ObjectToken(startPosition, actualReference, actualToken);
|
||||||
readTokens.Clear();
|
readTokens.Clear();
|
||||||
coreTokenScanner.Seek(previousTokenPositions[2]);
|
coreTokenScanner.Seek(previousTokenPositions[2]);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This should never happen.
|
// This should never happen.
|
||||||
Debug.Assert(false, $"Encountered a '{coreTokenScanner.CurrentToken}' operator before the end of the previous object.");
|
Debug.Assert(false, $"Encountered a '{coreTokenScanner.CurrentToken}' operator before the end of the previous object.");
|
||||||
return false;
|
return false;
|
||||||
@ -311,6 +315,9 @@
|
|||||||
{
|
{
|
||||||
stream = null;
|
stream = null;
|
||||||
|
|
||||||
|
// Used for shared reading of "stream", "endstream" and "endobj" candidates.
|
||||||
|
var buffer = new byte[EndstreamBytes.Length];
|
||||||
|
|
||||||
DictionaryToken streamDictionaryToken = GetStreamDictionary();
|
DictionaryToken streamDictionaryToken = GetStreamDictionary();
|
||||||
|
|
||||||
// Get the expected length from the stream dictionary if present.
|
// Get the expected length from the stream dictionary if present.
|
||||||
@ -322,7 +329,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Verify again that we start with "stream"
|
// Verify again that we start with "stream"
|
||||||
var hasStartStreamToken = ReadStreamTokenStart(inputBytes, startStreamTokenOffset);
|
var hasStartStreamToken = ReadStreamTokenStart(inputBytes, startStreamTokenOffset, buffer);
|
||||||
if (!hasStartStreamToken)
|
if (!hasStartStreamToken)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
@ -349,9 +356,9 @@
|
|||||||
{
|
{
|
||||||
inputBytes.Seek(inputBytes.CurrentOffset - 1);
|
inputBytes.Seek(inputBytes.CurrentOffset - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
} while ((char)inputBytes.CurrentByte != '\n');
|
} while ((char)inputBytes.CurrentByte != '\n');
|
||||||
|
|
||||||
// Store where we started reading the first byte of data.
|
// Store where we started reading the first byte of data.
|
||||||
@ -360,16 +367,7 @@
|
|||||||
// Store how many bytes we have read for checking against Length.
|
// Store how many bytes we have read for checking against Length.
|
||||||
long read = 0;
|
long read = 0;
|
||||||
|
|
||||||
// We want to check if we ever read 'endobj' or 'endstream'.
|
if (TryReadUsingLength(inputBytes, length, startDataOffset, buffer, out var streamData))
|
||||||
int endObjPosition = 0;
|
|
||||||
int endStreamPosition = 0;
|
|
||||||
int commonPartPosition = 0;
|
|
||||||
|
|
||||||
const string endWordPart = "end";
|
|
||||||
const string streamPart = "stream";
|
|
||||||
const string objPart = "obj";
|
|
||||||
|
|
||||||
if (TryReadUsingLength(inputBytes, length, startDataOffset, out var streamData))
|
|
||||||
{
|
{
|
||||||
stream = new StreamToken(streamDictionaryToken, streamData);
|
stream = new StreamToken(streamDictionaryToken, streamData);
|
||||||
return true;
|
return true;
|
||||||
@ -379,99 +377,100 @@
|
|||||||
|
|
||||||
PossibleStreamEndLocation? possibleEndLocation = null;
|
PossibleStreamEndLocation? possibleEndLocation = null;
|
||||||
|
|
||||||
|
// We're looking for either 'endobj' or 'endstream', so we look at every 'e'.
|
||||||
|
const byte sentinelByte = (byte)'e';
|
||||||
|
var queue = new CircularByteBuffer(EndstreamBytes.Length + 1);
|
||||||
|
var sentinelPosQueue = new Queue<long>();
|
||||||
|
var endLocations = new Stack<long>();
|
||||||
|
|
||||||
while (inputBytes.MoveNext())
|
while (inputBytes.MoveNext())
|
||||||
{
|
{
|
||||||
if (length.HasValue && read == length)
|
if (inputBytes.CurrentByte == sentinelByte)
|
||||||
{
|
{
|
||||||
// TODO: read ahead and check we're at the end...
|
sentinelPosQueue.Enqueue(inputBytes.CurrentOffset);
|
||||||
// break;
|
queue.Add(inputBytes.CurrentByte);
|
||||||
}
|
}
|
||||||
|
else if (sentinelPosQueue.Count > 0)
|
||||||
// We are reading 'end' (possibly).
|
|
||||||
if (commonPartPosition < endWordPart.Length && inputBytes.CurrentByte == endWordPart[commonPartPosition])
|
|
||||||
{
|
{
|
||||||
commonPartPosition++;
|
if (ReadHelper.IsWhitespace(inputBytes.CurrentByte))
|
||||||
}
|
|
||||||
else if (commonPartPosition == endWordPart.Length)
|
|
||||||
{
|
|
||||||
// We are reading 'stream' after 'end'
|
|
||||||
if (inputBytes.CurrentByte == streamPart[endStreamPosition])
|
|
||||||
{
|
{
|
||||||
endObjPosition = 0;
|
// Normalize whitespace
|
||||||
endStreamPosition++;
|
queue.Add((byte)' ');
|
||||||
|
|
||||||
// We've finished reading 'endstream', add it to the end tokens we've seen.
|
|
||||||
if (endStreamPosition == streamPart.Length && (!inputBytes.MoveNext() || ReadHelper.IsWhitespace(inputBytes.CurrentByte)))
|
|
||||||
{
|
|
||||||
var token = new PossibleStreamEndLocation(inputBytes.CurrentOffset - OperatorToken.EndStream.Data.Length, OperatorToken.EndStream);
|
|
||||||
|
|
||||||
possibleEndLocation = token;
|
|
||||||
|
|
||||||
if (length.HasValue && read > length)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
endStreamPosition = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (inputBytes.CurrentByte == objPart[endObjPosition])
|
|
||||||
{
|
|
||||||
// We are reading 'obj' after 'end'
|
|
||||||
|
|
||||||
endStreamPosition = 0;
|
|
||||||
endObjPosition++;
|
|
||||||
|
|
||||||
// We have finished reading 'endobj'.
|
|
||||||
if (endObjPosition == objPart.Length)
|
|
||||||
{
|
|
||||||
// If we saw an 'endstream' or 'endobj' previously we've definitely hit the end now.
|
|
||||||
if (possibleEndLocation != null)
|
|
||||||
{
|
|
||||||
var lastEndToken = possibleEndLocation.Value;
|
|
||||||
|
|
||||||
inputBytes.Seek(lastEndToken.Offset + lastEndToken.Type.Data.Length + 1);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
var token = new PossibleStreamEndLocation(inputBytes.CurrentOffset - OperatorToken.EndObject.Data.Length, OperatorToken.EndObject);
|
|
||||||
|
|
||||||
possibleEndLocation = token;
|
|
||||||
|
|
||||||
if (read > length)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// We were reading 'end' but then we had a character mismatch.
|
queue.Add(inputBytes.CurrentByte);
|
||||||
// Reset all the counters.
|
|
||||||
|
|
||||||
endStreamPosition = 0;
|
|
||||||
endObjPosition = 0;
|
|
||||||
commonPartPosition = 0;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// For safety reset every counter in case we had a partial read.
|
|
||||||
|
|
||||||
endStreamPosition = 0;
|
bool hasDequeuePotential;
|
||||||
endObjPosition = 0;
|
do
|
||||||
commonPartPosition = (inputBytes.CurrentByte == endWordPart[0]) ? 1 : 0;
|
{
|
||||||
|
hasDequeuePotential = false;
|
||||||
|
var currPos = sentinelPosQueue.Peek();
|
||||||
|
var distanceFromSentinel = inputBytes.CurrentOffset - currPos;
|
||||||
|
if (distanceFromSentinel > EndstreamBytes.Length)
|
||||||
|
{
|
||||||
|
sentinelPosQueue.Dequeue();
|
||||||
|
hasDequeuePotential = sentinelPosQueue.Count > 0;
|
||||||
|
}
|
||||||
|
if (distanceFromSentinel == EndstreamBytes.Length)
|
||||||
|
{
|
||||||
|
var isEndStream = queue.EndsWith("endstream ");
|
||||||
|
|
||||||
|
if (isEndStream)
|
||||||
|
{
|
||||||
|
endLocations.Push(currPos);
|
||||||
|
sentinelPosQueue.Clear();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sentinelPosQueue.Dequeue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (distanceFromSentinel == EndObjBytes.Length)
|
||||||
|
{
|
||||||
|
var isEndObj = queue.EndsWith("endobj ");
|
||||||
|
|
||||||
|
if (isEndObj)
|
||||||
|
{
|
||||||
|
endLocations.Push(-currPos);
|
||||||
|
sentinelPosQueue.Clear();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sentinelPosQueue.Dequeue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (hasDequeuePotential);
|
||||||
}
|
}
|
||||||
|
|
||||||
read++;
|
read++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sentinelPosQueue.Count > 0)
|
||||||
|
{
|
||||||
|
var isEndObj = queue.EndsWith("endobj");
|
||||||
|
if (isEndObj)
|
||||||
|
{
|
||||||
|
var location = inputBytes.CurrentOffset - EndObjBytes.Length + 1;
|
||||||
|
endLocations.Push(-location);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var isEndStr = queue.EndsWith("endstream");
|
||||||
|
if (isEndStr)
|
||||||
|
{
|
||||||
|
endLocations.Push(inputBytes.CurrentOffset - EndstreamBytes.Length + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
long streamDataEnd = inputBytes.CurrentOffset + 1;
|
long streamDataEnd = inputBytes.CurrentOffset + 1;
|
||||||
|
|
||||||
if (possibleEndLocation == null)
|
if (possibleEndLocation == null)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
var lastEnd = possibleEndLocation;
|
var lastEnd = possibleEndLocation;
|
||||||
|
|
||||||
@ -502,7 +501,12 @@
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool TryReadUsingLength(IInputBytes inputBytes, long? length, long startDataOffset, [NotNullWhen(true)] out byte[]? data)
|
private static bool TryReadUsingLength(
|
||||||
|
IInputBytes inputBytes,
|
||||||
|
long? length,
|
||||||
|
long startDataOffset,
|
||||||
|
byte[] buffer,
|
||||||
|
[NotNullWhen(true)] out byte[]? data)
|
||||||
{
|
{
|
||||||
data = null;
|
data = null;
|
||||||
|
|
||||||
@ -511,8 +515,6 @@
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
var readBuffer = new byte[EndstreamBytes.Length];
|
|
||||||
|
|
||||||
var newlineCount = 0;
|
var newlineCount = 0;
|
||||||
|
|
||||||
inputBytes.Seek(length.Value + startDataOffset);
|
inputBytes.Seek(length.Value + startDataOffset);
|
||||||
@ -533,20 +535,17 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var readLength = inputBytes.Read(readBuffer);
|
var readLength = inputBytes.Read(buffer);
|
||||||
|
|
||||||
if (readLength != readBuffer.Length)
|
if (readLength != EndstreamBytes.Length)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (var i = 0; i < EndstreamBytes.Length; i++)
|
if (!ByteArraysEqual(buffer, EndstreamBytes))
|
||||||
{
|
{
|
||||||
if (readBuffer[i] != EndstreamBytes[i])
|
inputBytes.Seek(startDataOffset);
|
||||||
{
|
return false;
|
||||||
inputBytes.Seek(startDataOffset);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inputBytes.Seek(startDataOffset);
|
inputBytes.Seek(startDataOffset);
|
||||||
@ -560,7 +559,7 @@
|
|||||||
throw new InvalidOperationException($"Reading using the stream length failed to read as many bytes as the stream specified. Wanted {length.Value}, got {countRead} at {startDataOffset + 1}.");
|
throw new InvalidOperationException($"Reading using the stream length failed to read as many bytes as the stream specified. Wanted {length.Value}, got {countRead} at {startDataOffset + 1}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
inputBytes.Read(readBuffer);
|
inputBytes.Read(buffer);
|
||||||
// Skip for the line break before 'endstream'.
|
// Skip for the line break before 'endstream'.
|
||||||
for (var i = 0; i < newlineCount; i++)
|
for (var i = 0; i < newlineCount; i++)
|
||||||
{
|
{
|
||||||
@ -657,22 +656,29 @@
|
|||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool ReadStreamTokenStart(IInputBytes input, long tokenStart)
|
private static bool ReadStreamTokenStart(IInputBytes input, long tokenStart, byte[] buffer)
|
||||||
{
|
{
|
||||||
input.Seek(tokenStart);
|
input.Seek(tokenStart);
|
||||||
|
|
||||||
for (var i = 0; i < OperatorToken.StartStream.Data.Length; i++)
|
var readCount = input.Read(buffer);
|
||||||
|
|
||||||
|
if (readCount < StartstreamBytes.Length
|
||||||
|
|| !ByteArraysEqual(buffer.AsSpan(0, StartstreamBytes.Length), StartstreamBytes))
|
||||||
{
|
{
|
||||||
if (!input.MoveNext() || input.CurrentByte != OperatorToken.StartStream.Data[i])
|
input.Seek(tokenStart);
|
||||||
{
|
return false;
|
||||||
input.Seek(tokenStart);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input.Seek(tokenStart + StartstreamBytes.Length);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static bool ByteArraysEqual(ReadOnlySpan<byte> array1, ReadOnlySpan<byte> array2)
|
||||||
|
{
|
||||||
|
return array1.SequenceEqual(array2);
|
||||||
|
}
|
||||||
|
|
||||||
public bool TryReadToken<T>(out T token) where T : class, IToken
|
public bool TryReadToken<T>(out T token) where T : class, IToken
|
||||||
{
|
{
|
||||||
if (isDisposed)
|
if (isDisposed)
|
||||||
@ -712,7 +718,7 @@
|
|||||||
|
|
||||||
coreTokenScanner.DeregisterCustomTokenizer(tokenizer);
|
coreTokenScanner.DeregisterCustomTokenizer(tokenizer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ObjectToken? Get(IndirectReference reference)
|
public ObjectToken? Get(IndirectReference reference)
|
||||||
{
|
{
|
||||||
if (isDisposed)
|
if (isDisposed)
|
||||||
|
|||||||
89
src/UglyToad.PdfPig/Util/CircularByteBuffer.cs
Normal file
89
src/UglyToad.PdfPig/Util/CircularByteBuffer.cs
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
namespace UglyToad.PdfPig.Util;
|
||||||
|
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
internal class CircularByteBuffer(int size)
|
||||||
|
{
|
||||||
|
private readonly byte[] buffer = new byte[size];
|
||||||
|
|
||||||
|
private int start;
|
||||||
|
private int count;
|
||||||
|
|
||||||
|
public void Add(byte b)
|
||||||
|
{
|
||||||
|
var insertionPosition = (start + count) % buffer.Length;
|
||||||
|
|
||||||
|
buffer[insertionPosition] = b;
|
||||||
|
if (count < buffer.Length)
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
start = (start + 1) % buffer.Length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool EndsWith(string s)
|
||||||
|
{
|
||||||
|
if (s.Length > count)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < s.Length; i++)
|
||||||
|
{
|
||||||
|
var str = s[i];
|
||||||
|
|
||||||
|
var inBuffer = count - (s.Length - i);
|
||||||
|
|
||||||
|
var buff = buffer[IndexToBufferIndex(inBuffer)];
|
||||||
|
|
||||||
|
if (buff != str)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsCurrentlyEqual(string s)
|
||||||
|
{
|
||||||
|
if (s.Length > buffer.Length)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < s.Length; i++)
|
||||||
|
{
|
||||||
|
var b = (byte)s[i];
|
||||||
|
var buff = buffer[IndexToBufferIndex(i)];
|
||||||
|
|
||||||
|
if (b != buff)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReadOnlySpan<byte> AsSpan()
|
||||||
|
{
|
||||||
|
Span<byte> tmp = new byte[count];
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
tmp[i] = buffer[IndexToBufferIndex(i)];
|
||||||
|
}
|
||||||
|
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return Encoding.ASCII.GetString(AsSpan());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int IndexToBufferIndex(int i) => (start + i) % buffer.Length;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user