mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-21 04:17:57 +08:00
#62 use length value of stream directly to read the full stream once
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Tests.IO
|
||||
{
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using PdfPig.IO;
|
||||
using PdfPig.Util;
|
||||
using Xunit;
|
||||
@@ -72,5 +73,158 @@
|
||||
Assert.False(array.IsAtEnd());
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadFromBeginningIsCorrect()
|
||||
{
|
||||
var bytes = StringToBytes("endstream and then <</go[]>>");
|
||||
|
||||
var buffer = new byte["endstream".Length];
|
||||
|
||||
var result = bytes.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length, result);
|
||||
Assert.Equal("endstream", OtherEncodings.BytesAsLatin1String(buffer));
|
||||
|
||||
Assert.Equal((byte)'m', bytes.CurrentByte);
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.Equal((byte)'a', bytes.CurrentByte);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadMatchesMoveBehaviour()
|
||||
{
|
||||
var bytesRead = StringToBytes("cows in the south");
|
||||
var bytesMove = StringToBytes("cows in the north");
|
||||
|
||||
const int readLength = 3;
|
||||
|
||||
var buffer = new byte[readLength];
|
||||
|
||||
var readResult = bytesRead.Read(buffer);
|
||||
|
||||
for (var i = 0; i < readLength; i++)
|
||||
{
|
||||
bytesMove.MoveNext();
|
||||
}
|
||||
|
||||
Assert.Equal(readLength, readResult);
|
||||
|
||||
Assert.Equal(bytesRead.CurrentOffset, bytesMove.CurrentOffset);
|
||||
Assert.Equal(bytesRead.CurrentByte, bytesMove.CurrentByte);
|
||||
Assert.Equal(bytesRead.MoveNext(), bytesMove.MoveNext());
|
||||
Assert.Equal(bytesRead.CurrentOffset, bytesMove.CurrentOffset);
|
||||
Assert.Equal(bytesRead.CurrentByte, bytesMove.CurrentByte);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadFromMiddleIsCorrect()
|
||||
{
|
||||
var bytes = StringToBytes("aa stream <<>>");
|
||||
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.True(bytes.MoveNext());
|
||||
|
||||
Assert.Equal((byte)' ', bytes.CurrentByte);
|
||||
|
||||
var buffer = new byte["stream".Length];
|
||||
|
||||
var result = bytes.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length, result);
|
||||
Assert.Equal("stream", OtherEncodings.BytesAsLatin1String(buffer));
|
||||
|
||||
Assert.Equal((byte)'m', bytes.CurrentByte);
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.Equal((byte)'<', bytes.CurrentByte);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadPastEndIsCorrect()
|
||||
{
|
||||
var bytes = StringToBytes("stream");
|
||||
|
||||
Assert.True(bytes.MoveNext());
|
||||
Assert.True(bytes.MoveNext());
|
||||
|
||||
var buffer = new byte["stream".Length];
|
||||
|
||||
var result = bytes.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length - 2, result);
|
||||
Assert.Equal("ream", OtherEncodings.BytesAsLatin1String(buffer.Take(buffer.Length - 2).ToArray()));
|
||||
|
||||
Assert.Equal((byte)'m', bytes.CurrentByte);
|
||||
Assert.True(bytes.IsAtEnd());
|
||||
Assert.False(bytes.MoveNext());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadFromStreamBeginningIsCorrect()
|
||||
{
|
||||
var stream = StringToStream("endstream and then <</go[]>>");
|
||||
|
||||
var buffer = new byte["endstream".Length];
|
||||
|
||||
var result = stream.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length, result);
|
||||
Assert.Equal("endstream", OtherEncodings.BytesAsLatin1String(buffer));
|
||||
|
||||
Assert.Equal((byte)'m', stream.CurrentByte);
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.Equal((byte)'a', stream.CurrentByte);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadFromStreamMiddleIsCorrect()
|
||||
{
|
||||
var stream = StringToStream("aa stream <<>>");
|
||||
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.True(stream.MoveNext());
|
||||
|
||||
Assert.Equal((byte)' ', stream.CurrentByte);
|
||||
|
||||
var buffer = new byte["stream".Length];
|
||||
|
||||
var result = stream.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length, result);
|
||||
Assert.Equal("stream", OtherEncodings.BytesAsLatin1String(buffer));
|
||||
|
||||
Assert.Equal((byte)'m', stream.CurrentByte);
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.Equal((byte)'<', stream.CurrentByte);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadPastStreamEndIsCorrect()
|
||||
{
|
||||
var stream = StringToStream("stream");
|
||||
|
||||
Assert.True(stream.MoveNext());
|
||||
Assert.True(stream.MoveNext());
|
||||
|
||||
var buffer = new byte["stream".Length];
|
||||
|
||||
var result = stream.Read(buffer);
|
||||
|
||||
Assert.Equal(buffer.Length - 2, result);
|
||||
Assert.Equal("ream", OtherEncodings.BytesAsLatin1String(buffer.Take(buffer.Length - 2).ToArray()));
|
||||
|
||||
Assert.Equal((byte)'m', stream.CurrentByte);
|
||||
Assert.True(stream.IsAtEnd());
|
||||
Assert.False(stream.MoveNext());
|
||||
}
|
||||
|
||||
private static ByteArrayInputBytes StringToBytes(string str) => new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(str));
|
||||
private static StreamInputBytes StringToStream(string str) => new StreamInputBytes(new MemoryStream(OtherEncodings.StringAsLatin1Bytes(str)));
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig.IO
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
@@ -54,6 +55,46 @@
|
||||
CurrentByte = currentOffset < 0 ? (byte)0 : bytes[(int)currentOffset];
|
||||
}
|
||||
|
||||
public int Read(byte[] buffer, int? length = null)
|
||||
{
|
||||
var bytesToRead = buffer.Length;
|
||||
if (length.HasValue)
|
||||
{
|
||||
if (length.Value < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Cannot use a negative length: {length.Value}.");
|
||||
}
|
||||
|
||||
if (length.Value > bytesToRead)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Cannot read more bytes {length.Value} than there is space in the buffer {buffer.Length}.");
|
||||
}
|
||||
|
||||
bytesToRead = length.Value;
|
||||
}
|
||||
|
||||
if (bytesToRead == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var viableLength = (bytes.Count - currentOffset - 1);
|
||||
var readLength = (int)(viableLength < bytesToRead ? viableLength : bytesToRead);
|
||||
var startFrom = (int)currentOffset;
|
||||
for (var i = 0; i < readLength; i++)
|
||||
{
|
||||
buffer[i] = bytes[startFrom + i + 1];
|
||||
}
|
||||
|
||||
if (readLength > 0)
|
||||
{
|
||||
currentOffset += readLength;
|
||||
CurrentByte = buffer[readLength - 1];
|
||||
}
|
||||
|
||||
return readLength;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
}
|
||||
|
@@ -42,5 +42,13 @@
|
||||
/// Move to a given position.
|
||||
/// </summary>
|
||||
void Seek(long position);
|
||||
|
||||
/// <summary>
|
||||
/// Fill the buffer with bytes starting from the current position.
|
||||
/// </summary>
|
||||
/// <param name="buffer">A buffer with a length corresponding to the number of bytes to read.</param>
|
||||
/// <param name="length">Optional override for the number of bytes to read.</param>
|
||||
/// <returns>The number of bytes successfully read.</returns>
|
||||
int Read(byte[] buffer, int? length = null);
|
||||
}
|
||||
}
|
@@ -89,6 +89,40 @@
|
||||
}
|
||||
}
|
||||
|
||||
public int Read(byte[] buffer, int? length = null)
|
||||
{
|
||||
var bytesToRead = buffer.Length;
|
||||
if (length.HasValue)
|
||||
{
|
||||
if (length.Value < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Cannot use a negative length: {length.Value}.");
|
||||
}
|
||||
|
||||
if (length.Value > bytesToRead)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Cannot read more bytes {length.Value} than there is space in the buffer {buffer.Length}.");
|
||||
}
|
||||
|
||||
bytesToRead = length.Value;
|
||||
}
|
||||
|
||||
if (bytesToRead == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var read = stream.Read(buffer, 0, bytesToRead);
|
||||
if (read > 0)
|
||||
{
|
||||
CurrentByte = buffer[read - 1];
|
||||
}
|
||||
|
||||
isAtEnd = stream.Position == stream.Length;
|
||||
|
||||
return read;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (shouldDispose)
|
||||
|
@@ -28,6 +28,11 @@
|
||||
|
||||
internal class PdfTokenScanner : IPdfTokenScanner
|
||||
{
|
||||
private static readonly byte[] EndstreamBytes =
|
||||
{
|
||||
(byte)'e', (byte)'n', (byte)'d', (byte)'s', (byte)'t', (byte)'r', (byte)'e', (byte)'a', (byte)'m'
|
||||
};
|
||||
|
||||
private static readonly Regex EndsWithNumberRegex = new Regex(@"(?<=^[^\s\d]+)\d+$");
|
||||
|
||||
private readonly IInputBytes inputBytes;
|
||||
@@ -256,6 +261,12 @@
|
||||
const string streamPart = "stream";
|
||||
const string objPart = "obj";
|
||||
|
||||
if (TryReadUsingLength(inputBytes, length, startDataOffset, out var streamData))
|
||||
{
|
||||
stream = new StreamToken(streamDictionaryToken, streamData);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Track any 'endobj' or 'endstream' operators we see.
|
||||
var observedEndLocations = new List<PossibleStreamEndLocation>();
|
||||
|
||||
@@ -404,6 +415,82 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryReadUsingLength(IInputBytes inputBytes, long? length, long startDataOffset, out byte[] data)
|
||||
{
|
||||
data = null;
|
||||
|
||||
if (!length.HasValue || length.Value + startDataOffset >= inputBytes.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var readBuffer = new byte[EndstreamBytes.Length];
|
||||
|
||||
var newlineCount = 0;
|
||||
|
||||
inputBytes.Seek(length.Value + startDataOffset);
|
||||
|
||||
var next = inputBytes.Peek();
|
||||
|
||||
if (next.HasValue && ReadHelper.IsEndOfLine(next.Value))
|
||||
{
|
||||
newlineCount++;
|
||||
inputBytes.MoveNext();
|
||||
|
||||
next = inputBytes.Peek();
|
||||
|
||||
if (next.HasValue && ReadHelper.IsEndOfLine(next.Value))
|
||||
{
|
||||
newlineCount++;
|
||||
inputBytes.MoveNext();
|
||||
}
|
||||
}
|
||||
|
||||
var readLength = inputBytes.Read(readBuffer);
|
||||
|
||||
if (readLength != readBuffer.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (var i = 0; i < EndstreamBytes.Length; i++)
|
||||
{
|
||||
if (readBuffer[i] != EndstreamBytes[i])
|
||||
{
|
||||
inputBytes.Seek(startDataOffset);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inputBytes.Seek(startDataOffset);
|
||||
|
||||
data = new byte[(int)length.Value];
|
||||
|
||||
var countRead = inputBytes.Read(data);
|
||||
|
||||
if (countRead != data.Length)
|
||||
{
|
||||
throw new InvalidOperationException($"Reading using the stream length failed to read as many bytes as the stream specified. Wanted {length.Value}, got {countRead} at {startDataOffset + 1}.");
|
||||
}
|
||||
|
||||
inputBytes.Read(readBuffer);
|
||||
// Skip for the line break before 'endstream'.
|
||||
for (var i = 0; i < newlineCount; i++)
|
||||
{
|
||||
var read = inputBytes.MoveNext();
|
||||
if (!read)
|
||||
{
|
||||
inputBytes.Seek(startDataOffset);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 1 skip to move past the 'm' in 'endstream'
|
||||
inputBytes.MoveNext();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private DictionaryToken GetStreamDictionary()
|
||||
{
|
||||
DictionaryToken streamDictionaryToken;
|
||||
|
Reference in New Issue
Block a user