mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 10:55:04 +08:00
fix issue with newlines in object start tokens #88
where we brute force the file and it contains newlines between object tokens we fix the parsing to prevent pseudo-infinite loops.
This commit is contained in:
@@ -123,7 +123,7 @@
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a character is whitespace or not.
|
||||
/// Determines if a character is whitespace or not, this includes newlines.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// These values are specified in table 1 (page 12) of ISO 32000-1:2008.
|
||||
|
@@ -36,7 +36,7 @@ startxref
|
||||
216
|
||||
%%EOF";
|
||||
|
||||
private static readonly long[] TestDataOffsets =
|
||||
private static readonly long[] TestDataOffsets =
|
||||
{
|
||||
TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase),
|
||||
TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
|
||||
@@ -57,7 +57,7 @@ startxref
|
||||
public void SearcherFindsCorrectObjects()
|
||||
{
|
||||
var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));
|
||||
|
||||
|
||||
var locations = BruteForceSearcher.GetObjectLocations(input);
|
||||
|
||||
Assert.Equal(4, locations.Count);
|
||||
@@ -69,11 +69,11 @@ startxref
|
||||
public void ReaderOnlyCallsOnce()
|
||||
{
|
||||
var reader = StringBytesTestConverter.Convert(TestData, false);
|
||||
|
||||
|
||||
var locations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
||||
|
||||
Assert.Equal(4, locations.Count);
|
||||
|
||||
|
||||
var newLocations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
||||
|
||||
Assert.Equal(4, locations.Count);
|
||||
@@ -132,7 +132,7 @@ endobj
|
||||
5 0 obj
|
||||
<< /IsEmpty false >>
|
||||
endobj";
|
||||
|
||||
|
||||
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
|
||||
|
||||
var locations = BruteForceSearcher.GetObjectLocations(bytes);
|
||||
@@ -168,12 +168,35 @@ endobj";
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||
|
||||
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||
Assert.StartsWith("3 0 obj", s);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BruteForceSearcherBytesFileOffsetsCorrect()
|
||||
{
|
||||
var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf")));
|
||||
|
||||
var locations = BruteForceSearcher.GetObjectLocations(bytes);
|
||||
|
||||
Assert.Equal(13, locations.Count);
|
||||
|
||||
Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
|
||||
Assert.Equal(244, locations[new IndirectReference(2, 0)]);
|
||||
Assert.Equal(15, locations[new IndirectReference(3, 0)]);
|
||||
Assert.Equal(222, locations[new IndirectReference(4, 0)]);
|
||||
Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
|
||||
Assert.Equal(353, locations[new IndirectReference(6, 0)]);
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||
Assert.StartsWith("3 0 obj", s);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
|
||||
{
|
||||
|
@@ -42,6 +42,8 @@
|
||||
|
||||
var currentlyInObject = false;
|
||||
|
||||
var objBuffer = new byte[4];
|
||||
|
||||
do
|
||||
{
|
||||
if (loopProtection > 1_000_000)
|
||||
@@ -94,17 +96,24 @@
|
||||
|
||||
bytes.Seek(currentOffset);
|
||||
|
||||
if (!ReadHelper.IsString(bytes, " obj"))
|
||||
bytes.Read(objBuffer);
|
||||
|
||||
if (!IsStartObjMarker(objBuffer))
|
||||
{
|
||||
currentOffset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Current byte is ' '[obj]
|
||||
var offset = currentOffset - 1;
|
||||
var offset = currentOffset + 1;
|
||||
|
||||
bytes.Seek(offset);
|
||||
|
||||
while (ReadHelper.IsWhitespace(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||
{
|
||||
bytes.Seek(--offset);
|
||||
}
|
||||
|
||||
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||
{
|
||||
generationBytes.Insert(0, (char)bytes.CurrentByte);
|
||||
@@ -113,13 +122,16 @@
|
||||
}
|
||||
|
||||
// We should now be at the space between object and generation number.
|
||||
if (!ReadHelper.IsSpace(bytes.CurrentByte))
|
||||
if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
|
||||
{
|
||||
currentOffset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
bytes.Seek(--offset);
|
||||
while (ReadHelper.IsWhitespace(bytes.CurrentByte))
|
||||
{
|
||||
bytes.Seek(--offset);
|
||||
}
|
||||
|
||||
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||
{
|
||||
@@ -185,5 +197,17 @@
|
||||
bytes.Seek(originalOffset);
|
||||
return long.MaxValue;
|
||||
}
|
||||
|
||||
private static bool IsStartObjMarker(byte[] data)
|
||||
{
|
||||
if (!ReadHelper.IsWhitespace(data[0]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return (data[1] == 'o' || data[1] == 'O')
|
||||
&& (data[2] == 'b' || data[2] == 'B')
|
||||
&& (data[3] == 'j' || data[3] == 'J');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user