mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
fix issue with newlines in object start tokens #88
where we brute force the file and it contains newlines between object tokens we fix the parsing to prevent pseudo-infinite loops.
This commit is contained in:
@@ -123,7 +123,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Determines if a character is whitespace or not.
|
/// Determines if a character is whitespace or not, this includes newlines.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <remarks>
|
/// <remarks>
|
||||||
/// These values are specified in table 1 (page 12) of ISO 32000-1:2008.
|
/// These values are specified in table 1 (page 12) of ISO 32000-1:2008.
|
||||||
|
@@ -36,7 +36,7 @@ startxref
|
|||||||
216
|
216
|
||||||
%%EOF";
|
%%EOF";
|
||||||
|
|
||||||
private static readonly long[] TestDataOffsets =
|
private static readonly long[] TestDataOffsets =
|
||||||
{
|
{
|
||||||
TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase),
|
TestData.IndexOf("2 17 obj", StringComparison.OrdinalIgnoreCase),
|
||||||
TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
|
TestData.IndexOf("3 0 obj", StringComparison.OrdinalIgnoreCase),
|
||||||
@@ -57,7 +57,7 @@ startxref
|
|||||||
public void SearcherFindsCorrectObjects()
|
public void SearcherFindsCorrectObjects()
|
||||||
{
|
{
|
||||||
var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));
|
var input = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(TestData));
|
||||||
|
|
||||||
var locations = BruteForceSearcher.GetObjectLocations(input);
|
var locations = BruteForceSearcher.GetObjectLocations(input);
|
||||||
|
|
||||||
Assert.Equal(4, locations.Count);
|
Assert.Equal(4, locations.Count);
|
||||||
@@ -69,11 +69,11 @@ startxref
|
|||||||
public void ReaderOnlyCallsOnce()
|
public void ReaderOnlyCallsOnce()
|
||||||
{
|
{
|
||||||
var reader = StringBytesTestConverter.Convert(TestData, false);
|
var reader = StringBytesTestConverter.Convert(TestData, false);
|
||||||
|
|
||||||
var locations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
var locations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
||||||
|
|
||||||
Assert.Equal(4, locations.Count);
|
Assert.Equal(4, locations.Count);
|
||||||
|
|
||||||
var newLocations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
var newLocations = BruteForceSearcher.GetObjectLocations(reader.Bytes);
|
||||||
|
|
||||||
Assert.Equal(4, locations.Count);
|
Assert.Equal(4, locations.Count);
|
||||||
@@ -132,7 +132,7 @@ endobj
|
|||||||
5 0 obj
|
5 0 obj
|
||||||
<< /IsEmpty false >>
|
<< /IsEmpty false >>
|
||||||
endobj";
|
endobj";
|
||||||
|
|
||||||
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
|
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
|
||||||
|
|
||||||
var locations = BruteForceSearcher.GetObjectLocations(bytes);
|
var locations = BruteForceSearcher.GetObjectLocations(bytes);
|
||||||
@@ -168,12 +168,35 @@ endobj";
|
|||||||
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||||
|
|
||||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||||
Assert.StartsWith("3 0 obj", s);
|
Assert.StartsWith("3 0 obj", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void BruteForceSearcherBytesFileOffsetsCorrect()
|
||||||
|
{
|
||||||
|
var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf")));
|
||||||
|
|
||||||
|
var locations = BruteForceSearcher.GetObjectLocations(bytes);
|
||||||
|
|
||||||
|
Assert.Equal(13, locations.Count);
|
||||||
|
|
||||||
|
Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
|
||||||
|
Assert.Equal(244, locations[new IndirectReference(2, 0)]);
|
||||||
|
Assert.Equal(15, locations[new IndirectReference(3, 0)]);
|
||||||
|
Assert.Equal(222, locations[new IndirectReference(4, 0)]);
|
||||||
|
Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
|
||||||
|
Assert.Equal(353, locations[new IndirectReference(6, 0)]);
|
||||||
|
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||||
|
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||||
|
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||||
|
|
||||||
|
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||||
|
Assert.StartsWith("3 0 obj", s);
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
|
public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
|
||||||
{
|
{
|
||||||
|
@@ -42,6 +42,8 @@
|
|||||||
|
|
||||||
var currentlyInObject = false;
|
var currentlyInObject = false;
|
||||||
|
|
||||||
|
var objBuffer = new byte[4];
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (loopProtection > 1_000_000)
|
if (loopProtection > 1_000_000)
|
||||||
@@ -94,17 +96,24 @@
|
|||||||
|
|
||||||
bytes.Seek(currentOffset);
|
bytes.Seek(currentOffset);
|
||||||
|
|
||||||
if (!ReadHelper.IsString(bytes, " obj"))
|
bytes.Read(objBuffer);
|
||||||
|
|
||||||
|
if (!IsStartObjMarker(objBuffer))
|
||||||
{
|
{
|
||||||
currentOffset++;
|
currentOffset++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Current byte is ' '[obj]
|
// Current byte is ' '[obj]
|
||||||
var offset = currentOffset - 1;
|
var offset = currentOffset + 1;
|
||||||
|
|
||||||
bytes.Seek(offset);
|
bytes.Seek(offset);
|
||||||
|
|
||||||
|
while (ReadHelper.IsWhitespace(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||||
|
{
|
||||||
|
bytes.Seek(--offset);
|
||||||
|
}
|
||||||
|
|
||||||
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||||
{
|
{
|
||||||
generationBytes.Insert(0, (char)bytes.CurrentByte);
|
generationBytes.Insert(0, (char)bytes.CurrentByte);
|
||||||
@@ -113,13 +122,16 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We should now be at the space between object and generation number.
|
// We should now be at the space between object and generation number.
|
||||||
if (!ReadHelper.IsSpace(bytes.CurrentByte))
|
if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
|
||||||
{
|
{
|
||||||
currentOffset++;
|
currentOffset++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes.Seek(--offset);
|
while (ReadHelper.IsWhitespace(bytes.CurrentByte))
|
||||||
|
{
|
||||||
|
bytes.Seek(--offset);
|
||||||
|
}
|
||||||
|
|
||||||
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
|
||||||
{
|
{
|
||||||
@@ -185,5 +197,17 @@
|
|||||||
bytes.Seek(originalOffset);
|
bytes.Seek(originalOffset);
|
||||||
return long.MaxValue;
|
return long.MaxValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static bool IsStartObjMarker(byte[] data)
|
||||||
|
{
|
||||||
|
if (!ReadHelper.IsWhitespace(data[0]))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (data[1] == 'o' || data[1] == 'O')
|
||||||
|
&& (data[2] == 'b' || data[2] == 'B')
|
||||||
|
&& (data[3] == 'j' || data[3] == 'J');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user