fix issue with newlines in object start tokens #88

where we brute force the file and it contains newlines between object tokens we fix the parsing to prevent pseudo-infinite loops.
This commit is contained in:
Eliot Jones
2020-03-17 20:09:47 +00:00
parent 5094f9d9d0
commit 0f91017613
3 changed files with 58 additions and 11 deletions

View File

@@ -123,7 +123,7 @@
} }
/// <summary> /// <summary>
/// Determines if a character is whitespace or not. /// Determines if a character is whitespace or not, this includes newlines.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// These values are specified in table 1 (page 12) of ISO 32000-1:2008. /// These values are specified in table 1 (page 12) of ISO 32000-1:2008.

View File

@@ -174,6 +174,29 @@ endobj";
} }
} }
[Fact]
public void BruteForceSearcherBytesFileOffsetsCorrect()
{
var bytes = new ByteArrayInputBytes(File.ReadAllBytes(IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf")));
var locations = BruteForceSearcher.GetObjectLocations(bytes);
Assert.Equal(13, locations.Count);
Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
Assert.Equal(244, locations[new IndirectReference(2, 0)]);
Assert.Equal(15, locations[new IndirectReference(3, 0)]);
Assert.Equal(222, locations[new IndirectReference(4, 0)]);
Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
Assert.Equal(353, locations[new IndirectReference(6, 0)]);
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
Assert.StartsWith("3 0 obj", s);
}
[Fact] [Fact]
public void BruteForceSearcherFileOffsetsCorrectOpenOffice() public void BruteForceSearcherFileOffsetsCorrectOpenOffice()
{ {

View File

@@ -42,6 +42,8 @@
var currentlyInObject = false; var currentlyInObject = false;
var objBuffer = new byte[4];
do do
{ {
if (loopProtection > 1_000_000) if (loopProtection > 1_000_000)
@@ -94,17 +96,24 @@
bytes.Seek(currentOffset); bytes.Seek(currentOffset);
if (!ReadHelper.IsString(bytes, " obj")) bytes.Read(objBuffer);
if (!IsStartObjMarker(objBuffer))
{ {
currentOffset++; currentOffset++;
continue; continue;
} }
// Current byte is ' '[obj] // Current byte is ' '[obj]
var offset = currentOffset - 1; var offset = currentOffset + 1;
bytes.Seek(offset); bytes.Seek(offset);
while (ReadHelper.IsWhitespace(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{
bytes.Seek(--offset);
}
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{ {
generationBytes.Insert(0, (char)bytes.CurrentByte); generationBytes.Insert(0, (char)bytes.CurrentByte);
@@ -113,13 +122,16 @@
} }
// We should now be at the space between object and generation number. // We should now be at the space between object and generation number.
if (!ReadHelper.IsSpace(bytes.CurrentByte)) if (!ReadHelper.IsWhitespace(bytes.CurrentByte))
{ {
currentOffset++; currentOffset++;
continue; continue;
} }
while (ReadHelper.IsWhitespace(bytes.CurrentByte))
{
bytes.Seek(--offset); bytes.Seek(--offset);
}
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{ {
@@ -185,5 +197,17 @@
bytes.Seek(originalOffset); bytes.Seek(originalOffset);
return long.MaxValue; return long.MaxValue;
} }
private static bool IsStartObjMarker(byte[] data)
{
if (!ReadHelper.IsWhitespace(data[0]))
{
return false;
}
return (data[1] == 'o' || data[1] == 'O')
&& (data[2] == 'b' || data[2] == 'B')
&& (data[3] == 'j' || data[3] == 'J');
}
} }
} }