avoid infinite loop in brute-force searcher #88

fixes the case where the brute-force searcher becomes stuck in an infinite loop, it may be the case that the problem pdf from #88 has a newline or some other whitespace between its object and generation number so this may cause a failure elsewhere.
This commit is contained in:
Eliot Jones
2020-03-03 15:49:17 +00:00
parent 58972de7cb
commit bef68a0654
2 changed files with 80 additions and 3 deletions

View File

@@ -84,6 +84,70 @@ startxref
}
}
[Fact]
public void ReaderEscapesUnexpectedObject()
{
const string s = @"%PDF-1.7
abcd
1 0 obj
<< /Type /Any >>
endobj
%AZ 0 obj
11 0 obj
769
endobj
%%EOF";
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
var locations = BruteForceSearcher.GetObjectLocations(bytes);
Assert.Equal(2, locations.Count);
var expectedLocations = new long[]
{
s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
};
Assert.Equal(expectedLocations, locations.Values);
}
[Fact]
public void ReaderEscapesUnexpectedGenerationNumber()
{
const string s = @"%PDF-2.0
abcdefghijklmnop
1 0 obj
256
endobj
16-0 obj
5 0 obj
<< /IsEmpty false >>
endobj";
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
var locations = BruteForceSearcher.GetObjectLocations(bytes);
Assert.Equal(2, locations.Count);
var expectedLocations = new long[]
{
s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
};
Assert.Equal(expectedLocations, locations.Values);
}
[Fact]
public void BruteForceSearcherFileOffsetsCorrect()
{

View File

@@ -33,6 +33,9 @@
var results = new Dictionary<IndirectReference, long>();
var generationBytes = new StringBuilder();
var objectNumberBytes = new StringBuilder();
var originPosition = bytes.CurrentOffset;
var currentOffset = (long)MinimumSearchOffset;
@@ -61,7 +64,7 @@
currentlyInObject = false;
loopProtection = 0;
for (int i = 0; i < "endobj".Length; i++)
for (var i = 0; i < "endobj".Length; i++)
{
bytes.MoveNext();
currentOffset++;
@@ -102,7 +105,6 @@
bytes.Seek(offset);
var generationBytes = new StringBuilder();
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{
generationBytes.Insert(0, (char)bytes.CurrentByte);
@@ -113,12 +115,12 @@
// We should now be at the space between object and generation number.
if (!ReadHelper.IsSpace(bytes.CurrentByte))
{
currentOffset++;
continue;
}
bytes.Seek(--offset);
var objectNumberBytes = new StringBuilder();
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{
objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
@@ -126,11 +128,22 @@
bytes.Seek(offset);
}
if (objectNumberBytes.Length == 0 || generationBytes.Length == 0)
{
generationBytes.Clear();
objectNumberBytes.Clear();
currentOffset++;
continue;
}
var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);
results[new IndirectReference(obj, generation)] = bytes.CurrentOffset;
generationBytes.Clear();
objectNumberBytes.Clear();
currentlyInObject = true;
currentOffset++;