avoid infinite loop in brute-force searcher #88

fixes the case where the brute-force searcher becomes stuck in an infinite loop, it may be the case that the problem pdf from #88 has a newline or some other whitespace between its object and generation number so this may cause a failure elsewhere.
This commit is contained in:
Eliot Jones
2020-03-03 15:49:17 +00:00
parent 58972de7cb
commit bef68a0654
2 changed files with 80 additions and 3 deletions

View File

@@ -84,6 +84,70 @@ startxref
} }
} }
[Fact]
public void ReaderEscapesUnexpectedObject()
{
const string s = @"%PDF-1.7
abcd
1 0 obj
<< /Type /Any >>
endobj
%AZ 0 obj
11 0 obj
769
endobj
%%EOF";
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
var locations = BruteForceSearcher.GetObjectLocations(bytes);
Assert.Equal(2, locations.Count);
var expectedLocations = new long[]
{
s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
};
Assert.Equal(expectedLocations, locations.Values);
}
[Fact]
public void ReaderEscapesUnexpectedGenerationNumber()
{
const string s = @"%PDF-2.0
abcdefghijklmnop
1 0 obj
256
endobj
16-0 obj
5 0 obj
<< /IsEmpty false >>
endobj";
var bytes = new ByteArrayInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
var locations = BruteForceSearcher.GetObjectLocations(bytes);
Assert.Equal(2, locations.Count);
var expectedLocations = new long[]
{
s.IndexOf("1 0 obj", StringComparison.OrdinalIgnoreCase),
s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
};
Assert.Equal(expectedLocations, locations.Values);
}
[Fact] [Fact]
public void BruteForceSearcherFileOffsetsCorrect() public void BruteForceSearcherFileOffsetsCorrect()
{ {

View File

@@ -33,6 +33,9 @@
var results = new Dictionary<IndirectReference, long>(); var results = new Dictionary<IndirectReference, long>();
var generationBytes = new StringBuilder();
var objectNumberBytes = new StringBuilder();
var originPosition = bytes.CurrentOffset; var originPosition = bytes.CurrentOffset;
var currentOffset = (long)MinimumSearchOffset; var currentOffset = (long)MinimumSearchOffset;
@@ -61,7 +64,7 @@
currentlyInObject = false; currentlyInObject = false;
loopProtection = 0; loopProtection = 0;
for (int i = 0; i < "endobj".Length; i++) for (var i = 0; i < "endobj".Length; i++)
{ {
bytes.MoveNext(); bytes.MoveNext();
currentOffset++; currentOffset++;
@@ -102,7 +105,6 @@
bytes.Seek(offset); bytes.Seek(offset);
var generationBytes = new StringBuilder();
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{ {
generationBytes.Insert(0, (char)bytes.CurrentByte); generationBytes.Insert(0, (char)bytes.CurrentByte);
@@ -113,12 +115,12 @@
// We should now be at the space between object and generation number. // We should now be at the space between object and generation number.
if (!ReadHelper.IsSpace(bytes.CurrentByte)) if (!ReadHelper.IsSpace(bytes.CurrentByte))
{ {
currentOffset++;
continue; continue;
} }
bytes.Seek(--offset); bytes.Seek(--offset);
var objectNumberBytes = new StringBuilder();
while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset) while (ReadHelper.IsDigit(bytes.CurrentByte) && offset >= MinimumSearchOffset)
{ {
objectNumberBytes.Insert(0, (char)bytes.CurrentByte); objectNumberBytes.Insert(0, (char)bytes.CurrentByte);
@@ -126,11 +128,22 @@
bytes.Seek(offset); bytes.Seek(offset);
} }
if (objectNumberBytes.Length == 0 || generationBytes.Length == 0)
{
generationBytes.Clear();
objectNumberBytes.Clear();
currentOffset++;
continue;
}
var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture); var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture); var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);
results[new IndirectReference(obj, generation)] = bytes.CurrentOffset; results[new IndirectReference(obj, generation)] = bytes.CurrentOffset;
generationBytes.Clear();
objectNumberBytes.Clear();
currentlyInObject = true; currentlyInObject = true;
currentOffset++; currentOffset++;