support streams containing only carriage returns. handle comments in arrays and dictionaries

* while the pdf specification says stream data should follow a newline following a stream operator some files have only a carriage return following the stream operator.
* since comment tokens may appear inside an array or dictionary we ignore them if they occur here since they will break interpretation of the dictionary or array contents.
This commit is contained in:
Eliot Jones
2019-12-20 14:04:58 +00:00
parent 3e6fa4b694
commit 3084a9aab6
3 changed files with 19 additions and 1 deletions

View File

@@ -26,6 +26,11 @@
while (!CurrentByteEndsCurrentArray(inputBytes, previousToken) && scanner.MoveNext()) while (!CurrentByteEndsCurrentArray(inputBytes, previousToken) && scanner.MoveNext())
{ {
previousToken = scanner.CurrentToken; previousToken = scanner.CurrentToken;
if (scanner.CurrentToken is CommentToken)
{
continue;
}
contents.Add(scanner.CurrentToken); contents.Add(scanner.CurrentToken);
} }

View File

@@ -48,6 +48,11 @@
while (coreScanner.MoveNext()) while (coreScanner.MoveNext())
{ {
if (coreScanner.CurrentToken is CommentToken)
{
continue;
}
tokens.Add(coreScanner.CurrentToken); tokens.Add(coreScanner.CurrentToken);
} }

View File

@@ -229,14 +229,22 @@
return false; return false;
} }
// While the specification demands a \n we have seen files with \r only in the wild.
var hadWhiteSpace = false;
if (inputBytes.CurrentByte == '\r') if (inputBytes.CurrentByte == '\r')
{ {
hadWhiteSpace = true;
inputBytes.MoveNext(); inputBytes.MoveNext();
} }
if (inputBytes.CurrentByte != '\n') if (inputBytes.CurrentByte != '\n')
{ {
return false; if (!hadWhiteSpace)
{
return false;
}
inputBytes.Seek(inputBytes.CurrentOffset - 1);
} }
// Store where we started reading the first byte of data. // Store where we started reading the first byte of data.