handle unbalanced parentheses for string tokenization

when the close parenthesis is unbalanced and precedes a line break followed by '/' or '>' we assume the bracket to be unbalanced and finish reading the string.
This commit is contained in:
Eliot Jones
2020-02-27 17:01:15 +00:00
parent f7cabe5d12
commit 420daaac6e
2 changed files with 66 additions and 13 deletions

View File

@@ -298,17 +298,36 @@ endobj
<< /S 1245 >>
stream
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
endobj";
endstream
endobj";
var scanner = GetScanner(s);
var token = ReadToEnd(scanner)[0];
var stream = Assert.IsType<StreamToken>(token.Data);
Assert.Equal(12655, token.Number.ObjectNumber);
var stream = Assert.IsType<StreamToken>(token.Data);
Assert.Equal("1245", stream.StreamDictionary.Data["S"].ToString());
Assert.Equal("%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", Encoding.UTF8.GetString(stream.Data.ToArray()));
}
[Fact]
public void ReadsStreamWithoutBreakBeforeEndstream()
{
const string s = @"
1 0 obj
12
endobj
7 0 obj
<< /Length 288
/Filter /FlateDecode >>
stream
xœ]ËjÃ0ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A- YYøï+Ï4¡t#qfîFWQY*­Dïv5:è”–§ñjB½Òa¤ •p7¤K  ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8 Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8; ¤iL°!Ø %É`K°ßì¸ÃöÜáÜ)  [#CFðİ#(yƒg^ÿ¶æò
ÿž“¸Zë#¢?¢hP”Æû?šÑï÷ø¯‰Šendstream
endobj

View File

@@ -23,14 +23,14 @@
return false;
}
int numberOfBrackets = 1;
bool isEscapeActive = false;
bool isLineBreaking = false;
var numberOfBrackets = 1;
var isEscapeActive = false;
var isLineBreaking = false;
bool octalModeActive = false;
var octalModeActive = false;
short[] octal = { 0, 0, 0 };
int octalsRead = 0;
var octalsRead = 0;
while (inputBytes.MoveNext())
{
@@ -85,8 +85,7 @@
}
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
// numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
break;
case '(':
@@ -185,7 +184,7 @@
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
{
for (int i = octalsRead; i > 0; i--)
for (var i = octalsRead; i > 0; i--)
{
octals[i] = octals[i - 1];
}
@@ -240,5 +239,40 @@
break;
}
}
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
{
const byte lineFeed = 10;
const byte carriageReturn = 13;
var braces = numberOfBrackets;
var nextThreeBytes = new byte[3];
var startAt = bytes.CurrentOffset;
var amountRead = bytes.Read(nextThreeBytes);
// Check the next 3 bytes if available
// The following cases are valid indicators for the end of the string
// 1. Next line contains another COSObject: CR + LF + '/'
// 2. COSDictionary ends in the next line: CR + LF + '>'
// 3. Next line contains another COSObject: CR + '/'
// 4. COSDictionary ends in the next line: CR + '>'
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
{
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
{
braces = 0;
}
}
if (amountRead > 0)
{
bytes.Seek(startAt);
}
return braces;
}
}
}