mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 03:34:52 +08:00
handle unbalanced parentheses for string tokenization
when the close parenthesis is unbalanced and precedes a line break followed by '/' or '>' we assume the bracket to be unbalanced and finish reading the string.
This commit is contained in:
@@ -298,17 +298,36 @@ endobj
|
||||
<< /S 1245 >>
|
||||
|
||||
stream
|
||||
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
|
||||
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
|
||||
endobj";
|
||||
endstream
|
||||
endobj";
|
||||
|
||||
var scanner = GetScanner(s);
|
||||
|
||||
|
||||
var token = ReadToEnd(scanner)[0];
|
||||
|
||||
var stream = Assert.IsType<StreamToken>(token.Data);
|
||||
Assert.Equal(12655, token.Number.ObjectNumber);
|
||||
|
||||
var stream = Assert.IsType<StreamToken>(token.Data);
|
||||
|
||||
Assert.Equal("1245", stream.StreamDictionary.Data["S"].ToString());
|
||||
|
||||
Assert.Equal("%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", Encoding.UTF8.GetString(stream.Data.ToArray()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadsStreamWithoutBreakBeforeEndstream()
|
||||
{
|
||||
const string s = @"
|
||||
1 0 obj
|
||||
12
|
||||
endobj
|
||||
|
||||
7 0 obj
|
||||
<< /Length 288
|
||||
/Filter /FlateDecode >>
|
||||
stream
|
||||
xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A-YYøï+Ï4¡t#qfîFWQY*Dïv5:è”–§ñjB‹½Òa¤ •p7¤K ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8;¤iL°!Ø %É`K°ßì¸ÃöÜáÜ) [‚#CFðİ#(yƒg^ÿ¶æò
|
||||
ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream
|
||||
endobj
|
||||
|
||||
|
@@ -23,14 +23,14 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
int numberOfBrackets = 1;
|
||||
bool isEscapeActive = false;
|
||||
bool isLineBreaking = false;
|
||||
var numberOfBrackets = 1;
|
||||
var isEscapeActive = false;
|
||||
var isLineBreaking = false;
|
||||
|
||||
bool octalModeActive = false;
|
||||
var octalModeActive = false;
|
||||
|
||||
short[] octal = { 0, 0, 0 };
|
||||
int octalsRead = 0;
|
||||
var octalsRead = 0;
|
||||
|
||||
while (inputBytes.MoveNext())
|
||||
{
|
||||
@@ -85,8 +85,7 @@
|
||||
}
|
||||
|
||||
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
|
||||
// numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);
|
||||
|
||||
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
|
||||
|
||||
break;
|
||||
case '(':
|
||||
@@ -185,7 +184,7 @@
|
||||
|
||||
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
|
||||
{
|
||||
for (int i = octalsRead; i > 0; i--)
|
||||
for (var i = octalsRead; i > 0; i--)
|
||||
{
|
||||
octals[i] = octals[i - 1];
|
||||
}
|
||||
@@ -240,5 +239,40 @@
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
|
||||
{
|
||||
const byte lineFeed = 10;
|
||||
const byte carriageReturn = 13;
|
||||
|
||||
var braces = numberOfBrackets;
|
||||
var nextThreeBytes = new byte[3];
|
||||
|
||||
var startAt = bytes.CurrentOffset;
|
||||
|
||||
var amountRead = bytes.Read(nextThreeBytes);
|
||||
|
||||
// Check the next 3 bytes if available
|
||||
// The following cases are valid indicators for the end of the string
|
||||
// 1. Next line contains another COSObject: CR + LF + '/'
|
||||
// 2. COSDictionary ends in the next line: CR + LF + '>'
|
||||
// 3. Next line contains another COSObject: CR + '/'
|
||||
// 4. COSDictionary ends in the next line: CR + '>'
|
||||
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
|
||||
{
|
||||
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|
||||
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
|
||||
{
|
||||
braces = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (amountRead > 0)
|
||||
{
|
||||
bytes.Seek(startAt);
|
||||
}
|
||||
|
||||
return braces;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user