mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
handle unbalanced parentheses for string tokenization
when the close parenthesis is unbalanced and precedes a line break followed by '/' or '>' we assume the bracket to be unbalanced and finish reading the string.
This commit is contained in:
@@ -298,17 +298,36 @@ endobj
|
|||||||
<< /S 1245 >>
|
<< /S 1245 >>
|
||||||
|
|
||||||
stream
|
stream
|
||||||
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
|
|
||||||
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
|
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
|
||||||
endobj";
|
endstream
|
||||||
endobj";
|
endobj";
|
||||||
|
|
||||||
var scanner = GetScanner(s);
|
var scanner = GetScanner(s);
|
||||||
|
|
||||||
|
|
||||||
var token = ReadToEnd(scanner)[0];
|
var token = ReadToEnd(scanner)[0];
|
||||||
|
|
||||||
var stream = Assert.IsType<StreamToken>(token.Data);
|
Assert.Equal(12655, token.Number.ObjectNumber);
|
||||||
|
|
||||||
|
var stream = Assert.IsType<StreamToken>(token.Data);
|
||||||
|
|
||||||
|
Assert.Equal("1245", stream.StreamDictionary.Data["S"].ToString());
|
||||||
|
|
||||||
|
Assert.Equal("%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", Encoding.UTF8.GetString(stream.Data.ToArray()));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void ReadsStreamWithoutBreakBeforeEndstream()
|
||||||
|
{
|
||||||
|
const string s = @"
|
||||||
|
1 0 obj
|
||||||
|
12
|
||||||
|
endobj
|
||||||
|
|
||||||
|
7 0 obj
|
||||||
|
<< /Length 288
|
||||||
|
/Filter /FlateDecode >>
|
||||||
|
stream
|
||||||
|
xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A-YYøï+Ï4¡t#qfîFWQY*Dïv5:è”–§ñjB‹½Òa¤ •p7¤K ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8;¤iL°!Ø %É`K°ßì¸ÃöÜáÜ) [‚#CFðİ#(yƒg^ÿ¶æò
|
||||||
ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream
|
ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream
|
||||||
endobj
|
endobj
|
||||||
|
|
||||||
|
@@ -23,14 +23,14 @@
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int numberOfBrackets = 1;
|
var numberOfBrackets = 1;
|
||||||
bool isEscapeActive = false;
|
var isEscapeActive = false;
|
||||||
bool isLineBreaking = false;
|
var isLineBreaking = false;
|
||||||
|
|
||||||
bool octalModeActive = false;
|
var octalModeActive = false;
|
||||||
|
|
||||||
short[] octal = { 0, 0, 0 };
|
short[] octal = { 0, 0, 0 };
|
||||||
int octalsRead = 0;
|
var octalsRead = 0;
|
||||||
|
|
||||||
while (inputBytes.MoveNext())
|
while (inputBytes.MoveNext())
|
||||||
{
|
{
|
||||||
@@ -85,8 +85,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
|
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
|
||||||
// numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);
|
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
|
||||||
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case '(':
|
case '(':
|
||||||
@@ -185,7 +184,7 @@
|
|||||||
|
|
||||||
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
|
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
|
||||||
{
|
{
|
||||||
for (int i = octalsRead; i > 0; i--)
|
for (var i = octalsRead; i > 0; i--)
|
||||||
{
|
{
|
||||||
octals[i] = octals[i - 1];
|
octals[i] = octals[i - 1];
|
||||||
}
|
}
|
||||||
@@ -240,5 +239,40 @@
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
|
||||||
|
{
|
||||||
|
const byte lineFeed = 10;
|
||||||
|
const byte carriageReturn = 13;
|
||||||
|
|
||||||
|
var braces = numberOfBrackets;
|
||||||
|
var nextThreeBytes = new byte[3];
|
||||||
|
|
||||||
|
var startAt = bytes.CurrentOffset;
|
||||||
|
|
||||||
|
var amountRead = bytes.Read(nextThreeBytes);
|
||||||
|
|
||||||
|
// Check the next 3 bytes if available
|
||||||
|
// The following cases are valid indicators for the end of the string
|
||||||
|
// 1. Next line contains another COSObject: CR + LF + '/'
|
||||||
|
// 2. COSDictionary ends in the next line: CR + LF + '>'
|
||||||
|
// 3. Next line contains another COSObject: CR + '/'
|
||||||
|
// 4. COSDictionary ends in the next line: CR + '>'
|
||||||
|
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
|
||||||
|
{
|
||||||
|
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|
||||||
|
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
|
||||||
|
{
|
||||||
|
braces = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (amountRead > 0)
|
||||||
|
{
|
||||||
|
bytes.Seek(startAt);
|
||||||
|
}
|
||||||
|
|
||||||
|
return braces;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user