address bugs and add tests

This commit is contained in:
EliotJones
2025-07-06 17:42:47 -05:00
parent 7134032188
commit 8176903b4e
2 changed files with 884 additions and 803 deletions

View File

@@ -252,6 +252,7 @@ A¡¬àð‰É©ˆ°¼×s³®í»š}%§X{{tøNåÝž¶ö¢ÖÞ¾
var str = Encoding.UTF8.GetString(stream.Data.ToArray());
Assert.StartsWith("H‰œUkLSgþÚh¹IÝÅl", str);
Assert.EndsWith("oäO ôkÆ)", str);
Assert.Equal(2, locationProvider.Offsets[new IndirectReference(352, 0)]);
}
@@ -289,7 +290,7 @@ endobj";
var str = Encoding.UTF8.GetString(data);
Assert.Equal(data.Length, invalidLengthStream.Length);
Assert.StartsWith("ABCDeeeee", str);
Assert.Equal(invalidLengthStream, str);
Assert.Equal(2, locationProvider.Offsets[new IndirectReference(352, 0)]);
}
@@ -297,47 +298,108 @@ endobj";
[Fact]
public void ReadsSimpleStreamObject()
{
// Length of the bytes as found by Encoding.UTF8.GetBytes is 45
const string s = @"
574387 0 obj
<< /Length 45 >>
stream
À“Éððr¥8»P£ØêÁi½®Û(éhŽú
endstream
endobj";
const string s =
"""
574387 0 obj
<< /Length 45 >>
stream
ÀÉððr¥8»P£ØêÁi½®Û(éú
endstream
endobj
""";
var scanner = GetScanner(s);
var token = ReadToEnd(scanner)[0];
var tokens = ReadToEnd(scanner);
var str = GetStreamDataString(tokens);
Assert.Equal("À“Éððr¥8»P£ØêÁi½®Û(éhŽú", str);
}
[Fact]
public void ReadsSimpleStreamContent()
{
const string s =
"""
1 0 obj
<< /Name /Bob >>
stream
123456
endstream
endobj
""";
var scanner = GetScanner(s);
var tokens = ReadToEnd(scanner);
var token = Assert.Single(tokens);
var stream = Assert.IsType<StreamToken>(token.Data);
var bytes = stream.Data.ToArray();
Assert.Equal(45, bytes.Length);
Assert.Equal(6, bytes.Length);
var outputString = Encoding.UTF8.GetString(bytes);
var outputString = Encoding.ASCII.GetString(bytes);
Assert.Equal("À“Éððr¥8»P£ØêÁi½®Û(éhŽú", outputString);
Assert.Equal("123456", outputString);
}
[Fact]
public void ReadsStreamContentWithNoLinebreak()
{
const string s =
"""
1 0 obj
<< /Name /Bob >>
stream
123456endstream
endobj
""";
var scanner = GetScanner(s);
var tokens = ReadToEnd(scanner);
var token = Assert.Single(tokens);
var stream = Assert.IsType<StreamToken>(token.Data);
var bytes = stream.Data.ToArray();
Assert.Equal(6, bytes.Length);
var outputString = Encoding.ASCII.GetString(bytes);
Assert.Equal("123456", outputString);
}
[Fact]
public void ReadsStreamWithIndirectLength()
{
const string s = @"5 0 obj 52 endobj
const string s =
"""
5 0 obj
52
endobj
12 0 obj
<< /Length 5 0 R /S 1245 >>
12 0 obj
stream
%¥×³®í»š}%§X{{tøNåÝžö¢ÖÞ¾~´¼
endstream
endobj
""";
<< /Length 5 0 R /S 1245 >>
stream
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞ¾~´¼
endstream
endobj";
var locationProvider = new TestObjectLocationProvider();
locationProvider.Offsets[new IndirectReference(5, 0)] = 0;
var locationProvider = new TestObjectLocationProvider
{
Offsets =
{
[new IndirectReference(5, 0)] = 0
}
};
var scanner = GetScanner(s, locationProvider);
@@ -356,54 +418,44 @@ endobj";
[Fact]
public void ReadsStreamWithMissingLength()
{
const string s = @"
12655 0 obj
<< /S 1245 >>
stream
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
endstream
endobj";
const string s =
"""
12655 0 obj
<< /S 1245 >>
stream
%¥×³®í»š}%§X{{tøNåendÝžö¢ÖÞgrehtyyy$&%&£$££(*¾~´¼
endstream
endobj
""";
var scanner = GetScanner(s);
var token = ReadToEnd(scanner)[0];
var tokens = ReadToEnd(scanner);
Assert.Equal(12655, token.Number.ObjectNumber);
var str = GetStreamDataString(tokens);
var stream = Assert.IsType<StreamToken>(token.Data);
Assert.Equal("1245", stream.StreamDictionary.Data["S"].ToString());
Assert.Equal("%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", Encoding.UTF8.GetString(stream.Data.ToArray()));
Assert.Equal("%¥×³®í»š}%§X{{tøNåendÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", str);
}
[Fact]
public void ReadsStreamWithoutBreakBeforeEndstream()
{
const string s = @"
1 0 obj
12
endobj
7 0 obj
<< /Length 288
/Filter /FlateDecode >>
stream
xœ]ËjÃ0ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A- YYøï+Ï4¡t#qfîFWQY*­Dïv5:è”–§ñjB½Òa¤ •p7¤K  ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8 Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8; ¤iL°!Ø %É`K°ßì¸ÃöÜáÜ)  [#CFðİ#(yƒg^ÿ¶æò
ÿž“¸Zë#¢?¢hP”Æû?šÑï÷ø¯‰Šendstream
endobj
9 0 obj
16
endobj";
const string s =
"""
7 0 obj
<< /Filter 0 >>
stream
ABCendcow233endendstream
endobj
""";
var scanner = GetScanner(s);
var token = ReadToEnd(scanner)[1];
var tokens = ReadToEnd(scanner);
Assert.Equal(7, token.Number.ObjectNumber);
var str = GetStreamDataString(tokens);
Assert.Equal("ABCendcow233end", str);
}
[Fact]
@@ -422,6 +474,10 @@ endobj";
var scanner = GetScanner(s);
var tokens = ReadToEnd(scanner);
var dataStr = GetStreamDataString(tokens);
Assert.Equal("012", dataStr);
}
[Fact]
@@ -432,17 +488,11 @@ endobj";
1974 0 obj
<<
/Filter /FlateDecode
/Length 1975 0 R
>>
stream
]ÔÏ@ñ'ð;øØ"Œg !Ué…Cÿ¨´ ö:B*Æ2äÀÛw¿™MZõ'þ°½ë]<ï>ïÆÓ­^|Ÿ/Ý>Ýêá4ösº^^ç.ÕÇôr«e[÷§îVÎü؝SµÈ7ïï×[:ïÆáRm6ÕâGþðzïõ‡Oýå˜>VosŸæÓøRøõ¼ÏçûםÎi¼ÕMµÝÖ}òƒ¾¦¯‡sª~ÛîϟŸn÷‡|Ïß+~Þ§T·~¾ŒÉt—>]§C—æÃøªM»ÜÖ­ÒØÿ÷ÙJããðïµ~†&msh ­YK4BK0yÈ¿rXVzš°Žà}$<zЁðDxò`þÐáAGÂ1:BÏða{B{$$Bа&
!ÂSÒä¿ýCC£ePHx´x-Ã
R<˜º@!á!>,âW@!á!¼œ@!áÑ2uBÂC=@!á¡þP(¤xðU
R< (¤°PHx(SW(4<S(4<´#@¡á¡ÌT¡Ð²><@¡á¡Œ¢PhxSW(4<õ¡Phxè Ç£PhY|Q
GëÃB¡e}à¡Phx˜¿
B¡áÑú°Phx´ÆÔ
+,ƒÂÂ#/× °²>3(¬.¡nPXx˜_ùC¡°²>x}ƒÂÂCx9ƒÂНoPXxˆš&ù!ÙگŠÿ jbky
yyÛJØlØßw±îužóæ¦ï\ìY§1½ï«Ó.ÿùz°gAendstream
ABC123endstream33093872end337772A
3093AAendstream
endstream
endobj
""";
@@ -450,6 +500,25 @@ endobj";
var scanner = GetScanner(s);
var tokens = ReadToEnd(scanner);
var str = GetStreamDataString(tokens);
Assert.Equal(
"""
ABC123endstream33093872end337772A
3093AA
""",
str);
}
private string GetStreamDataString(IReadOnlyList<ObjectToken> tokens)
{
var token = Assert.Single(tokens);
var stream = Assert.IsType<StreamToken>(token.Data);
return Encoding.UTF8.GetString(stream.Data.ToArray());
}
[Fact]

View File

@@ -400,7 +400,7 @@
// Token is at end of stream or is followed by whitespace
if (!inputBytes.MoveNext() || ReadHelper.IsWhitespace(inputBytes.CurrentByte))
{
var location = inputBytes.CurrentOffset - EndstreamBytes.Length;
var location = inputBytes.CurrentOffset - EndstreamBytes.Length - 1;
endLocations.Push(new EndLoc(true, location, !isEndData));
isEndData = true;
@@ -412,6 +412,12 @@
endStreamPosition = 0;
commonPartPosition = 0;
}
else
{
endStreamPosition = 0;
commonPartPosition = 0;
isEndData = false;
}
}
}
else if (inputBytes.CurrentByte == objPart[endObjPosition])
@@ -444,6 +450,10 @@
}
}
}
else if (inputBytes.CurrentByte == endWordPart[0])
{
commonPartPosition = 1;
}
else
{
// We were reading 'end' but then we had a character mismatch.
@@ -488,20 +498,22 @@
var dataLength = endLoc.Offset - startDataOffset;
// 3 characters, 'e', '\n' and possibly '\r'
inputBytes.Seek(endLoc.Offset - 3);
inputBytes.Seek(endLoc.Offset - 1);
var adjustment = 0;
bool isWhitespace;
do
{
inputBytes.MoveNext();
if (inputBytes.CurrentByte == '\r')
isWhitespace = ReadHelper.IsWhitespace(inputBytes.CurrentByte);
if (isWhitespace)
{
dataLength -= 3;
}
else
{
dataLength -= 2;
adjustment++;
}
inputBytes.Seek(endLoc.Offset - 1 - adjustment);
} while (isWhitespace);
Memory<byte> data = new byte[dataLength];
Memory<byte> data = new byte[dataLength - adjustment];
inputBytes.Seek(streamDataStart);
inputBytes.Read(data.Span);