mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
remove byte order marks from unicode strings #32
This commit is contained in:
@@ -273,7 +273,23 @@ are the same.)";
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(@"Mic", AssertStringToken(token).Data);
|
||||
Assert.Equal(@"Mic", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HandlesUtf16BigEndianStrings()
|
||||
{
|
||||
var input = new ByteArrayInputBytes(new byte[]
|
||||
{
|
||||
0xFF, 0xFE, 0x4D, 0x00, 0x69, 0x00, 0x63,
|
||||
0x00, 0x29
|
||||
});
|
||||
|
||||
var result = tokenizer.TryTokenize(0x28, input, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(@"Mic", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
private static StringToken AssertStringToken(IToken token)
|
||||
|
||||
@@ -153,13 +153,13 @@
|
||||
{
|
||||
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
|
||||
|
||||
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes);
|
||||
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);
|
||||
}
|
||||
else if (builder[0] == 0xFF && builder[1] == 0xFE)
|
||||
{
|
||||
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
|
||||
|
||||
tokenStr = Encoding.Unicode.GetString(rawBytes);
|
||||
tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -188,34 +188,6 @@
|
||||
octals[0] = value;
|
||||
}
|
||||
|
||||
//private static int CheckForEndOfString(IRandomAccessRead reader, int bracesParameter)
|
||||
//{
|
||||
// int braces = bracesParameter;
|
||||
// byte[] nextThreeBytes = new byte[3];
|
||||
// int amountRead = reader.Read(nextThreeBytes);
|
||||
|
||||
// // Check the next 3 bytes if available
|
||||
// // The following cases are valid indicators for the end of the string
|
||||
// // 1. Next line contains another COSObject: CR + LF + '/'
|
||||
// // 2. CosDictionary ends in the next line: CR + LF + '>'
|
||||
// // 3. Next line contains another COSObject: CR + '/'
|
||||
// // 4. CosDictionary ends in the next line: CR + '>'
|
||||
// if (amountRead == 3 && nextThreeBytes[0] == ReadHelper.AsciiCarriageReturn)
|
||||
// {
|
||||
// if (nextThreeBytes[1] == ReadHelper.AsciiLineFeed && nextThreeBytes[2] == '/' || nextThreeBytes[2] == '>'
|
||||
// || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
|
||||
// {
|
||||
// braces = 0;
|
||||
// }
|
||||
// }
|
||||
// if (amountRead > 0)
|
||||
// {
|
||||
// reader.Unread(nextThreeBytes, 0, amountRead);
|
||||
// }
|
||||
// return braces;
|
||||
//}
|
||||
//}
|
||||
|
||||
private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive,
|
||||
ref int octalsRead, ref bool isLineBreaking)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user