support big endian and little endian utf 16 in string tokens #32

This commit is contained in:
Eliot Jones
2019-06-05 18:02:24 +01:00
parent f375cb6f04
commit 39d05e6a47
2 changed files with 42 additions and 2 deletions

View File

@@ -18,7 +18,6 @@
} }
[Theory] [Theory]
[InlineData(')')]
[InlineData('<')] [InlineData('<')]
[InlineData('\\')] [InlineData('\\')]
[InlineData('A')] [InlineData('A')]
@@ -261,6 +260,22 @@ are the same.)";
Assert.Equal(@" (sleep 1; printf ""QUIT\r\n"") | ", AssertStringToken(token).Data); Assert.Equal(@" (sleep 1; printf ""QUIT\r\n"") | ", AssertStringToken(token).Data);
} }
[Fact]
public void HandlesUtf16Strings()
{
var input = new ByteArrayInputBytes(new byte[]
{
0xFE, 0xFF, 0x00, 0x4D, 0x00, 0x69, 0x00,
0x63, 0x29
});
var result = tokenizer.TryTokenize(0x28, input, out var token);
Assert.True(result);
Assert.Equal(@"Mic", AssertStringToken(token).Data);
}
private static StringToken AssertStringToken(IToken token) private static StringToken AssertStringToken(IToken token)
{ {
Assert.NotNull(token); Assert.NotNull(token);

View File

@@ -146,7 +146,32 @@
} }
} }
token = new StringToken(builder.ToString()); string tokenStr;
if (builder.Length >= 2)
{
if (builder[0] == 0xFE && builder[1] == 0xFF)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes);
}
else if (builder[0] == 0xFF && builder[1] == 0xFE)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.Unicode.GetString(rawBytes);
}
else
{
tokenStr = builder.ToString();
}
}
else
{
tokenStr = builder.ToString();
}
token = new StringToken(tokenStr);
return true; return true;
} }