Merge pull request #401 from plaisted/master

adjust string serialization to handle raw byte data properly
This commit is contained in:
Eliot Jones
2021-12-30 12:22:08 +00:00
committed by GitHub

View File

@@ -306,7 +306,8 @@
if (pair.Value == null) if (pair.Value == null)
{ {
WriteToken(NullToken.Instance, outputStream); WriteToken(NullToken.Instance, outputStream);
} else }
else
{ {
WriteToken(pair.Value, outputStream); WriteToken(pair.Value, outputStream);
} }
@@ -407,40 +408,70 @@
WriteLineBreak(outputStream); WriteLineBreak(outputStream);
outputStream.Write(StreamEnd, 0, StreamEnd.Length); outputStream.Write(StreamEnd, 0, StreamEnd.Length);
} }
private static int[] EscapeNeeded = new int[]
{
'\r', '\n', '\t', '\b', '\f', '\\'
};
private static int[] Escaped = new int[]
{
'r', 'n', 't', 'b', 'f', '\\'
};
private static void WriteString(StringToken stringToken, Stream outputStream) private static void WriteString(StringToken stringToken, Stream outputStream)
{ {
outputStream.WriteByte(StringStart);
if (stringToken.EncodedWith == StringToken.Encoding.Iso88591) if (stringToken.EncodedWith == StringToken.Encoding.Iso88591)
{ {
var isUtf16 = false; // iso 88591 (or really PdfDocEncoding in non-contentstream circumstances shouldn't
for (var i = 0; i < stringToken.Data.Length; i++) // have these chars but seems like internally this isn't obeyed (see:
{ // CanCreateDocumentInformationDictionaryWithNonAsciiCharacters test) and it may
var c = stringToken.Data[i]; // happen during parsing as well -> switch to unicode
if (stringToken.Data.Any(x => x > 255))
if (c == (char) StringStart || c == (char)StringEnd || c == (char) Backslash) {
{ var data = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE).GetBytes();
stringToken = new StringToken(stringToken.Data.Insert(i++, "\\"), stringToken.EncodedWith); outputStream.Write(data, 0, data.Length);
} }
else
// Close enough. {
if (c > 250) int ei;
{ for (var i = 0; i < stringToken.Data.Length; i++)
isUtf16 = true; {
break; var c = (int)stringToken.Data[i];
} if (c == (int)'(' || c == (int)')') // wastes a little space if escaping not needed but better than forward searching
} {
outputStream.WriteByte((byte)'\\');
if (isUtf16) outputStream.WriteByte((byte)c);
{ }
stringToken = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE); else if ((ei = Array.IndexOf(EscapeNeeded, c)) > -1)
} {
} outputStream.WriteByte((byte)'\\');
outputStream.WriteByte((byte)Escaped[ei]);
outputStream.WriteByte(StringStart); }
var bytes = stringToken.GetBytes(); else if (c < 32 || c > 126) // non printable
outputStream.Write(bytes, 0, bytes.Length); {
outputStream.WriteByte(StringEnd); var b3 = c / 64;
var b2 = (c - b3 * 64) / 8;
var b1 = c % 8;
outputStream.WriteByte((byte)'\\');
outputStream.WriteByte((byte)(b3 + '0'));
outputStream.WriteByte((byte)(b2 + '0'));
outputStream.WriteByte((byte)(b1 + '0'));
}
else
{
outputStream.WriteByte((byte)c);
}
}
}
}
else
{
var bytes = stringToken.GetBytes();
outputStream.Write(bytes, 0, bytes.Length);
}
outputStream.WriteByte(StringEnd);
WriteWhitespace(outputStream); WriteWhitespace(outputStream);
} }