Merge pull request #401 from plaisted/master

adjust string serialization to handle raw byte data properly
This commit is contained in:
Eliot Jones 2021-12-30 12:22:08 +00:00 committed by GitHub
commit a57e5f39ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -306,7 +306,8 @@
if (pair.Value == null)
{
WriteToken(NullToken.Instance, outputStream);
} else
}
else
{
WriteToken(pair.Value, outputStream);
}
@ -407,40 +408,70 @@
WriteLineBreak(outputStream);
outputStream.Write(StreamEnd, 0, StreamEnd.Length);
}
private static int[] EscapeNeeded = new int[]
{
'\r', '\n', '\t', '\b', '\f', '\\'
};
private static int[] Escaped = new int[]
{
'r', 'n', 't', 'b', 'f', '\\'
};
private static void WriteString(StringToken stringToken, Stream outputStream)
{
{
outputStream.WriteByte(StringStart);
if (stringToken.EncodedWith == StringToken.Encoding.Iso88591)
{
var isUtf16 = false;
for (var i = 0; i < stringToken.Data.Length; i++)
{
var c = stringToken.Data[i];
if (c == (char) StringStart || c == (char)StringEnd || c == (char) Backslash)
{
stringToken = new StringToken(stringToken.Data.Insert(i++, "\\"), stringToken.EncodedWith);
}
// Close enough.
if (c > 250)
{
isUtf16 = true;
break;
}
}
if (isUtf16)
{
stringToken = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE);
}
}
outputStream.WriteByte(StringStart);
var bytes = stringToken.GetBytes();
outputStream.Write(bytes, 0, bytes.Length);
outputStream.WriteByte(StringEnd);
{
// iso 88591 (or really PdfDocEncoding in non-contentstream circumstances shouldn't
// have these chars but seems like internally this isn't obeyed (see:
// CanCreateDocumentInformationDictionaryWithNonAsciiCharacters test) and it may
// happen during parsing as well -> switch to unicode
if (stringToken.Data.Any(x => x > 255))
{
var data = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE).GetBytes();
outputStream.Write(data, 0, data.Length);
}
else
{
int ei;
for (var i = 0; i < stringToken.Data.Length; i++)
{
var c = (int)stringToken.Data[i];
if (c == (int)'(' || c == (int)')') // wastes a little space if escaping not needed but better than forward searching
{
outputStream.WriteByte((byte)'\\');
outputStream.WriteByte((byte)c);
}
else if ((ei = Array.IndexOf(EscapeNeeded, c)) > -1)
{
outputStream.WriteByte((byte)'\\');
outputStream.WriteByte((byte)Escaped[ei]);
}
else if (c < 32 || c > 126) // non printable
{
var b3 = c / 64;
var b2 = (c - b3 * 64) / 8;
var b1 = c % 8;
outputStream.WriteByte((byte)'\\');
outputStream.WriteByte((byte)(b3 + '0'));
outputStream.WriteByte((byte)(b2 + '0'));
outputStream.WriteByte((byte)(b1 + '0'));
}
else
{
outputStream.WriteByte((byte)c);
}
}
}
}
else
{
var bytes = stringToken.GetBytes();
outputStream.Write(bytes, 0, bytes.Length);
}
outputStream.WriteByte(StringEnd);
WriteWhitespace(outputStream);
}