mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-07-16 17:49:51 +08:00
Merge pull request #401 from plaisted/master
adjust string serialization to handle raw byte data properly
This commit is contained in:
commit
a57e5f39ad
@ -306,7 +306,8 @@
|
||||
if (pair.Value == null)
|
||||
{
|
||||
WriteToken(NullToken.Instance, outputStream);
|
||||
} else
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteToken(pair.Value, outputStream);
|
||||
}
|
||||
@ -407,40 +408,70 @@
|
||||
WriteLineBreak(outputStream);
|
||||
outputStream.Write(StreamEnd, 0, StreamEnd.Length);
|
||||
}
|
||||
|
||||
|
||||
private static int[] EscapeNeeded = new int[]
|
||||
{
|
||||
'\r', '\n', '\t', '\b', '\f', '\\'
|
||||
};
|
||||
private static int[] Escaped = new int[]
|
||||
{
|
||||
'r', 'n', 't', 'b', 'f', '\\'
|
||||
};
|
||||
private static void WriteString(StringToken stringToken, Stream outputStream)
|
||||
{
|
||||
{
|
||||
outputStream.WriteByte(StringStart);
|
||||
|
||||
if (stringToken.EncodedWith == StringToken.Encoding.Iso88591)
|
||||
{
|
||||
var isUtf16 = false;
|
||||
for (var i = 0; i < stringToken.Data.Length; i++)
|
||||
{
|
||||
var c = stringToken.Data[i];
|
||||
|
||||
if (c == (char) StringStart || c == (char)StringEnd || c == (char) Backslash)
|
||||
{
|
||||
stringToken = new StringToken(stringToken.Data.Insert(i++, "\\"), stringToken.EncodedWith);
|
||||
}
|
||||
|
||||
// Close enough.
|
||||
if (c > 250)
|
||||
{
|
||||
isUtf16 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isUtf16)
|
||||
{
|
||||
stringToken = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE);
|
||||
}
|
||||
}
|
||||
|
||||
outputStream.WriteByte(StringStart);
|
||||
var bytes = stringToken.GetBytes();
|
||||
outputStream.Write(bytes, 0, bytes.Length);
|
||||
outputStream.WriteByte(StringEnd);
|
||||
|
||||
{
|
||||
// iso 88591 (or really PdfDocEncoding in non-contentstream circumstances shouldn't
|
||||
// have these chars but seems like internally this isn't obeyed (see:
|
||||
// CanCreateDocumentInformationDictionaryWithNonAsciiCharacters test) and it may
|
||||
// happen during parsing as well -> switch to unicode
|
||||
if (stringToken.Data.Any(x => x > 255))
|
||||
{
|
||||
var data = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE).GetBytes();
|
||||
outputStream.Write(data, 0, data.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
int ei;
|
||||
for (var i = 0; i < stringToken.Data.Length; i++)
|
||||
{
|
||||
var c = (int)stringToken.Data[i];
|
||||
if (c == (int)'(' || c == (int)')') // wastes a little space if escaping not needed but better than forward searching
|
||||
{
|
||||
outputStream.WriteByte((byte)'\\');
|
||||
outputStream.WriteByte((byte)c);
|
||||
}
|
||||
else if ((ei = Array.IndexOf(EscapeNeeded, c)) > -1)
|
||||
{
|
||||
outputStream.WriteByte((byte)'\\');
|
||||
outputStream.WriteByte((byte)Escaped[ei]);
|
||||
}
|
||||
else if (c < 32 || c > 126) // non printable
|
||||
{
|
||||
var b3 = c / 64;
|
||||
var b2 = (c - b3 * 64) / 8;
|
||||
var b1 = c % 8;
|
||||
outputStream.WriteByte((byte)'\\');
|
||||
outputStream.WriteByte((byte)(b3 + '0'));
|
||||
outputStream.WriteByte((byte)(b2 + '0'));
|
||||
outputStream.WriteByte((byte)(b1 + '0'));
|
||||
}
|
||||
else
|
||||
{
|
||||
outputStream.WriteByte((byte)c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var bytes = stringToken.GetBytes();
|
||||
outputStream.Write(bytes, 0, bytes.Length);
|
||||
}
|
||||
|
||||
outputStream.WriteByte(StringEnd);
|
||||
WriteWhitespace(outputStream);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user