Merge pull request #401 from plaisted/master

adjust string serialization to handle raw byte data properly
2025-09-20 11:37:57 +08:00 · 2021-12-30 12:22:08 +00:00
parent 99c284c719 2146466a9a
commit a57e5f39ad
1 changed files with 64 additions and 33 deletions
--- a/src/UglyToad.PdfPig/Writer/TokenWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/TokenWriter.cs
@@ -306,7 +306,8 @@
                if (pair.Value == null)
                {
                    WriteToken(NullToken.Instance, outputStream);
-                } else
+                }
                else
                {
                    WriteToken(pair.Value, outputStream);
                }
@@ -407,40 +408,70 @@
            WriteLineBreak(outputStream);
            outputStream.Write(StreamEnd, 0, StreamEnd.Length);
        }
-
+
        private static int[] EscapeNeeded = new int[]
        {
            '\r', '\n', '\t', '\b', '\f', '\\'
        };
        private static int[] Escaped = new int[]
        {
            'r', 'n', 't', 'b', 'f', '\\'
        };
        private static void WriteString(StringToken stringToken, Stream outputStream)
-        {
+        {
            outputStream.WriteByte(StringStart);
            if (stringToken.EncodedWith == StringToken.Encoding.Iso88591)
-            {
+            {
-                var isUtf16 = false;
+                // iso 88591 (or really PdfDocEncoding in non-contentstream circumstances shouldn't
-                for (var i = 0; i < stringToken.Data.Length; i++)
+                // have these chars but seems like internally this isn't obeyed (see:
-                {
+                // CanCreateDocumentInformationDictionaryWithNonAsciiCharacters test) and it may
-                    var c = stringToken.Data[i];
+                // happen during parsing as well -> switch to unicode
-
+                if (stringToken.Data.Any(x => x > 255))
-                    if (c == (char) StringStart || c == (char)StringEnd || c == (char) Backslash)
+                {
-                    {
+                    var data = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE).GetBytes();
-                        stringToken = new StringToken(stringToken.Data.Insert(i++, "\\"), stringToken.EncodedWith);
+                    outputStream.Write(data, 0, data.Length);
-                    }
+                }
-
+                else
-                    // Close enough.
+                {
-                    if (c > 250)
+                    int ei;
-                    {
+                    for (var i = 0; i < stringToken.Data.Length; i++)
-                        isUtf16 = true;
+                    {
-                        break;
+                        var c = (int)stringToken.Data[i];
-                    }
+                        if (c == (int)'(' || c == (int)')') // wastes a little space if escaping not needed but better than forward searching
-                }
+                        {
-
+                            outputStream.WriteByte((byte)'\\');
-                if (isUtf16)
+                            outputStream.WriteByte((byte)c);
-                {
+                        }
-                    stringToken = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE);
+                        else if ((ei = Array.IndexOf(EscapeNeeded, c)) > -1)
-                }
+                        {
-            }
+                            outputStream.WriteByte((byte)'\\');
-
+                            outputStream.WriteByte((byte)Escaped[ei]);
-            outputStream.WriteByte(StringStart);
+                        }
-            var bytes = stringToken.GetBytes();
+                        else if (c < 32 || c > 126) // non printable
-            outputStream.Write(bytes, 0, bytes.Length);
+                        {
-            outputStream.WriteByte(StringEnd);
+                            var b3 = c / 64;
-
+                            var b2 = (c - b3 * 64) / 8;
                            var b1 = c % 8;
                            outputStream.WriteByte((byte)'\\');
                            outputStream.WriteByte((byte)(b3 + '0'));
                            outputStream.WriteByte((byte)(b2 + '0'));
                            outputStream.WriteByte((byte)(b1 + '0'));
                        }
                        else
                        {
                            outputStream.WriteByte((byte)c);
                        }
                    }
                }
            }
            else
            {
                var bytes = stringToken.GetBytes();
                outputStream.Write(bytes, 0, bytes.Length);
            }
            outputStream.WriteByte(StringEnd);
            WriteWhitespace(outputStream);
        }