Merge pull request #401 from plaisted/master

adjust string serialization to handle raw byte data properly
2026-01-18 19:51:24 +08:00 · 2021-12-30 12:22:08 +00:00
parent 99c284c719 2146466a9a
commit a57e5f39ad
1 changed files with 64 additions and 33 deletions
--- a/src/UglyToad.PdfPig/Writer/TokenWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/TokenWriter.cs
@@ -306,7 +306,8 @@
                if (pair.Value == null)
                {
                    WriteToken(NullToken.Instance, outputStream);
-                } else
+                }
+                else
                {
                    WriteToken(pair.Value, outputStream);
                }
@@ -407,40 +408,70 @@
            WriteLineBreak(outputStream);
            outputStream.Write(StreamEnd, 0, StreamEnd.Length);
        }
-
+
+        private static int[] EscapeNeeded = new int[]
+        {
+            '\r', '\n', '\t', '\b', '\f', '\\'
+        };
+        private static int[] Escaped = new int[]
+        {
+            'r', 'n', 't', 'b', 'f', '\\'
+        };
        private static void WriteString(StringToken stringToken, Stream outputStream)
-        {
+        {
+            outputStream.WriteByte(StringStart);
+
            if (stringToken.EncodedWith == StringToken.Encoding.Iso88591)
-            {
-                var isUtf16 = false;
-                for (var i = 0; i < stringToken.Data.Length; i++)
-                {
-                    var c = stringToken.Data[i];
-
-                    if (c == (char) StringStart || c == (char)StringEnd || c == (char) Backslash)
-                    {
-                        stringToken = new StringToken(stringToken.Data.Insert(i++, "\\"), stringToken.EncodedWith);
-                    }
-
-                    // Close enough.
-                    if (c > 250)
-                    {
-                        isUtf16 = true;
-                        break;
-                    }
-                }
-
-                if (isUtf16)
-                {
-                    stringToken = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE);
-                }
-            }
-
-            outputStream.WriteByte(StringStart);
-            var bytes = stringToken.GetBytes();
-            outputStream.Write(bytes, 0, bytes.Length);
-            outputStream.WriteByte(StringEnd);
-
+            {
+                // iso 88591 (or really PdfDocEncoding in non-contentstream circumstances shouldn't
+                // have these chars but seems like internally this isn't obeyed (see:
+                // CanCreateDocumentInformationDictionaryWithNonAsciiCharacters test) and it may
+                // happen during parsing as well -> switch to unicode
+                if (stringToken.Data.Any(x => x > 255))
+                {
+                    var data = new StringToken(stringToken.Data, StringToken.Encoding.Utf16BE).GetBytes();
+                    outputStream.Write(data, 0, data.Length);
+                }
+                else
+                {
+                    int ei;
+                    for (var i = 0; i < stringToken.Data.Length; i++)
+                    {
+                        var c = (int)stringToken.Data[i];
+                        if (c == (int)'(' || c == (int)')') // wastes a little space if escaping not needed but better than forward searching
+                        {
+                            outputStream.WriteByte((byte)'\\');
+                            outputStream.WriteByte((byte)c);
+                        }
+                        else if ((ei = Array.IndexOf(EscapeNeeded, c)) > -1)
+                        {
+                            outputStream.WriteByte((byte)'\\');
+                            outputStream.WriteByte((byte)Escaped[ei]);
+                        }
+                        else if (c < 32 || c > 126) // non printable
+                        {
+                            var b3 = c / 64;
+                            var b2 = (c - b3 * 64) / 8;
+                            var b1 = c % 8;
+                            outputStream.WriteByte((byte)'\\');
+                            outputStream.WriteByte((byte)(b3 + '0'));
+                            outputStream.WriteByte((byte)(b2 + '0'));
+                            outputStream.WriteByte((byte)(b1 + '0'));
+                        }
+                        else
+                        {
+                            outputStream.WriteByte((byte)c);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                var bytes = stringToken.GetBytes();
+                outputStream.Write(bytes, 0, bytes.Length);
+            }
+
+            outputStream.WriteByte(StringEnd);
            WriteWhitespace(outputStream);
        }