From 19047f62aed7e678940362d45c86fb766740fdd4 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 25 Apr 2020 11:23:37 +0100 Subject: [PATCH] fix name output for merged documents --- examples/MergePdfDocuments.cs | 6 +++ src/UglyToad.PdfPig/Writer/PdfMerger.cs | 22 +++++++++ src/UglyToad.PdfPig/Writer/TokenWriter.cs | 56 +++++++++++++++++++---- 3 files changed, 75 insertions(+), 9 deletions(-) diff --git a/examples/MergePdfDocuments.cs b/examples/MergePdfDocuments.cs index 1220bb19..474cd091 100644 --- a/examples/MergePdfDocuments.cs +++ b/examples/MergePdfDocuments.cs @@ -3,6 +3,7 @@ using System; using System.IO; using System.Linq; + using PdfPig; using PdfPig.Writer; internal static class MergePdfDocuments @@ -20,6 +21,11 @@ var output = Path.Combine(location, "outputOfMerge.pdf"); File.WriteAllBytes(output, resultFileBytes); Console.WriteLine($"File output to: {output}"); + + using (var doc = PdfDocument.Open(resultFileBytes)) + { + Console.WriteLine($"Generated document with {doc.NumberOfPages} pages."); + } } catch (Exception ex) { diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index 87a4e9af..1d9a25a9 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -48,6 +48,28 @@ }); } + /// + /// Merge multiple PDF documents together with the pages in the order the file paths are provided. + /// + public static byte[] Merge(params string[] filePaths) + { + var bytes = new List(filePaths.Length); + + for (var i = 0; i < filePaths.Length; i++) + { + var filePath = filePaths[i]; + + if (filePath == null) + { + throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}."); + } + + bytes.Add(File.ReadAllBytes(filePath)); + } + + return Merge(bytes); + } + /// /// Merge the set of PDF documents. /// diff --git a/src/UglyToad.PdfPig/Writer/TokenWriter.cs b/src/UglyToad.PdfPig/Writer/TokenWriter.cs index 6c81accd..458905c1 100644 --- a/src/UglyToad.PdfPig/Writer/TokenWriter.cs +++ b/src/UglyToad.PdfPig/Writer/TokenWriter.cs @@ -3,11 +3,13 @@ using System; using System.Collections.Generic; using System.Globalization; + using System.Text; using System.IO; using System.Linq; using Core; using Graphics.Operations; using Tokens; + using Util; /// /// Writes any type of to the corresponding PDF document format output. @@ -21,7 +23,7 @@ private static readonly byte[] DictionaryEnd = OtherEncodings.StringAsLatin1Bytes(">>"); private static readonly byte Comment = GetByte("%"); - + private static readonly byte[] Eof = OtherEncodings.StringAsLatin1Bytes("%%EOF"); private static readonly byte[] FalseBytes = OtherEncodings.StringAsLatin1Bytes("false"); @@ -56,6 +58,20 @@ private static readonly byte[] Xref = OtherEncodings.StringAsLatin1Bytes("xref"); + private static readonly HashSet DelimiterChars = new HashSet + { + '(', + ')', + '<', + '>', + '[', + ']', + '{', + '}', + '/', + '%' + }; + /// /// Writes the given input token to the output stream with the correct PDF format and encoding including whitespace and line breaks as applicable. /// @@ -112,7 +128,7 @@ /// The object representing the catalog dictionary which is referenced from the trailer dictionary. /// The output stream to write to. /// The object reference for the document information dictionary if present. - internal static void WriteCrossReferenceTable(IReadOnlyDictionary objectOffsets, + internal static void WriteCrossReferenceTable(IReadOnlyDictionary objectOffsets, ObjectToken catalogToken, Stream outputStream, IndirectReference? documentInformationReference) @@ -143,7 +159,7 @@ WriteLineBreak(outputStream); WriteFirstXrefEmptyEntry(outputStream); - + foreach (var keyValuePair in objectOffsets.OrderBy(x => x.Key.ObjectNumber)) { /* @@ -161,22 +177,22 @@ var generation = OtherEncodings.StringAsLatin1Bytes(keyValuePair.Key.Generation.ToString("D5")); outputStream.Write(generation, 0, generation.Length); - + WriteWhitespace(outputStream); outputStream.WriteByte(InUseEntry); - + WriteWhitespace(outputStream); WriteLineBreak(outputStream); } - + outputStream.Write(Trailer, 0, Trailer.Length); WriteLineBreak(outputStream); var identifier = new ArrayToken(new IToken[] { - new HexToken(Guid.NewGuid().ToString("N").ToCharArray()), - new HexToken(Guid.NewGuid().ToString("N").ToCharArray()) + new HexToken(Guid.NewGuid().ToString("N").ToCharArray()), + new HexToken(Guid.NewGuid().ToString("N").ToCharArray()) }); var trailerDictionaryData = new Dictionary @@ -276,7 +292,29 @@ private static void WriteName(string name, Stream outputStream) { - var bytes = OtherEncodings.StringAsLatin1Bytes(name); + /* + * Beginning with PDF 1.2, any character except null (character code 0) may be + * included in a name by writing its 2-digit hexadecimal code, preceded by the number sign character (#). + * This is required for delimiter and whitespace characters. + * This is recommended for characters whose codes are outside the range 33 (!) to 126 (~). + */ + + var sb = new StringBuilder(); + + foreach (var c in name) + { + if (c < 33 || c > 126 || DelimiterChars.Contains(c)) + { + var str = Hex.GetString(new[] { (byte)c }); + sb.Append('#').Append(str); + } + else + { + sb.Append(c); + } + } + + var bytes = OtherEncodings.StringAsLatin1Bytes(sb.ToString()); outputStream.WriteByte(NameStart); outputStream.Write(bytes, 0, bytes.Length);