diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index 7df04adf..7c24a69b 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -16,8 +16,6 @@ using Tokenization.Scanner; using Tokens; using Exceptions; - using Graphics.Operations; - using Fonts; /// /// Merges PDF documents into each other. @@ -131,45 +129,35 @@ { private const decimal DefaultVersion = 1.2m; - private readonly BuilderContext context = new BuilderContext(); - private readonly List documentPages = new List(); - private readonly IndirectReferenceToken rootPagesIndirectReference; + private readonly PdfStreamWriter context = new PdfStreamWriter(); + private readonly List pagesTokenReferences = new List(); + private readonly IndirectReferenceToken rootPagesReference; private decimal currentVersion = DefaultVersion; - private MemoryStream memory = new MemoryStream(); - private int pageCount = 0; + + private readonly Dictionary referencesFromDocument = + new Dictionary(); public DocumentMerger() { - var reserved = context.ReserveNumber(); - rootPagesIndirectReference = new IndirectReferenceToken(new IndirectReference(reserved, 0)); - - WriteHeaderToStream(); + rootPagesReference = context.ReserveNumberToken(); } public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner) { - if (memory == null) - { - throw new ObjectDisposedException("Merger closed already"); - } - currentVersion = Math.Max(version, currentVersion); - var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesIndirectReference, tokenScanner); + var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner); pageCount += count; - documentPages.Add(new IndirectReferenceToken(pagesReference.Number)); + pagesTokenReferences.Add(pagesReference); + + referencesFromDocument.Clear(); } public byte[] Build() { - if (memory == null) - { - throw new ObjectDisposedException("Merger closed already"); - } - - if (documentPages.Count < 1) + if (pagesTokenReferences.Count < 1) { throw new PdfDocumentFormatException("Empty document"); } @@ -177,29 +165,23 @@ var pagesDictionary = new DictionaryToken(new Dictionary { { NameToken.Type, NameToken.Pages }, - { NameToken.Kids, new ArrayToken(documentPages) }, + { NameToken.Kids, new ArrayToken(pagesTokenReferences) }, { NameToken.Count, new NumericToken(pageCount) } }); - var pagesRef = context.WriteObject(memory, pagesDictionary, (int)rootPagesIndirectReference.Data.ObjectNumber); + var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber); var catalog = new DictionaryToken(new Dictionary { { NameToken.Type, NameToken.Catalog }, - { NameToken.Pages, new IndirectReferenceToken(pagesRef.Number) } + { NameToken.Pages, pagesRef } }); - var catalogRef = context.WriteObject(memory, catalog); - - TokenWriter.WriteCrossReferenceTable(context.ObjectOffsets, catalogRef, memory, null); + var catalogRef = context.WriteToken(catalog); - if (currentVersion != DefaultVersion) - { - memory.Seek(0, SeekOrigin.Begin); - WriteHeaderToStream(); - } - - var bytes = memory.ToArray(); + context.Flush(currentVersion, catalogRef); + + var bytes = context.ToArray(); Close(); @@ -208,22 +190,20 @@ public void Close() { - memory?.Dispose(); - memory = null; + context.Dispose(); } - private (ObjectToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner) + private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner) { Debug.Assert(!treeNode.IsPage); - - var currentNodeReserved = context.ReserveNumber(); - var currentNodeReference = new IndirectReferenceToken(new IndirectReference(currentNodeReserved, 0)); + + var currentNodeReference = context.ReserveNumberToken(); var pageReferences = new List(); var nodeCount = 0; foreach (var pageNode in treeNode.Children) { - ObjectToken newEntry; + IndirectReferenceToken newEntry; if (!pageNode.IsPage) { var count = 0; @@ -236,7 +216,7 @@ ++nodeCount; } - pageReferences.Add(new IndirectReferenceToken(newEntry.Number)); + pageReferences.Add(newEntry); } var newPagesNode = new Dictionary @@ -259,10 +239,10 @@ var pagesDictionary = new DictionaryToken(newPagesNode); - return (context.WriteObject(memory, pagesDictionary, currentNodeReserved), nodeCount); + return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount); } - private ObjectToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner) + private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner) { Debug.Assert(pageNode.IsPage); @@ -285,79 +265,111 @@ pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); } - return context.WriteObject(memory, new DictionaryToken(pageDictionary)); + return context.WriteToken(new DictionaryToken(pageDictionary)); } - + /// /// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream /// and replace the indirect reference with the correct/new one /// /// Token to inspect for reference /// scanner get the content from the original document - /// A copy of the token with all his content copied to the new document's stream - private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner) + /// Weather we want to create a new token for basic token(Ex. NumericToken) because the original could be modified + /// A reference of the token that was copied. With all the reference updated + private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, bool deepCopy = false) { - if (tokenToCopy is DictionaryToken dictionaryToken) + // This token need to be deep copied, because they could contain reference. So we have to update them. + switch (tokenToCopy) { - var newContent = new Dictionary(); - foreach (var setPair in dictionaryToken.Data) + case DictionaryToken dictionaryToken: { - var name = setPair.Key; - var token = setPair.Value; - newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); - } + var newContent = new Dictionary(); + foreach (var setPair in dictionaryToken.Data) + { + var name = setPair.Key; + var token = setPair.Value; + newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner, deepCopy)); + } - return new DictionaryToken(newContent); - } - else if (tokenToCopy is ArrayToken arrayToken) - { - var newArray = new List(arrayToken.Length); - foreach (var token in arrayToken.Data) + return new DictionaryToken(newContent); + } + case ArrayToken arrayToken: { - newArray.Add(CopyToken(token, tokenScanner)); + var newArray = new List(arrayToken.Length); + foreach (var token in arrayToken.Data) + { + newArray.Add(CopyToken(token, tokenScanner, deepCopy)); + } + + return new ArrayToken(newArray); } + case IndirectReferenceToken referenceToken: + { + if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken)) + { + return newReferenceToken; + } + + var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner); - return new ArrayToken(newArray); - } - else if (tokenToCopy is IndirectReferenceToken referenceToken) - { - var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner); + Debug.Assert(!(tokenObject is IndirectReferenceToken)); - Debug.Assert(!(tokenObject is IndirectReferenceToken)); + var newToken = CopyToken(tokenObject, tokenScanner, deepCopy); + newReferenceToken = context.WriteToken(newToken); + + referencesFromDocument.Add(referenceToken, newReferenceToken); + + return newReferenceToken; + } + case StreamToken streamToken: + { + var properties = CopyToken(streamToken.StreamDictionary, tokenScanner, deepCopy) as DictionaryToken; + Debug.Assert(properties != null); - var newToken = CopyToken(tokenObject, tokenScanner); - var objToken = context.WriteObject(memory, newToken); - return new IndirectReferenceToken(objToken.Number); + var bytes = deepCopy ? new List(streamToken.Data) : streamToken.Data; + return new StreamToken(properties, bytes); + } + + case ObjectToken _: + { + // This is because, since we don't write token directly to the stream. So can't know the offset. + // The token would be invalid. Although I don't think the copy of an object token would ever happen + throw new NotSupportedException("Copying a Object token is not supported"); + } } - else if (tokenToCopy is StreamToken streamToken) - { - var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken; - Debug.Assert(properties != null); - return new StreamToken(properties, new List(streamToken.Data)); - } - else // Non Complex Token - BooleanToken, NumericToken, NameToken, Etc... - { + + if (!deepCopy) return tokenToCopy; + + // This tokens can't be deep copied + if (tokenToCopy is EndOfLineToken || tokenToCopy is NullToken || tokenToCopy is BooleanToken) + return tokenToCopy; + + switch (tokenToCopy) + { + case CommentToken commentToken: + return new CommentToken(commentToken.Data); + + case HexToken hexToken: + return new HexToken(hexToken.Data.ToCharArray()); + + case InlineImageDataToken imageDataToken: + return new InlineImageDataToken(imageDataToken.Data); + + case NameToken nameToken: + return NameToken.Create(nameToken.Data); + + case NumericToken numericToken: + return new NumericToken(numericToken.Data); + + case OperatorToken operatorToken: + return OperatorToken.Create(operatorToken.Data); + + case StringToken stringToken: + return new StringToken(stringToken.Data, stringToken.EncodedWith); } - } - - private void WriteHeaderToStream() - { - WriteString($"%PDF-{currentVersion:0.0}", memory); - - memory.WriteText("%"); - memory.WriteByte(169); - memory.WriteByte(205); - memory.WriteByte(196); - memory.WriteByte(210); - memory.WriteNewLine(); - } - - private static void WriteString(string text, Stream stream) - { - var bytes = OtherEncodings.StringAsLatin1Bytes(text); - stream.Write(bytes, 0, bytes.Length); - stream.WriteNewLine(); + + throw new NotSupportedException($"Tried to deep copy {tokenToCopy.GetType()}"); } private static bool IgnoreKeyForPagesNode(KeyValuePair token)