From a190653683d48a11117d341276d4c71be2d042af Mon Sep 17 00:00:00 2001 From: InusualZ Date: Fri, 2 Oct 2020 21:33:36 -0400 Subject: [PATCH] Use the new IObjectCopier API in the PdfMerger Added a new PagesCopier, this class would be the one responsible to give a parent to the page tree that are copied from other documents --- .../Writer/Copier/Page/PagesCopier.cs | 96 +++++++++ src/UglyToad.PdfPig/Writer/PdfMerger.cs | 193 ++---------------- 2 files changed, 118 insertions(+), 171 deletions(-) create mode 100644 src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs diff --git a/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs new file mode 100644 index 00000000..4da1d87a --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs @@ -0,0 +1,96 @@ +namespace UglyToad.PdfPig.Writer.Copier.Page +{ + using System; + using System.Collections.Generic; + using System.Diagnostics; + using Core; + using Tokens; + + /// + internal class PagesCopier : IObjectCopier + { + private readonly ObjectCopier copier; + + private readonly IndirectReferenceToken rootPagesReferenceToken; + + /// + public PagesCopier(ObjectCopier mainCopier, IndirectReferenceToken rootPagesToken = null) + { + copier = mainCopier; + rootPagesReferenceToken = rootPagesToken; + } + + /// + public IToken CopyObject(IToken sourceToken, Func tokenScanner) + { + if (!(sourceToken is IndirectReferenceToken sourceReferenceToken)) + { + return null; + } + + // Check if this token haven't been copied before + if (copier.TryGetNewReference(sourceReferenceToken, out var newReferenceToken)) + { + return newReferenceToken; + } + + // Make sure that we are copying a DictionaryToken + var token = tokenScanner(sourceReferenceToken); + if (!(token is DictionaryToken dictionaryToken)) + { + return null; + } + + // Make sure we are copying a `/Pages` Dictionary + if (!dictionaryToken.TryGet(NameToken.Type, out var nameTypeToken) || !nameTypeToken.Equals(NameToken.Pages)) + { + return null; + } + + // We have to reserve the reference before hand, because if we don't, we would fall in a loop. + // The child `/Page` have a reference to the parent + var tokenNumber = copier.ReserveTokenNumber(); + copier.SetNewReference(sourceReferenceToken, new IndirectReferenceToken(new IndirectReference(tokenNumber, 0))); + + // If `/Pages` is not the root page node, copy the token normally + // We are testing for one: + // * If @rootPagesReferenceToken is null, just do a normal copy of the tree + // * If the tree have a Parent NameToken, it means the tree is not a root tree so we don't have to assign him + // a new parent + if (rootPagesReferenceToken == null || dictionaryToken.TryGet(NameToken.Parent, out IndirectReferenceToken _)) + { + return copier.WriteToken(copier.CopyObject(dictionaryToken, tokenScanner), tokenNumber); + } + + // Since the tree is a root tree, it means that the tree comes from another document, we have to make sure + // that the new tree is a child of the new root tree, this we do by adding a Parent NameToken to the tree, + // that point to @rootPagesReferenceToken + return CopyPagesTree(dictionaryToken, tokenNumber, tokenScanner); + } + + private IndirectReferenceToken CopyPagesTree(DictionaryToken pagesDictionary, int reservedNumber, Func tokenScanner) + { + Debug.Assert(rootPagesReferenceToken != null); + + var newContent = new Dictionary() + { + {NameToken.Parent, rootPagesReferenceToken} + }; + + foreach (var dataSet in pagesDictionary.Data) + { + newContent.Add(NameToken.Create(dataSet.Key), copier.CopyObject(dataSet.Value, tokenScanner)); + } + + var newPagesTree = new DictionaryToken(newContent); + + return copier.WriteToken(newPagesTree, reservedNumber); + } + + /// + public void ClearReference() + { + // Nothing to do + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index 1d9a25a9..c6c1c8a7 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -2,9 +2,10 @@ { using System; using System.Collections.Generic; - using System.Diagnostics; using System.IO; using Content; + using Copier; + using Copier.Page; using Core; using CrossReference; using Encryption; @@ -149,31 +150,36 @@ private class DocumentMerger { private const decimal DefaultVersion = 1.2m; - - private readonly PdfStreamWriter context = new PdfStreamWriter(); - private readonly List pagesTokenReferences = new List(); + + private readonly PdfStreamWriter context; + private readonly List pagesTokenReferences; private readonly IndirectReferenceToken rootPagesReference; + private readonly MultiCopier copier; private decimal currentVersion = DefaultVersion; private int pageCount = 0; - - private readonly Dictionary referencesFromDocument = - new Dictionary(); public DocumentMerger() { + context = new PdfStreamWriter(); + pagesTokenReferences = new List(); + rootPagesReference = context.ReserveNumberToken(); + + copier = new MultiCopier(context); + copier.AddCopier(new PagesCopier(copier, rootPagesReference)); } - + public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner) { currentVersion = Math.Max(version, currentVersion); - var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner); - pageCount += count; - pagesTokenReferences.Add(pagesReference); + var copiedPages = copier.CopyObject(new IndirectReferenceToken(documentCatalog.PageTree.Reference), tokenScanner) as IndirectReferenceToken; + pagesTokenReferences.Add(copiedPages); - referencesFromDocument.Clear(); + pageCount += documentCatalog.PagesDictionary.Get(NameToken.Count, tokenScanner).Int; + + copier.ClearReference(); } public byte[] Build() @@ -190,7 +196,7 @@ { NameToken.Count, new NumericToken(pageCount) } }); - var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber); + var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber); var catalog = new DictionaryToken(new Dictionary { @@ -199,9 +205,9 @@ }); var catalogRef = context.WriteToken(catalog); - + context.Flush(currentVersion, catalogRef); - + var bytes = context.ToArray(); Close(); @@ -209,165 +215,10 @@ return bytes; } - public void Close() + private void Close() { context.Dispose(); } - - private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner) - { - Debug.Assert(!treeNode.IsPage); - - var currentNodeReference = context.ReserveNumberToken(); - - var pageReferences = new List(); - var nodeCount = 0; - foreach (var pageNode in treeNode.Children) - { - IndirectReferenceToken newEntry; - if (!pageNode.IsPage) - { - var count = 0; - (newEntry, count) = CopyPagesTree(pageNode, currentNodeReference, tokenScanner); - nodeCount += count; - } - else - { - newEntry = CopyPageNode(pageNode, currentNodeReference, tokenScanner); - ++nodeCount; - } - - pageReferences.Add(newEntry); - } - - var newPagesNode = new Dictionary - { - { NameToken.Type, NameToken.Pages }, - { NameToken.Kids, new ArrayToken(pageReferences) }, - { NameToken.Count, new NumericToken(nodeCount) }, - { NameToken.Parent, treeParentReference } - }; - - foreach (var pair in treeNode.NodeDictionary.Data) - { - if (IgnoreKeyForPagesNode(pair)) - { - continue; - } - - newPagesNode[NameToken.Create(pair.Key)] = CopyToken(pair.Value, tokenScanner); - } - - var pagesDictionary = new DictionaryToken(newPagesNode); - - return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount); - } - - private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner) - { - Debug.Assert(pageNode.IsPage); - - var pageDictionary = new Dictionary - { - {NameToken.Parent, parentPagesObject}, - }; - - foreach (var setPair in pageNode.NodeDictionary.Data) - { - var name = setPair.Key; - var token = setPair.Value; - - if (name == NameToken.Parent) - { - // Skip Parent token, since we have to reassign it - continue; - } - - pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); - } - - return context.WriteToken(new DictionaryToken(pageDictionary)); - } - - /// - /// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream - /// and replace the indirect reference with the correct/new one - /// - /// Token to inspect for reference - /// scanner get the content from the original document - /// A reference of the token that was copied. With all the reference updated - private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner) - { - // This token need to be deep copied, because they could contain reference. So we have to update them. - switch (tokenToCopy) - { - case DictionaryToken dictionaryToken: - { - var newContent = new Dictionary(); - foreach (var setPair in dictionaryToken.Data) - { - var name = setPair.Key; - var token = setPair.Value; - newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); - } - - return new DictionaryToken(newContent); - } - case ArrayToken arrayToken: - { - var newArray = new List(arrayToken.Length); - foreach (var token in arrayToken.Data) - { - newArray.Add(CopyToken(token, tokenScanner)); - } - - return new ArrayToken(newArray); - } - case IndirectReferenceToken referenceToken: - { - if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken)) - { - return newReferenceToken; - } - - var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner); - - Debug.Assert(!(tokenObject is IndirectReferenceToken)); - - var newToken = CopyToken(tokenObject, tokenScanner); - newReferenceToken = context.WriteToken(newToken); - - referencesFromDocument.Add(referenceToken, newReferenceToken); - - return newReferenceToken; - } - case StreamToken streamToken: - { - var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken; - Debug.Assert(properties != null); - - var bytes = streamToken.Data; - return new StreamToken(properties, bytes); - } - - case ObjectToken _: - { - // Since we don't write token directly to the stream. - // We can't know the offset. Therefore the token would be invalid - throw new NotSupportedException("Copying a Object token is not supported"); - } - } - - return tokenToCopy; - } - - private static bool IgnoreKeyForPagesNode(KeyValuePair token) - { - return string.Equals(token.Key, NameToken.Type.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Kids.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Count.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Parent.Data, StringComparison.OrdinalIgnoreCase); - } } } } \ No newline at end of file