Use the new IObjectCopier API in the PdfMerger

Added a new PagesCopier, this class would be the one responsible to give a parent to the page tree that are copied from other documents
This commit is contained in:
InusualZ
2020-10-02 21:33:36 -04:00
parent 8f0326a818
commit a190653683
2 changed files with 118 additions and 171 deletions

View File

@@ -0,0 +1,96 @@
namespace UglyToad.PdfPig.Writer.Copier.Page
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Core;
using Tokens;
/// <inheritdoc/>
internal class PagesCopier : IObjectCopier
{
private readonly ObjectCopier copier;
private readonly IndirectReferenceToken rootPagesReferenceToken;
/// <inheritdoc/>
public PagesCopier(ObjectCopier mainCopier, IndirectReferenceToken rootPagesToken = null)
{
copier = mainCopier;
rootPagesReferenceToken = rootPagesToken;
}
/// <inheritdoc/>
public IToken CopyObject(IToken sourceToken, Func<IndirectReferenceToken, IToken> tokenScanner)
{
if (!(sourceToken is IndirectReferenceToken sourceReferenceToken))
{
return null;
}
// Check if this token haven't been copied before
if (copier.TryGetNewReference(sourceReferenceToken, out var newReferenceToken))
{
return newReferenceToken;
}
// Make sure that we are copying a DictionaryToken
var token = tokenScanner(sourceReferenceToken);
if (!(token is DictionaryToken dictionaryToken))
{
return null;
}
// Make sure we are copying a `/Pages` Dictionary
if (!dictionaryToken.TryGet(NameToken.Type, out var nameTypeToken) || !nameTypeToken.Equals(NameToken.Pages))
{
return null;
}
// We have to reserve the reference before hand, because if we don't, we would fall in a loop.
// The child `/Page` have a reference to the parent
var tokenNumber = copier.ReserveTokenNumber();
copier.SetNewReference(sourceReferenceToken, new IndirectReferenceToken(new IndirectReference(tokenNumber, 0)));
// If `/Pages` is not the root page node, copy the token normally
// We are testing for one:
// * If @rootPagesReferenceToken is null, just do a normal copy of the tree
// * If the tree have a Parent NameToken, it means the tree is not a root tree so we don't have to assign him
// a new parent
if (rootPagesReferenceToken == null || dictionaryToken.TryGet(NameToken.Parent, out IndirectReferenceToken _))
{
return copier.WriteToken(copier.CopyObject(dictionaryToken, tokenScanner), tokenNumber);
}
// Since the tree is a root tree, it means that the tree comes from another document, we have to make sure
// that the new tree is a child of the new root tree, this we do by adding a Parent NameToken to the tree,
// that point to @rootPagesReferenceToken
return CopyPagesTree(dictionaryToken, tokenNumber, tokenScanner);
}
private IndirectReferenceToken CopyPagesTree(DictionaryToken pagesDictionary, int reservedNumber, Func<IndirectReferenceToken, IToken> tokenScanner)
{
Debug.Assert(rootPagesReferenceToken != null);
var newContent = new Dictionary<NameToken, IToken>()
{
{NameToken.Parent, rootPagesReferenceToken}
};
foreach (var dataSet in pagesDictionary.Data)
{
newContent.Add(NameToken.Create(dataSet.Key), copier.CopyObject(dataSet.Value, tokenScanner));
}
var newPagesTree = new DictionaryToken(newContent);
return copier.WriteToken(newPagesTree, reservedNumber);
}
/// <inheritdoc/>
public void ClearReference()
{
// Nothing to do
}
}
}

View File

@@ -2,9 +2,10 @@
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using Content;
using Copier;
using Copier.Page;
using Core;
using CrossReference;
using Encryption;
@@ -149,31 +150,36 @@
private class DocumentMerger
{
private const decimal DefaultVersion = 1.2m;
private readonly PdfStreamWriter context = new PdfStreamWriter();
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly PdfStreamWriter context;
private readonly List<IndirectReferenceToken> pagesTokenReferences;
private readonly IndirectReferenceToken rootPagesReference;
private readonly MultiCopier copier;
private decimal currentVersion = DefaultVersion;
private int pageCount = 0;
private readonly Dictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument =
new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
public DocumentMerger()
{
context = new PdfStreamWriter();
pagesTokenReferences = new List<IndirectReferenceToken>();
rootPagesReference = context.ReserveNumberToken();
copier = new MultiCopier(context);
copier.AddCopier(new PagesCopier(copier, rootPagesReference));
}
public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner)
{
currentVersion = Math.Max(version, currentVersion);
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner);
pageCount += count;
pagesTokenReferences.Add(pagesReference);
var copiedPages = copier.CopyObject(new IndirectReferenceToken(documentCatalog.PageTree.Reference), tokenScanner) as IndirectReferenceToken;
pagesTokenReferences.Add(copiedPages);
referencesFromDocument.Clear();
pageCount += documentCatalog.PagesDictionary.Get<NumericToken>(NameToken.Count, tokenScanner).Int;
copier.ClearReference();
}
public byte[] Build()
@@ -190,7 +196,7 @@
{ NameToken.Count, new NumericToken(pageCount) }
});
var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
{
@@ -199,9 +205,9 @@
});
var catalogRef = context.WriteToken(catalog);
context.Flush(currentVersion, catalogRef);
var bytes = context.ToArray();
Close();
@@ -209,165 +215,10 @@
return bytes;
}
public void Close()
private void Close()
{
context.Dispose();
}
private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
{
Debug.Assert(!treeNode.IsPage);
var currentNodeReference = context.ReserveNumberToken();
var pageReferences = new List<IndirectReferenceToken>();
var nodeCount = 0;
foreach (var pageNode in treeNode.Children)
{
IndirectReferenceToken newEntry;
if (!pageNode.IsPage)
{
var count = 0;
(newEntry, count) = CopyPagesTree(pageNode, currentNodeReference, tokenScanner);
nodeCount += count;
}
else
{
newEntry = CopyPageNode(pageNode, currentNodeReference, tokenScanner);
++nodeCount;
}
pageReferences.Add(newEntry);
}
var newPagesNode = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(pageReferences) },
{ NameToken.Count, new NumericToken(nodeCount) },
{ NameToken.Parent, treeParentReference }
};
foreach (var pair in treeNode.NodeDictionary.Data)
{
if (IgnoreKeyForPagesNode(pair))
{
continue;
}
newPagesNode[NameToken.Create(pair.Key)] = CopyToken(pair.Value, tokenScanner);
}
var pagesDictionary = new DictionaryToken(newPagesNode);
return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount);
}
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
{
Debug.Assert(pageNode.IsPage);
var pageDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Parent, parentPagesObject},
};
foreach (var setPair in pageNode.NodeDictionary.Data)
{
var name = setPair.Key;
var token = setPair.Value;
if (name == NameToken.Parent)
{
// Skip Parent token, since we have to reassign it
continue;
}
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return context.WriteToken(new DictionaryToken(pageDictionary));
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken))
{
return newReferenceToken;
}
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner);
newReferenceToken = context.WriteToken(newToken);
referencesFromDocument.Add(referenceToken, newReferenceToken);
return newReferenceToken;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
}
private static bool IgnoreKeyForPagesNode(KeyValuePair<string, IToken> token)
{
return string.Equals(token.Key, NameToken.Type.Data, StringComparison.OrdinalIgnoreCase)
|| string.Equals(token.Key, NameToken.Kids.Data, StringComparison.OrdinalIgnoreCase)
|| string.Equals(token.Key, NameToken.Count.Data, StringComparison.OrdinalIgnoreCase)
|| string.Equals(token.Key, NameToken.Parent.Data, StringComparison.OrdinalIgnoreCase);
}
}
}
}