mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Use the new IObjectCopier API in the PdfMerger
Added a new PagesCopier, this class would be the one responsible to give a parent to the page tree that are copied from other documents
This commit is contained in:
96
src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs
Normal file
96
src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs
Normal file
@@ -0,0 +1,96 @@
|
||||
namespace UglyToad.PdfPig.Writer.Copier.Page
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using Core;
|
||||
using Tokens;
|
||||
|
||||
/// <inheritdoc/>
|
||||
internal class PagesCopier : IObjectCopier
|
||||
{
|
||||
private readonly ObjectCopier copier;
|
||||
|
||||
private readonly IndirectReferenceToken rootPagesReferenceToken;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public PagesCopier(ObjectCopier mainCopier, IndirectReferenceToken rootPagesToken = null)
|
||||
{
|
||||
copier = mainCopier;
|
||||
rootPagesReferenceToken = rootPagesToken;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IToken CopyObject(IToken sourceToken, Func<IndirectReferenceToken, IToken> tokenScanner)
|
||||
{
|
||||
if (!(sourceToken is IndirectReferenceToken sourceReferenceToken))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check if this token haven't been copied before
|
||||
if (copier.TryGetNewReference(sourceReferenceToken, out var newReferenceToken))
|
||||
{
|
||||
return newReferenceToken;
|
||||
}
|
||||
|
||||
// Make sure that we are copying a DictionaryToken
|
||||
var token = tokenScanner(sourceReferenceToken);
|
||||
if (!(token is DictionaryToken dictionaryToken))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Make sure we are copying a `/Pages` Dictionary
|
||||
if (!dictionaryToken.TryGet(NameToken.Type, out var nameTypeToken) || !nameTypeToken.Equals(NameToken.Pages))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// We have to reserve the reference before hand, because if we don't, we would fall in a loop.
|
||||
// The child `/Page` have a reference to the parent
|
||||
var tokenNumber = copier.ReserveTokenNumber();
|
||||
copier.SetNewReference(sourceReferenceToken, new IndirectReferenceToken(new IndirectReference(tokenNumber, 0)));
|
||||
|
||||
// If `/Pages` is not the root page node, copy the token normally
|
||||
// We are testing for one:
|
||||
// * If @rootPagesReferenceToken is null, just do a normal copy of the tree
|
||||
// * If the tree have a Parent NameToken, it means the tree is not a root tree so we don't have to assign him
|
||||
// a new parent
|
||||
if (rootPagesReferenceToken == null || dictionaryToken.TryGet(NameToken.Parent, out IndirectReferenceToken _))
|
||||
{
|
||||
return copier.WriteToken(copier.CopyObject(dictionaryToken, tokenScanner), tokenNumber);
|
||||
}
|
||||
|
||||
// Since the tree is a root tree, it means that the tree comes from another document, we have to make sure
|
||||
// that the new tree is a child of the new root tree, this we do by adding a Parent NameToken to the tree,
|
||||
// that point to @rootPagesReferenceToken
|
||||
return CopyPagesTree(dictionaryToken, tokenNumber, tokenScanner);
|
||||
}
|
||||
|
||||
private IndirectReferenceToken CopyPagesTree(DictionaryToken pagesDictionary, int reservedNumber, Func<IndirectReferenceToken, IToken> tokenScanner)
|
||||
{
|
||||
Debug.Assert(rootPagesReferenceToken != null);
|
||||
|
||||
var newContent = new Dictionary<NameToken, IToken>()
|
||||
{
|
||||
{NameToken.Parent, rootPagesReferenceToken}
|
||||
};
|
||||
|
||||
foreach (var dataSet in pagesDictionary.Data)
|
||||
{
|
||||
newContent.Add(NameToken.Create(dataSet.Key), copier.CopyObject(dataSet.Value, tokenScanner));
|
||||
}
|
||||
|
||||
var newPagesTree = new DictionaryToken(newContent);
|
||||
|
||||
return copier.WriteToken(newPagesTree, reservedNumber);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void ClearReference()
|
||||
{
|
||||
// Nothing to do
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,9 +2,10 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using Content;
|
||||
using Copier;
|
||||
using Copier.Page;
|
||||
using Core;
|
||||
using CrossReference;
|
||||
using Encryption;
|
||||
@@ -149,31 +150,36 @@
|
||||
private class DocumentMerger
|
||||
{
|
||||
private const decimal DefaultVersion = 1.2m;
|
||||
|
||||
private readonly PdfStreamWriter context = new PdfStreamWriter();
|
||||
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
|
||||
|
||||
private readonly PdfStreamWriter context;
|
||||
private readonly List<IndirectReferenceToken> pagesTokenReferences;
|
||||
private readonly IndirectReferenceToken rootPagesReference;
|
||||
private readonly MultiCopier copier;
|
||||
|
||||
private decimal currentVersion = DefaultVersion;
|
||||
private int pageCount = 0;
|
||||
|
||||
private readonly Dictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument =
|
||||
new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
|
||||
|
||||
public DocumentMerger()
|
||||
{
|
||||
context = new PdfStreamWriter();
|
||||
pagesTokenReferences = new List<IndirectReferenceToken>();
|
||||
|
||||
rootPagesReference = context.ReserveNumberToken();
|
||||
|
||||
copier = new MultiCopier(context);
|
||||
copier.AddCopier(new PagesCopier(copier, rootPagesReference));
|
||||
}
|
||||
|
||||
|
||||
public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
currentVersion = Math.Max(version, currentVersion);
|
||||
|
||||
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner);
|
||||
pageCount += count;
|
||||
pagesTokenReferences.Add(pagesReference);
|
||||
var copiedPages = copier.CopyObject(new IndirectReferenceToken(documentCatalog.PageTree.Reference), tokenScanner) as IndirectReferenceToken;
|
||||
pagesTokenReferences.Add(copiedPages);
|
||||
|
||||
referencesFromDocument.Clear();
|
||||
pageCount += documentCatalog.PagesDictionary.Get<NumericToken>(NameToken.Count, tokenScanner).Int;
|
||||
|
||||
copier.ClearReference();
|
||||
}
|
||||
|
||||
public byte[] Build()
|
||||
@@ -190,7 +196,7 @@
|
||||
{ NameToken.Count, new NumericToken(pageCount) }
|
||||
});
|
||||
|
||||
var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
|
||||
var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
|
||||
|
||||
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
|
||||
{
|
||||
@@ -199,9 +205,9 @@
|
||||
});
|
||||
|
||||
var catalogRef = context.WriteToken(catalog);
|
||||
|
||||
|
||||
context.Flush(currentVersion, catalogRef);
|
||||
|
||||
|
||||
var bytes = context.ToArray();
|
||||
|
||||
Close();
|
||||
@@ -209,165 +215,10 @@
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public void Close()
|
||||
private void Close()
|
||||
{
|
||||
context.Dispose();
|
||||
}
|
||||
|
||||
private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
Debug.Assert(!treeNode.IsPage);
|
||||
|
||||
var currentNodeReference = context.ReserveNumberToken();
|
||||
|
||||
var pageReferences = new List<IndirectReferenceToken>();
|
||||
var nodeCount = 0;
|
||||
foreach (var pageNode in treeNode.Children)
|
||||
{
|
||||
IndirectReferenceToken newEntry;
|
||||
if (!pageNode.IsPage)
|
||||
{
|
||||
var count = 0;
|
||||
(newEntry, count) = CopyPagesTree(pageNode, currentNodeReference, tokenScanner);
|
||||
nodeCount += count;
|
||||
}
|
||||
else
|
||||
{
|
||||
newEntry = CopyPageNode(pageNode, currentNodeReference, tokenScanner);
|
||||
++nodeCount;
|
||||
}
|
||||
|
||||
pageReferences.Add(newEntry);
|
||||
}
|
||||
|
||||
var newPagesNode = new Dictionary<NameToken, IToken>
|
||||
{
|
||||
{ NameToken.Type, NameToken.Pages },
|
||||
{ NameToken.Kids, new ArrayToken(pageReferences) },
|
||||
{ NameToken.Count, new NumericToken(nodeCount) },
|
||||
{ NameToken.Parent, treeParentReference }
|
||||
};
|
||||
|
||||
foreach (var pair in treeNode.NodeDictionary.Data)
|
||||
{
|
||||
if (IgnoreKeyForPagesNode(pair))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
newPagesNode[NameToken.Create(pair.Key)] = CopyToken(pair.Value, tokenScanner);
|
||||
}
|
||||
|
||||
var pagesDictionary = new DictionaryToken(newPagesNode);
|
||||
|
||||
return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount);
|
||||
}
|
||||
|
||||
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
Debug.Assert(pageNode.IsPage);
|
||||
|
||||
var pageDictionary = new Dictionary<NameToken, IToken>
|
||||
{
|
||||
{NameToken.Parent, parentPagesObject},
|
||||
};
|
||||
|
||||
foreach (var setPair in pageNode.NodeDictionary.Data)
|
||||
{
|
||||
var name = setPair.Key;
|
||||
var token = setPair.Value;
|
||||
|
||||
if (name == NameToken.Parent)
|
||||
{
|
||||
// Skip Parent token, since we have to reassign it
|
||||
continue;
|
||||
}
|
||||
|
||||
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
||||
}
|
||||
|
||||
return context.WriteToken(new DictionaryToken(pageDictionary));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
|
||||
/// and replace the indirect reference with the correct/new one
|
||||
/// </summary>
|
||||
/// <param name="tokenToCopy">Token to inspect for reference</param>
|
||||
/// <param name="tokenScanner">scanner get the content from the original document</param>
|
||||
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
|
||||
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
// This token need to be deep copied, because they could contain reference. So we have to update them.
|
||||
switch (tokenToCopy)
|
||||
{
|
||||
case DictionaryToken dictionaryToken:
|
||||
{
|
||||
var newContent = new Dictionary<NameToken, IToken>();
|
||||
foreach (var setPair in dictionaryToken.Data)
|
||||
{
|
||||
var name = setPair.Key;
|
||||
var token = setPair.Value;
|
||||
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new DictionaryToken(newContent);
|
||||
}
|
||||
case ArrayToken arrayToken:
|
||||
{
|
||||
var newArray = new List<IToken>(arrayToken.Length);
|
||||
foreach (var token in arrayToken.Data)
|
||||
{
|
||||
newArray.Add(CopyToken(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new ArrayToken(newArray);
|
||||
}
|
||||
case IndirectReferenceToken referenceToken:
|
||||
{
|
||||
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken))
|
||||
{
|
||||
return newReferenceToken;
|
||||
}
|
||||
|
||||
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
|
||||
|
||||
Debug.Assert(!(tokenObject is IndirectReferenceToken));
|
||||
|
||||
var newToken = CopyToken(tokenObject, tokenScanner);
|
||||
newReferenceToken = context.WriteToken(newToken);
|
||||
|
||||
referencesFromDocument.Add(referenceToken, newReferenceToken);
|
||||
|
||||
return newReferenceToken;
|
||||
}
|
||||
case StreamToken streamToken:
|
||||
{
|
||||
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
|
||||
Debug.Assert(properties != null);
|
||||
|
||||
var bytes = streamToken.Data;
|
||||
return new StreamToken(properties, bytes);
|
||||
}
|
||||
|
||||
case ObjectToken _:
|
||||
{
|
||||
// Since we don't write token directly to the stream.
|
||||
// We can't know the offset. Therefore the token would be invalid
|
||||
throw new NotSupportedException("Copying a Object token is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
return tokenToCopy;
|
||||
}
|
||||
|
||||
private static bool IgnoreKeyForPagesNode(KeyValuePair<string, IToken> token)
|
||||
{
|
||||
return string.Equals(token.Key, NameToken.Type.Data, StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(token.Key, NameToken.Kids.Data, StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(token.Key, NameToken.Count.Data, StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(token.Key, NameToken.Parent.Data, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user