Make PdfMerger use the PdfStreamWriter

This commit is contained in:
InusualZ
2020-03-08 15:33:54 -03:00
committed by Eliot Jones
parent 44ad5c8b0c
commit b3f310a249

View File

@@ -16,8 +16,6 @@
using Tokenization.Scanner;
using Tokens;
using Exceptions;
using Graphics.Operations;
using Fonts;
/// <summary>
/// Merges PDF documents into each other.
@@ -131,45 +129,35 @@
{
private const decimal DefaultVersion = 1.2m;
private readonly BuilderContext context = new BuilderContext();
private readonly List<IndirectReferenceToken> documentPages = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesIndirectReference;
private readonly PdfStreamWriter context = new PdfStreamWriter();
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesReference;
private decimal currentVersion = DefaultVersion;
private MemoryStream memory = new MemoryStream();
private int pageCount = 0;
private readonly Dictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument =
new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
public DocumentMerger()
{
var reserved = context.ReserveNumber();
rootPagesIndirectReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
WriteHeaderToStream();
rootPagesReference = context.ReserveNumberToken();
}
public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner)
{
if (memory == null)
{
throw new ObjectDisposedException("Merger closed already");
}
currentVersion = Math.Max(version, currentVersion);
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesIndirectReference, tokenScanner);
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner);
pageCount += count;
documentPages.Add(new IndirectReferenceToken(pagesReference.Number));
pagesTokenReferences.Add(pagesReference);
referencesFromDocument.Clear();
}
public byte[] Build()
{
if (memory == null)
{
throw new ObjectDisposedException("Merger closed already");
}
if (documentPages.Count < 1)
if (pagesTokenReferences.Count < 1)
{
throw new PdfDocumentFormatException("Empty document");
}
@@ -177,29 +165,23 @@
var pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(documentPages) },
{ NameToken.Kids, new ArrayToken(pagesTokenReferences) },
{ NameToken.Count, new NumericToken(pageCount) }
});
var pagesRef = context.WriteObject(memory, pagesDictionary, (int)rootPagesIndirectReference.Data.ObjectNumber);
var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Catalog },
{ NameToken.Pages, new IndirectReferenceToken(pagesRef.Number) }
{ NameToken.Pages, pagesRef }
});
var catalogRef = context.WriteObject(memory, catalog);
var catalogRef = context.WriteToken(catalog);
TokenWriter.WriteCrossReferenceTable(context.ObjectOffsets, catalogRef, memory, null);
context.Flush(currentVersion, catalogRef);
if (currentVersion != DefaultVersion)
{
memory.Seek(0, SeekOrigin.Begin);
WriteHeaderToStream();
}
var bytes = memory.ToArray();
var bytes = context.ToArray();
Close();
@@ -208,22 +190,20 @@
public void Close()
{
memory?.Dispose();
memory = null;
context.Dispose();
}
private (ObjectToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
{
Debug.Assert(!treeNode.IsPage);
var currentNodeReserved = context.ReserveNumber();
var currentNodeReference = new IndirectReferenceToken(new IndirectReference(currentNodeReserved, 0));
var currentNodeReference = context.ReserveNumberToken();
var pageReferences = new List<IndirectReferenceToken>();
var nodeCount = 0;
foreach (var pageNode in treeNode.Children)
{
ObjectToken newEntry;
IndirectReferenceToken newEntry;
if (!pageNode.IsPage)
{
var count = 0;
@@ -236,7 +216,7 @@
++nodeCount;
}
pageReferences.Add(new IndirectReferenceToken(newEntry.Number));
pageReferences.Add(newEntry);
}
var newPagesNode = new Dictionary<NameToken, IToken>
@@ -259,10 +239,10 @@
var pagesDictionary = new DictionaryToken(newPagesNode);
return (context.WriteObject(memory, pagesDictionary, currentNodeReserved), nodeCount);
return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount);
}
private ObjectToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
{
Debug.Assert(pageNode.IsPage);
@@ -285,7 +265,7 @@
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return context.WriteObject(memory, new DictionaryToken(pageDictionary));
return context.WriteToken(new DictionaryToken(pageDictionary));
}
/// <summary>
@@ -294,70 +274,102 @@
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <returns>A copy of the token with all his content copied to the new document's stream</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
/// <param name="deepCopy">Weather we want to create a new token for basic token(Ex. NumericToken) because the original could be modified</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, bool deepCopy = false)
{
if (tokenToCopy is DictionaryToken dictionaryToken)
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner, deepCopy));
}
return new DictionaryToken(newContent);
}
else if (tokenToCopy is ArrayToken arrayToken)
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner));
newArray.Add(CopyToken(token, tokenScanner, deepCopy));
}
return new ArrayToken(newArray);
}
else if (tokenToCopy is IndirectReferenceToken referenceToken)
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken))
{
return newReferenceToken;
}
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner);
var objToken = context.WriteObject(memory, newToken);
return new IndirectReferenceToken(objToken.Number);
var newToken = CopyToken(tokenObject, tokenScanner, deepCopy);
newReferenceToken = context.WriteToken(newToken);
referencesFromDocument.Add(referenceToken, newReferenceToken);
return newReferenceToken;
}
else if (tokenToCopy is StreamToken streamToken)
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner, deepCopy) as DictionaryToken;
Debug.Assert(properties != null);
return new StreamToken(properties, new List<byte>(streamToken.Data));
var bytes = deepCopy ? new List<byte>(streamToken.Data) : streamToken.Data;
return new StreamToken(properties, bytes);
}
else // Non Complex Token - BooleanToken, NumericToken, NameToken, Etc...
case ObjectToken _:
{
// This is because, since we don't write token directly to the stream. So can't know the offset.
// The token would be invalid. Although I don't think the copy of an object token would ever happen
throw new NotSupportedException("Copying a Object token is not supported");
}
}
if (!deepCopy)
return tokenToCopy;
}
}
private void WriteHeaderToStream()
// This tokens can't be deep copied
if (tokenToCopy is EndOfLineToken || tokenToCopy is NullToken || tokenToCopy is BooleanToken)
return tokenToCopy;
switch (tokenToCopy)
{
WriteString($"%PDF-{currentVersion:0.0}", memory);
case CommentToken commentToken:
return new CommentToken(commentToken.Data);
memory.WriteText("%");
memory.WriteByte(169);
memory.WriteByte(205);
memory.WriteByte(196);
memory.WriteByte(210);
memory.WriteNewLine();
case HexToken hexToken:
return new HexToken(hexToken.Data.ToCharArray());
case InlineImageDataToken imageDataToken:
return new InlineImageDataToken(imageDataToken.Data);
case NameToken nameToken:
return NameToken.Create(nameToken.Data);
case NumericToken numericToken:
return new NumericToken(numericToken.Data);
case OperatorToken operatorToken:
return OperatorToken.Create(operatorToken.Data);
case StringToken stringToken:
return new StringToken(stringToken.Data, stringToken.EncodedWith);
}
private static void WriteString(string text, Stream stream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
stream.Write(bytes, 0, bytes.Length);
stream.WriteNewLine();
throw new NotSupportedException($"Tried to deep copy {tokenToCopy.GetType()}");
}
private static bool IgnoreKeyForPagesNode(KeyValuePair<string, IToken> token)