mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 19:07:56 +08:00
Make PdfMerger use the PdfStreamWriter
This commit is contained in:
@@ -16,8 +16,6 @@
|
|||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
using Exceptions;
|
using Exceptions;
|
||||||
using Graphics.Operations;
|
|
||||||
using Fonts;
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Merges PDF documents into each other.
|
/// Merges PDF documents into each other.
|
||||||
@@ -131,45 +129,35 @@
|
|||||||
{
|
{
|
||||||
private const decimal DefaultVersion = 1.2m;
|
private const decimal DefaultVersion = 1.2m;
|
||||||
|
|
||||||
private readonly BuilderContext context = new BuilderContext();
|
private readonly PdfStreamWriter context = new PdfStreamWriter();
|
||||||
private readonly List<IndirectReferenceToken> documentPages = new List<IndirectReferenceToken>();
|
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
|
||||||
private readonly IndirectReferenceToken rootPagesIndirectReference;
|
private readonly IndirectReferenceToken rootPagesReference;
|
||||||
|
|
||||||
private decimal currentVersion = DefaultVersion;
|
private decimal currentVersion = DefaultVersion;
|
||||||
private MemoryStream memory = new MemoryStream();
|
|
||||||
|
|
||||||
private int pageCount = 0;
|
private int pageCount = 0;
|
||||||
|
|
||||||
|
private readonly Dictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument =
|
||||||
|
new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
|
||||||
|
|
||||||
public DocumentMerger()
|
public DocumentMerger()
|
||||||
{
|
{
|
||||||
var reserved = context.ReserveNumber();
|
rootPagesReference = context.ReserveNumberToken();
|
||||||
rootPagesIndirectReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
|
|
||||||
|
|
||||||
WriteHeaderToStream();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner)
|
public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner)
|
||||||
{
|
{
|
||||||
if (memory == null)
|
|
||||||
{
|
|
||||||
throw new ObjectDisposedException("Merger closed already");
|
|
||||||
}
|
|
||||||
|
|
||||||
currentVersion = Math.Max(version, currentVersion);
|
currentVersion = Math.Max(version, currentVersion);
|
||||||
|
|
||||||
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesIndirectReference, tokenScanner);
|
var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner);
|
||||||
pageCount += count;
|
pageCount += count;
|
||||||
documentPages.Add(new IndirectReferenceToken(pagesReference.Number));
|
pagesTokenReferences.Add(pagesReference);
|
||||||
|
|
||||||
|
referencesFromDocument.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] Build()
|
public byte[] Build()
|
||||||
{
|
{
|
||||||
if (memory == null)
|
if (pagesTokenReferences.Count < 1)
|
||||||
{
|
|
||||||
throw new ObjectDisposedException("Merger closed already");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (documentPages.Count < 1)
|
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException("Empty document");
|
throw new PdfDocumentFormatException("Empty document");
|
||||||
}
|
}
|
||||||
@@ -177,29 +165,23 @@
|
|||||||
var pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
|
var pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
|
||||||
{
|
{
|
||||||
{ NameToken.Type, NameToken.Pages },
|
{ NameToken.Type, NameToken.Pages },
|
||||||
{ NameToken.Kids, new ArrayToken(documentPages) },
|
{ NameToken.Kids, new ArrayToken(pagesTokenReferences) },
|
||||||
{ NameToken.Count, new NumericToken(pageCount) }
|
{ NameToken.Count, new NumericToken(pageCount) }
|
||||||
});
|
});
|
||||||
|
|
||||||
var pagesRef = context.WriteObject(memory, pagesDictionary, (int)rootPagesIndirectReference.Data.ObjectNumber);
|
var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
|
||||||
|
|
||||||
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
|
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
|
||||||
{
|
{
|
||||||
{ NameToken.Type, NameToken.Catalog },
|
{ NameToken.Type, NameToken.Catalog },
|
||||||
{ NameToken.Pages, new IndirectReferenceToken(pagesRef.Number) }
|
{ NameToken.Pages, pagesRef }
|
||||||
});
|
});
|
||||||
|
|
||||||
var catalogRef = context.WriteObject(memory, catalog);
|
var catalogRef = context.WriteToken(catalog);
|
||||||
|
|
||||||
TokenWriter.WriteCrossReferenceTable(context.ObjectOffsets, catalogRef, memory, null);
|
context.Flush(currentVersion, catalogRef);
|
||||||
|
|
||||||
if (currentVersion != DefaultVersion)
|
var bytes = context.ToArray();
|
||||||
{
|
|
||||||
memory.Seek(0, SeekOrigin.Begin);
|
|
||||||
WriteHeaderToStream();
|
|
||||||
}
|
|
||||||
|
|
||||||
var bytes = memory.ToArray();
|
|
||||||
|
|
||||||
Close();
|
Close();
|
||||||
|
|
||||||
@@ -208,22 +190,20 @@
|
|||||||
|
|
||||||
public void Close()
|
public void Close()
|
||||||
{
|
{
|
||||||
memory?.Dispose();
|
context.Dispose();
|
||||||
memory = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private (ObjectToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
|
private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner)
|
||||||
{
|
{
|
||||||
Debug.Assert(!treeNode.IsPage);
|
Debug.Assert(!treeNode.IsPage);
|
||||||
|
|
||||||
var currentNodeReserved = context.ReserveNumber();
|
var currentNodeReference = context.ReserveNumberToken();
|
||||||
var currentNodeReference = new IndirectReferenceToken(new IndirectReference(currentNodeReserved, 0));
|
|
||||||
|
|
||||||
var pageReferences = new List<IndirectReferenceToken>();
|
var pageReferences = new List<IndirectReferenceToken>();
|
||||||
var nodeCount = 0;
|
var nodeCount = 0;
|
||||||
foreach (var pageNode in treeNode.Children)
|
foreach (var pageNode in treeNode.Children)
|
||||||
{
|
{
|
||||||
ObjectToken newEntry;
|
IndirectReferenceToken newEntry;
|
||||||
if (!pageNode.IsPage)
|
if (!pageNode.IsPage)
|
||||||
{
|
{
|
||||||
var count = 0;
|
var count = 0;
|
||||||
@@ -236,7 +216,7 @@
|
|||||||
++nodeCount;
|
++nodeCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
pageReferences.Add(new IndirectReferenceToken(newEntry.Number));
|
pageReferences.Add(newEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
var newPagesNode = new Dictionary<NameToken, IToken>
|
var newPagesNode = new Dictionary<NameToken, IToken>
|
||||||
@@ -259,10 +239,10 @@
|
|||||||
|
|
||||||
var pagesDictionary = new DictionaryToken(newPagesNode);
|
var pagesDictionary = new DictionaryToken(newPagesNode);
|
||||||
|
|
||||||
return (context.WriteObject(memory, pagesDictionary, currentNodeReserved), nodeCount);
|
return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ObjectToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
|
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner)
|
||||||
{
|
{
|
||||||
Debug.Assert(pageNode.IsPage);
|
Debug.Assert(pageNode.IsPage);
|
||||||
|
|
||||||
@@ -285,7 +265,7 @@
|
|||||||
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
||||||
}
|
}
|
||||||
|
|
||||||
return context.WriteObject(memory, new DictionaryToken(pageDictionary));
|
return context.WriteToken(new DictionaryToken(pageDictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -294,70 +274,102 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="tokenToCopy">Token to inspect for reference</param>
|
/// <param name="tokenToCopy">Token to inspect for reference</param>
|
||||||
/// <param name="tokenScanner">scanner get the content from the original document</param>
|
/// <param name="tokenScanner">scanner get the content from the original document</param>
|
||||||
/// <returns>A copy of the token with all his content copied to the new document's stream</returns>
|
/// <param name="deepCopy">Weather we want to create a new token for basic token(Ex. NumericToken) because the original could be modified</param>
|
||||||
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
|
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
|
||||||
|
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, bool deepCopy = false)
|
||||||
{
|
{
|
||||||
if (tokenToCopy is DictionaryToken dictionaryToken)
|
// This token need to be deep copied, because they could contain reference. So we have to update them.
|
||||||
|
switch (tokenToCopy)
|
||||||
|
{
|
||||||
|
case DictionaryToken dictionaryToken:
|
||||||
{
|
{
|
||||||
var newContent = new Dictionary<NameToken, IToken>();
|
var newContent = new Dictionary<NameToken, IToken>();
|
||||||
foreach (var setPair in dictionaryToken.Data)
|
foreach (var setPair in dictionaryToken.Data)
|
||||||
{
|
{
|
||||||
var name = setPair.Key;
|
var name = setPair.Key;
|
||||||
var token = setPair.Value;
|
var token = setPair.Value;
|
||||||
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner, deepCopy));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DictionaryToken(newContent);
|
return new DictionaryToken(newContent);
|
||||||
}
|
}
|
||||||
else if (tokenToCopy is ArrayToken arrayToken)
|
case ArrayToken arrayToken:
|
||||||
{
|
{
|
||||||
var newArray = new List<IToken>(arrayToken.Length);
|
var newArray = new List<IToken>(arrayToken.Length);
|
||||||
foreach (var token in arrayToken.Data)
|
foreach (var token in arrayToken.Data)
|
||||||
{
|
{
|
||||||
newArray.Add(CopyToken(token, tokenScanner));
|
newArray.Add(CopyToken(token, tokenScanner, deepCopy));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ArrayToken(newArray);
|
return new ArrayToken(newArray);
|
||||||
}
|
}
|
||||||
else if (tokenToCopy is IndirectReferenceToken referenceToken)
|
case IndirectReferenceToken referenceToken:
|
||||||
{
|
{
|
||||||
|
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken))
|
||||||
|
{
|
||||||
|
return newReferenceToken;
|
||||||
|
}
|
||||||
|
|
||||||
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
|
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
|
||||||
|
|
||||||
Debug.Assert(!(tokenObject is IndirectReferenceToken));
|
Debug.Assert(!(tokenObject is IndirectReferenceToken));
|
||||||
|
|
||||||
var newToken = CopyToken(tokenObject, tokenScanner);
|
var newToken = CopyToken(tokenObject, tokenScanner, deepCopy);
|
||||||
var objToken = context.WriteObject(memory, newToken);
|
newReferenceToken = context.WriteToken(newToken);
|
||||||
return new IndirectReferenceToken(objToken.Number);
|
|
||||||
|
referencesFromDocument.Add(referenceToken, newReferenceToken);
|
||||||
|
|
||||||
|
return newReferenceToken;
|
||||||
}
|
}
|
||||||
else if (tokenToCopy is StreamToken streamToken)
|
case StreamToken streamToken:
|
||||||
{
|
{
|
||||||
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
|
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner, deepCopy) as DictionaryToken;
|
||||||
Debug.Assert(properties != null);
|
Debug.Assert(properties != null);
|
||||||
return new StreamToken(properties, new List<byte>(streamToken.Data));
|
|
||||||
|
var bytes = deepCopy ? new List<byte>(streamToken.Data) : streamToken.Data;
|
||||||
|
return new StreamToken(properties, bytes);
|
||||||
}
|
}
|
||||||
else // Non Complex Token - BooleanToken, NumericToken, NameToken, Etc...
|
|
||||||
|
case ObjectToken _:
|
||||||
{
|
{
|
||||||
|
// This is because, since we don't write token directly to the stream. So can't know the offset.
|
||||||
|
// The token would be invalid. Although I don't think the copy of an object token would ever happen
|
||||||
|
throw new NotSupportedException("Copying a Object token is not supported");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!deepCopy)
|
||||||
return tokenToCopy;
|
return tokenToCopy;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void WriteHeaderToStream()
|
// This tokens can't be deep copied
|
||||||
|
if (tokenToCopy is EndOfLineToken || tokenToCopy is NullToken || tokenToCopy is BooleanToken)
|
||||||
|
return tokenToCopy;
|
||||||
|
|
||||||
|
switch (tokenToCopy)
|
||||||
{
|
{
|
||||||
WriteString($"%PDF-{currentVersion:0.0}", memory);
|
case CommentToken commentToken:
|
||||||
|
return new CommentToken(commentToken.Data);
|
||||||
|
|
||||||
memory.WriteText("%");
|
case HexToken hexToken:
|
||||||
memory.WriteByte(169);
|
return new HexToken(hexToken.Data.ToCharArray());
|
||||||
memory.WriteByte(205);
|
|
||||||
memory.WriteByte(196);
|
case InlineImageDataToken imageDataToken:
|
||||||
memory.WriteByte(210);
|
return new InlineImageDataToken(imageDataToken.Data);
|
||||||
memory.WriteNewLine();
|
|
||||||
|
case NameToken nameToken:
|
||||||
|
return NameToken.Create(nameToken.Data);
|
||||||
|
|
||||||
|
case NumericToken numericToken:
|
||||||
|
return new NumericToken(numericToken.Data);
|
||||||
|
|
||||||
|
case OperatorToken operatorToken:
|
||||||
|
return OperatorToken.Create(operatorToken.Data);
|
||||||
|
|
||||||
|
case StringToken stringToken:
|
||||||
|
return new StringToken(stringToken.Data, stringToken.EncodedWith);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void WriteString(string text, Stream stream)
|
throw new NotSupportedException($"Tried to deep copy {tokenToCopy.GetType()}");
|
||||||
{
|
|
||||||
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
|
|
||||||
stream.Write(bytes, 0, bytes.Length);
|
|
||||||
stream.WriteNewLine();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool IgnoreKeyForPagesNode(KeyValuePair<string, IToken> token)
|
private static bool IgnoreKeyForPagesNode(KeyValuePair<string, IToken> token)
|
||||||
|
Reference in New Issue
Block a user