From 5a98b21c2f68f32ed8e1e14c3d814d26fb355e5c Mon Sep 17 00:00:00 2001 From: InusualZ Date: Fri, 2 Oct 2020 20:36:31 -0400 Subject: [PATCH 1/4] Don't brute force search for similar token. since the process is slow Since this class is the writer of token, it should never be the one responsible to check if a token is duplicated. --- src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs | 136 +++++++++++------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs index 5ce02090..ef3ceb50 100644 --- a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs +++ b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs @@ -9,7 +9,8 @@ using Tokens; /// - /// This class would lazily flush all token. Allowing us to make changes to references without need to rewrite the whole stream + /// This class would lazily flush all token. + /// Allowing us to make changes to references without need to rewrite the whole stream /// internal class PdfStreamWriter : IDisposable { @@ -17,20 +18,34 @@ private readonly Dictionary tokenReferences = new Dictionary(); - public int CurrentNumber { get; private set; } = 1; + private int currentNumber = 1; - public Stream Stream { get; private set; } + private Stream stream; + /// + /// Flag to set whether or not we want to dispose the stream + /// public bool DisposeStream { get; set; } + /// + /// Construct a PdfStreamWriter with a memory stream + /// public PdfStreamWriter() : this(new MemoryStream()) { } + /// + /// Construct a PdfStreamWriter + /// public PdfStreamWriter(Stream baseStream, bool disposeStream = true) { - Stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream)); + stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream)); DisposeStream = disposeStream; } + /// + /// Flush the document with all the token that we have accumulated + /// + /// Pdf Version that we are targeting + /// Catalog's indirect reference token to which the token are related public void Flush(decimal version, IndirectReferenceToken catalogReference) { if (catalogReference == null) @@ -38,14 +53,14 @@ throw new ArgumentNullException(nameof(catalogReference)); } - WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", Stream); + WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", stream); - Stream.WriteText("%"); - Stream.WriteByte(169); - Stream.WriteByte(205); - Stream.WriteByte(196); - Stream.WriteByte(210); - Stream.WriteNewLine(); + stream.WriteText("%"); + stream.WriteByte(169); + stream.WriteByte(205); + stream.WriteByte(196); + stream.WriteByte(210); + stream.WriteNewLine(); var offsets = new Dictionary(); ObjectToken catalogToken = null; @@ -53,10 +68,10 @@ { var referenceToken = pair.Key; var token = pair.Value; - var offset = Stream.Position; + var offset = stream.Position; var obj = new ObjectToken(offset, referenceToken.Data, token); - TokenWriter.WriteToken(obj, Stream); + TokenWriter.WriteToken(obj, stream); offsets.Add(referenceToken.Data, offset); @@ -72,75 +87,105 @@ } // TODO: Support document information - TokenWriter.WriteCrossReferenceTable(offsets, catalogToken, Stream, null); + TokenWriter.WriteCrossReferenceTable(offsets, catalogToken, stream, null); } + /// + /// Push a new token to be written + /// + /// + /// + /// public IndirectReferenceToken WriteToken(IToken token, int? reservedNumber = null) { - // if you can't consider deduplicating the token. - // It must be because it's referenced by his child element, so you must have reserved a number before hand - // Example /Pages Obj - var canBeDuplicated = !reservedNumber.HasValue; - if (!canBeDuplicated) + if (reservedNumber.HasValue) { if (!reservedNumbers.Remove(reservedNumber.Value)) { throw new InvalidOperationException("You can't reuse a reserved number"); } - // When we end up writing this token, all of his child would already have been added and checked for duplicate return AddToken(token, reservedNumber.Value); } - var reference = FindToken(token); - if (reference == null) - { - return AddToken(token, CurrentNumber++); - } - - return reference; + return AddToken(token, currentNumber++); } + /// + /// Get a token based on his indirect reference + /// + /// + /// + public IToken GetToken(IndirectReferenceToken referenceToken) + { + return tokenReferences.TryGetValue(referenceToken, out var token) ? token : null; + } + + /// + /// Replace a token base on his indirect reference + /// + /// + /// + public void ReplaceToken(IndirectReferenceToken referenceToken, IToken newToken) + { + tokenReferences[referenceToken] = newToken; + } + + /// + /// Reserve a number for a token + /// + /// public int ReserveNumber() { - var reserved = CurrentNumber; + var reserved = currentNumber; reservedNumbers.Add(reserved); - CurrentNumber++; + currentNumber++; return reserved; } + /// + /// Reserve a number and create a token with it + /// + /// public IndirectReferenceToken ReserveNumberToken() { return new IndirectReferenceToken(new IndirectReference(ReserveNumber(), 0)); } + /// + /// Return the bytes that have been flushed to the stream + /// + /// public byte[] ToArray() { - var currentPosition = Stream.Position; - Stream.Seek(0, SeekOrigin.Begin); + var currentPosition = stream.Position; + stream.Seek(0, SeekOrigin.Begin); - var bytes = new byte[Stream.Length]; + var bytes = new byte[stream.Length]; - if (Stream.Read(bytes, 0, bytes.Length) != bytes.Length) + if (stream.Read(bytes, 0, bytes.Length) != bytes.Length) { - throw new Exception("Unable to read all the bytes from stream"); + throw new IOException("Unable to read all the bytes from stream"); } - Stream.Seek(currentPosition, SeekOrigin.Begin); + stream.Seek(currentPosition, SeekOrigin.Begin); return bytes; } + /// + /// Dispose the stream if the PdfStreamWriter#DisposeStream flag is set + /// public void Dispose() { if (!DisposeStream) { - Stream = null; + stream = null; return; } - Stream?.Dispose(); - Stream = null; + stream?.Dispose(); + stream = null; } private IndirectReferenceToken AddToken(IToken token, int reservedNumber) @@ -151,21 +196,6 @@ return referenceToken; } - private IndirectReferenceToken FindToken(IToken token) - { - foreach (var pair in tokenReferences) - { - var reference = pair.Key; - var storedToken = pair.Value; - if (storedToken.Equals(token)) - { - return reference; - } - } - - return null; - } - private static void WriteString(string text, Stream stream) { var bytes = OtherEncodings.StringAsLatin1Bytes(text); From 8f0326a8180346539f100188712ac85c94dfb1b2 Mon Sep 17 00:00:00 2001 From: InusualZ Date: Fri, 2 Oct 2020 21:00:27 -0400 Subject: [PATCH 2/4] Introduce a new API for intercepting token that are being copied This API would allow us to track any type of token while is/was copied to a stream, so when a similar token come again, we can decide if want to just use the already written token or the new one. This API would allow us to divide the code for each specific thing that we are trying to avoid having duplicate, while not penalizing performance. Another plus would be, that since every "deduplicator" code would be behind a class, if a class is causing some performance regression that the user don't want, the user could decide not to add it and the resultant pdf would still be valid --- .../Writer/Copier/IObjectCopier.cs | 24 +++ .../Writer/Copier/MultiCopier.cs | 75 ++++++++ .../Writer/Copier/ObjectCopier.cs | 170 ++++++++++++++++++ .../Writer/Copier/TokenHelper.cs | 34 ++++ 4 files changed, 303 insertions(+) create mode 100644 src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs create mode 100644 src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs create mode 100644 src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs create mode 100644 src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs diff --git a/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs new file mode 100644 index 00000000..ce24aaad --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs @@ -0,0 +1,24 @@ +namespace UglyToad.PdfPig.Writer.Copier +{ + using System; + using Tokens; + + /// + /// An interface for copying token + /// + public interface IObjectCopier + { + /// + /// Copy the token to the destination stream + /// + /// Token to copy + /// Function to resolve indirect reference identified in the token to copy + /// + public IToken CopyObject(IToken sourceToken, Func tokenScanner); + + /// + /// Clear the references of the previously copied object + /// + public void ClearReference(); + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs new file mode 100644 index 00000000..3e626253 --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs @@ -0,0 +1,75 @@ +namespace UglyToad.PdfPig.Writer.Copier +{ + using System; + using System.Collections.Generic; + using Tokens; + using Writer; + + /// + internal class MultiCopier : ObjectCopier + { + private readonly List copiers; + + /// + public MultiCopier(PdfStreamWriter destinationStream) : base(destinationStream) + { + copiers = new List(); + } + + /// + /// + /// + /// + public void AddCopier(IObjectCopier copier) + { + copiers.Add(copier); + } + + /// + /// + /// + /// + /// + public bool RemoveCopier(IObjectCopier copier) + { + return copiers.Remove(copier); + } + + /// + /// + /// + /// + public IReadOnlyList GetCopiers() + { + return copiers; + } + + /// + public override IToken CopyObject(IToken sourceToken, Func tokenScanner) + { + // We give the token to the child copiers, to see if they have a better way of copying the token + foreach (var copier in copiers) + { + var newToken = copier.CopyObject(sourceToken, tokenScanner); + if (newToken != null) + { + return newToken; + } + } + + // If the token did not found a suitable copier, let just do a simple copy of the token + return base.CopyObject(sourceToken, tokenScanner); + } + + /// + public override void ClearReference() + { + foreach (var copier in copiers) + { + copier.ClearReference(); + } + + base.ClearReference(); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs new file mode 100644 index 00000000..bb82e094 --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs @@ -0,0 +1,170 @@ +namespace UglyToad.PdfPig.Writer.Copier +{ + using System; + using System.Collections.Generic; + using PdfPig; + using Tokenization.Scanner; + using Tokens; + using Writer; + + /// + internal class ObjectCopier : IObjectCopier + { + private readonly PdfStreamWriter pdfStream; + + private readonly Dictionary newReferenceMap; + + /// + public ObjectCopier(PdfStreamWriter destinationStream) + { + pdfStream = destinationStream ?? throw new ArgumentNullException(nameof(destinationStream)); + newReferenceMap = new Dictionary(); + } + + /// + public IToken CopyObject(IToken sourceToken, PdfDocument sourceDocument) + { + IToken tokenScanner(IndirectReferenceToken referenceToken) + { + var objToken = sourceDocument.Structure.GetObject(referenceToken.Data); + return objToken.Data; + } + + return CopyObject(sourceToken, tokenScanner); + } + + /// + public IToken CopyObject(IToken sourceToken, IPdfTokenScanner tokenScanner) + { + IToken tokenGetter(IndirectReferenceToken referenceToken) + { + var objToken = tokenScanner.Get(referenceToken.Data); + return objToken.Data; + } + + return CopyObject(sourceToken, tokenGetter); + } + + /// + public virtual IToken CopyObject(IToken sourceToken, Func tokenScanner) + { + // This token need to be deep copied, because they could contain reference. So we have to update them. + switch (sourceToken) + { + case DictionaryToken dictionaryToken: + { + var newContent = new Dictionary(); + foreach (var setPair in dictionaryToken.Data) + { + var name = setPair.Key; + var token = setPair.Value; + + newContent.Add(NameToken.Create(name), CopyObject(token, tokenScanner)); + } + + return new DictionaryToken(newContent); + } + case ArrayToken arrayToken: + { + var newArray = new List(arrayToken.Length); + foreach (var token in arrayToken.Data) + { + newArray.Add(CopyObject(token, tokenScanner)); + } + + return new ArrayToken(newArray); + } + case IndirectReferenceToken referenceToken: + { + if (TryGetNewReference(referenceToken, out var newReferenceToken)) + { + return newReferenceToken; + } + + var referencedToken = tokenScanner(referenceToken); + var newReferencedToken = CopyObject(referencedToken, tokenScanner); + + var newToken = WriteToken(newReferencedToken); + SetNewReference(referenceToken, newToken); + return newToken; + } + + case StreamToken streamToken: + { + var properties = CopyObject(streamToken.StreamDictionary, tokenScanner); + var bytes = streamToken.Data; + return new StreamToken(properties as DictionaryToken, bytes); + } + + case ObjectToken _: + { + + // This is because, since we don't write token directly to the stream. So we can't know the offset. + // The token would be invalid. Although I don't think the copy of an object token would ever happen + throw new NotSupportedException("Copying a Object token is not supported"); + } + } + + return sourceToken; + } + + /// + /// + /// + /// + /// + /// + public virtual bool TryGetNewReference(IndirectReferenceToken sourceReferenceToken, out IndirectReferenceToken newReferenceToken) + { + newReferenceToken = default; + foreach (var referenceSet in newReferenceMap) + { + if (!referenceSet.Key.Equals(sourceReferenceToken)) + { + continue; + } + + newReferenceToken = referenceSet.Value; + return true; + } + + return false; + } + + /// + public virtual void ClearReference() + { + newReferenceMap.Clear(); + } + + /// + /// + /// + /// + /// + public void SetNewReference(IndirectReferenceToken oldToken, IndirectReferenceToken newToken) + { + newReferenceMap.Add(oldToken, newToken); + } + + /// + /// + /// + /// + public int ReserveTokenNumber() + { + return pdfStream.ReserveNumber(); + } + + /// + /// + /// + /// + /// + /// + public IndirectReferenceToken WriteToken(IToken token, int? reservedNumber = null) + { + return pdfStream.WriteToken(token, reservedNumber); + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs b/src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs new file mode 100644 index 00000000..71a7820a --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs @@ -0,0 +1,34 @@ +namespace UglyToad.PdfPig.Writer.Copier +{ + using System; + using Tokens; + + internal static class TokenHelper + { + // This is to avoid infinite loop in production. Although, it should never happen + const int MAX_ITERATIONS = 10; + + public static T GetTokenAs(IToken token, Func lookupFunc) where T : IToken + { + var iterations = 0; + + var original = token; + while (iterations++ < MAX_ITERATIONS) + { + switch (token) + { + case T result: + return result; + case IndirectReferenceToken tokenReference: + token = lookupFunc(tokenReference); + continue; + case ObjectToken tokenObject: + token = tokenObject.Data; + continue; + } + } + + throw new InvalidOperationException($"Unable to extract a {typeof(T)} token from {original}"); + } + } +} \ No newline at end of file From a190653683d48a11117d341276d4c71be2d042af Mon Sep 17 00:00:00 2001 From: InusualZ Date: Fri, 2 Oct 2020 21:33:36 -0400 Subject: [PATCH 3/4] Use the new IObjectCopier API in the PdfMerger Added a new PagesCopier, this class would be the one responsible to give a parent to the page tree that are copied from other documents --- .../Writer/Copier/Page/PagesCopier.cs | 96 +++++++++ src/UglyToad.PdfPig/Writer/PdfMerger.cs | 193 ++---------------- 2 files changed, 118 insertions(+), 171 deletions(-) create mode 100644 src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs diff --git a/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs new file mode 100644 index 00000000..4da1d87a --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Copier/Page/PagesCopier.cs @@ -0,0 +1,96 @@ +namespace UglyToad.PdfPig.Writer.Copier.Page +{ + using System; + using System.Collections.Generic; + using System.Diagnostics; + using Core; + using Tokens; + + /// + internal class PagesCopier : IObjectCopier + { + private readonly ObjectCopier copier; + + private readonly IndirectReferenceToken rootPagesReferenceToken; + + /// + public PagesCopier(ObjectCopier mainCopier, IndirectReferenceToken rootPagesToken = null) + { + copier = mainCopier; + rootPagesReferenceToken = rootPagesToken; + } + + /// + public IToken CopyObject(IToken sourceToken, Func tokenScanner) + { + if (!(sourceToken is IndirectReferenceToken sourceReferenceToken)) + { + return null; + } + + // Check if this token haven't been copied before + if (copier.TryGetNewReference(sourceReferenceToken, out var newReferenceToken)) + { + return newReferenceToken; + } + + // Make sure that we are copying a DictionaryToken + var token = tokenScanner(sourceReferenceToken); + if (!(token is DictionaryToken dictionaryToken)) + { + return null; + } + + // Make sure we are copying a `/Pages` Dictionary + if (!dictionaryToken.TryGet(NameToken.Type, out var nameTypeToken) || !nameTypeToken.Equals(NameToken.Pages)) + { + return null; + } + + // We have to reserve the reference before hand, because if we don't, we would fall in a loop. + // The child `/Page` have a reference to the parent + var tokenNumber = copier.ReserveTokenNumber(); + copier.SetNewReference(sourceReferenceToken, new IndirectReferenceToken(new IndirectReference(tokenNumber, 0))); + + // If `/Pages` is not the root page node, copy the token normally + // We are testing for one: + // * If @rootPagesReferenceToken is null, just do a normal copy of the tree + // * If the tree have a Parent NameToken, it means the tree is not a root tree so we don't have to assign him + // a new parent + if (rootPagesReferenceToken == null || dictionaryToken.TryGet(NameToken.Parent, out IndirectReferenceToken _)) + { + return copier.WriteToken(copier.CopyObject(dictionaryToken, tokenScanner), tokenNumber); + } + + // Since the tree is a root tree, it means that the tree comes from another document, we have to make sure + // that the new tree is a child of the new root tree, this we do by adding a Parent NameToken to the tree, + // that point to @rootPagesReferenceToken + return CopyPagesTree(dictionaryToken, tokenNumber, tokenScanner); + } + + private IndirectReferenceToken CopyPagesTree(DictionaryToken pagesDictionary, int reservedNumber, Func tokenScanner) + { + Debug.Assert(rootPagesReferenceToken != null); + + var newContent = new Dictionary() + { + {NameToken.Parent, rootPagesReferenceToken} + }; + + foreach (var dataSet in pagesDictionary.Data) + { + newContent.Add(NameToken.Create(dataSet.Key), copier.CopyObject(dataSet.Value, tokenScanner)); + } + + var newPagesTree = new DictionaryToken(newContent); + + return copier.WriteToken(newPagesTree, reservedNumber); + } + + /// + public void ClearReference() + { + // Nothing to do + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index 1d9a25a9..c6c1c8a7 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -2,9 +2,10 @@ { using System; using System.Collections.Generic; - using System.Diagnostics; using System.IO; using Content; + using Copier; + using Copier.Page; using Core; using CrossReference; using Encryption; @@ -149,31 +150,36 @@ private class DocumentMerger { private const decimal DefaultVersion = 1.2m; - - private readonly PdfStreamWriter context = new PdfStreamWriter(); - private readonly List pagesTokenReferences = new List(); + + private readonly PdfStreamWriter context; + private readonly List pagesTokenReferences; private readonly IndirectReferenceToken rootPagesReference; + private readonly MultiCopier copier; private decimal currentVersion = DefaultVersion; private int pageCount = 0; - - private readonly Dictionary referencesFromDocument = - new Dictionary(); public DocumentMerger() { + context = new PdfStreamWriter(); + pagesTokenReferences = new List(); + rootPagesReference = context.ReserveNumberToken(); + + copier = new MultiCopier(context); + copier.AddCopier(new PagesCopier(copier, rootPagesReference)); } - + public void AppendDocument(Catalog documentCatalog, decimal version, IPdfTokenScanner tokenScanner) { currentVersion = Math.Max(version, currentVersion); - var (pagesReference, count) = CopyPagesTree(documentCatalog.PageTree, rootPagesReference, tokenScanner); - pageCount += count; - pagesTokenReferences.Add(pagesReference); + var copiedPages = copier.CopyObject(new IndirectReferenceToken(documentCatalog.PageTree.Reference), tokenScanner) as IndirectReferenceToken; + pagesTokenReferences.Add(copiedPages); - referencesFromDocument.Clear(); + pageCount += documentCatalog.PagesDictionary.Get(NameToken.Count, tokenScanner).Int; + + copier.ClearReference(); } public byte[] Build() @@ -190,7 +196,7 @@ { NameToken.Count, new NumericToken(pageCount) } }); - var pagesRef = context.WriteToken( pagesDictionary, (int)rootPagesReference.Data.ObjectNumber); + var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber); var catalog = new DictionaryToken(new Dictionary { @@ -199,9 +205,9 @@ }); var catalogRef = context.WriteToken(catalog); - + context.Flush(currentVersion, catalogRef); - + var bytes = context.ToArray(); Close(); @@ -209,165 +215,10 @@ return bytes; } - public void Close() + private void Close() { context.Dispose(); } - - private (IndirectReferenceToken, int) CopyPagesTree(PageTreeNode treeNode, IndirectReferenceToken treeParentReference, IPdfTokenScanner tokenScanner) - { - Debug.Assert(!treeNode.IsPage); - - var currentNodeReference = context.ReserveNumberToken(); - - var pageReferences = new List(); - var nodeCount = 0; - foreach (var pageNode in treeNode.Children) - { - IndirectReferenceToken newEntry; - if (!pageNode.IsPage) - { - var count = 0; - (newEntry, count) = CopyPagesTree(pageNode, currentNodeReference, tokenScanner); - nodeCount += count; - } - else - { - newEntry = CopyPageNode(pageNode, currentNodeReference, tokenScanner); - ++nodeCount; - } - - pageReferences.Add(newEntry); - } - - var newPagesNode = new Dictionary - { - { NameToken.Type, NameToken.Pages }, - { NameToken.Kids, new ArrayToken(pageReferences) }, - { NameToken.Count, new NumericToken(nodeCount) }, - { NameToken.Parent, treeParentReference } - }; - - foreach (var pair in treeNode.NodeDictionary.Data) - { - if (IgnoreKeyForPagesNode(pair)) - { - continue; - } - - newPagesNode[NameToken.Create(pair.Key)] = CopyToken(pair.Value, tokenScanner); - } - - var pagesDictionary = new DictionaryToken(newPagesNode); - - return (context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber), nodeCount); - } - - private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner) - { - Debug.Assert(pageNode.IsPage); - - var pageDictionary = new Dictionary - { - {NameToken.Parent, parentPagesObject}, - }; - - foreach (var setPair in pageNode.NodeDictionary.Data) - { - var name = setPair.Key; - var token = setPair.Value; - - if (name == NameToken.Parent) - { - // Skip Parent token, since we have to reassign it - continue; - } - - pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); - } - - return context.WriteToken(new DictionaryToken(pageDictionary)); - } - - /// - /// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream - /// and replace the indirect reference with the correct/new one - /// - /// Token to inspect for reference - /// scanner get the content from the original document - /// A reference of the token that was copied. With all the reference updated - private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner) - { - // This token need to be deep copied, because they could contain reference. So we have to update them. - switch (tokenToCopy) - { - case DictionaryToken dictionaryToken: - { - var newContent = new Dictionary(); - foreach (var setPair in dictionaryToken.Data) - { - var name = setPair.Key; - var token = setPair.Value; - newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner)); - } - - return new DictionaryToken(newContent); - } - case ArrayToken arrayToken: - { - var newArray = new List(arrayToken.Length); - foreach (var token in arrayToken.Data) - { - newArray.Add(CopyToken(token, tokenScanner)); - } - - return new ArrayToken(newArray); - } - case IndirectReferenceToken referenceToken: - { - if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken)) - { - return newReferenceToken; - } - - var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner); - - Debug.Assert(!(tokenObject is IndirectReferenceToken)); - - var newToken = CopyToken(tokenObject, tokenScanner); - newReferenceToken = context.WriteToken(newToken); - - referencesFromDocument.Add(referenceToken, newReferenceToken); - - return newReferenceToken; - } - case StreamToken streamToken: - { - var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken; - Debug.Assert(properties != null); - - var bytes = streamToken.Data; - return new StreamToken(properties, bytes); - } - - case ObjectToken _: - { - // Since we don't write token directly to the stream. - // We can't know the offset. Therefore the token would be invalid - throw new NotSupportedException("Copying a Object token is not supported"); - } - } - - return tokenToCopy; - } - - private static bool IgnoreKeyForPagesNode(KeyValuePair token) - { - return string.Equals(token.Key, NameToken.Type.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Kids.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Count.Data, StringComparison.OrdinalIgnoreCase) - || string.Equals(token.Key, NameToken.Parent.Data, StringComparison.OrdinalIgnoreCase); - } } } } \ No newline at end of file From 704c56285c5005e5cd6f3eb51ccafc53bd2180ae Mon Sep 17 00:00:00 2001 From: InusualZ Date: Mon, 5 Oct 2020 20:16:41 -0400 Subject: [PATCH 4/4] Address CI Errors --- src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs | 2 +- src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs index ce24aaad..03531b16 100644 --- a/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs +++ b/src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs @@ -6,7 +6,7 @@ /// /// An interface for copying token /// - public interface IObjectCopier + internal interface IObjectCopier { /// /// Copy the token to the destination stream diff --git a/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs b/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs index bb82e094..81c911b4 100644 --- a/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs +++ b/src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs @@ -98,10 +98,7 @@ case ObjectToken _: { - - // This is because, since we don't write token directly to the stream. So we can't know the offset. - // The token would be invalid. Although I don't think the copy of an object token would ever happen - throw new NotSupportedException("Copying a Object token is not supported"); + throw new NotSupportedException("Copying a Object Token is not supported"); } }