From 8b7fb5274fb687b9711d1aff6702d63e712e26ca Mon Sep 17 00:00:00 2001 From: Plaisted Date: Thu, 14 Jan 2021 13:57:28 -0600 Subject: [PATCH] PdfMerger: Correcting indirect object lookup dict to use dictionary compatible key --- .../Writer/PdfMergerTests.cs | 16 ++++++++++++++++ src/UglyToad.PdfPig/Writer/PdfMerger.cs | 10 +++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs index ce906c43..4722e215 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs @@ -3,6 +3,7 @@ using Integration; using PdfPig.Writer; using System; + using System.Collections.Generic; using System.IO; using Xunit; @@ -95,6 +96,21 @@ } } + [Fact] + public void DedupsObjectsFromSameDoc() + { + var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf"); + + var result = PdfMerger.Merge(new List { File.ReadAllBytes(one) }, new List> { new List { 1, 2} }); + + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(2, document.NumberOfPages); + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, + "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use + } + } + [Fact] public void CanMergeWithObjectStream() { diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index d884fcd9..df95e719 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -198,7 +198,7 @@ currentVersion = Math.Max(version, currentVersion); - var referencesFromDocument = new Dictionary(); + var referencesFromDocument = new Dictionary(); var currentNodeReference = context.ReserveNumberToken(); var pagesReferences = new List(); @@ -347,7 +347,7 @@ } private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner, - IDictionary referencesFromDocument) + IDictionary referencesFromDocument) { Debug.Assert(pageNode.IsPage); @@ -381,7 +381,7 @@ /// scanner get the content from the original document /// Map of previously copied /// A reference of the token that was copied. With all the reference updated - private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary referencesFromDocument) + private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary referencesFromDocument) { // This token need to be deep copied, because they could contain reference. So we have to update them. switch (tokenToCopy) @@ -410,14 +410,14 @@ } case IndirectReferenceToken referenceToken: { - if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken)) + if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken)) { return newReferenceToken; } //we add the token to referencesFromDocument to prevent stackoverflow on references cycles newReferenceToken = context.ReserveNumberToken(); - referencesFromDocument.Add(referenceToken, newReferenceToken); + referencesFromDocument.Add(referenceToken.Data, newReferenceToken); var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner); Debug.Assert(!(tokenObject is IndirectReferenceToken));