Merge pull request #259 from plaisted/master

PdfMerger: Fix indirect object lookup dict
This commit is contained in:
Eliot Jones
2021-01-15 09:35:15 -04:00
committed by GitHub
2 changed files with 21 additions and 5 deletions

View File

@@ -3,6 +3,7 @@
using Integration; using Integration;
using PdfPig.Writer; using PdfPig.Writer;
using System; using System;
using System.Collections.Generic;
using System.IO; using System.IO;
using Xunit; using Xunit;
@@ -101,6 +102,21 @@
} }
} }
[Fact]
public void DedupsObjectsFromSameDoc()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new List<byte[]> { File.ReadAllBytes(one) }, new List<IReadOnlyList<int>> { new List<int> { 1, 2} });
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
[Fact] [Fact]
public void CanMergeWithObjectStream() public void CanMergeWithObjectStream()
{ {

View File

@@ -242,7 +242,7 @@
currentVersion = Math.Max(version, currentVersion); currentVersion = Math.Max(version, currentVersion);
var referencesFromDocument = new Dictionary<IndirectReferenceToken, IndirectReferenceToken>(); var referencesFromDocument = new Dictionary<IndirectReference, IndirectReferenceToken>();
var currentNodeReference = context.ReserveNumberToken(); var currentNodeReference = context.ReserveNumberToken();
var pagesReferences = new List<IndirectReferenceToken>(); var pagesReferences = new List<IndirectReferenceToken>();
@@ -387,7 +387,7 @@
} }
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner, private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument) IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{ {
Debug.Assert(pageNode.IsPage); Debug.Assert(pageNode.IsPage);
@@ -421,7 +421,7 @@
/// <param name="tokenScanner">scanner get the content from the original document</param> /// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied</param> /// <param name="referencesFromDocument">Map of previously copied</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns> /// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument) private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{ {
// This token need to be deep copied, because they could contain reference. So we have to update them. // This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy) switch (tokenToCopy)
@@ -450,14 +450,14 @@
} }
case IndirectReferenceToken referenceToken: case IndirectReferenceToken referenceToken:
{ {
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken)) if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{ {
return newReferenceToken; return newReferenceToken;
} }
//we add the token to referencesFromDocument to prevent stackoverflow on references cycles //we add the token to referencesFromDocument to prevent stackoverflow on references cycles
newReferenceToken = context.ReserveNumberToken(); newReferenceToken = context.ReserveNumberToken();
referencesFromDocument.Add(referenceToken, newReferenceToken); referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner); var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken)); Debug.Assert(!(tokenObject is IndirectReferenceToken));