Merge pull request #259 from plaisted/master

PdfMerger: Fix indirect object lookup dict
This commit is contained in:
Eliot Jones
2021-01-15 09:35:15 -04:00
committed by GitHub
2 changed files with 21 additions and 5 deletions

View File

@@ -3,6 +3,7 @@
using Integration;
using PdfPig.Writer;
using System;
using System.Collections.Generic;
using System.IO;
using Xunit;
@@ -101,6 +102,21 @@
}
}
[Fact]
public void DedupsObjectsFromSameDoc()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new List<byte[]> { File.ReadAllBytes(one) }, new List<IReadOnlyList<int>> { new List<int> { 1, 2} });
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
[Fact]
public void CanMergeWithObjectStream()
{

View File

@@ -242,7 +242,7 @@
currentVersion = Math.Max(version, currentVersion);
var referencesFromDocument = new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
var referencesFromDocument = new Dictionary<IndirectReference, IndirectReferenceToken>();
var currentNodeReference = context.ReserveNumberToken();
var pagesReferences = new List<IndirectReferenceToken>();
@@ -387,7 +387,7 @@
}
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument)
IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
Debug.Assert(pageNode.IsPage);
@@ -421,7 +421,7 @@
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReferenceToken, IndirectReferenceToken> referencesFromDocument)
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
@@ -450,14 +450,14 @@
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken, out var newReferenceToken))
if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{
return newReferenceToken;
}
//we add the token to referencesFromDocument to prevent stackoverflow on references cycles
newReferenceToken = context.ReserveNumberToken();
referencesFromDocument.Add(referenceToken, newReferenceToken);
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));