perf improvement for copying lots of pages from large documents

This commit is contained in:
Plaisted
2021-02-06 18:04:13 -06:00
parent 92f9af613f
commit 1db481164c
2 changed files with 411 additions and 391 deletions

View File

@@ -271,8 +271,15 @@ namespace UglyToad.PdfPig.Writer
return WriterUtil.CopyToken(context, token, source, refs);
}
internal class PageInfo
{
public DictionaryToken Page { get; set; }
public List<DictionaryToken> Parents { get; set; }
}
private readonly ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>> existingCopies =
new ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>>();
private readonly ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>> existingTrees =
new ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>>();
/// <summary>
/// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
/// </summary>
@@ -287,16 +294,34 @@ namespace UglyToad.PdfPig.Writer
existingCopies.Add(document.Structure.TokenScanner, refs);
}
if (!existingTrees.TryGetValue(document, out var pagesInfos))
{
pagesInfos = new Dictionary<int, PageInfo>();
int i = 1;
foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
{
if (i == pageNumber)
pagesInfos[i] = new PageInfo
{
Page = pageDict, Parents = parents
};
i++;
}
existingTrees.Add(document, pagesInfos);
}
if (!pagesInfos.ContainsKey(pageNumber))
{
throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
}
var pageInfo = pagesInfos[pageNumber];
// copy content streams
var streams = new List<PdfPageBuilder.CopiedContentStream>();
if (pageDict.ContainsKey(NameToken.Contents))
if (pageInfo.Page.ContainsKey(NameToken.Contents))
{
var token = pageDict.Data[NameToken.Contents];
var token = pageInfo.Page.Data[NameToken.Contents];
if (token is ArrayToken array)
{
foreach (var item in array.Data)
@@ -321,7 +346,7 @@ namespace UglyToad.PdfPig.Writer
Dictionary<NameToken, IToken> resources = new Dictionary<NameToken, IToken>();
// just put all parent resources into new page
foreach (var dict in parents)
foreach (var dict in pageInfo.Parents)
{
if (dict.TryGet(NameToken.Resources, out var token))
{
@@ -330,7 +355,7 @@ namespace UglyToad.PdfPig.Writer
}
foreach (var kvp in pageDict.Data)
foreach (var kvp in pageInfo.Page.Data)
{
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
{
@@ -362,12 +387,7 @@ namespace UglyToad.PdfPig.Writer
pages[builder.PageNumber] = builder;
return builder;
}
i++;
}
throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDict)
{