diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs index ad9f5d98..91e9420c 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs @@ -834,6 +834,26 @@ } } + [Fact] + public void CanCreatePageTree() + { + var count = 25 * 25 * 25 + 1; + using (var builder = new PdfDocumentBuilder()) + { + for (var i = 0; i < count;i++) + { + builder.AddPage(PageSize.A4); + } + var result = builder.Build(); + WriteFile(nameof(CanCreatePageTree), result); + + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(count, document.NumberOfPages); + } + } + } + [InlineData("Single Page Simple - from google drive.pdf")] [InlineData("Old Gutnish Internet Explorer.pdf")] [InlineData("68-1990-01_A.pdf")] @@ -853,6 +873,7 @@ builder.AddPage(doc, i); } var result = builder.Build(); + WriteFile(nameof(CopiedPagesResultInSameData) + "_" + name, result); using (var doc2 = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs index d1dda73a..8eca2b99 100644 --- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs @@ -278,7 +278,7 @@ namespace UglyToad.PdfPig.Writer } private readonly ConditionalWeakTable> existingCopies = new ConditionalWeakTable>(); - private readonly ConditionalWeakTable> existingTrees = + private readonly ConditionalWeakTable> existingTrees = new ConditionalWeakTable>(); /// /// Add a new page with the specified size, this page will be included in the output when is called. @@ -315,7 +315,7 @@ namespace UglyToad.PdfPig.Writer throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document."); } - var pageInfo = pagesInfos[pageNumber]; + var pageInfo = pagesInfos[pageNumber]; // copy content streams var streams = new List(); @@ -386,7 +386,7 @@ namespace UglyToad.PdfPig.Writer } pages[builder.PageNumber] = builder; - return builder; + return builder; void CopyResourceDict(IToken token, Dictionary destinationDict) @@ -445,15 +445,12 @@ namespace UglyToad.PdfPig.Writer return dict; } } - - private void CompleteDocument() - { - var fontsWritten = new Dictionary(); - + private void CompleteDocument() + { + // write fonts to reserved object numbers foreach (var font in fonts) { - var fontObj = font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference); - fontsWritten.Add(font.Key, fontObj); + font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference); } var procSet = new List @@ -465,34 +462,30 @@ namespace UglyToad.PdfPig.Writer NameToken.ImageI }; - var resources = new Dictionary + + int desiredLeafSize = 25; + var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize)); + + var leafRefs = new List(); + var leafChildren = new List>(); + var leafs = new List>(); + for (var i = 0; i < numLeafs; i++) { - { NameToken.ProcSet, new ArrayToken(procSet) } - }; + leafs.Add(new Dictionary() + { + {NameToken.Type, NameToken.Pages}, + }); + leafChildren.Add(new List()); + leafRefs.Add(context.ReserveObjectNumber()); + } - // var fontDictionary = new DictionaryToken(fontsWritten.Select(x => - // (fonts[x.Key].FontKey.Name, (IToken)x.Value)) - // .ToDictionary(x => x.Item1, x => x.Item2)); - // var fontsDictionaryRef = context.WriteToken(fontDictionary); - // if (fontsWritten.Count > 0) - // { - // var fontsDictionary = new DictionaryToken(fontsWritten.Select(x => - // (fonts[x.Key].FontKey.Name, (IToken)x.Value)) - // .ToDictionary(x => x.Item1, x => x.Item2)); - // - // var fontsDictionaryRef = context.WriteToken(fontsDictionary); - // - // resources.Add(NameToken.Font, fontsDictionaryRef); - // } + int leafNum = 0; - var parentIndirect = context.ReserveObjectNumber(); - - var pageReferences = new List(); foreach (var page in pages) { var pageDictionary = page.Value.additionalPageProperties; pageDictionary[NameToken.Type] = NameToken.Page; - pageDictionary[NameToken.Parent] = parentIndirect; + pageDictionary[NameToken.Parent] = leafRefs[leafNum]; pageDictionary[NameToken.ProcSet] = new ArrayToken(procSet); if (!pageDictionary.ContainsKey(NameToken.MediaBox)) { @@ -526,28 +519,38 @@ namespace UglyToad.PdfPig.Writer } - var pageRef = context.WriteToken( new DictionaryToken(pageDictionary)); + leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary))); - pageReferences.Add(pageRef); + if (leafChildren[leafNum].Count >= desiredLeafSize) + { + leafNum += 1; + } } - var pagesDictionaryData = new Dictionary + var dummyName = NameToken.Create("ObjIdToUse"); + for (var i = 0; i < leafs.Count; i++) { - {NameToken.Type, NameToken.Pages}, - {NameToken.Kids, new ArrayToken(pageReferences)}, - {NameToken.Resources, new DictionaryToken(resources)}, - {NameToken.Count, new NumericToken(pageReferences.Count)} - }; - - var pagesDictionary = new DictionaryToken(pagesDictionaryData); - - var pagesRef = context.WriteToken(pagesDictionary, parentIndirect); + leafs[i][NameToken.Kids] = new ArrayToken(leafChildren[i]); + leafs[i][NameToken.Count] = new NumericToken(leafChildren[i].Count); + leafs[i][dummyName] = leafRefs[i]; + } var catalogDictionary = new Dictionary { {NameToken.Type, NameToken.Catalog}, - {NameToken.Pages, pagesRef} }; + if (leafs.Count == 1) + { + var leaf = leafs[0]; + var id = leaf[dummyName] as IndirectReferenceToken; + leaf.Remove(dummyName); + catalogDictionary[NameToken.Pages] = context.WriteToken(new DictionaryToken(leaf), id); + } + else + { + var rootPageInfo = CreatePageTree(leafs, null); + catalogDictionary[NameToken.Pages] = rootPageInfo.Ref; + } if (ArchiveStandard != PdfAStandard.None) { @@ -584,6 +587,51 @@ namespace UglyToad.PdfPig.Writer } context.CompletePdf(catalogRef, informationReference); + + (int Count, IndirectReferenceToken Ref) CreatePageTree(List> pagesNodes, IndirectReferenceToken parent) + { + // TODO shorten page tree when there is a single or small number of pages left in a branch + var count = 0; + var thisObj = context.ReserveObjectNumber(); + + var children = new List(); + if (pagesNodes.Count > desiredLeafSize) + { + var currentTreeDepth = (int) Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize)); + var perBranch = (int) Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1)); + var branches = (int)Math.Ceiling(decimal.Divide(pagesNodes.Count, (decimal)perBranch)); + for (var i = 0; i < branches; i++) + { + var part = pagesNodes.Skip(i*perBranch).Take(perBranch).ToList(); + var result = CreatePageTree(part, thisObj); + count += result.Count; + children.Add(result.Ref); + } + } + else + { + foreach (var page in pagesNodes) + { + page[NameToken.Parent] = thisObj; + var id = page[dummyName] as IndirectReferenceToken; + page.Remove(dummyName); + count += (page[NameToken.Count] as NumericToken).Int; + children.Add(context.WriteToken(new DictionaryToken(page), id)); + } + } + + var node = new Dictionary + { + {NameToken.Type, NameToken.Pages}, + {NameToken.Kids, new ArrayToken(children)}, + {NameToken.Count, new NumericToken(count)} + }; + if (parent != null) + { + node[NameToken.Parent] = parent; + } + return (count, context.WriteToken(new DictionaryToken(node), thisObj)); + } } ///