namespace UglyToad.Pdf.Content { using System; using System.Collections.Generic; using System.Linq; using ContentStream; using ContentStream.TypedAccessors; using Cos; using IO; using Logging; using Parser; public class Pages { private readonly ILog log; private readonly Catalog catalog; private readonly IPdfObjectParser pdfObjectParser; private readonly IPageFactory pageFactory; private readonly IRandomAccessRead reader; private readonly bool isLenientParsing; private readonly PdfDictionary rootPageDictionary; private readonly Dictionary locatedPages = new Dictionary(); public int Count { get; } internal Pages(ILog log, Catalog catalog, IPdfObjectParser pdfObjectParser, IPageFactory pageFactory, IRandomAccessRead reader, bool isLenientParsing) { if (catalog == null) { throw new ArgumentNullException(nameof(catalog)); } var pages = catalog.Get(CosName.PAGES) as CosObject; if (pages == null) { throw new InvalidOperationException("No pages were present in the catalog for this PDF document"); } var pagesObject = pdfObjectParser.Parse(pages.ToIndirectReference(), reader, isLenientParsing); if (!(pagesObject is PdfDictionary catalogPageDictionary)) { throw new InvalidOperationException("Could not find the root pages object: " + pages); } var count = catalogPageDictionary.GetIntOrDefault(CosName.COUNT); rootPageDictionary = catalogPageDictionary; Count = count; this.log = log; this.catalog = catalog; this.pdfObjectParser = pdfObjectParser; this.pageFactory = pageFactory; this.reader = reader; this.isLenientParsing = isLenientParsing; } public Page GetPage(int pageNumber) { if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary)) { return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing); } var observed = new List(); // todo: running a search for a different, unloaded, page number, results in a bug. var isFound = FindPage(rootPageDictionary, pageNumber, observed); if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary)) { throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber); } var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing); locatedPages[pageNumber] = targetPageDictionary; return page; } private static int GetNextPageNumber(IReadOnlyList pages) { if (pages.Count == 0) { return 1; } return pages[pages.Count - 1] + 1; } public bool FindPage(PdfDictionary currentPageDictionary, int soughtPageNumber, List pageNumbersObserved) { var type = currentPageDictionary.GetName(CosName.TYPE); if (type.Equals(CosName.PAGE)) { var pageNumber = GetNextPageNumber(pageNumbersObserved); bool found = pageNumber == soughtPageNumber; locatedPages[pageNumber] = currentPageDictionary; return found; } if (!type.Equals(CosName.PAGES)) { log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary); return false; } var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray; bool childFound = false; foreach (var kid in kids.OfType()) { // todo: exit early var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary; var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved); if (thisPageMatches) { childFound = true; } } return childFound; } public IReadOnlyList GetAllPages() { return new Page[0]; } public void LoadAll() { } } }