mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-07-18 03:36:09 +08:00
145 lines
4.8 KiB
C#
145 lines
4.8 KiB
C#
namespace UglyToad.PdfPig.Content
|
|
{
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using Geometry;
|
|
using Logging;
|
|
using Parser.Parts;
|
|
using Tokenization.Scanner;
|
|
using Tokenization.Tokens;
|
|
using Util;
|
|
|
|
internal class Pages
|
|
{
|
|
private readonly ILog log;
|
|
private readonly Catalog catalog;
|
|
private readonly IPageFactory pageFactory;
|
|
private readonly bool isLenientParsing;
|
|
private readonly IPdfTokenScanner pdfScanner;
|
|
private readonly DictionaryToken rootPageDictionary;
|
|
private readonly Dictionary<int, DictionaryToken> locatedPages = new Dictionary<int, DictionaryToken>();
|
|
|
|
public int Count { get; }
|
|
|
|
internal Pages(ILog log, Catalog catalog, IPageFactory pageFactory, bool isLenientParsing, IPdfTokenScanner pdfScanner)
|
|
{
|
|
if (catalog == null)
|
|
{
|
|
throw new ArgumentNullException(nameof(catalog));
|
|
}
|
|
|
|
rootPageDictionary = catalog.PagesDictionary;
|
|
|
|
Count = rootPageDictionary.GetIntOrDefault(NameToken.Count);
|
|
|
|
this.log = log;
|
|
this.catalog = catalog;
|
|
this.pageFactory = pageFactory;
|
|
this.isLenientParsing = isLenientParsing;
|
|
this.pdfScanner = pdfScanner;
|
|
}
|
|
|
|
public Page GetPage(int pageNumber)
|
|
{
|
|
if (locatedPages.TryGetValue(pageNumber, out DictionaryToken targetPageDictionary))
|
|
{
|
|
// TODO: cache the page
|
|
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(),
|
|
isLenientParsing);
|
|
}
|
|
|
|
var observed = new List<int>();
|
|
|
|
var pageTreeMembers = new PageTreeMembers();
|
|
|
|
// todo: running a search for a different, unloaded, page number, results in a bug.
|
|
var isFound = FindPage(rootPageDictionary, pageNumber, observed, pageTreeMembers);
|
|
|
|
if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary))
|
|
{
|
|
throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber);
|
|
}
|
|
|
|
var page = pageFactory.Create(pageNumber, targetPageDictionary, pageTreeMembers, isLenientParsing);
|
|
|
|
locatedPages[pageNumber] = targetPageDictionary;
|
|
|
|
return page;
|
|
}
|
|
|
|
private static int GetNextPageNumber(IReadOnlyList<int> pages)
|
|
{
|
|
if (pages.Count == 0)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
return pages[pages.Count - 1] + 1;
|
|
}
|
|
|
|
public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved, PageTreeMembers pageTreeMembers)
|
|
{
|
|
var type = currentPageDictionary.GetNameOrDefault(NameToken.Type);
|
|
|
|
if (type?.Equals(NameToken.Page) == true)
|
|
{
|
|
var pageNumber = GetNextPageNumber(pageNumbersObserved);
|
|
|
|
bool found = pageNumber == soughtPageNumber;
|
|
|
|
locatedPages[pageNumber] = currentPageDictionary;
|
|
pageNumbersObserved.Add(pageNumber);
|
|
|
|
return found;
|
|
}
|
|
|
|
if (type?.Equals(NameToken.Pages) != true)
|
|
{
|
|
log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
|
|
|
return false;
|
|
}
|
|
|
|
if (currentPageDictionary.TryGet(NameToken.MediaBox, out var token))
|
|
{
|
|
var mediaBox = DirectObjectFinder.Get<ArrayToken>(token, pdfScanner);
|
|
|
|
pageTreeMembers.MediaBox = new MediaBox(new PdfRectangle(mediaBox.GetNumeric(0).Data,
|
|
mediaBox.GetNumeric(1).Data,
|
|
mediaBox.GetNumeric(2).Data,
|
|
mediaBox.GetNumeric(3).Data));
|
|
}
|
|
|
|
if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids)
|
|
|| !(kids is ArrayToken kidsArray))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
pageFactory.LoadResources(currentPageDictionary, isLenientParsing);
|
|
|
|
bool childFound = false;
|
|
foreach (var kid in kidsArray.Data)
|
|
{
|
|
// todo: exit early
|
|
var child = DirectObjectFinder.Get<DictionaryToken>(kid, pdfScanner);
|
|
|
|
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved, pageTreeMembers);
|
|
|
|
if (thisPageMatches)
|
|
{
|
|
childFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return childFound;
|
|
}
|
|
|
|
public IReadOnlyList<Page> GetAllPages()
|
|
{
|
|
return new Page[0];
|
|
}
|
|
}
|
|
}
|