mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
150 lines
4.7 KiB
C#
150 lines
4.7 KiB
C#
namespace UglyToad.Pdf.Content
|
|
{
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using ContentStream;
|
|
using ContentStream.TypedAccessors;
|
|
using Cos;
|
|
using IO;
|
|
using Logging;
|
|
using Parser;
|
|
|
|
public class Pages
|
|
{
|
|
private readonly ILog log;
|
|
private readonly Catalog catalog;
|
|
private readonly IPdfObjectParser pdfObjectParser;
|
|
private readonly IPageFactory pageFactory;
|
|
private readonly IRandomAccessRead reader;
|
|
private readonly bool isLenientParsing;
|
|
private readonly PdfDictionary rootPageDictionary;
|
|
private readonly Dictionary<int, PdfDictionary> locatedPages = new Dictionary<int, PdfDictionary>();
|
|
|
|
public int Count { get; }
|
|
|
|
internal Pages(ILog log, Catalog catalog, IPdfObjectParser pdfObjectParser, IPageFactory pageFactory,
|
|
IRandomAccessRead reader, bool isLenientParsing)
|
|
{
|
|
if (catalog == null)
|
|
{
|
|
throw new ArgumentNullException(nameof(catalog));
|
|
}
|
|
|
|
var pages = catalog.Get(CosName.PAGES) as CosObject;
|
|
|
|
if (pages == null)
|
|
{
|
|
throw new InvalidOperationException("No pages were present in the catalog for this PDF document");
|
|
}
|
|
|
|
var pagesObject = pdfObjectParser.Parse(pages.ToIndirectReference(), reader, isLenientParsing);
|
|
|
|
if (!(pagesObject is PdfDictionary catalogPageDictionary))
|
|
{
|
|
throw new InvalidOperationException("Could not find the root pages object: " + pages);
|
|
}
|
|
|
|
var count = catalogPageDictionary.GetIntOrDefault(CosName.COUNT);
|
|
|
|
rootPageDictionary = catalogPageDictionary;
|
|
|
|
Count = count;
|
|
|
|
this.log = log;
|
|
this.catalog = catalog;
|
|
this.pdfObjectParser = pdfObjectParser;
|
|
this.pageFactory = pageFactory;
|
|
this.reader = reader;
|
|
this.isLenientParsing = isLenientParsing;
|
|
}
|
|
|
|
|
|
public Page GetPage(int pageNumber)
|
|
{
|
|
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
|
|
{
|
|
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader,
|
|
isLenientParsing);
|
|
}
|
|
|
|
var observed = new List<int>();
|
|
|
|
// todo: running a search for a different, unloaded, page number, results in a bug.
|
|
var isFound = FindPage(rootPageDictionary, pageNumber, observed);
|
|
|
|
if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary))
|
|
{
|
|
throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber);
|
|
}
|
|
|
|
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing);
|
|
|
|
locatedPages[pageNumber] = targetPageDictionary;
|
|
|
|
return page;
|
|
}
|
|
|
|
private static int GetNextPageNumber(IReadOnlyList<int> pages)
|
|
{
|
|
if (pages.Count == 0)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
return pages[pages.Count - 1] + 1;
|
|
}
|
|
|
|
public bool FindPage(PdfDictionary currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved)
|
|
{
|
|
var type = currentPageDictionary.GetName(CosName.TYPE);
|
|
|
|
if (type.Equals(CosName.PAGE))
|
|
{
|
|
var pageNumber = GetNextPageNumber(pageNumbersObserved);
|
|
|
|
bool found = pageNumber == soughtPageNumber;
|
|
|
|
locatedPages[pageNumber] = currentPageDictionary;
|
|
|
|
return found;
|
|
}
|
|
|
|
if (!type.Equals(CosName.PAGES))
|
|
{
|
|
log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
|
|
|
return false;
|
|
}
|
|
|
|
var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray;
|
|
|
|
bool childFound = false;
|
|
foreach (var kid in kids.OfType<CosObject>())
|
|
{
|
|
// todo: exit early
|
|
var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
|
|
|
|
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
|
|
|
|
if (thisPageMatches)
|
|
{
|
|
childFound = true;
|
|
}
|
|
}
|
|
|
|
return childFound;
|
|
}
|
|
|
|
public IReadOnlyList<Page> GetAllPages()
|
|
{
|
|
return new Page[0];
|
|
}
|
|
|
|
public void LoadAll()
|
|
{
|
|
|
|
}
|
|
}
|
|
}
|