Add AddPageFactory() methods and GetPage<T>() to PdfDocument. Make public IPageFactory<TPage>, PageFactoryBase<TPage>, IResourceStore, ILookupFilterProvider, IPageContentParser, IPdfTokenScanner, UserSpaceUnit, ResourceColorSpace

This commit is contained in:
BobLd 2023-07-01 13:18:54 +01:00
parent 94cc9be967
commit ae5d3627e0
21 changed files with 731 additions and 246 deletions

View File

@ -0,0 +1,176 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Collections.Generic;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.Filters;
using UglyToad.PdfPig.Geometry;
using UglyToad.PdfPig.Logging;
using UglyToad.PdfPig.Outline;
using UglyToad.PdfPig.Parser;
using UglyToad.PdfPig.Tokenization.Scanner;
using UglyToad.PdfPig.Tokens;
using Xunit;
public class PageFactoryTests
{
[Fact]
public void SimpleFactory1()
{
var file = IntegrationHelpers.GetDocumentPath("Various Content Types");
using (var document = PdfDocument.Open(file))
{
document.AddPageFactory<SimplePage>(typeof(SimplePageFactory));
var page = document.GetPage<SimplePage>(1);
Assert.Equal(1, page.Number);
page = document.GetPage<SimplePage>(1);
Assert.Equal(1, page.Number);
}
}
[Fact]
public void SimpleFactory2()
{
var file = IntegrationHelpers.GetDocumentPath("Various Content Types");
using (var document = PdfDocument.Open(file))
{
document.AddPageFactory(new SimplePageFactory());
var page = document.GetPage<SimplePage>(1);
Assert.Equal(1, page.Number);
page = document.GetPage<SimplePage>(1);
Assert.Equal(1, page.Number);
}
}
[Fact]
public void InformationFactory()
{
var file = IntegrationHelpers.GetDocumentPath("Various Content Types");
using (var document = PdfDocument.Open(file))
{
document.AddPageFactory<PageInformation>(typeof(PageInformationFactory));
Page page = document.GetPage(1);
PageInformation pageInfo = document.GetPage<PageInformation>(1);
Assert.Equal(page.Number, pageInfo.Number);
Assert.Equal(page.Rotation, pageInfo.Rotation);
Assert.Equal(page.MediaBox.Bounds, pageInfo.MediaBox.Bounds);
Assert.Equal(page.CropBox.Bounds, pageInfo.CropBox.Bounds);
//Assert.Equal(page.Unit, pageInfo.UserSpaceUnit);
pageInfo = document.GetPage<PageInformation>(1);
Assert.Equal(page.Number, pageInfo.Number);
Assert.Equal(page.Rotation, pageInfo.Rotation);
Assert.Equal(page.MediaBox.Bounds, pageInfo.MediaBox.Bounds);
Assert.Equal(page.CropBox.Bounds, pageInfo.CropBox.Bounds);
}
}
#region SimplePage
public class SimplePage
{
public int Number { get; }
public int Rotation { get; }
public MediaBox MediaBox { get; }
public SimplePage(int number, int rotation, MediaBox mediaBox)
{
Number = number;
Rotation = rotation;
MediaBox = mediaBox;
}
}
public class SimplePageFactory : IPageFactory<SimplePage>
{
public SimplePageFactory()
{
// do nothing
}
public SimplePageFactory(
IPdfTokenScanner pdfScanner,
IResourceStore resourceStore,
ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser,
ILog log)
{ }
public SimplePage Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, NamedDestinations annotationProvider, IParsingOptions parsingOptions)
{
return new SimplePage(number, pageTreeMembers.Rotation, pageTreeMembers.MediaBox);
}
}
#endregion
#region PageInformation
public class PageInformation
{
public int Number { get; set; }
public PageRotationDegrees Rotation { get; set; }
public MediaBox MediaBox { get; set; }
public CropBox CropBox { get; set; }
public UserSpaceUnit UserSpaceUnit { get; set; }
}
public class PageInformationFactory : PageFactoryBase<PageInformation>
{
public PageInformationFactory(
IPdfTokenScanner pdfScanner,
IResourceStore resourceStore,
ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser,
ILog log)
: base(pdfScanner, resourceStore, filterProvider, pageContentParser, log)
{
}
protected override PageInformation ProcessPage(
int pageNumber,
DictionaryToken dictionary,
NamedDestinations namedDestinations,
IReadOnlyList<byte> contentBytes,
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
IParsingOptions parsingOptions)
{
return ProcessPage(pageNumber, dictionary, namedDestinations, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
protected override PageInformation ProcessPage(int pageNumber,
DictionaryToken dictionary,
NamedDestinations namedDestinations,
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
IParsingOptions parsingOptions)
{
return new PageInformation()
{
Number = pageNumber,
Rotation = rotation,
MediaBox = mediaBox,
CropBox = cropBox,
UserSpaceUnit = userSpaceUnit
};
}
}
#endregion
}
}

View File

@ -81,17 +81,21 @@
"UglyToad.PdfPig.Content.Hyperlink", "UglyToad.PdfPig.Content.Hyperlink",
"UglyToad.PdfPig.Content.InlineImage", "UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPdfImage", "UglyToad.PdfPig.Content.IPdfImage",
"UglyToad.PdfPig.Content.IResourceStore",
"UglyToad.PdfPig.Content.Letter", "UglyToad.PdfPig.Content.Letter",
"UglyToad.PdfPig.Content.MarkedContentElement", "UglyToad.PdfPig.Content.MarkedContentElement",
"UglyToad.PdfPig.Content.MediaBox", "UglyToad.PdfPig.Content.MediaBox",
"UglyToad.PdfPig.Content.OptionalContentGroupElement", "UglyToad.PdfPig.Content.OptionalContentGroupElement",
"UglyToad.PdfPig.Content.Page", "UglyToad.PdfPig.Content.Page",
"UglyToad.PdfPig.Content.PageFactoryBase`1",
"UglyToad.PdfPig.Content.PageRotationDegrees", "UglyToad.PdfPig.Content.PageRotationDegrees",
"UglyToad.PdfPig.Content.PageSize", "UglyToad.PdfPig.Content.PageSize",
"UglyToad.PdfPig.Content.PageTreeNode", "UglyToad.PdfPig.Content.PageTreeNode",
"UglyToad.PdfPig.Content.PageTreeMembers",
"UglyToad.PdfPig.Content.Word", "UglyToad.PdfPig.Content.Word",
"UglyToad.PdfPig.Content.TextOrientation", "UglyToad.PdfPig.Content.TextOrientation",
"UglyToad.PdfPig.Content.XmpMetadata", "UglyToad.PdfPig.Content.XmpMetadata",
"UglyToad.PdfPig.Content.IPageFactory`1",
"UglyToad.PdfPig.CrossReference.CrossReferenceTable", "UglyToad.PdfPig.CrossReference.CrossReferenceTable",
"UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.CrossReferenceType",
"UglyToad.PdfPig.CrossReference.TrailerDictionary", "UglyToad.PdfPig.CrossReference.TrailerDictionary",
@ -99,6 +103,7 @@
"UglyToad.PdfPig.Filters.DefaultFilterProvider", "UglyToad.PdfPig.Filters.DefaultFilterProvider",
"UglyToad.PdfPig.Filters.IFilter", "UglyToad.PdfPig.Filters.IFilter",
"UglyToad.PdfPig.Filters.IFilterProvider", "UglyToad.PdfPig.Filters.IFilterProvider",
"UglyToad.PdfPig.Filters.ILookupFilterProvider",
"UglyToad.PdfPig.Functions.FunctionTypes", "UglyToad.PdfPig.Functions.FunctionTypes",
"UglyToad.PdfPig.Functions.PdfFunction", "UglyToad.PdfPig.Functions.PdfFunction",
"UglyToad.PdfPig.PdfFonts.CharacterBoundingBox", "UglyToad.PdfPig.PdfFonts.CharacterBoundingBox",
@ -109,9 +114,11 @@
"UglyToad.PdfPig.PdfFonts.FontStretch", "UglyToad.PdfPig.PdfFonts.FontStretch",
"UglyToad.PdfPig.PdfFonts.IFont", "UglyToad.PdfPig.PdfFonts.IFont",
"UglyToad.PdfPig.Geometry.GeometryExtensions", "UglyToad.PdfPig.Geometry.GeometryExtensions",
"UglyToad.PdfPig.Geometry.UserSpaceUnit",
"UglyToad.PdfPig.Graphics.Colors.CMYKColor", "UglyToad.PdfPig.Graphics.Colors.CMYKColor",
"UglyToad.PdfPig.Graphics.Colors.ColorSpace", "UglyToad.PdfPig.Graphics.Colors.ColorSpace",
"UglyToad.PdfPig.Graphics.PdfPath", "UglyToad.PdfPig.Graphics.PdfPath",
"UglyToad.PdfPig.Graphics.Colors.ResourceColorSpace",
"UglyToad.PdfPig.Graphics.Colors.ColorSpaceExtensions", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceExtensions",
"UglyToad.PdfPig.Graphics.Colors.ColorSpaceFamily", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceFamily",
"UglyToad.PdfPig.Graphics.Colors.GrayColor", "UglyToad.PdfPig.Graphics.Colors.GrayColor",
@ -230,6 +237,7 @@
"UglyToad.PdfPig.Graphics.TextMatrices", "UglyToad.PdfPig.Graphics.TextMatrices",
"UglyToad.PdfPig.Graphics.XObjectContentRecord", "UglyToad.PdfPig.Graphics.XObjectContentRecord",
"UglyToad.PdfPig.Images.ColorSpaceDetailsByteConverter", "UglyToad.PdfPig.Images.ColorSpaceDetailsByteConverter",
"UglyToad.PdfPig.IParsingOptions",
"UglyToad.PdfPig.Logging.ILog", "UglyToad.PdfPig.Logging.ILog",
"UglyToad.PdfPig.Outline.Bookmarks", "UglyToad.PdfPig.Outline.Bookmarks",
"UglyToad.PdfPig.Outline.BookmarkNode", "UglyToad.PdfPig.Outline.BookmarkNode",
@ -237,15 +245,18 @@
"UglyToad.PdfPig.Outline.EmbeddedBookmarkNode", "UglyToad.PdfPig.Outline.EmbeddedBookmarkNode",
"UglyToad.PdfPig.Outline.ExternalBookmarkNode", "UglyToad.PdfPig.Outline.ExternalBookmarkNode",
"UglyToad.PdfPig.Outline.UriBookmarkNode", "UglyToad.PdfPig.Outline.UriBookmarkNode",
"UglyToad.PdfPig.Outline.NamedDestinations",
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestination", "UglyToad.PdfPig.Outline.Destinations.ExplicitDestination",
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationCoordinates", "UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationCoordinates",
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType", "UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType",
"UglyToad.PdfPig.ParsingOptions", "UglyToad.PdfPig.ParsingOptions",
"UglyToad.PdfPig.Parser.IPageContentParser",
"UglyToad.PdfPig.PdfDocument", "UglyToad.PdfPig.PdfDocument",
"UglyToad.PdfPig.PdfExtensions", "UglyToad.PdfPig.PdfExtensions",
"UglyToad.PdfPig.Rendering.IPageImageRenderer", "UglyToad.PdfPig.Rendering.IPageImageRenderer",
"UglyToad.PdfPig.Rendering.PdfRendererImageFormat", "UglyToad.PdfPig.Rendering.PdfRendererImageFormat",
"UglyToad.PdfPig.Structure", "UglyToad.PdfPig.Structure",
"UglyToad.PdfPig.Tokenization.Scanner.IPdfTokenScanner",
"UglyToad.PdfPig.Util.Adler32Checksum", "UglyToad.PdfPig.Util.Adler32Checksum",
"UglyToad.PdfPig.Util.IWordExtractor", "UglyToad.PdfPig.Util.IWordExtractor",
"UglyToad.PdfPig.Util.DefaultWordExtractor", "UglyToad.PdfPig.Util.DefaultWordExtractor",

View File

@ -3,12 +3,19 @@
using Outline; using Outline;
using Tokens; using Tokens;
internal interface IPageFactory /// <summary>
/// Page factory interface.
/// </summary>
/// <typeparam name="TPage">The type of page the page factory creates.</typeparam>
public interface IPageFactory<TPage>
{ {
Page Create(int number, /// <summary>
/// Create the page.
/// </summary>
TPage Create(int number,
DictionaryToken dictionary, DictionaryToken dictionary,
PageTreeMembers pageTreeMembers, PageTreeMembers pageTreeMembers,
NamedDestinations annotationProvider, NamedDestinations annotationProvider,
InternalParsingOptions parsingOptions); IParsingOptions parsingOptions);
} }
} }

View File

@ -5,9 +5,15 @@
using System.Collections.Generic; using System.Collections.Generic;
using Tokens; using Tokens;
internal interface IResourceStore /// <summary>
/// Resource store.
/// </summary>
public interface IResourceStore
{ {
void LoadResourceDictionary(DictionaryToken resourceDictionary, InternalParsingOptions parsingOptions); /// <summary>
/// Load the resource dictionary.
/// </summary>
void LoadResourceDictionary(DictionaryToken resourceDictionary, IParsingOptions parsingOptions);
/// <summary> /// <summary>
/// Remove any named resources and associated state for the last resource dictionary loaded. /// Remove any named resources and associated state for the last resource dictionary loaded.
@ -15,22 +21,49 @@
/// </summary> /// </summary>
void UnloadResourceDictionary(); void UnloadResourceDictionary();
/// <summary>
/// Get the font corresponding to the name.
/// </summary>
IFont GetFont(NameToken name); IFont GetFont(NameToken name);
/// <summary>
/// Try get the XObject corresponding to the name.
/// </summary>
bool TryGetXObject(NameToken name, out StreamToken stream); bool TryGetXObject(NameToken name, out StreamToken stream);
/// <summary>
/// Get the extended graphics state dictionary corresponding to the name.
/// </summary>
DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name); DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name);
/// <summary>
/// Get the font from the <see cref="IndirectReferenceToken"/>.
/// </summary>
IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken); IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken);
/// <summary>
/// Get the named color space by its name.
/// </summary>
bool TryGetNamedColorSpace(NameToken name, out ResourceColorSpace namedColorSpace); bool TryGetNamedColorSpace(NameToken name, out ResourceColorSpace namedColorSpace);
/// <summary>
/// Get the color space details corresponding to the name.
/// </summary>
ColorSpaceDetails GetColorSpaceDetails(NameToken name, DictionaryToken dictionary); ColorSpaceDetails GetColorSpaceDetails(NameToken name, DictionaryToken dictionary);
/// <summary>
/// Get the marked content properties dictionary corresponding to the name.
/// </summary>
DictionaryToken GetMarkedContentPropertiesDictionary(NameToken name); DictionaryToken GetMarkedContentPropertiesDictionary(NameToken name);
/// <summary>
/// Get all <see cref="PatternColor"/> as a dictionnary. Keys are the <see cref="PatternColor"/> names.
/// </summary>
IReadOnlyDictionary<NameToken, PatternColor> GetPatterns(); IReadOnlyDictionary<NameToken, PatternColor> GetPatterns();
/// <summary>
/// Get the shading corresponding to the name.
/// </summary>
Shading GetShading(NameToken name); Shading GetShading(NameToken name);
} }
} }

View File

@ -0,0 +1,264 @@
namespace UglyToad.PdfPig.Content
{
using Core;
using System;
using System.Collections.Generic;
using UglyToad.PdfPig.Filters;
using UglyToad.PdfPig.Geometry;
using UglyToad.PdfPig.Logging;
using UglyToad.PdfPig.Outline;
using UglyToad.PdfPig.Parser;
using UglyToad.PdfPig.Parser.Parts;
using UglyToad.PdfPig.Tokenization.Scanner;
using UglyToad.PdfPig.Tokens;
using UglyToad.PdfPig.Util;
/// <summary>
/// Page factory abstract class.
/// </summary>
/// <typeparam name="TPage">The type of page the page factory creates.</typeparam>
public abstract class PageFactoryBase<TPage> : IPageFactory<TPage>
{
/// <summary>
/// The Pdf token scanner.
/// </summary>
public readonly IPdfTokenScanner pdfScanner;
/// <summary>
/// The resource store.
/// </summary>
public readonly IResourceStore resourceStore;
/// <summary>
/// The filter provider.
/// </summary>
public readonly ILookupFilterProvider filterProvider;
/// <summary>
/// The page content parser.
/// </summary>
public readonly IPageContentParser pageContentParser;
/// <summary>
/// The <see cref="ILog"/> used to record messages raised by the parsing process.
/// </summary>
public readonly ILog log;
/// <summary>
/// Create a <see cref="PageFactoryBase{TPage}"/>.
/// </summary>
protected PageFactoryBase(
IPdfTokenScanner pdfScanner,
IResourceStore resourceStore,
ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser,
ILog log)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.pdfScanner = pdfScanner;
this.log = log;
}
/// <inheritdoc/>
public TPage Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
NamedDestinations namedDestinations, IParsingOptions parsingOptions)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
var type = dictionary.GetNameOrDefault(NameToken.Type);
if (type != null && !type.Equals(NameToken.Page))
{
parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
// TODO - check if NameToken.Rotate is already looked for in Pages.cs, we don't need to look again
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
{
rotation = new PageRotationDegrees(rotateToken.Int);
}
var stackDepth = 0;
while (pageTreeMembers.ParentResources.Count > 0)
{
var resource = pageTreeMembers.ParentResources.Dequeue();
resourceStore.LoadResourceDictionary(resource, parsingOptions);
stackDepth++;
}
if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
{
resourceStore.LoadResourceDictionary(resources, parsingOptions);
stackDepth++;
}
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
TPage page;
if (!dictionary.TryGet(NameToken.Contents, out var contents))
{
page = ProcessPage(number, dictionary, namedDestinations, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
else if (DirectObjectFinder.TryGet<ArrayToken>(contents, pdfScanner, out var array))
{
var bytes = new List<byte>();
for (var i = 0; i < array.Data.Count; i++)
{
var item = array.Data[i];
if (!(item is IndirectReferenceToken obj))
{
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
}
var contentStream = DirectObjectFinder.Get<StreamToken>(obj, pdfScanner);
if (contentStream == null)
{
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
}
bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));
if (i < array.Data.Count - 1)
{
bytes.Add((byte)'\n');
}
}
page = ProcessPage(number, dictionary, namedDestinations, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
else
{
var contentStream = DirectObjectFinder.Get<StreamToken>(contents, pdfScanner);
if (contentStream == null)
{
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var bytes = contentStream.Decode(filterProvider, pdfScanner);
page = ProcessPage(number, dictionary, namedDestinations, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
for (var i = 0; i < stackDepth; i++)
{
resourceStore.UnloadResourceDictionary();
}
return page;
}
/// <summary>
/// Process a page with no content.
/// </summary>
protected abstract TPage ProcessPage(
int pageNumber,
DictionaryToken dictionary,
NamedDestinations namedDestinations,
IReadOnlyList<byte> contentBytes,
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
IParsingOptions parsingOptions);
/// <summary>
/// Process a page with no content.
/// </summary>
protected abstract TPage ProcessPage(
int pageNumber,
DictionaryToken dictionary,
NamedDestinations namedDestinations,
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
IParsingOptions parsingOptions);
/// <summary>
/// Get the user space units.
/// </summary>
public static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary)
{
var spaceUnits = UserSpaceUnit.Default;
if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber)
{
spaceUnits = new UserSpaceUnit(userUnitNumber.Int);
}
return spaceUnits;
}
/// <summary>
/// Get the crop box.
/// </summary>
public CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox)
{
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray))
{
if (cropBoxArray.Length != 4)
{
log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");
return new CropBox(mediaBox.Bounds);
}
return new CropBox(cropBoxArray.ToRectangle(pdfScanner));
}
else
{
return pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
}
}
/// <summary>
/// Get the media box.
/// </summary>
public MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers)
{
MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
&& DirectObjectFinder.TryGet(mediaBoxObject, pdfScanner, out ArrayToken mediaBoxArray))
{
if (mediaBoxArray.Length != 4)
{
log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaBoxArray}. Defaulting to US Letter.");
return MediaBox.Letter;
}
mediaBox = new MediaBox(mediaBoxArray.ToRectangle(pdfScanner));
}
else
{
mediaBox = pageTreeMembers.MediaBox;
if (mediaBox == null)
{
log.Error($"The MediaBox was the wrong missing for page {number}. Using US Letter.");
// PDFBox defaults to US Letter.
mediaBox = MediaBox.Letter;
}
}
return mediaBox;
}
}
}

View File

@ -6,17 +6,26 @@
/// <summary> /// <summary>
/// Contains the values inherited from the Page Tree for this page. /// Contains the values inherited from the Page Tree for this page.
/// </summary> /// </summary>
internal class PageTreeMembers public class PageTreeMembers
{ {
public CropBox GetCropBox() internal CropBox GetCropBox()
{ {
return null; return null;
} }
/// <summary>
/// The page media box.
/// </summary>
public MediaBox MediaBox { get; set; } public MediaBox MediaBox { get; set; }
/// <summary>
/// The page rotation.
/// </summary>
public int Rotation { get; set; } public int Rotation { get; set; }
/// <summary>
/// The page parent resources.
/// </summary>
public Queue<DictionaryToken> ParentResources { get; } = new Queue<DictionaryToken>(); public Queue<DictionaryToken> ParentResources { get; } = new Queue<DictionaryToken>();
} }
} }

View File

@ -3,14 +3,22 @@
using Core; using Core;
using Outline; using Outline;
using System; using System;
using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Runtime.Serialization;
using System.Runtime.Versioning;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
using UglyToad.PdfPig.Parser;
using Util; using Util;
internal class Pages internal class Pages
{ {
private readonly IPageFactory pageFactory; private readonly ConcurrentDictionary<Type, object> pageFactoryCache = new ConcurrentDictionary<Type, object>();
private readonly IPageFactory<Page> defaultPageFactory;
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly Dictionary<int, PageTreeNode> pagesByNumber; private readonly Dictionary<int, PageTreeNode> pagesByNumber;
public int Count => pagesByNumber.Count; public int Count => pagesByNumber.Count;
@ -20,21 +28,35 @@
/// </summary> /// </summary>
public PageTreeNode PageTree { get; } public PageTreeNode PageTree { get; }
internal Pages(IPageFactory pageFactory, IPdfTokenScanner pdfScanner, PageTreeNode pageTree, Dictionary<int, PageTreeNode> pagesByNumber) internal Pages(IPageFactory<Page> pageFactory, IPdfTokenScanner pdfScanner, PageTreeNode pageTree, Dictionary<int, PageTreeNode> pagesByNumber)
{ {
this.pageFactory = pageFactory ?? throw new ArgumentNullException(nameof(pageFactory)); this.defaultPageFactory = pageFactory ?? throw new ArgumentNullException(nameof(pageFactory));
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.pagesByNumber = pagesByNumber; this.pagesByNumber = pagesByNumber;
PageTree = pageTree; PageTree = pageTree;
AddPageFactory(this.defaultPageFactory);
} }
internal Page GetPage(int pageNumber, NamedDestinations namedDestinations, InternalParsingOptions parsingOptions) internal Page GetPage(int pageNumber, NamedDestinations namedDestinations, InternalParsingOptions parsingOptions) => GetPage(defaultPageFactory, pageNumber, namedDestinations, parsingOptions);
internal TPage GetPage<TPage>(int pageNumber, NamedDestinations namedDestinations, InternalParsingOptions parsingOptions)
{
if (pageFactoryCache.TryGetValue(typeof(TPage), out var o) && o is IPageFactory<TPage> pageFactory)
{
return GetPage(pageFactory, pageNumber, namedDestinations, parsingOptions);
}
throw new InvalidOperationException($"Could not find {typeof(IPageFactory<TPage>)} for page type {typeof(TPage)}.");
}
private TPage GetPage<TPage>(IPageFactory<TPage> pageFactory, int pageNumber, NamedDestinations namedDestinations, InternalParsingOptions parsingOptions)
{ {
if (pageNumber <= 0 || pageNumber > Count) if (pageNumber <= 0 || pageNumber > Count)
{ {
parsingOptions.Logger.Error($"Page {pageNumber} requested but is out of range."); parsingOptions.Logger.Error($"Page {pageNumber} requested but is out of range.");
throw new ArgumentOutOfRangeException(nameof(pageNumber), throw new ArgumentOutOfRangeException(nameof(pageNumber),
$"Page number {pageNumber} invalid, must be between 1 and {Count}."); $"Page number {pageNumber} invalid, must be between 1 and {Count}.");
} }
@ -49,7 +71,7 @@
} }
var pageTreeMembers = new PageTreeMembers(); var pageTreeMembers = new PageTreeMembers();
while (pageStack.Count > 0) while (pageStack.Count > 0)
{ {
currentNode = pageStack.Pop(); currentNode = pageStack.Pop();
@ -58,7 +80,7 @@
{ {
pageTreeMembers.ParentResources.Enqueue(resourcesDictionary); pageTreeMembers.ParentResources.Enqueue(resourcesDictionary);
} }
if (currentNode.NodeDictionary.TryGet(NameToken.MediaBox, pdfScanner, out ArrayToken mediaBox)) if (currentNode.NodeDictionary.TryGet(NameToken.MediaBox, pdfScanner, out ArrayToken mediaBox))
{ {
pageTreeMembers.MediaBox = new MediaBox(mediaBox.ToRectangle(pdfScanner)); pageTreeMembers.MediaBox = new MediaBox(mediaBox.ToRectangle(pdfScanner));
@ -70,14 +92,37 @@
} }
} }
var page = pageFactory.Create( return pageFactory.Create(
pageNumber, pageNumber,
pageNode.NodeDictionary, pageNode.NodeDictionary,
pageTreeMembers, pageTreeMembers,
namedDestinations, namedDestinations,
parsingOptions); parsingOptions);
}
return page;
internal void AddPageFactory<TPage>(IPageFactory<TPage> pageFactory)
{
// TODO - throw if already exists
pageFactoryCache.TryAdd(typeof(TPage), pageFactory);
}
internal void AddPageFactory<TPage>(Type type)
{
// TODO - check for type, should implement IPageFactory<TPage>
if (!typeof(IPageFactory<TPage>).IsAssignableFrom(type))
{
throw new ArgumentException($"The type provided does not implement {typeof(IPageFactory<TPage>)}.");
}
var defaultPageFactory = (PageFactory)pageFactoryCache[typeof(Page)];
// TODO - careful here - resourceStore is not thread safe
var pageFactory = (IPageFactory<TPage>)Activator.CreateInstance(type,
defaultPageFactory.pdfScanner, defaultPageFactory.resourceStore,
defaultPageFactory.filterProvider, defaultPageFactory.pageContentParser,
defaultPageFactory.log);
AddPageFactory(pageFactory);
} }
internal PageTreeNode GetPageNode(int pageNumber) internal PageTreeNode GetPageNode(int pageNumber)

View File

@ -10,7 +10,7 @@
using Tokens; using Tokens;
using Util; using Util;
internal class PagesFactory internal static class PagesFactory
{ {
private class PageCounter private class PageCounter
{ {
@ -21,7 +21,7 @@
} }
} }
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory pageFactory, ILog log, bool isLenientParsing) public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory<Page> pageFactory, ILog log, bool isLenientParsing)
{ {
var pageNumber = new PageCounter(); var pageNumber = new PageCounter();

View File

@ -41,7 +41,7 @@
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
} }
public void LoadResourceDictionary(DictionaryToken resourceDictionary, InternalParsingOptions parsingOptions) public void LoadResourceDictionary(DictionaryToken resourceDictionary, IParsingOptions parsingOptions)
{ {
lastLoadedFont = (null, null); lastLoadedFont = (null, null);
loadedNamedColorSpaceDetails.Clear(); loadedNamedColorSpaceDetails.Clear();
@ -176,7 +176,7 @@
namedColorSpaces.Pop(); namedColorSpaces.Pop();
} }
private void LoadFontDictionary(DictionaryToken fontDictionary, InternalParsingOptions parsingOptions) private void LoadFontDictionary(DictionaryToken fontDictionary, IParsingOptions parsingOptions)
{ {
lastLoadedFont = (null, null); lastLoadedFont = (null, null);

View File

@ -25,8 +25,14 @@
IReadOnlyList<IFilter> GetAllFilters(); IReadOnlyList<IFilter> GetAllFilters();
} }
internal interface ILookupFilterProvider : IFilterProvider /// <summary>
/// Gets filter implementations (<see cref="IFilter"/>) for decoding PDF data with lookup.
/// </summary>
public interface ILookupFilterProvider : IFilterProvider
{ {
/// <summary>
/// Get the filters specified in this dictionary.
/// </summary>
IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner); IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner);
} }
} }

View File

@ -7,8 +7,11 @@
/// By default user space units correspond to 1/72nd of an inch (a typographic point). /// By default user space units correspond to 1/72nd of an inch (a typographic point).
/// The UserUnit entry in a page dictionary can define the space units as a different multiple of 1/72 (1 point). /// The UserUnit entry in a page dictionary can define the space units as a different multiple of 1/72 (1 point).
/// </summary> /// </summary>
internal readonly struct UserSpaceUnit public readonly struct UserSpaceUnit
{ {
/// <summary>
/// Default <see cref="UserSpaceUnit"/> with <see cref="PointMultiples"/> set to 1.
/// </summary>
public static readonly UserSpaceUnit Default = new UserSpaceUnit(1); public static readonly UserSpaceUnit Default = new UserSpaceUnit(1);
/// <summary> /// <summary>
@ -29,6 +32,7 @@
PointMultiples = pointMultiples; PointMultiples = pointMultiples;
} }
/// <inheritdoc/>
public override string ToString() public override string ToString()
{ {
return PointMultiples.ToString(CultureInfo.InvariantCulture); return PointMultiples.ToString(CultureInfo.InvariantCulture);

View File

@ -5,18 +5,24 @@
/// <summary> /// <summary>
/// A color space definition from a resource dictionary. /// A color space definition from a resource dictionary.
/// </summary> /// </summary>
internal struct ResourceColorSpace public readonly struct ResourceColorSpace
{ {
/// <summary>
/// The color space name.
/// </summary>
public NameToken Name { get; } public NameToken Name { get; }
/// <summary>
/// The color space data.
/// </summary>
public IToken Data { get; } public IToken Data { get; }
public ResourceColorSpace(NameToken name, IToken data) internal ResourceColorSpace(NameToken name, IToken data)
{ {
Name = name; Name = name;
Data = data; Data = data;
} }
public ResourceColorSpace(NameToken name) : this(name, null) { } internal ResourceColorSpace(NameToken name) : this(name, null) { }
} }
} }

View File

@ -49,7 +49,7 @@
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly IPageContentParser pageContentParser; private readonly IPageContentParser pageContentParser;
private readonly ILookupFilterProvider filterProvider; private readonly ILookupFilterProvider filterProvider;
private readonly InternalParsingOptions parsingOptions; private readonly IParsingOptions parsingOptions;
private readonly MarkedContentStack markedContentStack = new MarkedContentStack(); private readonly MarkedContentStack markedContentStack = new MarkedContentStack();
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>(); private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
@ -92,7 +92,7 @@
IPdfTokenScanner pdfScanner, IPdfTokenScanner pdfScanner,
IPageContentParser pageContentParser, IPageContentParser pageContentParser,
ILookupFilterProvider filterProvider, ILookupFilterProvider filterProvider,
InternalParsingOptions parsingOptions) IParsingOptions parsingOptions)
{ {
this.pageNumber = pageNumber; this.pageNumber = pageNumber;
this.resourceStore = resourceStore; this.resourceStore = resourceStore;

View File

@ -0,0 +1,40 @@
namespace UglyToad.PdfPig
{
using System.Collections.Generic;
using UglyToad.PdfPig.Logging;
/// <summary>
/// Parsing options interface.
/// </summary>
public interface IParsingOptions
{
/// <summary>
/// Should the parser apply clipping to paths?
/// Defaults to <see langword="false"/>.
/// <para>Bezier curves will be transformed into polylines if clipping is set to <see langword="true"/>.</para>
/// </summary>
bool ClipPaths { get; }
/// <summary>
/// Should the parser ignore issues where the document does not conform to the PDF specification?
/// </summary>
bool UseLenientParsing { get; }
/// <summary>
/// All passwords to try when opening this document, will include any values set for <see cref="ParsingOptions.Password"/>.
/// </summary>
List<string> Passwords { get; }
/// <summary>
/// Skip extracting content where the font could not be found, will result in some letters being skipped/missed
/// but will prevent the library throwing where the source PDF has some corrupted text. Also skips XObjects like
/// forms and images when missing.
/// </summary>
bool SkipMissingFonts { get; }
/// <summary>
/// The <see cref="ILog"/> used to record messages raised by the parsing process.
/// </summary>
ILog Logger { get; }
}
}

View File

@ -4,11 +4,11 @@
using System.Collections.Generic; using System.Collections.Generic;
/// <summary> /// <summary>
/// <see cref="ParsingOptions"/> but without being a public API/ /// <see cref="ParsingOptions"/> but without being a public API.
/// </summary> /// </summary>
internal class InternalParsingOptions internal class InternalParsingOptions : IParsingOptions
{ {
public IReadOnlyList<string> Passwords { get; } public List<string> Passwords { get; }
public bool UseLenientParsing { get; } public bool UseLenientParsing { get; }
@ -21,7 +21,7 @@
public ILog Logger { get; } public ILog Logger { get; }
public InternalParsingOptions( public InternalParsingOptions(
IReadOnlyList<string> passwords, List<string> passwords,
bool useLenientParsing, bool useLenientParsing,
bool clipPaths, bool clipPaths,
bool skipMissingFonts, bool skipMissingFonts,

View File

@ -9,7 +9,7 @@
/// <summary> /// <summary>
/// Named destinations in a PDF document /// Named destinations in a PDF document
/// </summary> /// </summary>
internal class NamedDestinations public class NamedDestinations
{ {
/// <summary> /// <summary>
/// Dictionary containing explicit destinations, keyed by name /// Dictionary containing explicit destinations, keyed by name

View File

@ -1,13 +1,18 @@
namespace UglyToad.PdfPig.Parser namespace UglyToad.PdfPig.Parser
{ {
using System.Collections.Generic;
using Core; using Core;
using Graphics.Operations; using Graphics.Operations;
using Logging; using Logging;
using System.Collections.Generic;
internal interface IPageContentParser /// <summary>
/// Page content parser interface.
/// </summary>
public interface IPageContentParser
{ {
IReadOnlyList<IGraphicsStateOperation> Parse(int pageNumber, IInputBytes inputBytes, /// <summary>
ILog log); /// Parse the <see cref="IInputBytes"/> into <see cref="IGraphicsStateOperation"/>s.
/// </summary>
IReadOnlyList<IGraphicsStateOperation> Parse(int pageNumber, IInputBytes inputBytes, ILog log);
} }
} }

View File

@ -1,167 +1,39 @@
namespace UglyToad.PdfPig.Parser namespace UglyToad.PdfPig.Parser
{ {
using System;
using System.Collections.Generic;
using Annotations; using Annotations;
using Content; using Content;
using Core;
using Filters; using Filters;
using Geometry; using Geometry;
using Graphics; using Graphics;
using Graphics.Operations; using Graphics.Operations;
using Logging; using Logging;
using Outline; using Outline;
using Parts; using System.Collections.Generic;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
using Util; using UglyToad.PdfPig.Core;
internal class PageFactory : IPageFactory internal class PageFactory : PageFactoryBase<Page>
{ {
private readonly IPdfTokenScanner pdfScanner;
private readonly IResourceStore resourceStore;
private readonly ILookupFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
private readonly ILog log;
public PageFactory( public PageFactory(
IPdfTokenScanner pdfScanner, IPdfTokenScanner pdfScanner,
IResourceStore resourceStore, IResourceStore resourceStore,
ILookupFilterProvider filterProvider, ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser, IPageContentParser pageContentParser,
ILog log) ILog log)
{ : base(pdfScanner, resourceStore, filterProvider, pageContentParser, log)
this.resourceStore = resourceStore; { }
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.pdfScanner = pdfScanner;
this.log = log;
}
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, protected override Page ProcessPage(int pageNumber,
NamedDestinations namedDestinations, InternalParsingOptions parsingOptions) DictionaryToken dictionary,
{ NamedDestinations namedDestinations,
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
var type = dictionary.GetNameOrDefault(NameToken.Type);
if (type != null && !type.Equals(NameToken.Page))
{
parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
{
rotation = new PageRotationDegrees(rotateToken.Int);
}
var stackDepth = 0;
while (pageTreeMembers.ParentResources.Count > 0)
{
var resource = pageTreeMembers.ParentResources.Dequeue();
resourceStore.LoadResourceDictionary(resource, parsingOptions);
stackDepth++;
}
if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
{
resourceStore.LoadResourceDictionary(resources, parsingOptions);
stackDepth++;
}
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
PageContent content;
if (!dictionary.TryGet(NameToken.Contents, out var contents))
{
content = new PageContent(EmptyArray<IGraphicsStateOperation>.Instance,
EmptyArray<Letter>.Instance,
EmptyArray<PdfPath>.Instance,
EmptyArray<Union<XObjectContentRecord, InlineImage>>.Instance,
EmptyArray<MarkedContentElement>.Instance,
pdfScanner,
filterProvider,
resourceStore);
// ignored for now, is it possible? check the spec...
}
else if (DirectObjectFinder.TryGet<ArrayToken>(contents, pdfScanner, out var array))
{
var bytes = new List<byte>();
for (var i = 0; i < array.Data.Count; i++)
{
var item = array.Data[i];
if (!(item is IndirectReferenceToken obj))
{
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
}
var contentStream = DirectObjectFinder.Get<StreamToken>(obj, pdfScanner);
if (contentStream == null)
{
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
}
bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));
if (i < array.Data.Count - 1)
{
bytes.Add((byte)'\n');
}
}
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
else
{
var contentStream = DirectObjectFinder.Get<StreamToken>(contents, pdfScanner);
if (contentStream == null)
{
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var bytes = contentStream.Decode(filterProvider, pdfScanner);
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, log);
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
for (var i = 0; i < stackDepth; i++)
{
resourceStore.UnloadResourceDictionary();
}
return page;
}
private PageContent GetContent(
int pageNumber,
IReadOnlyList<byte> contentBytes, IReadOnlyList<byte> contentBytes,
CropBox cropBox, CropBox cropBox,
UserSpaceUnit userSpaceUnit, UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation, PageRotationDegrees rotation,
MediaBox mediaBox, MediaBox mediaBox,
InternalParsingOptions parsingOptions) IParsingOptions parsingOptions)
{ {
var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
parsingOptions.Logger);
var context = new ContentStreamProcessor( var context = new ContentStreamProcessor(
pageNumber, pageNumber,
resourceStore, resourceStore,
@ -174,82 +46,38 @@
filterProvider, filterProvider,
parsingOptions); parsingOptions);
return context.Process(pageNumber, operations); var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes), parsingOptions.Logger);
var content = context.Process(pageNumber, operations);
var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, log);
return new Page(pageNumber, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
} }
private static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary) protected override Page ProcessPage(
{ int pageNumber,
var spaceUnits = UserSpaceUnit.Default;
if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber)
{
spaceUnits = new UserSpaceUnit(userUnitNumber.Int);
}
return spaceUnits;
}
private CropBox GetCropBox(
DictionaryToken dictionary, DictionaryToken dictionary,
PageTreeMembers pageTreeMembers, NamedDestinations namedDestinations,
MediaBox mediaBox) CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
IParsingOptions parsingOptions)
{ {
CropBox cropBox; var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, log);
DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray))
{
if (cropBoxArray.Length != 4)
{
log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");
cropBox = new CropBox(mediaBox.Bounds);
return cropBox; var content = new PageContent(EmptyArray<IGraphicsStateOperation>.Instance,
} EmptyArray<Letter>.Instance,
EmptyArray<PdfPath>.Instance,
EmptyArray<Union<XObjectContentRecord, InlineImage>>.Instance,
EmptyArray<MarkedContentElement>.Instance,
pdfScanner,
filterProvider,
resourceStore);
// ignored for now, is it possible? check the spec...
cropBox = new CropBox(cropBoxArray.ToRectangle(pdfScanner)); return new Page(pageNumber, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
}
else
{
cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
}
return cropBox;
}
private MediaBox GetMediaBox(
int number,
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers)
{
MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
&& DirectObjectFinder.TryGet(mediaBoxObject, pdfScanner, out ArrayToken mediaBoxArray))
{
if (mediaBoxArray.Length != 4)
{
log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaBoxArray}. Defaulting to US Letter.");
mediaBox = MediaBox.Letter;
return mediaBox;
}
mediaBox = new MediaBox(mediaBoxArray.ToRectangle(pdfScanner));
}
else
{
mediaBox = pageTreeMembers.MediaBox;
if (mediaBox == null)
{
log.Error($"The MediaBox was the wrong missing for page {number}. Using US Letter.");
// PDFBox defaults to US Letter.
mediaBox = MediaBox.Letter;
}
}
return mediaBox;
} }
} }
} }

View File

@ -6,7 +6,7 @@
/// <summary> /// <summary>
/// Configures options used by the parser when reading PDF documents. /// Configures options used by the parser when reading PDF documents.
/// </summary> /// </summary>
public class ParsingOptions public class ParsingOptions : IParsingOptions
{ {
/// <summary> /// <summary>
/// A default <see cref="ParsingOptions"/> with <see cref="UseLenientParsing"/> set to false. /// A default <see cref="ParsingOptions"/> with <see cref="UseLenientParsing"/> set to false.

View File

@ -161,6 +161,57 @@
} }
} }
/// <summary>
/// Get the page with the specified page number (1 indexed).
/// </summary>
/// <typeparam name="TPage"></typeparam>
/// <param name="pageNumber">The number of the page to return, this starts from 1.</param>
/// <returns>The page.</returns>
public TPage GetPage<TPage>(int pageNumber)
{
// TODO - update log with log type
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access page after the document is disposed.");
}
parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");
try
{
return pages.GetPage<TPage>(pageNumber, namedDestinations, parsingOptions);
}
catch (Exception ex)
{
if (IsEncrypted)
{
throw new PdfDocumentEncryptedException("Document was encrypted which may have caused error when retrieving page.", encryptionDictionary, ex);
}
throw;
}
}
/// <summary>
/// TODO
/// </summary>
/// <typeparam name="TPage"></typeparam>
/// <param name="pageFactory"></param>
public void AddPageFactory<TPage>(IPageFactory<TPage> pageFactory)
{
pages.AddPageFactory(pageFactory);
}
/// <summary>
/// TODO
/// </summary>
/// <typeparam name="TPage"></typeparam>
/// <param name="type"></param>
public void AddPageFactory<TPage>(Type type)
{
pages.AddPageFactory<TPage>(type);
}
/// <summary> /// <summary>
/// Gets all pages in this document in order. /// Gets all pages in this document in order.
/// </summary> /// </summary>

View File

@ -7,7 +7,7 @@
/// <summary> /// <summary>
/// Tokenizes objects from bytes in a PDF file. /// Tokenizes objects from bytes in a PDF file.
/// </summary> /// </summary>
internal interface IPdfTokenScanner : ISeekableTokenScanner, IDisposable public interface IPdfTokenScanner : ISeekableTokenScanner, IDisposable
{ {
/// <summary> /// <summary>
/// Tokenize the object with a given object number. /// Tokenize the object with a given object number.