mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Abstract away PageFactory main logic into BasePageFactory
This commit is contained in:
@@ -74,13 +74,14 @@
|
||||
"UglyToad.PdfPig.Annotations.AppearanceStream",
|
||||
"UglyToad.PdfPig.Annotations.QuadPointsQuadrilateral",
|
||||
"UglyToad.PdfPig.Content.ArtifactMarkedContentElement",
|
||||
"UglyToad.PdfPig.Content.BasePageFactory`1",
|
||||
"UglyToad.PdfPig.Content.Catalog",
|
||||
"UglyToad.PdfPig.Content.CropBox",
|
||||
"UglyToad.PdfPig.Content.DocumentInformation",
|
||||
"UglyToad.PdfPig.Content.EmbeddedFile",
|
||||
"UglyToad.PdfPig.Content.Hyperlink",
|
||||
"UglyToad.PdfPig.Content.InlineImage",
|
||||
"UglyToad.PdfPig.Content.IPageFactory",
|
||||
"UglyToad.PdfPig.Content.IPageFactory`1",
|
||||
"UglyToad.PdfPig.Content.IPdfImage",
|
||||
"UglyToad.PdfPig.Content.IResourceStore",
|
||||
"UglyToad.PdfPig.Content.Letter",
|
||||
|
||||
326
src/UglyToad.PdfPig/Content/BasePageFactory.cs
Normal file
326
src/UglyToad.PdfPig/Content/BasePageFactory.cs
Normal file
@@ -0,0 +1,326 @@
|
||||
namespace UglyToad.PdfPig.Content
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Core;
|
||||
using Filters;
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
using Graphics.Operations;
|
||||
using Outline.Destinations;
|
||||
using Parser;
|
||||
using Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
/// <summary>
|
||||
/// Page factory abstract class.
|
||||
/// </summary>
|
||||
/// <typeparam name="TPage">The type of page the page factory creates.</typeparam>
|
||||
public abstract class BasePageFactory<TPage> : IPageFactory<TPage>
|
||||
{
|
||||
/// <summary>
|
||||
/// The parsing options.
|
||||
/// </summary>
|
||||
public readonly ParsingOptions ParsingOptions;
|
||||
|
||||
/// <summary>
|
||||
/// The Pdf token scanner.
|
||||
/// </summary>
|
||||
public readonly IPdfTokenScanner PdfScanner;
|
||||
|
||||
/// <summary>
|
||||
/// The resource store.
|
||||
/// </summary>
|
||||
public readonly IResourceStore ResourceStore;
|
||||
|
||||
/// <summary>
|
||||
/// The filter provider.
|
||||
/// </summary>
|
||||
public readonly ILookupFilterProvider FilterProvider;
|
||||
|
||||
/// <summary>
|
||||
/// The page content parser.
|
||||
/// </summary>
|
||||
public readonly IPageContentParser PageContentParser;
|
||||
|
||||
/// <summary>
|
||||
/// Create a <see cref="BasePageFactory{TPage}"/>.
|
||||
/// </summary>
|
||||
protected BasePageFactory(
|
||||
IPdfTokenScanner pdfScanner,
|
||||
IResourceStore resourceStore,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IPageContentParser pageContentParser,
|
||||
ParsingOptions parsingOptions)
|
||||
{
|
||||
this.ResourceStore = resourceStore;
|
||||
this.FilterProvider = filterProvider;
|
||||
this.PageContentParser = pageContentParser;
|
||||
this.PdfScanner = pdfScanner;
|
||||
this.ParsingOptions = parsingOptions;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public TPage Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
|
||||
NamedDestinations namedDestinations)
|
||||
{
|
||||
if (dictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var type = dictionary.GetNameOrDefault(NameToken.Type);
|
||||
|
||||
if (type != null && !type.Equals(NameToken.Page))
|
||||
{
|
||||
ParsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
|
||||
}
|
||||
|
||||
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
|
||||
if (dictionary.TryGet(NameToken.Rotate, PdfScanner, out NumericToken rotateToken))
|
||||
{
|
||||
rotation = new PageRotationDegrees(rotateToken.Int);
|
||||
}
|
||||
|
||||
var stackDepth = 0;
|
||||
|
||||
while (pageTreeMembers.ParentResources.Count > 0)
|
||||
{
|
||||
var resource = pageTreeMembers.ParentResources.Dequeue();
|
||||
|
||||
ResourceStore.LoadResourceDictionary(resource);
|
||||
stackDepth++;
|
||||
}
|
||||
|
||||
if (dictionary.TryGet(NameToken.Resources, PdfScanner, out DictionaryToken resources))
|
||||
{
|
||||
ResourceStore.LoadResourceDictionary(resources);
|
||||
stackDepth++;
|
||||
}
|
||||
|
||||
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
|
||||
|
||||
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
|
||||
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
|
||||
|
||||
var initialMatrix = OperationContextHelper.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, ParsingOptions.Logger);
|
||||
|
||||
ApplyTransformNormalise(initialMatrix, ref mediaBox, ref cropBox);
|
||||
|
||||
TPage page;
|
||||
|
||||
if (!dictionary.TryGet(NameToken.Contents, out var contents))
|
||||
{
|
||||
// ignored for now, is it possible? check the spec...
|
||||
page = ProcessPageInternal(number, dictionary, namedDestinations, mediaBox, cropBox, userSpaceUnit, rotation, initialMatrix, null);
|
||||
}
|
||||
else if (DirectObjectFinder.TryGet<ArrayToken>(contents, PdfScanner, out var array))
|
||||
{
|
||||
var bytes = new List<byte>();
|
||||
|
||||
for (var i = 0; i < array.Data.Count; i++)
|
||||
{
|
||||
var item = array.Data[i];
|
||||
|
||||
if (!(item is IndirectReferenceToken obj))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
|
||||
}
|
||||
|
||||
var contentStream = DirectObjectFinder.Get<StreamToken>(obj, PdfScanner);
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
|
||||
}
|
||||
|
||||
bytes.AddRange(contentStream.Decode(FilterProvider, PdfScanner));
|
||||
|
||||
if (i < array.Data.Count - 1)
|
||||
{
|
||||
bytes.Add((byte)'\n');
|
||||
}
|
||||
}
|
||||
|
||||
page = ProcessPageInternal(number, dictionary, namedDestinations, mediaBox, cropBox, userSpaceUnit, rotation, initialMatrix, bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
var contentStream = DirectObjectFinder.Get<StreamToken>(contents, PdfScanner);
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||
}
|
||||
|
||||
var bytes = contentStream.Decode(FilterProvider, PdfScanner);
|
||||
|
||||
page = ProcessPageInternal(number, dictionary, namedDestinations, mediaBox, cropBox, userSpaceUnit, rotation, initialMatrix, bytes);
|
||||
}
|
||||
|
||||
for (var i = 0; i < stackDepth; i++)
|
||||
{
|
||||
ResourceStore.UnloadResourceDictionary();
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
private TPage ProcessPageInternal(
|
||||
int pageNumber,
|
||||
DictionaryToken dictionary,
|
||||
NamedDestinations namedDestinations,
|
||||
MediaBox mediaBox,
|
||||
CropBox cropBox,
|
||||
UserSpaceUnit userSpaceUnit,
|
||||
PageRotationDegrees rotation,
|
||||
TransformationMatrix initialMatrix,
|
||||
IReadOnlyList<byte> contentBytes)
|
||||
{
|
||||
IReadOnlyList<IGraphicsStateOperation> operations;
|
||||
|
||||
if (contentBytes == null || contentBytes.Count == 0)
|
||||
{
|
||||
operations = EmptyArray<IGraphicsStateOperation>.Instance;
|
||||
}
|
||||
else
|
||||
{
|
||||
operations = PageContentParser.Parse(pageNumber,
|
||||
new ByteArrayInputBytes(contentBytes),
|
||||
ParsingOptions.Logger);
|
||||
}
|
||||
|
||||
return ProcessPage(pageNumber,
|
||||
dictionary,
|
||||
namedDestinations,
|
||||
mediaBox,
|
||||
cropBox,
|
||||
userSpaceUnit,
|
||||
rotation,
|
||||
initialMatrix,
|
||||
operations);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Process a page with content.
|
||||
/// </summary>
|
||||
/// <param name="pageNumber">The page number, starts at 1.</param>
|
||||
/// <param name="dictionary"></param>
|
||||
/// <param name="namedDestinations"></param>
|
||||
/// <param name="mediaBox">The page media box.</param>
|
||||
/// <param name="cropBox">The page effective crop box, computed as the intersection of the initial crop box and the media box.</param>
|
||||
/// <param name="userSpaceUnit"></param>
|
||||
/// <param name="rotation">The page rotation.</param>
|
||||
/// <param name="initialMatrix"></param>
|
||||
/// <param name="operations">The page operations. Can be empty if the page has no content.</param>
|
||||
protected abstract TPage ProcessPage(
|
||||
int pageNumber,
|
||||
DictionaryToken dictionary,
|
||||
NamedDestinations namedDestinations,
|
||||
MediaBox mediaBox,
|
||||
CropBox cropBox,
|
||||
UserSpaceUnit userSpaceUnit,
|
||||
PageRotationDegrees rotation,
|
||||
TransformationMatrix initialMatrix,
|
||||
IReadOnlyList<IGraphicsStateOperation> operations);
|
||||
|
||||
/// <summary>
|
||||
/// Get the user space units.
|
||||
/// </summary>
|
||||
protected static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary)
|
||||
{
|
||||
if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber)
|
||||
{
|
||||
return new UserSpaceUnit(userUnitNumber.Int);
|
||||
}
|
||||
|
||||
return UserSpaceUnit.Default;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the crop box.
|
||||
/// </summary>
|
||||
protected CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox)
|
||||
{
|
||||
CropBox cropBox;
|
||||
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
|
||||
DirectObjectFinder.TryGet(cropBoxObject, PdfScanner, out ArrayToken cropBoxArray))
|
||||
{
|
||||
if (cropBoxArray.Length != 4)
|
||||
{
|
||||
ParsingOptions.Logger.Error(
|
||||
$"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");
|
||||
|
||||
cropBox = new CropBox(mediaBox.Bounds);
|
||||
|
||||
return cropBox;
|
||||
}
|
||||
|
||||
cropBox = new CropBox(cropBoxArray.ToRectangle(PdfScanner));
|
||||
}
|
||||
else
|
||||
{
|
||||
cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
|
||||
}
|
||||
|
||||
return cropBox;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the media box.
|
||||
/// </summary>
|
||||
protected MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers)
|
||||
{
|
||||
MediaBox mediaBox;
|
||||
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
|
||||
&& DirectObjectFinder.TryGet(mediaBoxObject, PdfScanner, out ArrayToken mediaBoxArray))
|
||||
{
|
||||
if (mediaBoxArray.Length != 4)
|
||||
{
|
||||
ParsingOptions.Logger.Error(
|
||||
$"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaBoxArray}. Defaulting to US Letter.");
|
||||
|
||||
mediaBox = MediaBox.Letter;
|
||||
|
||||
return mediaBox;
|
||||
}
|
||||
|
||||
mediaBox = new MediaBox(mediaBoxArray.ToRectangle(PdfScanner));
|
||||
}
|
||||
else
|
||||
{
|
||||
mediaBox = pageTreeMembers.MediaBox;
|
||||
|
||||
if (mediaBox == null)
|
||||
{
|
||||
ParsingOptions.Logger.Error(
|
||||
$"The MediaBox was the wrong missing for page {number}. Using US Letter.");
|
||||
|
||||
// PDFBox defaults to US Letter.
|
||||
mediaBox = MediaBox.Letter;
|
||||
}
|
||||
}
|
||||
|
||||
return mediaBox;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply the matrix transform to the media box and crop box.
|
||||
/// Then Normalise() in order to obtain rectangles with rotation=0
|
||||
/// and width and height as viewed on screen.
|
||||
/// </summary>
|
||||
/// <param name="transformationMatrix"></param>
|
||||
/// <param name="mediaBox"></param>
|
||||
/// <param name="cropBox"></param>
|
||||
protected static void ApplyTransformNormalise(TransformationMatrix transformationMatrix, ref MediaBox mediaBox, ref CropBox cropBox)
|
||||
{
|
||||
if (transformationMatrix != TransformationMatrix.Identity)
|
||||
{
|
||||
mediaBox = new MediaBox(transformationMatrix.Transform(mediaBox.Bounds).Normalise());
|
||||
cropBox = new CropBox(transformationMatrix.Transform(cropBox.Bounds).Normalise());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,14 +6,15 @@
|
||||
/// <summary>
|
||||
/// Page factory interface.
|
||||
/// </summary>
|
||||
public interface IPageFactory
|
||||
/// <typeparam name="TPage">The type of page the page factory creates.</typeparam>
|
||||
public interface IPageFactory<out TPage>
|
||||
{
|
||||
/// <summary>
|
||||
/// Create the page.
|
||||
/// </summary>
|
||||
Page Create(int number,
|
||||
TPage Create(int number,
|
||||
DictionaryToken dictionary,
|
||||
PageTreeMembers pageTreeMembers,
|
||||
NamedDestinations annotationProvider);
|
||||
NamedDestinations namedDestinations);
|
||||
}
|
||||
}
|
||||
@@ -1,18 +1,19 @@
|
||||
namespace UglyToad.PdfPig.Content
|
||||
{
|
||||
using Core;
|
||||
using Outline.Destinations;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Core;
|
||||
using Outline.Destinations;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class Pages
|
||||
{
|
||||
private readonly IPageFactory pageFactory;
|
||||
private readonly IPageFactory<Page> pageFactory;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly Dictionary<int, PageTreeNode> pagesByNumber;
|
||||
|
||||
public int Count => pagesByNumber.Count;
|
||||
|
||||
/// <summary>
|
||||
@@ -20,7 +21,7 @@
|
||||
/// </summary>
|
||||
public PageTreeNode PageTree { get; }
|
||||
|
||||
internal Pages(IPageFactory pageFactory, IPdfTokenScanner pdfScanner, PageTreeNode pageTree, Dictionary<int, PageTreeNode> pagesByNumber)
|
||||
internal Pages(IPageFactory<Page> pageFactory, IPdfTokenScanner pdfScanner, PageTreeNode pageTree, Dictionary<int, PageTreeNode> pagesByNumber)
|
||||
{
|
||||
this.pageFactory = pageFactory ?? throw new ArgumentNullException(nameof(pageFactory));
|
||||
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
|
||||
@@ -34,7 +35,7 @@
|
||||
{
|
||||
parsingOptions.Logger.Error($"Page {pageNumber} requested but is out of range.");
|
||||
|
||||
throw new ArgumentOutOfRangeException(nameof(pageNumber),
|
||||
throw new ArgumentOutOfRangeException(nameof(pageNumber),
|
||||
$"Page number {pageNumber} invalid, must be between 1 and {Count}.");
|
||||
}
|
||||
|
||||
@@ -49,7 +50,7 @@
|
||||
}
|
||||
|
||||
var pageTreeMembers = new PageTreeMembers();
|
||||
|
||||
|
||||
while (pageStack.Count > 0)
|
||||
{
|
||||
currentNode = pageStack.Pop();
|
||||
@@ -58,7 +59,7 @@
|
||||
{
|
||||
pageTreeMembers.ParentResources.Enqueue(resourcesDictionary);
|
||||
}
|
||||
|
||||
|
||||
if (currentNode.NodeDictionary.TryGet(NameToken.MediaBox, pdfScanner, out ArrayToken mediaBox))
|
||||
{
|
||||
pageTreeMembers.MediaBox = new MediaBox(mediaBox.ToRectangle(pdfScanner));
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class PagesFactory
|
||||
internal static class PagesFactory
|
||||
{
|
||||
private class PageCounter
|
||||
private sealed class PageCounter
|
||||
{
|
||||
public int PageCount { get; private set; }
|
||||
public void Increment()
|
||||
@@ -21,7 +21,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory pageFactory, ILog log, bool isLenientParsing)
|
||||
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory<Page> pageFactory, ILog log, bool isLenientParsing)
|
||||
{
|
||||
var pageNumber = new PageCounter();
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
namespace UglyToad.PdfPig.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Annotations;
|
||||
using Content;
|
||||
@@ -9,267 +8,81 @@
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
using Graphics.Operations;
|
||||
using Logging;
|
||||
using Outline.Destinations;
|
||||
using Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class PageFactory : IPageFactory
|
||||
internal class PageFactory : BasePageFactory<Page>
|
||||
{
|
||||
private readonly ParsingOptions parsingOptions;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IPageContentParser pageContentParser;
|
||||
|
||||
public PageFactory(
|
||||
IPdfTokenScanner pdfScanner,
|
||||
IResourceStore resourceStore,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IPageContentParser pageContentParser,
|
||||
ParsingOptions parsingOptions)
|
||||
: base(pdfScanner, resourceStore, filterProvider, pageContentParser, parsingOptions)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.filterProvider = filterProvider;
|
||||
this.pageContentParser = pageContentParser;
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.parsingOptions = parsingOptions;
|
||||
}
|
||||
|
||||
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
|
||||
NamedDestinations namedDestinations)
|
||||
{
|
||||
if (dictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var type = dictionary.GetNameOrDefault(NameToken.Type);
|
||||
|
||||
if (type != null && !type.Equals(NameToken.Page))
|
||||
{
|
||||
parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
|
||||
}
|
||||
|
||||
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
|
||||
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
|
||||
{
|
||||
rotation = new PageRotationDegrees(rotateToken.Int);
|
||||
}
|
||||
|
||||
var stackDepth = 0;
|
||||
|
||||
while (pageTreeMembers.ParentResources.Count > 0)
|
||||
{
|
||||
var resource = pageTreeMembers.ParentResources.Dequeue();
|
||||
|
||||
resourceStore.LoadResourceDictionary(resource);
|
||||
stackDepth++;
|
||||
}
|
||||
|
||||
if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources))
|
||||
{
|
||||
resourceStore.LoadResourceDictionary(resources);
|
||||
stackDepth++;
|
||||
}
|
||||
|
||||
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
|
||||
|
||||
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
|
||||
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
|
||||
|
||||
var initialMatrix = OperationContextHelper.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, parsingOptions.Logger);
|
||||
|
||||
ApplyTransformNormalise(initialMatrix, ref mediaBox, ref cropBox);
|
||||
|
||||
PageContent content;
|
||||
|
||||
if (!dictionary.TryGet(NameToken.Contents, out var contents))
|
||||
{
|
||||
content = new PageContent(EmptyArray<IGraphicsStateOperation>.Instance,
|
||||
EmptyArray<Letter>.Instance,
|
||||
EmptyArray<PdfPath>.Instance,
|
||||
EmptyArray<Union<XObjectContentRecord, InlineImage>>.Instance,
|
||||
EmptyArray<MarkedContentElement>.Instance,
|
||||
pdfScanner,
|
||||
filterProvider,
|
||||
resourceStore);
|
||||
// ignored for now, is it possible? check the spec...
|
||||
}
|
||||
else if (DirectObjectFinder.TryGet<ArrayToken>(contents, pdfScanner, out var array))
|
||||
{
|
||||
var bytes = new List<byte>();
|
||||
|
||||
for (var i = 0; i < array.Data.Count; i++)
|
||||
{
|
||||
var item = array.Data[i];
|
||||
|
||||
if (!(item is IndirectReferenceToken obj))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
|
||||
}
|
||||
|
||||
var contentStream = DirectObjectFinder.Get<StreamToken>(obj, pdfScanner);
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
|
||||
}
|
||||
|
||||
bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner));
|
||||
|
||||
if (i < array.Data.Count - 1)
|
||||
{
|
||||
bytes.Add((byte)'\n');
|
||||
}
|
||||
}
|
||||
|
||||
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, initialMatrix, parsingOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
var contentStream = DirectObjectFinder.Get<StreamToken>(contents, pdfScanner);
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||
}
|
||||
|
||||
var bytes = contentStream.Decode(filterProvider, pdfScanner);
|
||||
|
||||
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, initialMatrix, parsingOptions);
|
||||
}
|
||||
|
||||
var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, parsingOptions.Logger);
|
||||
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
|
||||
|
||||
for (var i = 0; i < stackDepth; i++)
|
||||
{
|
||||
resourceStore.UnloadResourceDictionary();
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
private PageContent GetContent(
|
||||
int pageNumber,
|
||||
IReadOnlyList<byte> contentBytes,
|
||||
protected override Page ProcessPage(int pageNumber,
|
||||
DictionaryToken dictionary,
|
||||
NamedDestinations namedDestinations,
|
||||
MediaBox mediaBox,
|
||||
CropBox cropBox,
|
||||
UserSpaceUnit userSpaceUnit,
|
||||
PageRotationDegrees rotation,
|
||||
TransformationMatrix initialMatrix,
|
||||
ParsingOptions parsingOptions)
|
||||
IReadOnlyList<IGraphicsStateOperation> operations)
|
||||
{
|
||||
var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
|
||||
parsingOptions.Logger);
|
||||
var annotationProvider = new AnnotationProvider(PdfScanner,
|
||||
dictionary,
|
||||
initialMatrix,
|
||||
namedDestinations,
|
||||
ParsingOptions.Logger);
|
||||
|
||||
if (operations == null || operations.Count == 0)
|
||||
{
|
||||
PageContent emptyContent = new PageContent(EmptyArray<IGraphicsStateOperation>.Instance,
|
||||
EmptyArray<Letter>.Instance,
|
||||
EmptyArray<PdfPath>.Instance,
|
||||
EmptyArray<Union<XObjectContentRecord, InlineImage>>.Instance,
|
||||
EmptyArray<MarkedContentElement>.Instance,
|
||||
PdfScanner,
|
||||
FilterProvider,
|
||||
ResourceStore);
|
||||
|
||||
return new Page(pageNumber,
|
||||
dictionary,
|
||||
mediaBox,
|
||||
cropBox,
|
||||
rotation,
|
||||
emptyContent,
|
||||
annotationProvider,
|
||||
PdfScanner);
|
||||
}
|
||||
|
||||
var context = new ContentStreamProcessor(
|
||||
pageNumber,
|
||||
resourceStore,
|
||||
pdfScanner,
|
||||
pageContentParser,
|
||||
filterProvider,
|
||||
ResourceStore,
|
||||
PdfScanner,
|
||||
PageContentParser,
|
||||
FilterProvider,
|
||||
cropBox,
|
||||
userSpaceUnit,
|
||||
rotation,
|
||||
initialMatrix,
|
||||
parsingOptions);
|
||||
ParsingOptions);
|
||||
|
||||
return context.Process(pageNumber, operations);
|
||||
}
|
||||
PageContent content = context.Process(pageNumber, operations);
|
||||
|
||||
private static UserSpaceUnit GetUserSpaceUnits(DictionaryToken dictionary)
|
||||
{
|
||||
var spaceUnits = UserSpaceUnit.Default;
|
||||
if (dictionary.TryGet(NameToken.UserUnit, out var userUnitBase) && userUnitBase is NumericToken userUnitNumber)
|
||||
{
|
||||
spaceUnits = new UserSpaceUnit(userUnitNumber.Int);
|
||||
}
|
||||
|
||||
return spaceUnits;
|
||||
}
|
||||
|
||||
private CropBox GetCropBox(
|
||||
DictionaryToken dictionary,
|
||||
PageTreeMembers pageTreeMembers,
|
||||
MediaBox mediaBox)
|
||||
{
|
||||
CropBox cropBox;
|
||||
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
|
||||
DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray))
|
||||
{
|
||||
if (cropBoxArray.Length != 4)
|
||||
{
|
||||
parsingOptions.Logger.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");
|
||||
|
||||
cropBox = new CropBox(mediaBox.Bounds);
|
||||
|
||||
return cropBox;
|
||||
}
|
||||
|
||||
cropBox = new CropBox(cropBoxArray.ToRectangle(pdfScanner));
|
||||
}
|
||||
else
|
||||
{
|
||||
cropBox = pageTreeMembers.GetCropBox() ?? new CropBox(mediaBox.Bounds);
|
||||
}
|
||||
|
||||
return cropBox;
|
||||
}
|
||||
|
||||
private MediaBox GetMediaBox(
|
||||
int number,
|
||||
DictionaryToken dictionary,
|
||||
PageTreeMembers pageTreeMembers)
|
||||
{
|
||||
MediaBox mediaBox;
|
||||
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
|
||||
&& DirectObjectFinder.TryGet(mediaBoxObject, pdfScanner, out ArrayToken mediaBoxArray))
|
||||
{
|
||||
if (mediaBoxArray.Length != 4)
|
||||
{
|
||||
parsingOptions.Logger.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaBoxArray}. Defaulting to US Letter.");
|
||||
|
||||
mediaBox = MediaBox.Letter;
|
||||
|
||||
return mediaBox;
|
||||
}
|
||||
|
||||
mediaBox = new MediaBox(mediaBoxArray.ToRectangle(pdfScanner));
|
||||
}
|
||||
else
|
||||
{
|
||||
mediaBox = pageTreeMembers.MediaBox;
|
||||
|
||||
if (mediaBox == null)
|
||||
{
|
||||
parsingOptions.Logger.Error($"The MediaBox was the wrong missing for page {number}. Using US Letter.");
|
||||
|
||||
// PDFBox defaults to US Letter.
|
||||
mediaBox = MediaBox.Letter;
|
||||
}
|
||||
}
|
||||
|
||||
return mediaBox;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply the matrix transform to the media box and crop box.
|
||||
/// Then Normalise() in order to obtain rectangles with rotation=0
|
||||
/// and width and height as viewed on screen.
|
||||
/// </summary>
|
||||
/// <param name="transformationMatrix"></param>
|
||||
/// <param name="mediaBox"></param>
|
||||
/// <param name="cropBox"></param>
|
||||
private static void ApplyTransformNormalise(TransformationMatrix transformationMatrix, ref MediaBox mediaBox, ref CropBox cropBox)
|
||||
{
|
||||
if (transformationMatrix != TransformationMatrix.Identity)
|
||||
{
|
||||
mediaBox = new MediaBox(transformationMatrix.Transform(mediaBox.Bounds).Normalise());
|
||||
cropBox = new CropBox(transformationMatrix.Transform(cropBox.Bounds).Normalise());
|
||||
}
|
||||
return new Page(pageNumber,
|
||||
dictionary,
|
||||
mediaBox,
|
||||
cropBox,
|
||||
rotation,
|
||||
content,
|
||||
annotationProvider,
|
||||
PdfScanner);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user