#482 add skip missing fonts option and pass parsing options to content stream processor

this doesn't fix the reported issue since the pdf itself is corrupted on page 8 however it will
allow recovery in some scenarios where text content isn't important.

also adds more informative error when stream unintentionally passed with non zero offset
This commit is contained in:
Eliot Jones
2022-10-09 13:44:05 -04:00
parent c643facee0
commit e2246a88bb
8 changed files with 192 additions and 83 deletions

View File

@@ -4,6 +4,10 @@
internal interface IPageFactory internal interface IPageFactory
{ {
Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths); Page Create(
int number,
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
InternalParsingOptions parsingOptions);
} }
} }

View File

@@ -23,10 +23,12 @@
Count = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count); Count = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count);
} }
public Page GetPage(int pageNumber, bool clipPaths) public Page GetPage(int pageNumber, InternalParsingOptions parsingOptions)
{ {
if (pageNumber <= 0 || pageNumber > Count) if (pageNumber <= 0 || pageNumber > Count)
{ {
parsingOptions.Logger.Error($"Page {pageNumber} requested but is out of range.");
throw new ArgumentOutOfRangeException(nameof(pageNumber), throw new ArgumentOutOfRangeException(nameof(pageNumber),
$"Page number {pageNumber} invalid, must be between 1 and {Count}."); $"Page number {pageNumber} invalid, must be between 1 and {Count}.");
} }
@@ -63,7 +65,11 @@
} }
} }
var page = pageFactory.Create(pageNumber, pageNode.NodeDictionary, pageTreeMembers, clipPaths); var page = pageFactory.Create(
pageNumber,
pageNode.NodeDictionary,
pageTreeMembers,
parsingOptions);
return page; return page;
} }

View File

@@ -5,7 +5,6 @@
using Core; using Core;
using Filters; using Filters;
using Geometry; using Geometry;
using Logging;
using Operations; using Operations;
using Parser; using Parser;
using PdfFonts; using PdfFonts;
@@ -49,9 +48,8 @@
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly IPageContentParser pageContentParser; private readonly IPageContentParser pageContentParser;
private readonly ILookupFilterProvider filterProvider; private readonly ILookupFilterProvider filterProvider;
private readonly ILog log;
private readonly bool clipPaths;
private readonly PdfVector pageSize; private readonly PdfVector pageSize;
private readonly InternalParsingOptions parsingOptions;
private readonly MarkedContentStack markedContentStack = new MarkedContentStack(); private readonly MarkedContentStack markedContentStack = new MarkedContentStack();
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>(); private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
@@ -90,9 +88,8 @@
IPdfTokenScanner pdfScanner, IPdfTokenScanner pdfScanner,
IPageContentParser pageContentParser, IPageContentParser pageContentParser,
ILookupFilterProvider filterProvider, ILookupFilterProvider filterProvider,
ILog log, PdfVector pageSize,
bool clipPaths, InternalParsingOptions parsingOptions)
PdfVector pageSize)
{ {
this.resourceStore = resourceStore; this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit; this.userSpaceUnit = userSpaceUnit;
@@ -100,9 +97,8 @@
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser)); this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.log = log;
this.clipPaths = clipPaths;
this.pageSize = pageSize; this.pageSize = pageSize;
this.parsingOptions = parsingOptions;
// initiate CurrentClippingPath to cropBox // initiate CurrentClippingPath to cropBox
var clippingSubpath = new PdfSubpath(); var clippingSubpath = new PdfSubpath();
@@ -230,6 +226,15 @@
if (font == null) if (font == null)
{ {
if (parsingOptions.SkipMissingFonts)
{
parsingOptions.Logger.Warn($"Skipping a missing font with name {currentState.FontState.FontName} " +
$"since it is not present in the document and {nameof(InternalParsingOptions.SkipMissingFonts)} " +
"is set to true. This may result in some text being skipped and not included in the output.");
return;
}
throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet."); throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet.");
} }
@@ -253,7 +258,8 @@
if (!foundUnicode || unicode == null) if (!foundUnicode || unicode == null)
{ {
log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}."); parsingOptions.Logger.Warn($"We could not find the corresponding character with code {code} in font {font.Name}.");
// Try casting directly to string as in PDFBox 1.8. // Try casting directly to string as in PDFBox 1.8.
unicode = new string((char)code, 1); unicode = new string((char)code, 1);
} }
@@ -494,7 +500,7 @@
var contentStream = formStream.Decode(filterProvider, pdfScanner); var contentStream = formStream.Decode(filterProvider, pdfScanner);
var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentStream), log); var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentStream), parsingOptions.Logger);
// 3. We don't respect clipping currently. // 3. We don't respect clipping currently.
@@ -677,7 +683,7 @@
if (CurrentPath.IsClipping) if (CurrentPath.IsClipping)
{ {
if (!clipPaths) if (!parsingOptions.ClipPaths)
{ {
// if we don't clip paths, add clipping path to paths // if we don't clip paths, add clipping path to paths
paths.Add(CurrentPath); paths.Add(CurrentPath);
@@ -717,9 +723,9 @@
CurrentPath.FillColor = currentState.CurrentNonStrokingColor; CurrentPath.FillColor = currentState.CurrentNonStrokingColor;
} }
if (clipPaths) if (parsingOptions.ClipPaths)
{ {
var clippedPath = currentState.CurrentClippingPath.Clip(CurrentPath, log); var clippedPath = currentState.CurrentClippingPath.Clip(CurrentPath, parsingOptions.Logger);
if (clippedPath != null) if (clippedPath != null)
{ {
paths.Add(clippedPath); paths.Add(clippedPath);
@@ -745,15 +751,15 @@
AddCurrentSubpath(); AddCurrentSubpath();
CurrentPath.SetClipping(clippingRule); CurrentPath.SetClipping(clippingRule);
if (clipPaths) if (parsingOptions.ClipPaths)
{ {
var currentClipping = GetCurrentState().CurrentClippingPath; var currentClipping = GetCurrentState().CurrentClippingPath;
currentClipping.SetClipping(clippingRule); currentClipping.SetClipping(clippingRule);
var newClippings = CurrentPath.Clip(currentClipping, log); var newClippings = CurrentPath.Clip(currentClipping, parsingOptions.Logger);
if (newClippings == null) if (newClippings == null)
{ {
log.Warn("Empty clipping path found. Clipping path not updated."); parsingOptions.Logger.Warn("Empty clipping path found. Clipping path not updated.");
} }
else else
{ {
@@ -796,7 +802,7 @@
{ {
if (inlineImageBuilder != null) if (inlineImageBuilder != null)
{ {
log?.Error("Begin inline image (BI) command encountered while another inline image was active."); parsingOptions.Logger.Error("Begin inline image (BI) command encountered while another inline image was active.");
} }
inlineImageBuilder = new InlineImageBuilder(); inlineImageBuilder = new InlineImageBuilder();
@@ -806,7 +812,7 @@
{ {
if (inlineImageBuilder == null) if (inlineImageBuilder == null)
{ {
log?.Error("Begin inline image data (ID) command encountered without a corresponding begin inline image (BI) command."); parsingOptions.Logger.Error("Begin inline image data (ID) command encountered without a corresponding begin inline image (BI) command.");
return; return;
} }
@@ -817,7 +823,7 @@
{ {
if (inlineImageBuilder == null) if (inlineImageBuilder == null)
{ {
log?.Error("End inline image (EI) command encountered without a corresponding begin inline image (BI) command."); parsingOptions.Logger.Error("End inline image (EI) command encountered without a corresponding begin inline image (BI) command.");
return; return;
} }

View File

@@ -0,0 +1,34 @@
namespace UglyToad.PdfPig;
using Logging;
using System.Collections.Generic;
/// <summary>
/// <see cref="ParsingOptions"/> but without being a public API/
/// </summary>
internal class InternalParsingOptions
{
public IReadOnlyList<string> Passwords { get; }
public bool UseLenientParsing { get; }
public bool ClipPaths { get; }
public bool SkipMissingFonts { get; }
public ILog Logger { get; }
public InternalParsingOptions(
IReadOnlyList<string> passwords,
bool useLenientParsing,
bool clipPaths,
bool skipMissingFonts,
ILog logger)
{
Passwords = passwords;
UseLenientParsing = useLenientParsing;
ClipPaths = clipPaths;
SkipMissingFonts = skipMissingFonts;
Logger = logger;
}
}

View File

@@ -21,20 +21,20 @@
private readonly IResourceStore resourceStore; private readonly IResourceStore resourceStore;
private readonly ILookupFilterProvider filterProvider; private readonly ILookupFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser; private readonly IPageContentParser pageContentParser;
private readonly ILog log;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, ILookupFilterProvider filterProvider, public PageFactory(
IPageContentParser pageContentParser, IPdfTokenScanner pdfScanner,
ILog log) IResourceStore resourceStore,
ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser)
{ {
this.resourceStore = resourceStore; this.resourceStore = resourceStore;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser; this.pageContentParser = pageContentParser;
this.log = log;
this.pdfScanner = pdfScanner; this.pdfScanner = pdfScanner;
} }
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool clipPaths) public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, InternalParsingOptions parsingOptions)
{ {
if (dictionary == null) if (dictionary == null)
{ {
@@ -45,11 +45,11 @@
if (type != null && !type.Equals(NameToken.Page)) if (type != null && !type.Equals(NameToken.Page))
{ {
log?.Error($"Page {number} had its type specified as {type} rather than 'Page'."); parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
} }
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, parsingOptions.Logger);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, parsingOptions.Logger);
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation); var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken)) if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
@@ -130,7 +130,7 @@
} }
} }
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
} }
else else
{ {
@@ -143,7 +143,7 @@
var bytes = contentStream.Decode(filterProvider, pdfScanner); var bytes = contentStream.Decode(filterProvider, pdfScanner);
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
} }
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
@@ -158,18 +158,28 @@
return page; return page;
} }
private PageContent GetContent(int pageNumber, IReadOnlyList<byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, private PageContent GetContent(
PageRotationDegrees rotation, bool clipPaths, MediaBox mediaBox) int pageNumber,
IReadOnlyList<byte> contentBytes,
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
InternalParsingOptions parsingOptions)
{ {
var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes), var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
log); parsingOptions.Logger);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, pdfScanner, var context = new ContentStreamProcessor(
cropBox.Bounds,
resourceStore,
userSpaceUnit,
rotation,
pdfScanner,
pageContentParser, pageContentParser,
filterProvider, filterProvider,
log, new PdfVector(mediaBox.Bounds.Width, mediaBox.Bounds.Height),
clipPaths, parsingOptions);
new PdfVector(mediaBox.Bounds.Width, mediaBox.Bounds.Height));
return context.Process(pageNumber, operations); return context.Process(pageNumber, operations);
} }
@@ -185,7 +195,11 @@
return spaceUnits; return spaceUnits;
} }
private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) private CropBox GetCropBox(
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
MediaBox mediaBox,
ILog log)
{ {
CropBox cropBox; CropBox cropBox;
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
@@ -210,7 +224,11 @@
return cropBox; return cropBox;
} }
private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers) private MediaBox GetMediaBox(
int number,
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
ILog log)
{ {
MediaBox mediaBox; MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject)

View File

@@ -45,9 +45,25 @@
internal static PdfDocument Open(Stream stream, ParsingOptions options) internal static PdfDocument Open(Stream stream, ParsingOptions options)
{ {
var initialPosition = stream.Position;
var streamInput = new StreamInputBytes(stream, false); var streamInput = new StreamInputBytes(stream, false);
return Open(streamInput, options); try
{
return Open(streamInput, options);
}
catch (Exception ex)
{
if (initialPosition != 0)
{
throw new InvalidOperationException(
"Could not parse document due to an error, the input stream was not at position zero when provided to the Open method.",
ex);
}
throw;
}
} }
private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions options = null) private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions options = null)
@@ -75,19 +91,28 @@
passwords.Add(string.Empty); passwords.Add(string.Empty);
} }
var document = OpenDocument(inputBytes, tokenScanner, options?.Logger ?? new NoOpLog(), isLenientParsing, passwords, clipPaths); var finalOptions = new InternalParsingOptions(
passwords,
isLenientParsing,
clipPaths,
options?.SkipMissingFonts ?? false,
options?.Logger ?? new NoOpLog());
var document = OpenDocument(inputBytes, tokenScanner, finalOptions);
return document; return document;
} }
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, private static PdfDocument OpenDocument(
IReadOnlyList<string> passwords, bool clipPaths) IInputBytes inputBytes,
ISeekableTokenScanner scanner,
InternalParsingOptions parsingOptions)
{ {
var filterProvider = new FilterProviderWithLookup(DefaultFilterProvider.Instance); var filterProvider = new FilterProviderWithLookup(DefaultFilterProvider.Instance);
CrossReferenceTable crossReferenceTable = null; CrossReferenceTable crossReferenceTable = null;
var xrefValidator = new XrefOffsetValidator(log); var xrefValidator = new XrefOffsetValidator(parsingOptions.Logger);
// We're ok with this since our intent is to lazily load the cross reference table. // We're ok with this since our intent is to lazily load the cross reference table.
// ReSharper disable once AccessToModifiedClosure // ReSharper disable once AccessToModifiedClosure
@@ -95,30 +120,39 @@
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance); var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);
var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider); var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
var crossReferenceParser = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser); var crossReferenceParser = new CrossReferenceParser(parsingOptions.Logger, xrefValidator, crossReferenceStreamParser);
var version = FileHeaderParser.Parse(scanner, inputBytes, isLenientParsing, log); var version = FileHeaderParser.Parse(scanner, inputBytes, parsingOptions.UseLenientParsing, parsingOptions.Logger);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(
isLenientParsing) + version.OffsetInFile; inputBytes,
scanner,
parsingOptions.UseLenientParsing) + version.OffsetInFile;
// TODO: make this use the scanner. // TODO: make this use the scanner.
var validator = new CrossReferenceOffsetValidator(xrefValidator); var validator = new CrossReferenceOffsetValidator(xrefValidator);
crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing); crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, parsingOptions.UseLenientParsing);
crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceTable = crossReferenceParser.Parse(
inputBytes,
parsingOptions.UseLenientParsing,
crossReferenceOffset, crossReferenceOffset,
version.OffsetInFile, version.OffsetInFile,
pdfScanner, pdfScanner,
scanner); scanner);
var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing, var (rootReference, rootDictionary) = ParseTrailer(
crossReferenceTable,
parsingOptions.UseLenientParsing,
pdfScanner, pdfScanner,
out var encryptionDictionary); out var encryptionDictionary);
var encryptionHandler = encryptionDictionary != null ? var encryptionHandler = encryptionDictionary != null ?
(IEncryptionHandler)new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, passwords) (IEncryptionHandler)new EncryptionHandler(
encryptionDictionary,
crossReferenceTable.Trailer,
parsingOptions.Passwords)
: NoOpEncryptionHandler.Instance; : NoOpEncryptionHandler.Instance;
pdfScanner.UpdateEncryptionHandler(encryptionHandler); pdfScanner.UpdateEncryptionHandler(encryptionHandler);
@@ -128,35 +162,45 @@
var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader); var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, var fontFactory = new FontFactory(parsingOptions.Logger, new Type0FontHandler(cidFontFactory,
filterProvider, pdfScanner), filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance, new TrueTypeFontHandler(parsingOptions.Logger, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
type1Handler), type1Handler),
type1Handler, type1Handler,
new Type3FontHandler(pdfScanner, filterProvider, encodingReader)); new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
var resourceContainer = new ResourceStore(pdfScanner, fontFactory); var resourceContainer = new ResourceStore(pdfScanner, fontFactory);
var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer, isLenientParsing); var information = DocumentInformationFactory.Create(
pdfScanner,
crossReferenceTable.Trailer,
parsingOptions.UseLenientParsing);
var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing); var catalog = CatalogFactory.Create(
rootReference,
rootDictionary,
pdfScanner,
parsingOptions.UseLenientParsing);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
log);
var caching = new ParsingCachingProviders(resourceContainer);
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable); var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
var bookmarksProvider = new BookmarksProvider(log, pdfScanner); var bookmarksProvider = new BookmarksProvider(parsingOptions.Logger, pdfScanner);
return new PdfDocument(log, inputBytes, version, crossReferenceTable, caching, pageFactory, catalog, information, return new PdfDocument(
inputBytes,
version,
crossReferenceTable,
pageFactory,
catalog,
information,
encryptionDictionary, encryptionDictionary,
pdfScanner, pdfScanner,
filterProvider, filterProvider,
acroFormFactory, acroFormFactory,
bookmarksProvider, bookmarksProvider,
clipPaths); parsingOptions);
} }
private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner, private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,

View File

@@ -48,5 +48,11 @@
/// All passwords to try when opening this document, will include any values set for <see cref="Password"/>. /// All passwords to try when opening this document, will include any values set for <see cref="Password"/>.
/// </summary> /// </summary>
public List<string> Passwords { get; set; } = new List<string>(); public List<string> Passwords { get; set; } = new List<string>();
/// <summary>
/// Skip extracting content where the font could not be found, will result in some letters being skipped/missed
/// but will prevent the library throwing where the source PDF has some corrupted text.
/// </summary>
public bool SkipMissingFonts { get; set; } = false;
} }
} }

View File

@@ -10,7 +10,6 @@
using Encryption; using Encryption;
using Exceptions; using Exceptions;
using Filters; using Filters;
using Logging;
using Parser; using Parser;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
@@ -28,16 +27,9 @@
[NotNull] [NotNull]
private readonly HeaderVersion version; private readonly HeaderVersion version;
private readonly ILog log;
private readonly IInputBytes inputBytes; private readonly IInputBytes inputBytes;
private readonly bool clipPaths;
[NotNull]
private readonly ParsingCachingProviders cachingProviders;
[CanBeNull] [CanBeNull]
private readonly EncryptionDictionary encryptionDictionary; private readonly EncryptionDictionary encryptionDictionary;
@@ -46,6 +38,7 @@
private readonly ILookupFilterProvider filterProvider; private readonly ILookupFilterProvider filterProvider;
private readonly BookmarksProvider bookmarksProvider; private readonly BookmarksProvider bookmarksProvider;
private readonly InternalParsingOptions parsingOptions;
[NotNull] [NotNull]
private readonly Pages pages; private readonly Pages pages;
@@ -82,11 +75,10 @@
/// </summary> /// </summary>
public bool IsEncrypted => encryptionDictionary != null; public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(ILog log, internal PdfDocument(
IInputBytes inputBytes, IInputBytes inputBytes,
HeaderVersion version, HeaderVersion version,
CrossReferenceTable crossReferenceTable, CrossReferenceTable crossReferenceTable,
ParsingCachingProviders cachingProviders,
IPageFactory pageFactory, IPageFactory pageFactory,
Catalog catalog, Catalog catalog,
DocumentInformation information, DocumentInformation information,
@@ -95,17 +87,16 @@
ILookupFilterProvider filterProvider, ILookupFilterProvider filterProvider,
AcroFormFactory acroFormFactory, AcroFormFactory acroFormFactory,
BookmarksProvider bookmarksProvider, BookmarksProvider bookmarksProvider,
bool clipPaths) InternalParsingOptions parsingOptions)
{ {
this.log = log;
this.inputBytes = inputBytes; this.inputBytes = inputBytes;
this.version = version ?? throw new ArgumentNullException(nameof(version)); this.version = version ?? throw new ArgumentNullException(nameof(version));
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
this.encryptionDictionary = encryptionDictionary; this.encryptionDictionary = encryptionDictionary;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider)); this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
this.clipPaths = clipPaths; this.parsingOptions = parsingOptions;
Information = information ?? throw new ArgumentNullException(nameof(information)); Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(catalog, pageFactory, pdfScanner); pages = new Pages(catalog, pageFactory, pdfScanner);
Structure = new Structure(catalog, crossReferenceTable, pdfScanner); Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
@@ -153,11 +144,11 @@
throw new ObjectDisposedException("Cannot access page after the document is disposed."); throw new ObjectDisposedException("Cannot access page after the document is disposed.");
} }
log.Debug($"Accessing page {pageNumber}."); parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");
try try
{ {
return pages.GetPage(pageNumber, clipPaths); return pages.GetPage(pageNumber, parsingOptions);
} }
catch (Exception ex) catch (Exception ex)
{ {
@@ -258,7 +249,7 @@
} }
catch (Exception ex) catch (Exception ex)
{ {
log.Error("Failed disposing the PdfDocument due to an error.", ex); parsingOptions.Logger.Error("Failed disposing the PdfDocument due to an error.", ex);
} }
finally finally
{ {