#482 add skip missing fonts option and pass parsing options to content stream processor

this doesn't fix the reported issue since the pdf itself is corrupted on page 8 however it will
allow recovery in some scenarios where text content isn't important.

also adds more informative error when stream unintentionally passed with non zero offset
This commit is contained in:
Eliot Jones
2022-10-09 13:44:05 -04:00
parent c643facee0
commit e2246a88bb
8 changed files with 192 additions and 83 deletions

View File

@@ -10,7 +10,6 @@
using Encryption;
using Exceptions;
using Filters;
using Logging;
using Parser;
using Tokenization.Scanner;
using Tokens;
@@ -28,16 +27,9 @@
[NotNull]
private readonly HeaderVersion version;
private readonly ILog log;
private readonly IInputBytes inputBytes;
private readonly bool clipPaths;
[NotNull]
private readonly ParsingCachingProviders cachingProviders;
[CanBeNull]
private readonly EncryptionDictionary encryptionDictionary;
@@ -46,6 +38,7 @@
private readonly ILookupFilterProvider filterProvider;
private readonly BookmarksProvider bookmarksProvider;
private readonly InternalParsingOptions parsingOptions;
[NotNull]
private readonly Pages pages;
@@ -82,11 +75,10 @@
/// </summary>
public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(ILog log,
internal PdfDocument(
IInputBytes inputBytes,
HeaderVersion version,
CrossReferenceTable crossReferenceTable,
ParsingCachingProviders cachingProviders,
IPageFactory pageFactory,
Catalog catalog,
DocumentInformation information,
@@ -95,17 +87,16 @@
ILookupFilterProvider filterProvider,
AcroFormFactory acroFormFactory,
BookmarksProvider bookmarksProvider,
bool clipPaths)
InternalParsingOptions parsingOptions)
{
this.log = log;
this.inputBytes = inputBytes;
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
this.encryptionDictionary = encryptionDictionary;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
this.clipPaths = clipPaths;
this.parsingOptions = parsingOptions;
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(catalog, pageFactory, pdfScanner);
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
@@ -153,11 +144,11 @@
throw new ObjectDisposedException("Cannot access page after the document is disposed.");
}
log.Debug($"Accessing page {pageNumber}.");
parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");
try
{
return pages.GetPage(pageNumber, clipPaths);
return pages.GetPage(pageNumber, parsingOptions);
}
catch (Exception ex)
{
@@ -258,7 +249,7 @@
}
catch (Exception ex)
{
log.Error("Failed disposing the PdfDocument due to an error.", ex);
parsingOptions.Logger.Error("Failed disposing the PdfDocument due to an error.", ex);
}
finally
{