#482 add skip missing fonts option and pass parsing options to content stream processor

this doesn't fix the reported issue since the pdf itself is corrupted on page 8 however it will allow recovery in some scenarios where text content isn't important. also adds more informative error when stream unintentionally passed with non zero offset
2025-12-21 11:13:55 +08:00 · 2022-10-09 13:44:05 -04:00
parent c643facee0
commit e2246a88bb
8 changed files with 192 additions and 83 deletions
--- a/src/UglyToad.PdfPig/PdfDocument.cs
+++ b/src/UglyToad.PdfPig/PdfDocument.cs
@@ -10,7 +10,6 @@
    using Encryption;
    using Exceptions;
    using Filters;
-    using Logging;
    using Parser;
    using Tokenization.Scanner;
    using Tokens;
@@ -28,16 +27,9 @@
        
        [NotNull]
        private readonly HeaderVersion version;
-        
-        private readonly ILog log;

        private readonly IInputBytes inputBytes;

-        private readonly bool clipPaths;
-
-        [NotNull]
-        private readonly ParsingCachingProviders cachingProviders;
-
        [CanBeNull]
        private readonly EncryptionDictionary encryptionDictionary;

@@ -46,6 +38,7 @@

        private readonly ILookupFilterProvider filterProvider;
        private readonly BookmarksProvider bookmarksProvider;
+        private readonly InternalParsingOptions parsingOptions;

        [NotNull]
        private readonly Pages pages;
@@ -82,11 +75,10 @@
        /// </summary>
        public bool IsEncrypted => encryptionDictionary != null;

-        internal PdfDocument(ILog log, 
+        internal PdfDocument(
            IInputBytes inputBytes,
            HeaderVersion version, 
            CrossReferenceTable crossReferenceTable,
-            ParsingCachingProviders cachingProviders,
            IPageFactory pageFactory,
            Catalog catalog,
            DocumentInformation information, 
@@ -95,17 +87,16 @@
            ILookupFilterProvider filterProvider,
            AcroFormFactory acroFormFactory,
            BookmarksProvider bookmarksProvider,
-            bool clipPaths)
+            InternalParsingOptions parsingOptions)
        {
-            this.log = log;
            this.inputBytes = inputBytes;
            this.version = version ?? throw new ArgumentNullException(nameof(version));
-            this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
            this.encryptionDictionary = encryptionDictionary;
            this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
            this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
            this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
-            this.clipPaths = clipPaths;
+            this.parsingOptions = parsingOptions;
+
            Information = information ?? throw new ArgumentNullException(nameof(information));
            pages = new Pages(catalog, pageFactory, pdfScanner);
            Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
@@ -153,11 +144,11 @@
                throw new ObjectDisposedException("Cannot access page after the document is disposed.");
            }

-            log.Debug($"Accessing page {pageNumber}.");
+            parsingOptions.Logger.Debug($"Accessing page {pageNumber}.");

            try
            {
-                return pages.GetPage(pageNumber, clipPaths);
+                return pages.GetPage(pageNumber, parsingOptions);
            }
            catch (Exception ex)
            {
@@ -258,7 +249,7 @@
            }
            catch (Exception ex)
            {
-                log.Error("Failed disposing the PdfDocument due to an error.", ex);
+                parsingOptions.Logger.Error("Failed disposing the PdfDocument due to an error.", ex);
            }
            finally
            {