From cbd02a270fb45df67f3d1d44fe6be4a287978e68 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 14 Apr 2022 20:46:36 -0400 Subject: [PATCH] don't throw if no information dictionary if lenient parsing --- .../Parser/DocumentInformationFactory.cs | 20 +++++++++++-------- .../Parser/PdfDocumentFactory.cs | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs index 14515c63..dbedf5dc 100644 --- a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs +++ b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs @@ -5,7 +5,7 @@ using Parts; using Tokenization.Scanner; using Tokens; - using UglyToad.PdfPig.Core; + using Core; /// /// Parse the dictionary from a PDF file trailer. @@ -15,7 +15,7 @@ /// /// Convert the file trailer dictionary into a instance. /// - public static DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer) + public static DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer, bool isLenientParsing) { if (!trailer.Info.HasValue) { @@ -37,26 +37,30 @@ return new DocumentInformation(infoParsed, title, author, subject, keywords, creator, producer, creationDate, modifiedDate); } - else if (token is StreamToken streamToken) + + if (token is StreamToken streamToken) { var streamDictionary = streamToken.StreamDictionary; if (!streamDictionary.TryGet(NameToken.Type, out NameToken typeNameToken) || typeNameToken != "Metadata") { - throw new PdfDocumentFormatException($"Unknown document metadata type was found"); + throw new PdfDocumentFormatException("Unknown document metadata type was found"); } if (!streamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeToken) || subtypeToken != "XML") { - throw new PdfDocumentFormatException($"Unknown document metadata subtype was found"); + throw new PdfDocumentFormatException("Unknown document metadata subtype was found"); } - // We are not fully supporting XMP Stream so we left the user fully deserialize the stream + // We are not fully supporting XMP Stream so we let the user fully deserialize the stream return DocumentInformation.Default; } - else + + if (isLenientParsing) { - throw new PdfDocumentFormatException($"Unknown document information token was found {token.GetType().Name}"); + return DocumentInformation.Default; } + + throw new PdfDocumentFormatException($"Unknown document information token was found {token.GetType().Name}"); } private static string GetEntryOrDefault(DictionaryToken infoDictionary, NameToken key) diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 316e9410..4d34c735 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -137,7 +137,7 @@ var resourceContainer = new ResourceStore(pdfScanner, fontFactory); - var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer); + var information = DocumentInformationFactory.Create(pdfScanner, crossReferenceTable.Trailer, isLenientParsing); var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);