From 268947fb5ee7807513c5e883a71836ca6fccf646 Mon Sep 17 00:00:00 2001 From: Wesley Moret Date: Sat, 1 May 2021 11:44:56 -0400 Subject: [PATCH] Allow to have a metadata stream in the document information entry (#322) --- .../Parser/DocumentInformationFactory.cs | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs index 6028d80e..14515c63 100644 --- a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs +++ b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs @@ -5,6 +5,7 @@ using Parts; using Tokenization.Scanner; using Tokens; + using UglyToad.PdfPig.Core; /// /// Parse the dictionary from a PDF file trailer. @@ -21,19 +22,41 @@ return DocumentInformation.Default; } - var infoParsed = DirectObjectFinder.Get(trailer.Info.Value, pdfTokenScanner); + var token = DirectObjectFinder.Get(trailer.Info.Value, pdfTokenScanner); + if (token is DictionaryToken infoParsed) + { + var title = GetEntryOrDefault(infoParsed, NameToken.Title); + var author = GetEntryOrDefault(infoParsed, NameToken.Author); + var subject = GetEntryOrDefault(infoParsed, NameToken.Subject); + var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords); + var creator = GetEntryOrDefault(infoParsed, NameToken.Creator); + var producer = GetEntryOrDefault(infoParsed, NameToken.Producer); + var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate); + var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate); - var title = GetEntryOrDefault(infoParsed, NameToken.Title); - var author = GetEntryOrDefault(infoParsed, NameToken.Author); - var subject = GetEntryOrDefault(infoParsed, NameToken.Subject); - var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords); - var creator = GetEntryOrDefault(infoParsed, NameToken.Creator); - var producer = GetEntryOrDefault(infoParsed, NameToken.Producer); - var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate); - var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate); + return new DocumentInformation(infoParsed, title, author, subject, + keywords, creator, producer, creationDate, modifiedDate); + } + else if (token is StreamToken streamToken) + { + var streamDictionary = streamToken.StreamDictionary; + if (!streamDictionary.TryGet(NameToken.Type, out NameToken typeNameToken) || typeNameToken != "Metadata") + { + throw new PdfDocumentFormatException($"Unknown document metadata type was found"); + } - return new DocumentInformation(infoParsed, title, author, subject, - keywords, creator, producer, creationDate, modifiedDate); + if (!streamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeToken) || subtypeToken != "XML") + { + throw new PdfDocumentFormatException($"Unknown document metadata subtype was found"); + } + + // We are not fully supporting XMP Stream so we left the user fully deserialize the stream + return DocumentInformation.Default; + } + else + { + throw new PdfDocumentFormatException($"Unknown document information token was found {token.GetType().Name}"); + } } private static string GetEntryOrDefault(DictionaryToken infoDictionary, NameToken key)