Handle indirect references in document information factory and fix #706

This commit is contained in:
BobLd
2024-03-10 14:51:06 +00:00
parent acfe8b5fdd
commit 845e3b4a7f
3 changed files with 39 additions and 9 deletions

View File

@@ -82,5 +82,20 @@
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff));
Assert.Equal("Expected name as dictionary key, instead got: Collaborative", ex.Message);
}
[Fact]
public void CanReadDocumentInformationIndirectRef()
{
// Issue 706
var path = IntegrationHelpers.GetSpecificTestDocumentPath("EBOOK-DIETETYKA-SPORTOWA_copy_1.pdf");
using (var document = PdfDocument.Open(path))
{
var information = document.Information;
Assert.Equal("EBOOK", information.Title);
Assert.Equal("Pages", information.Creator);
Assert.Equal("D:20190306232856Z00'00'", information.CreationDate);
}
}
}
}

View File

@@ -25,14 +25,14 @@
var token = DirectObjectFinder.Get<IToken>(trailer.Info.Value, pdfTokenScanner);
if (token is DictionaryToken infoParsed)
{
var title = GetEntryOrDefault(infoParsed, NameToken.Title);
var author = GetEntryOrDefault(infoParsed, NameToken.Author);
var subject = GetEntryOrDefault(infoParsed, NameToken.Subject);
var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords);
var creator = GetEntryOrDefault(infoParsed, NameToken.Creator);
var producer = GetEntryOrDefault(infoParsed, NameToken.Producer);
var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate);
var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate);
var title = GetEntryOrDefault(infoParsed, NameToken.Title, pdfTokenScanner);
var author = GetEntryOrDefault(infoParsed, NameToken.Author, pdfTokenScanner);
var subject = GetEntryOrDefault(infoParsed, NameToken.Subject, pdfTokenScanner);
var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords, pdfTokenScanner);
var creator = GetEntryOrDefault(infoParsed, NameToken.Creator, pdfTokenScanner);
var producer = GetEntryOrDefault(infoParsed, NameToken.Producer, pdfTokenScanner);
var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate, pdfTokenScanner);
var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate, pdfTokenScanner);
return new DocumentInformation(infoParsed, title, author, subject,
keywords, creator, producer, creationDate, modifiedDate);
@@ -63,7 +63,7 @@
throw new PdfDocumentFormatException($"Unknown document information token was found {token.GetType().Name}");
}
private static string GetEntryOrDefault(DictionaryToken infoDictionary, NameToken key)
private static string GetEntryOrDefault(DictionaryToken infoDictionary, NameToken key, IPdfTokenScanner pdfTokenScanner)
{
if (infoDictionary == null)
{
@@ -75,6 +75,21 @@
return null;
}
if (value is IndirectReferenceToken idr)
{
if (DirectObjectFinder.TryGet(idr, pdfTokenScanner, out StringToken strI))
{
return strI.Data;
}
if (DirectObjectFinder.TryGet(idr, pdfTokenScanner, out HexToken hexI))
{
return hexI.Data;
}
return null;
}
if (value is StringToken str)
{
return str.Data;