mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 10:47:56 +08:00
recover from invalid acro-form references
we add a try/catch to the direct object finder's tryget method so it returns false rather than throwing. if we have an acro-form reference in the catalog but no corresponding object in the document we instead scan all objects in the document to find form fields and reconstruct the acro-form dictionary.
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
using System.Linq;
|
||||
using Content;
|
||||
using Core;
|
||||
using CrossReference;
|
||||
using Exceptions;
|
||||
using Fields;
|
||||
using Filters;
|
||||
@@ -30,11 +31,13 @@
|
||||
|
||||
private readonly IPdfTokenScanner tokenScanner;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly CrossReferenceTable crossReferenceTable;
|
||||
|
||||
public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider)
|
||||
public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider, CrossReferenceTable crossReferenceTable)
|
||||
{
|
||||
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
|
||||
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
||||
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -44,10 +47,40 @@
|
||||
[CanBeNull]
|
||||
public AcroForm GetAcroForm(Catalog catalog)
|
||||
{
|
||||
if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken) || !DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary))
|
||||
if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken) )
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary))
|
||||
{
|
||||
var fieldsRefs = new List<IndirectReferenceToken>();
|
||||
|
||||
// Invalid reference, try constructing the form from a Brute Force scan.
|
||||
foreach (var reference in crossReferenceTable.ObjectOffsets.Keys)
|
||||
{
|
||||
var referenceToken = new IndirectReferenceToken(reference);
|
||||
if (!DirectObjectFinder.TryGet(referenceToken, tokenScanner, out DictionaryToken dict))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dict.TryGet(NameToken.Kids, tokenScanner, out ArrayToken _) && dict.TryGet(NameToken.T, tokenScanner, out StringToken _))
|
||||
{
|
||||
fieldsRefs.Add(referenceToken);
|
||||
}
|
||||
}
|
||||
|
||||
if (fieldsRefs.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
acroDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
|
||||
{
|
||||
{ NameToken.Fields, new ArrayToken(fieldsRefs) }
|
||||
});
|
||||
}
|
||||
|
||||
var signatureFlags = (SignatureFlags)0;
|
||||
if (acroDictionary.TryGetOptionalTokenDirect(NameToken.SigFlags, tokenScanner, out NumericToken signatureToken))
|
||||
|
@@ -20,17 +20,24 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
var temp = scanner.Get(reference.Data);
|
||||
|
||||
if (temp.Data is T tTemp)
|
||||
try
|
||||
{
|
||||
tokenResult = tTemp;
|
||||
return true;
|
||||
var temp = scanner.Get(reference.Data);
|
||||
|
||||
if (temp.Data is T tTemp)
|
||||
{
|
||||
tokenResult = tTemp;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (temp.Data is IndirectReferenceToken nestedReferenceToken)
|
||||
{
|
||||
return TryGet(nestedReferenceToken, scanner, out tokenResult);
|
||||
}
|
||||
}
|
||||
|
||||
if (temp.Data is IndirectReferenceToken nestedReferenceToken)
|
||||
catch
|
||||
{
|
||||
return TryGet(nestedReferenceToken, scanner, out tokenResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@@ -148,7 +148,7 @@
|
||||
|
||||
var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer);
|
||||
|
||||
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider);
|
||||
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
|
||||
var bookmarksProvider = new BookmarksProvider(log, pdfScanner, isLenientParsing);
|
||||
|
||||
return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information,
|
||||
|
Reference in New Issue
Block a user