recover from invalid acro-form references

we add a try/catch to the direct object finder's tryget method so it returns false rather than throwing.

if we have an acro-form reference in the catalog but no corresponding object in the document we instead scan all objects in the document to find form fields and reconstruct the acro-form dictionary.
This commit is contained in:
Eliot Jones
2020-02-27 12:08:40 +00:00
parent f415c3116e
commit 4150881be9
3 changed files with 51 additions and 11 deletions

View File

@@ -5,6 +5,7 @@
using System.Linq; using System.Linq;
using Content; using Content;
using Core; using Core;
using CrossReference;
using Exceptions; using Exceptions;
using Fields; using Fields;
using Filters; using Filters;
@@ -30,11 +31,13 @@
private readonly IPdfTokenScanner tokenScanner; private readonly IPdfTokenScanner tokenScanner;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly CrossReferenceTable crossReferenceTable;
public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider) public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider, CrossReferenceTable crossReferenceTable)
{ {
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner)); this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
} }
/// <summary> /// <summary>
@@ -44,10 +47,40 @@
[CanBeNull] [CanBeNull]
public AcroForm GetAcroForm(Catalog catalog) public AcroForm GetAcroForm(Catalog catalog)
{ {
if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken) || !DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary)) if (!catalog.CatalogDictionary.TryGet(NameToken.AcroForm, out var acroRawToken) )
{ {
return null; return null;
} }
if (!DirectObjectFinder.TryGet(acroRawToken, tokenScanner, out DictionaryToken acroDictionary))
{
var fieldsRefs = new List<IndirectReferenceToken>();
// Invalid reference, try constructing the form from a Brute Force scan.
foreach (var reference in crossReferenceTable.ObjectOffsets.Keys)
{
var referenceToken = new IndirectReferenceToken(reference);
if (!DirectObjectFinder.TryGet(referenceToken, tokenScanner, out DictionaryToken dict))
{
continue;
}
if (dict.TryGet(NameToken.Kids, tokenScanner, out ArrayToken _) && dict.TryGet(NameToken.T, tokenScanner, out StringToken _))
{
fieldsRefs.Add(referenceToken);
}
}
if (fieldsRefs.Count == 0)
{
return null;
}
acroDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Fields, new ArrayToken(fieldsRefs) }
});
}
var signatureFlags = (SignatureFlags)0; var signatureFlags = (SignatureFlags)0;
if (acroDictionary.TryGetOptionalTokenDirect(NameToken.SigFlags, tokenScanner, out NumericToken signatureToken)) if (acroDictionary.TryGetOptionalTokenDirect(NameToken.SigFlags, tokenScanner, out NumericToken signatureToken))

View File

@@ -20,17 +20,24 @@
return false; return false;
} }
var temp = scanner.Get(reference.Data); try
if (temp.Data is T tTemp)
{ {
tokenResult = tTemp; var temp = scanner.Get(reference.Data);
return true;
if (temp.Data is T tTemp)
{
tokenResult = tTemp;
return true;
}
if (temp.Data is IndirectReferenceToken nestedReferenceToken)
{
return TryGet(nestedReferenceToken, scanner, out tokenResult);
}
} }
catch
if (temp.Data is IndirectReferenceToken nestedReferenceToken)
{ {
return TryGet(nestedReferenceToken, scanner, out tokenResult); return false;
} }
return false; return false;

View File

@@ -148,7 +148,7 @@
var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer); var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer);
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider); var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
var bookmarksProvider = new BookmarksProvider(log, pdfScanner, isLenientParsing); var bookmarksProvider = new BookmarksProvider(log, pdfScanner, isLenientParsing);
return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information, return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information,