do not require tounicode to be valid even if present #354 #619

these issues reported that parsing was failing due to a missing
token being reference in the tounicode entry. since neither
issue included a sample file it's impossible to determine the
right fix accurately, however since the tounicode entry is
optional in the spec we can try being more lenient here, this
might just result in more errors once we try to use the font
but the logger will at least prevent parsing the entire document
failing
This commit is contained in:
Eliot Jones 2023-05-21 12:34:27 +01:00
parent 6b9c3be9f8
commit 0b8252e930
2 changed files with 29 additions and 7 deletions

View File

@ -160,12 +160,25 @@
var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var type0Handler = new Type0FontHandler(
cidFontFactory,
filterProvider,
pdfScanner,
parsingOptions.Logger);
var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);
var fontFactory = new FontFactory(parsingOptions.Logger, new Type0FontHandler(cidFontFactory,
filterProvider, pdfScanner),
new TrueTypeFontHandler(parsingOptions.Logger, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
type1Handler),
var trueTypeHandler = new TrueTypeFontHandler(parsingOptions.Logger,
pdfScanner,
filterProvider,
encodingReader,
SystemFontFinder.Instance,
type1Handler);
var fontFactory = new FontFactory(
parsingOptions.Logger,
type0Handler,
trueTypeHandler,
type1Handler,
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

View File

@ -7,6 +7,7 @@
using Core;
using Filters;
using Fonts;
using Logging;
using Parts;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
@ -18,13 +19,18 @@
private readonly CidFontFactory cidFontFactory;
private readonly ILookupFilterProvider filterProvider;
private readonly IPdfTokenScanner scanner;
private readonly ILog logger;
public Type0FontHandler(CidFontFactory cidFontFactory, ILookupFilterProvider filterProvider,
IPdfTokenScanner scanner)
public Type0FontHandler(
CidFontFactory cidFontFactory,
ILookupFilterProvider filterProvider,
IPdfTokenScanner scanner,
ILog logger)
{
this.cidFontFactory = cidFontFactory;
this.filterProvider = filterProvider;
this.scanner = scanner;
this.logger = logger;
}
public IFont Generate(DictionaryToken dictionary)
@ -79,7 +85,10 @@
}
else
{
throw new PdfDocumentFormatException($"Invalid type of toUnicode CMap encountered. Got: {toUnicodeValue}.");
// Rather than throwing here, let's try returning the font anyway since
// this error is tripping people up as seen in issues #354 and #619.
// This will probably just cause errors further along the parsing but it might be more informative.
logger.Error($"Invalid type of toUnicode CMap encountered for font named {baseFont}. Got: {toUnicodeValue}.");
}
}