remove lenient parsing from font related classes

lenient parsing gives us more code to maintain for no real benefit, parsing should always be as lenient as possible. remove the flag from some of the font code.
This commit is contained in:
Eliot Jones
2020-02-27 18:10:02 +00:00
parent ec9e425712
commit 746cbfa30c
20 changed files with 68 additions and 140 deletions

View File

@@ -112,8 +112,6 @@
pdfScanner,
scanner);
var fontDescriptorFactory = new FontDescriptorFactory();
var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
pdfScanner,
out var encryptionDictionary);
@@ -124,14 +122,14 @@
pdfScanner.UpdateEncryptionHandler(encryptionHandler);
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, filterProvider);
var cidFontFactory = new CidFontFactory(pdfScanner, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, fontDescriptorFactory, encodingReader);
var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, fontDescriptorFactory, encodingReader, new SystemFontFinder(),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, new SystemFontFinder(),
type1Handler),
type1Handler,
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

View File

@@ -29,14 +29,14 @@
}
}
public static CMap Parse(IInputBytes bytes, bool isLenientParsing)
public static CMap Parse(IInputBytes bytes)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
var result = CMapParser.Parse(bytes, isLenientParsing);
var result = CMapParser.Parse(bytes);
return result;
}

View File

@@ -18,7 +18,7 @@
private static readonly CodespaceRangeParser CodespaceRangeParser = new CodespaceRangeParser();
private static readonly CidCharacterParser CidCharacterParser = new CidCharacterParser();
public CMap Parse(IInputBytes inputBytes, bool isLenientParsing)
public CMap Parse(IInputBytes inputBytes)
{
var scanner = new CoreTokenScanner(inputBytes);
@@ -51,7 +51,7 @@
{
if (previousToken is NumericToken numeric)
{
CodespaceRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
CodespaceRangeParser.Parse(numeric, scanner, builder);
}
else
{
@@ -63,7 +63,7 @@
{
if (previousToken is NumericToken numeric)
{
BaseFontCharacterParser.Parse(numeric, scanner, builder, isLenientParsing);
BaseFontCharacterParser.Parse(numeric, scanner, builder);
}
else
{
@@ -75,7 +75,7 @@
{
if (previousToken is NumericToken numeric)
{
BaseFontRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
BaseFontRangeParser.Parse(numeric, scanner, builder);
}
else
{
@@ -87,7 +87,7 @@
{
if (previousToken is NumericToken numeric)
{
CidCharacterParser.Parse(numeric, scanner, builder, isLenientParsing);
CidCharacterParser.Parse(numeric, scanner, builder);
}
else
{
@@ -99,7 +99,7 @@
{
if (previousToken is NumericToken numeric)
{
CidRangeParser.Parse(numeric, scanner, builder, isLenientParsing);
CidRangeParser.Parse(numeric, scanner, builder);
}
else
{
@@ -111,7 +111,7 @@
}
else if (token is NameToken name)
{
CidFontNameParser.Parse(name, scanner, builder, isLenientParsing);
CidFontNameParser.Parse(name, scanner, builder);
}
previousToken = token;
@@ -141,7 +141,7 @@
bytes = memoryStream.ToArray();
}
return Parse(new ByteArrayInputBytes(bytes), true);
return Parse(new ByteArrayInputBytes(bytes));
}
}
}

View File

@@ -28,7 +28,7 @@
return number.Int;
}
public static double[] GetWidths(IPdfTokenScanner pdfScanner, DictionaryToken dictionary, bool isLenientParsing)
public static double[] GetWidths(IPdfTokenScanner pdfScanner, DictionaryToken dictionary)
{
if (!dictionary.TryGet(NameToken.Widths, out var token))
{
@@ -53,17 +53,14 @@
return result;
}
public static FontDescriptor GetFontDescriptor(IPdfTokenScanner pdfScanner, FontDescriptorFactory fontDescriptorFactory, DictionaryToken dictionary,
bool isLenientParsing)
public static FontDescriptor GetFontDescriptor(IPdfTokenScanner pdfScanner, DictionaryToken dictionary)
{
if (!dictionary.TryGet(NameToken.FontDescriptor, out var obj))
if (!dictionary.TryGet(NameToken.FontDescriptor, pdfScanner, out DictionaryToken parsed))
{
throw new InvalidFontFormatException($"No font descriptor indirect reference found in the TrueType font: {dictionary}.");
}
var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, pdfScanner);
var descriptor = fontDescriptorFactory.Generate(parsed, pdfScanner, isLenientParsing);
var descriptor = FontDescriptorFactory.Generate(parsed, pdfScanner);
return descriptor;
}

View File

@@ -8,15 +8,12 @@
using Filters;
using Fonts;
using Fonts.AdobeFontMetrics;
using Fonts.CompactFontFormat;
using Fonts.Encodings;
using Fonts.Standard14Fonts;
using Fonts.SystemFonts;
using Fonts.TrueType;
using Fonts.TrueType.Parser;
using Fonts.Type1;
using Logging;
using Parts;
using PdfPig.Parser.Parts;
using Simple;
using Tokenization.Scanner;
@@ -28,20 +25,17 @@
private readonly ILog log;
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader;
private readonly ISystemFontFinder systemFontFinder;
private readonly IFontHandler type1FontHandler;
public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader,
ISystemFontFinder systemFontFinder,
IFontHandler type1FontHandler)
{
this.log = log;
this.filterProvider = filterProvider;
this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader;
this.systemFontFinder = systemFontFinder;
this.type1FontHandler = type1FontHandler;
@@ -96,9 +90,9 @@
var firstCharacter = firstCharacterToken.Int;
var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing);
var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary);
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing);
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, dictionary);
var font = ParseTrueTypeFont(descriptor, out var actualHandler);
@@ -118,7 +112,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap));
}
}

View File

@@ -50,7 +50,7 @@
descendantFontDictionary = (DictionaryToken) descendantObject;
}
cidFont = ParseDescendant(descendantFontDictionary, isLenientParsing);
cidFont = ParseDescendant(descendantFontDictionary);
}
else
{
@@ -70,7 +70,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap));
}
}
else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName))
@@ -124,7 +124,7 @@
return false;
}
private ICidFont ParseDescendant(DictionaryToken dictionary, bool isLenientParsing)
private ICidFont ParseDescendant(DictionaryToken dictionary)
{
var type = dictionary.GetNameOrDefault(NameToken.Type);
if (type?.Equals(NameToken.Font) != true)
@@ -132,7 +132,7 @@
throw new InvalidFontFormatException($"Expected \'Font\' dictionary but found \'{type}\'");
}
var result = cidFontFactory.Generate(dictionary, isLenientParsing);
var result = cidFontFactory.Generate(dictionary);
return result;
}
@@ -156,7 +156,7 @@
{
var decoded = stream.Decode(filterProvider);
var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded), false);
var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded));
result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary);
}

View File

@@ -10,7 +10,6 @@
using Fonts.Standard14Fonts;
using Fonts.Type1;
using Fonts.Type1.Parser;
using Parts;
using PdfPig.Parser.Parts;
using Simple;
using Tokenization.Scanner;
@@ -20,16 +19,13 @@
{
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader;
public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader)
{
this.pdfScanner = pdfScanner;
this.filterProvider = filterProvider;
this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader;
}
@@ -64,7 +60,7 @@
lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing);
widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary);
}
else
{
@@ -86,7 +82,7 @@
}
}
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, fontDescriptorFactory, dictionary, isLenientParsing);
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfScanner, dictionary);
var font = ParseFontProgram(descriptor, isLenientParsing);
@@ -101,7 +97,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap));
}
}

View File

@@ -33,7 +33,7 @@
var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
var widths = FontDictionaryAccessHelper.GetWidths(scanner, dictionary, isLenientParsing);
var widths = FontDictionaryAccessHelper.GetWidths(scanner, dictionary);
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing);
@@ -46,7 +46,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap));
}
}

View File

@@ -7,7 +7,7 @@
internal class BaseFontCharacterParser : ICidFontPartParser<NumericToken>
{
public void Parse(NumericToken numeric, ITokenScanner tokenScanner, CharacterMapBuilder builder, bool isLenientParsing)
public void Parse(NumericToken numeric, ITokenScanner tokenScanner, CharacterMapBuilder builder)
{
for (var i = 0; i < numeric.Int; i++)
{

View File

@@ -13,7 +13,7 @@
/// </summary>
internal class BaseFontRangeParser : ICidFontPartParser<NumericToken>
{
public void Parse(NumericToken numberOfOperations, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
public void Parse(NumericToken numberOfOperations, ITokenScanner scanner, CharacterMapBuilder builder)
{
for (var i = 0; i < numberOfOperations.Int; i++)
{

View File

@@ -8,7 +8,7 @@
internal class CidCharacterParser : ICidFontPartParser<NumericToken>
{
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder)
{
var results = new List<CidCharacterMapping>();

View File

@@ -17,20 +17,17 @@
internal class CidFontFactory
{
private readonly FontDescriptorFactory descriptorFactory;
private readonly IFilterProvider filterProvider;
private readonly IPdfTokenScanner pdfScanner;
public CidFontFactory(IPdfTokenScanner pdfScanner, FontDescriptorFactory descriptorFactory,
IFilterProvider filterProvider)
public CidFontFactory(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider)
{
this.pdfScanner = pdfScanner;
this.descriptorFactory = descriptorFactory;
this.filterProvider = filterProvider;
}
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
{
public ICidFont Generate(DictionaryToken dictionary)
{
var type = dictionary.GetNameOrDefault(NameToken.Type);
if (!NameToken.Font.Equals(type))
{
@@ -50,7 +47,7 @@
FontDescriptor descriptor = null;
if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
{
descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing);
descriptor = FontDescriptorFactory.Generate(descriptorDictionary, pdfScanner);
}
var fontProgram = ReadDescriptorFile(descriptor);
@@ -67,7 +64,7 @@
if (NameToken.CidFontType2.Equals(subType))
{
var cidToGid = GetCharacterIdentifierToGlyphIndexMap(dictionary, isLenientParsing);
var cidToGid = GetCharacterIdentifierToGlyphIndexMap(dictionary);
return new Type2CidFont(type, subType, baseFont, systemInfo, descriptor, fontProgram, verticalWritingMetrics, widths, defaultWidth, cidToGid);
}
@@ -287,20 +284,15 @@
return new CharacterIdentifierSystemInfo(registry, ordering, supplement);
}
private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary, bool isLenientParsing)
private CharacterIdentifierToGlyphIndexMap GetCharacterIdentifierToGlyphIndexMap(DictionaryToken dictionary)
{
if (!dictionary.TryGet(NameToken.CidToGidMap, out var entry))
{
return new CharacterIdentifierToGlyphIndexMap();
}
if (entry is NameToken name)
if (entry is NameToken)
{
if (!name.Equals(NameToken.Identity) && !isLenientParsing)
{
throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}.");
}
return new CharacterIdentifierToGlyphIndexMap();
}

View File

@@ -1,6 +1,5 @@
namespace UglyToad.PdfPig.PdfFonts.Parser.Parts
{
using System;
using System.Globalization;
using Cmap;
using Tokenization.Scanner;
@@ -8,8 +7,7 @@
internal class CidFontNameParser : ICidFontPartParser<NameToken>
{
public void Parse(NameToken nameToken, ITokenScanner scanner, CharacterMapBuilder builder,
bool isLenientParsing)
public void Parse(NameToken nameToken, ITokenScanner scanner, CharacterMapBuilder builder)
{
switch (nameToken.Data)
{
@@ -83,54 +81,28 @@
{
if (scanner.TryReadToken(out DictionaryToken dictionary))
{
builder.CharacterIdentifierSystemInfo = GetCharacterIdentifier(dictionary, isLenientParsing);
builder.CharacterIdentifierSystemInfo = GetCharacterIdentifier(dictionary);
}
break;
}
}
}
private static CharacterIdentifierSystemInfo GetCharacterIdentifier(DictionaryToken dictionary, bool isLenientParsing)
private static CharacterIdentifierSystemInfo GetCharacterIdentifier(DictionaryToken dictionary)
{
string GetErrorMessage(string missingKey)
{
return $"No {missingKey} found in the CIDSystemInfo dictionary: " + dictionary;
}
if (!dictionary.TryGet(NameToken.Registry, out var registry) || !(registry is StringToken registryString))
{
if (isLenientParsing)
{
registryString = new StringToken("Adobe");
}
else
{
throw new InvalidOperationException(GetErrorMessage("registry"));
}
registryString = new StringToken("Adobe");
}
if (!dictionary.TryGet(NameToken.Ordering, out var ordering) || !(ordering is StringToken orderingString))
{
if (isLenientParsing)
{
orderingString = new StringToken("");
}
else
{
throw new InvalidOperationException(GetErrorMessage("ordering"));
}
orderingString = new StringToken(string.Empty);
}
if (!dictionary.TryGet(NameToken.Supplement, out var supplement) || !(supplement is NumericToken supplementNumeric))
{
if (isLenientParsing)
{
supplementNumeric = new NumericToken(0);
}
else
{
throw new InvalidOperationException(GetErrorMessage("supplement"));
}
supplementNumeric = new NumericToken(0);
}
return new CharacterIdentifierSystemInfo(registryString.Data, orderingString.Data, supplementNumeric.Int);

View File

@@ -7,7 +7,7 @@
internal class CidRangeParser : ICidFontPartParser<NumericToken>
{
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder)
{
for (var i = 0; i < numeric.Int; i++)
{

View File

@@ -8,7 +8,7 @@
internal class CodespaceRangeParser : ICidFontPartParser<NumericToken>
{
public void Parse(NumericToken numeric, ITokenScanner tokenScanner, CharacterMapBuilder builder, bool isLenientParsing)
public void Parse(NumericToken numeric, ITokenScanner tokenScanner, CharacterMapBuilder builder)
{
/*
* For example:

View File

@@ -2,25 +2,24 @@
{
using System;
using Core;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Util;
using Util.JetBrains.Annotations;
internal class FontDescriptorFactory
internal static class FontDescriptorFactory
{
public FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner, bool isLenientParsing)
public static FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
var name = GetFontName(dictionary, pdfScanner, isLenientParsing);
var name = GetFontName(dictionary, pdfScanner);
var family = GetFontFamily(dictionary);
var stretch = GetFontStretch(dictionary);
var flags = GetFlags(dictionary, isLenientParsing);
var flags = GetFlags(dictionary);
var bounding = GetBoundingBox(dictionary, pdfScanner);
var charSet = GetCharSet(dictionary);
var fontFile = GetFontFile(dictionary);
@@ -57,27 +56,14 @@
return number.Data;
}
private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing)
private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner)
{
if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken))
if (!dictionary.TryGet(NameToken.FontName, scanner, out NameToken name))
{
if (name is IndirectReferenceToken nameReference)
{
var indirectName = DirectObjectFinder.Get<NameToken>(nameReference, scanner);
return indirectName;
}
if (isLenientParsing)
{
nameToken = NameToken.Create(string.Empty);
}
else
{
throw new InvalidOperationException("Could not parse the font descriptor, could not retrieve the font name. " + dictionary);
}
name = NameToken.Create(string.Empty);
}
return nameToken;
return name;
}
private static string GetFontFamily(DictionaryToken dictionary)
@@ -100,20 +86,13 @@
return stretchName.ConvertToFontStretch();
}
private static FontDescriptorFlags GetFlags(DictionaryToken dictionary, bool isLenientParsing)
private static FontDescriptorFlags GetFlags(DictionaryToken dictionary)
{
var flags = dictionary.GetIntOrDefault(NameToken.Flags, -1);
if (flags == -1)
{
if (isLenientParsing)
{
flags = 0;
}
else
{
throw new InvalidOperationException("Font flags were not set correctly for the font descriptor: " + dictionary);
}
flags = 0;
}
return (FontDescriptorFlags) flags;

View File

@@ -12,6 +12,6 @@
/// <summary>
/// Parse the definition for this part of the CID font and write the results to the <see cref="CharacterMapBuilder"/>.
/// </summary>
void Parse(TToken previous, ITokenScanner tokenScanner, CharacterMapBuilder builder, bool isLenientParsing);
void Parse(TToken previous, ITokenScanner tokenScanner, CharacterMapBuilder builder);
}
}