fixes various font handling issues for type 1 and truetype fonts

handle "unionsq" and other tricky glyph names. log missing glyphs. ignore flexpoints in type 1 subroutines. improve system font performance and substitution. handle truetype fonts using standard 14 fonts.
This commit is contained in:
Eliot Jones
2019-01-12 13:54:16 +00:00
parent 17d4b964f7
commit 245efae8ed
9 changed files with 328 additions and 33 deletions

View File

@@ -87,9 +87,14 @@
var uniStr = new StringBuilder();
var foundUnicode = true;
for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4)
{
int codePoint = int.Parse(name.Substring(chPos, 4), NumberStyles.HexNumber);
if (!int.TryParse(name.Substring(chPos, 4), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var codePoint))
{
foundUnicode = false;
break;
}
if (codePoint > 0xD7FF && codePoint < 0xE000)
{
@@ -100,6 +105,11 @@
uniStr.Append((char)codePoint);
}
if (!foundUnicode)
{
return null;
}
unicode = uniStr.ToString();
}
else if (name.StartsWith("u") && name.Length == 5)

View File

@@ -15,6 +15,7 @@
using Tokens;
using TrueType;
using TrueType.Parser;
using Util;
internal class TrueTypeFontHandler : IFontHandler
{
@@ -46,7 +47,29 @@
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
{
var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);
if (!dictionary.TryGetOptionalTokenDirect(NameToken.FirstChar, pdfScanner, out NumericToken firstCharacterToken))
{
if (!dictionary.TryGetOptionalTokenDirect(NameToken.BaseFont, pdfScanner, out NameToken baseFont))
{
throw new InvalidFontFormatException($"The provided TrueType font dictionary did not contain a /FirstChar or a /BaseFont entry: {dictionary}.");
}
// Can use the AFM descriptor despite not being Type 1!
var standard14Font = Standard14.GetAdobeFontMetrics(baseFont.Data);
if (standard14Font == null)
{
throw new InvalidFontFormatException($"The provided TrueType font dictionary did not have a /FirstChar and did not match a Standard 14 font: {dictionary}.");
}
var fileSystemFont = systemFontFinder.GetTrueTypeFont(baseFont.Data);
var thisEncoding = encodingReader.Read(dictionary, isLenientParsing);
return new TrueTypeStandard14FallbackSimpleFont(baseFont, standard14Font, thisEncoding, fileSystemFont);
}
var firstCharacter = firstCharacterToken.Int;
var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing);

View File

@@ -11,6 +11,89 @@
using TrueType;
using Util.JetBrains.Annotations;
/// <summary>
/// Some TrueType fonts use both the Standard 14 descriptor and the TrueType font from disk.
/// </summary>
internal class TrueTypeStandard14FallbackSimpleFont : IFont
{
private static readonly TransformationMatrix DefaultTransformation =
TransformationMatrix.FromValues(1m / 1000m, 0, 0, 1m / 1000m, 0, 0);
private readonly FontMetrics fontMetrics;
private readonly Encoding encoding;
private readonly TrueTypeFontProgram font;
public NameToken Name { get; }
public bool IsVertical { get; } = false;
public TrueTypeStandard14FallbackSimpleFont(NameToken name, FontMetrics fontMetrics, Encoding encoding, TrueTypeFontProgram font)
{
this.fontMetrics = fontMetrics;
this.encoding = encoding ?? throw new ArgumentNullException(nameof(encoding));
this.font = font;
Name = name;
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
{
codeLength = 1;
return bytes.CurrentByte;
}
public bool TryGetUnicode(int characterCode, out string value)
{
value = null;
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
// Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
}
catch
{
return false;
}
return true;
}
public CharacterBoundingBox GetBoundingBox(int characterCode)
{
var fontMatrix = GetFontMatrix();
if (font != null && font.TryGetBoundingBox(characterCode, out var bounds))
{
bounds = fontMatrix.Transform(bounds);
return new CharacterBoundingBox(bounds, bounds.Width);
}
var name = encoding.GetName(characterCode);
var metrics = fontMetrics.CharacterMetrics[name];
bounds = fontMatrix.Transform(metrics.BoundingBox);
var width = fontMatrix.TransformX(metrics.WidthX);
return new CharacterBoundingBox(bounds, width);
}
public TransformationMatrix GetFontMatrix()
{
if (font?.TableRegister.HeaderTable != null)
{
var scale = (decimal)font.GetFontMatrixMultiplier();
return TransformationMatrix.FromValues(1 / scale, 0, 0, 1 / scale, 0, 0);
}
return DefaultTransformation;
}
}
internal class TrueTypeSimpleFont : IFont
{
private static readonly TransformationMatrix DefaultTransformation =

View File

@@ -8,13 +8,59 @@
using IO;
using TrueType;
using TrueType.Parser;
using Util;
internal class SystemFontFinder : ISystemFontFinder
{
private static readonly IReadOnlyDictionary<string, string[]> NameSubstitutes;
static SystemFontFinder()
{
var dict = new Dictionary<string, string[]>
{
{"Courier", new[] {"CourierNew", "CourierNewPSMT", "LiberationMono", "NimbusMonL-Regu"}},
{"Courier-Bold", new[] {"CourierNewPS-BoldMT", "CourierNew-Bold", "LiberationMono-Bold", "NimbusMonL-Bold"}},
{"Courier-Oblique", new[] {"CourierNewPS-ItalicMT", "CourierNew-Italic", "LiberationMono-Italic", "NimbusMonL-ReguObli"}},
{"Courier-BoldOblique", new[] {"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", "LiberationMono-BoldItalic", "NimbusMonL-BoldObli"}},
{"Helvetica", new[] {"ArialMT", "Arial", "LiberationSans", "NimbusSanL-Regu"}},
{"Helvetica-Bold", new[] {"Arial-BoldMT", "Arial-Bold", "LiberationSans-Bold", "NimbusSanL-Bold"}},
{"Helvetica-BoldOblique", new[] {"Arial-BoldItalicMT", "Helvetica-BoldItalic", "LiberationSans-BoldItalic", "NimbusSanL-BoldItal"}},
{"Helvetica-Oblique", new[] {"Arial-ItalicMT", "Arial-Italic", "Helvetica-Italic", "LiberationSans-Italic", "NimbusSanL-ReguItal"}},
{"Times-Roman", new[] {"TimesNewRomanPSMT", "TimesNewRoman", "TimesNewRomanPS", "LiberationSerif", "NimbusRomNo9L-Regu"}},
{"Times-Bold", new[] {"TimesNewRomanPS-BoldMT", "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", "LiberationSerif-Bold", "NimbusRomNo9L-Medi"}},
{"Times-Italic", new[] {"TimesNewRomanPS-ItalicMT", "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", "LiberationSerif-Italic", "NimbusRomNo9L-ReguItal"}},
{"TimesNewRomanPS-BoldItalicMT", new[] {"TimesNewRomanPS-BoldItalic", "TimesNewRoman-BoldItalic", "LiberationSerif-BoldItalic", "NimbusRomNo9L-MediItal"}},
{"Symbol", new[] {"SymbolMT", "StandardSymL"}},
{"ZapfDingbats", new[] {"ZapfDingbatsITC", "Dingbats", "MS-Gothic"}}
};
var names = Standard14.GetNames();
foreach (var name in names)
{
if (!dict.ContainsKey(name))
{
var value = Standard14.GetMappedFontName(name);
if (dict.TryGetValue(value, out var subs))
{
dict[name] = subs;
}
else
{
dict[name] = new[] {value};
}
}
}
NameSubstitutes = dict;
}
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly Lazy<IReadOnlyList<SystemFontRecord>> availableFonts;
private readonly Dictionary<string, TrueTypeFontProgram> cache = new Dictionary<string, TrueTypeFontProgram>(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, string> nameToFileNameMap = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
private readonly HashSet<string> readFiles = new HashSet<string>();
public SystemFontFinder(TrueTypeFontParser trueTypeFontParser)
@@ -48,11 +94,76 @@
public TrueTypeFontProgram GetTrueTypeFont(string name)
{
if (cache.TryGetValue(name, out var result))
var result = GetTrueTypeFontNamed(name);
if (result != null)
{
return result;
}
if (name.Contains("-"))
{
result = GetTrueTypeFontNamed(name.Replace("-", string.Empty));
if (result != null)
{
return result;
}
}
if (name.Contains(","))
{
result = GetTrueTypeFontNamed(name.Replace(",", "-"));
if (result != null)
{
return result;
}
}
foreach (var substituteName in GetSubstituteNames(name))
{
result = GetTrueTypeFontNamed(substituteName);
if (result != null)
{
return result;
}
}
result = GetTrueTypeFontNamed(name + "-Regular");
return result;
}
private IEnumerable<string> GetSubstituteNames(string name)
{
name = name.Replace(" ", string.Empty);
if (NameSubstitutes.TryGetValue(name, out var values))
{
return values;
}
return EmptyArray<string>.Instance;
}
private TrueTypeFontProgram GetTrueTypeFontNamed(string name)
{
if (cache.TryGetValue(name, out var result))
{
return result;
}
if (nameToFileNameMap.TryGetValue(name, out var fileName))
{
if (TryReadFile(fileName, false, name, out result))
{
return result;
}
return null;
}
var nameCandidates = availableFonts.Value.Where(x => Path.GetFileName(x.Path)?.StartsWith(name[0].ToString(), StringComparison.OrdinalIgnoreCase) == true);
foreach (var systemFontRecord in nameCandidates)
@@ -86,27 +197,50 @@
return false;
}
using (var fileStream = File.OpenRead(record.Path))
{
readFiles.Add(record.Path);
var input = new StreamInputBytes(fileStream);
var trueType = trueTypeFontParser.Parse(new TrueTypeDataBytes(input));
var psName = trueType.TableRegister.NameTable?.GetPostscriptName() ?? trueType.Name;
if (!cache.ContainsKey(psName))
{
cache[psName] = trueType;
}
if (string.Equals(psName, name, StringComparison.OrdinalIgnoreCase))
{
font = trueType;
return true;
}
}
return TryReadFile(record.Path, true, name, out font);
}
return false;
}
private bool TryReadFile(string fileName, bool readNameFirst, string fontName, out TrueTypeFontProgram font)
{
font = null;
readFiles.Add(fileName);
using (var fileStream = File.OpenRead(fileName))
{
var input = new StreamInputBytes(fileStream);
var data = new TrueTypeDataBytes(input);
if (readNameFirst)
{
var name = trueTypeFontParser.GetNameTable(data);
if (name == null)
{
return false;
}
var fontNameFromFile = name.GetPostscriptName() ?? name.FontName;
nameToFileNameMap[fontNameFromFile] = fileName;
if (!string.Equals(fontNameFromFile, fontName, StringComparison.OrdinalIgnoreCase))
{
return false;
}
}
font = trueTypeFontParser.Parse(data);
var psName = font.TableRegister.NameTable?.GetPostscriptName() ?? font.Name;
if (!cache.ContainsKey(psName))
{
cache[psName] = font;
}
return true;
}
}
}
}

View File

@@ -125,6 +125,41 @@
return new TrueTypeFontProgram(version, tables, builder.Build());
}
public NameTable GetNameTable(TrueTypeDataBytes data)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
// Read these data points to move to the correct data location.
data.Read32Fixed();
int numberOfTables = data.ReadUnsignedShort();
data.ReadUnsignedShort();
data.ReadUnsignedShort();
data.ReadUnsignedShort();
TrueTypeHeaderTable? name = null;
for (var i = 0; i < numberOfTables; i++)
{
var tableHeader = ReadTable(data);
if (tableHeader.HasValue && tableHeader.Value.Tag == TrueTypeHeaderTable.Name)
{
name = tableHeader;
break;
}
}
if (!name.HasValue)
{
return null;
}
return TableParser.Parse<NameTable>(name.Value, data, new TableRegister.Builder());
}
private static void OptionallyParseTables(IReadOnlyDictionary<string, TrueTypeHeaderTable> tables, TrueTypeDataBytes data, TableRegister.Builder tableRegister)
{
// cmap

View File

@@ -44,10 +44,11 @@
case FlexEnd:
{
context.IsFlexing = false;
if (context.FlexPoints.Count < 7)
{
throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine.");
}
// TODO: I don't really care about flexpoints, but we should probably handle them... one day.
//if (context.FlexPoints.Count < 7)
//{
// throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine.");
//}
context.ClearFlexPoints();
break;

View File

@@ -7,6 +7,7 @@
using Fonts;
using Geometry;
using IO;
using Logging;
using Operations;
using PdfPig.Core;
using Tokenization.Scanner;
@@ -22,6 +23,7 @@
private readonly bool isLenientParsing;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly ILog log;
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
@@ -43,13 +45,15 @@
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory)
XObjectFactory xObjectFactory,
ILog log)
{
this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit;
this.isLenientParsing = isLenientParsing;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
this.log = log;
graphicsStack.Push(new CurrentGraphicsState());
}
@@ -125,10 +129,9 @@
var foundUnicode = font.TryGetUnicode(code, out var unicode);
if (!foundUnicode && !isLenientParsing)
if (!foundUnicode || unicode == null)
{
// TODO: record warning
// throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}.");
}
var wordSpacing = 0m;

View File

@@ -9,6 +9,7 @@
using Geometry;
using Graphics;
using IO;
using Logging;
using Parts;
using Tokenization.Scanner;
using Tokens;
@@ -17,20 +18,23 @@
internal class PageFactory : IPageFactory
{
private readonly IPdfTokenScanner pdfScanner;
private readonly IResourceStore resourceStore;
private readonly IFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
private readonly XObjectFactory xObjectFactory;
private readonly IPdfTokenScanner pdfScanner;
private readonly ILog log;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
IPageContentParser pageContentParser,
XObjectFactory xObjectFactory)
XObjectFactory xObjectFactory,
ILog log)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.xObjectFactory = xObjectFactory;
this.log = log;
this.pdfScanner = pdfScanner;
}
@@ -108,7 +112,7 @@
{
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory, log);
return context.Process(operations);
}

View File

@@ -117,7 +117,9 @@
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
new XObjectFactory(), log);
var informationFactory = new DocumentInformationFactory();