From 3e6fa4b6942a0d5c203469bac259c6fce6fd1bfd Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Fri, 20 Dec 2019 12:47:24 +0000 Subject: [PATCH] correctly map character code to glyph id when retrieving bounding boxes for truetype fonts previously we just treated character codes as glyph ids when getting the bounding box from the truetype font program itself. this change uses the code for character code to glyph id mapping from pdfbox, with some changes, to retrieve the correct bounding box where possible. since this relies in some places on using the unicode value or name, rather than character code, we add a cache to the individual truetype fonts to store the character code to unicode mapping which should have the benefit of improving performance. --- .../CharacterIdentifierToGlyphIndexMap.cs | 2 +- .../Fonts/CidFonts/ICidFontProgram.cs | 4 +- .../CompactFontFormatFontProgram.cs | 4 +- .../Fonts/Simple/TrueTypeSimpleFont.cs | 121 +++++++++++++++++- .../Fonts/TrueType/TrueTypeFontProgram.cs | 64 +++++++-- 5 files changed, 171 insertions(+), 24 deletions(-) diff --git a/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs b/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs index 4b62b63a..91f104e4 100644 --- a/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs +++ b/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs @@ -33,7 +33,7 @@ } } - public int GetGlyphIndex(int characterIdentifier) + public int? GetGlyphIndex(int characterIdentifier) { if (isIdentity) { diff --git a/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs b/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs index 2d29c626..96529621 100644 --- a/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs @@ -10,9 +10,9 @@ { bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox); - bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox); + bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox); - bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width); + bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width); bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width); diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs index 0ab94718..33085258 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs @@ -71,12 +71,12 @@ return true; } - public bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox) + public bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox) { throw new NotImplementedException(); } - public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width) + public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width) { throw new NotImplementedException(); } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 520765b2..a82cb63a 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -22,11 +22,11 @@ private readonly Dictionary boundingBoxCache = new Dictionary(); - [CanBeNull] - private readonly Encoding encoding; + private readonly Dictionary unicodeValuesCache = new Dictionary(); - [CanBeNull] - private readonly TrueTypeFontProgram fontProgram; + [CanBeNull] private readonly Encoding encoding; + + [CanBeNull] private readonly TrueTypeFontProgram fontProgram; private readonly int firstCharacter; @@ -68,11 +68,18 @@ { value = null; + if (unicodeValuesCache.TryGetValue(characterCode, out value)) + { + return true; + } + // Behaviour specified by the Extraction of Text Content section of the specification. // If the font contains a ToUnicode CMap use that. if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value)) { + unicodeValuesCache[characterCode] = value; + return true; } @@ -90,13 +97,18 @@ try { value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName) - ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); + ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); } catch { return false; } + if (value != null) + { + unicodeValuesCache[characterCode] = value; + } + return value != null; } @@ -179,7 +191,7 @@ return descriptor.BoundingBox; } - if (fontProgram.TryGetBoundingBox(characterCode, out var bounds)) + if (fontProgram.TryGetBoundingBox(characterCode, CharacterCodeToGlyphId, out var bounds)) { return bounds; } @@ -194,6 +206,103 @@ return new PdfRectangle(0, 0, GetWidth(characterCode), 0); } + private int? CharacterCodeToGlyphId(int characterCode) + { + bool HasFlag(FontDescriptorFlags value, FontDescriptorFlags target) + { + return (value & target) == target; + } + + if (descriptor == null || !unicodeValuesCache.TryGetValue(characterCode, out var unicode) + || fontProgram.TableRegister.CMapTable == null + || encoding == null + || !encoding.CodeToNameMap.TryGetValue(characterCode, out var name) + || name == null) + { + return null; + } + + if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase)) + { + return 0; + } + + var glyphId = 0; + + if (HasFlag(descriptor.Flags, FontDescriptorFlags.Symbolic) && fontProgram.WindowsSymbolCMap != null) + { + const int startRangeF000 = 0xF000; + const int startRangeF100 = 0xF100; + const int startRangeF200 = 0xF200; + + // (3, 0) - (Windows, Symbol) + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode); + + if (glyphId == 0 && characterCode >= 0 && characterCode <= 0xFF) + { + // CMap may use one of the following code ranges, so that we have to add the high byte to get the mapped value. + + // F000 - F0FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF000); + + if (glyphId == 0) + { + // F100 - F1FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF100); + } + + if (glyphId == 0) + { + // F200 - F2FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF200); + } + } + + // Handle fonts incorrectly set to symbolic. + if (glyphId == 0 && fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode)) + { + glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]); + } + } + else + { + // (3, 1) - (Windows, Unicode) + if (fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode)) + { + glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]); + } + + if (glyphId == 0 + && fontProgram.MacRomanCMap != null + && MacOsRomanEncoding.Instance.NameToCodeMap.TryGetValue(name, out var macCode)) + { + // (1, 0) - (Macintosh, Roman) + + glyphId = fontProgram.MacRomanCMap.CharacterCodeToGlyphIndex(macCode); + } + + if (glyphId == 0 && fontProgram.TableRegister.PostScriptTable != null) + { + for (var i = 0; i < fontProgram.TableRegister.PostScriptTable.GlyphNames.Length; i++) + { + var glyphName = fontProgram.TableRegister.PostScriptTable.GlyphNames[i]; + + if (string.Equals(glyphName, name, StringComparison.OrdinalIgnoreCase)) + { + return i; + } + } + } + } + + if (glyphId != 0) + { + return glyphId; + } + + return null; + } + private decimal GetWidth(int characterCode) { var index = characterCode - firstCharacter; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs index bd6e40f8..5298a8ad 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs @@ -5,6 +5,7 @@ using CidFonts; using Geometry; using Parser; + using Tables.CMapSubTables; using Util.JetBrains.Annotations; internal class TrueTypeFontProgram : ICidFontProgram @@ -19,19 +20,55 @@ [CanBeNull] public string Name => TableRegister.NameTable?.FontName; + public ICMapSubTable WindowsUnicodeCMap { get; } + + public ICMapSubTable MacRomanCMap { get; } + + public ICMapSubTable WindowsSymbolCMap { get; } + public TrueTypeFontProgram(decimal version, IReadOnlyDictionary tableHeaders, TableRegister tableRegister) { Version = version; TableHeaders = tableHeaders; TableRegister = tableRegister ?? throw new ArgumentNullException(nameof(tableRegister)); + + if (TableRegister.CMapTable != null) + { + const int encodingSymbol = 0; + const int encodingUnicode = 1; + const int encodingMacRoman = 0; + + foreach (var subTable in TableRegister.CMapTable.SubTables) + { + if (WindowsSymbolCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Windows + && subTable.EncodingId == encodingSymbol) + { + WindowsSymbolCMap = subTable; + } + else if (WindowsUnicodeCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Windows + && subTable.EncodingId == encodingUnicode) + { + WindowsUnicodeCMap = subTable; + } + else if (MacRomanCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Macintosh + && subTable.EncodingId == encodingMacRoman) + { + MacRomanCMap = subTable; + } + } + } + } public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox) => TryGetBoundingBox(characterIdentifier, null, out boundingBox); - public bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox) + public bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox) { boundingBox = default(PdfRectangle); - if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index)) + if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index)) { return false; } @@ -51,16 +88,16 @@ { boundingBox = glyph.Bounds; } - + return true; } public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width) => TryGetBoundingAdvancedWidth(characterIdentifier, null, out width); - public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width) + public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width) { width = 0m; - if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index)) + if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index)) { return false; } @@ -80,23 +117,24 @@ return true; } - private bool TryGetGlyphIndex(int characterIdentifier, Func characterIdentifierToGlyphIndex, out int glyphIndex) + private bool TryGetGlyphIndex(int characterIdentifier, Func characterCodeToGlyphId, out int glyphId) { - glyphIndex = 0; + glyphId = 0; - if (characterIdentifierToGlyphIndex != null) - { - glyphIndex = characterIdentifierToGlyphIndex(characterIdentifier); + var externalGlyphId = characterCodeToGlyphId?.Invoke(characterIdentifier); - return true; - } + if (externalGlyphId != null) + { + glyphId = externalGlyphId.Value; + return true; + } if (TableRegister.CMapTable == null) { return false; } - return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphIndex); + return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphId); } } } \ No newline at end of file