diff --git a/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs b/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs index 4b62b63a..91f104e4 100644 --- a/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs +++ b/src/UglyToad.PdfPig/Fonts/CidFonts/CharacterIdentifierToGlyphIndexMap.cs @@ -33,7 +33,7 @@ } } - public int GetGlyphIndex(int characterIdentifier) + public int? GetGlyphIndex(int characterIdentifier) { if (isIdentity) { diff --git a/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs b/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs index 2d29c626..96529621 100644 --- a/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/CidFonts/ICidFontProgram.cs @@ -10,9 +10,9 @@ { bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox); - bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox); + bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox); - bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width); + bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width); bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width); diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs index 0ab94718..33085258 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFontProgram.cs @@ -71,12 +71,12 @@ return true; } - public bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox) + public bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox) { throw new NotImplementedException(); } - public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width) + public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width) { throw new NotImplementedException(); } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 520765b2..a82cb63a 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -22,11 +22,11 @@ private readonly Dictionary boundingBoxCache = new Dictionary(); - [CanBeNull] - private readonly Encoding encoding; + private readonly Dictionary unicodeValuesCache = new Dictionary(); - [CanBeNull] - private readonly TrueTypeFontProgram fontProgram; + [CanBeNull] private readonly Encoding encoding; + + [CanBeNull] private readonly TrueTypeFontProgram fontProgram; private readonly int firstCharacter; @@ -68,11 +68,18 @@ { value = null; + if (unicodeValuesCache.TryGetValue(characterCode, out value)) + { + return true; + } + // Behaviour specified by the Extraction of Text Content section of the specification. // If the font contains a ToUnicode CMap use that. if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value)) { + unicodeValuesCache[characterCode] = value; + return true; } @@ -90,13 +97,18 @@ try { value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName) - ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); + ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); } catch { return false; } + if (value != null) + { + unicodeValuesCache[characterCode] = value; + } + return value != null; } @@ -179,7 +191,7 @@ return descriptor.BoundingBox; } - if (fontProgram.TryGetBoundingBox(characterCode, out var bounds)) + if (fontProgram.TryGetBoundingBox(characterCode, CharacterCodeToGlyphId, out var bounds)) { return bounds; } @@ -194,6 +206,103 @@ return new PdfRectangle(0, 0, GetWidth(characterCode), 0); } + private int? CharacterCodeToGlyphId(int characterCode) + { + bool HasFlag(FontDescriptorFlags value, FontDescriptorFlags target) + { + return (value & target) == target; + } + + if (descriptor == null || !unicodeValuesCache.TryGetValue(characterCode, out var unicode) + || fontProgram.TableRegister.CMapTable == null + || encoding == null + || !encoding.CodeToNameMap.TryGetValue(characterCode, out var name) + || name == null) + { + return null; + } + + if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase)) + { + return 0; + } + + var glyphId = 0; + + if (HasFlag(descriptor.Flags, FontDescriptorFlags.Symbolic) && fontProgram.WindowsSymbolCMap != null) + { + const int startRangeF000 = 0xF000; + const int startRangeF100 = 0xF100; + const int startRangeF200 = 0xF200; + + // (3, 0) - (Windows, Symbol) + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode); + + if (glyphId == 0 && characterCode >= 0 && characterCode <= 0xFF) + { + // CMap may use one of the following code ranges, so that we have to add the high byte to get the mapped value. + + // F000 - F0FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF000); + + if (glyphId == 0) + { + // F100 - F1FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF100); + } + + if (glyphId == 0) + { + // F200 - F2FF + glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF200); + } + } + + // Handle fonts incorrectly set to symbolic. + if (glyphId == 0 && fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode)) + { + glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]); + } + } + else + { + // (3, 1) - (Windows, Unicode) + if (fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode)) + { + glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]); + } + + if (glyphId == 0 + && fontProgram.MacRomanCMap != null + && MacOsRomanEncoding.Instance.NameToCodeMap.TryGetValue(name, out var macCode)) + { + // (1, 0) - (Macintosh, Roman) + + glyphId = fontProgram.MacRomanCMap.CharacterCodeToGlyphIndex(macCode); + } + + if (glyphId == 0 && fontProgram.TableRegister.PostScriptTable != null) + { + for (var i = 0; i < fontProgram.TableRegister.PostScriptTable.GlyphNames.Length; i++) + { + var glyphName = fontProgram.TableRegister.PostScriptTable.GlyphNames[i]; + + if (string.Equals(glyphName, name, StringComparison.OrdinalIgnoreCase)) + { + return i; + } + } + } + } + + if (glyphId != 0) + { + return glyphId; + } + + return null; + } + private decimal GetWidth(int characterCode) { var index = characterCode - firstCharacter; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs index bd6e40f8..5298a8ad 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFontProgram.cs @@ -5,6 +5,7 @@ using CidFonts; using Geometry; using Parser; + using Tables.CMapSubTables; using Util.JetBrains.Annotations; internal class TrueTypeFontProgram : ICidFontProgram @@ -19,19 +20,55 @@ [CanBeNull] public string Name => TableRegister.NameTable?.FontName; + public ICMapSubTable WindowsUnicodeCMap { get; } + + public ICMapSubTable MacRomanCMap { get; } + + public ICMapSubTable WindowsSymbolCMap { get; } + public TrueTypeFontProgram(decimal version, IReadOnlyDictionary tableHeaders, TableRegister tableRegister) { Version = version; TableHeaders = tableHeaders; TableRegister = tableRegister ?? throw new ArgumentNullException(nameof(tableRegister)); + + if (TableRegister.CMapTable != null) + { + const int encodingSymbol = 0; + const int encodingUnicode = 1; + const int encodingMacRoman = 0; + + foreach (var subTable in TableRegister.CMapTable.SubTables) + { + if (WindowsSymbolCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Windows + && subTable.EncodingId == encodingSymbol) + { + WindowsSymbolCMap = subTable; + } + else if (WindowsUnicodeCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Windows + && subTable.EncodingId == encodingUnicode) + { + WindowsUnicodeCMap = subTable; + } + else if (MacRomanCMap == null + && subTable.PlatformId == TrueTypeCMapPlatform.Macintosh + && subTable.EncodingId == encodingMacRoman) + { + MacRomanCMap = subTable; + } + } + } + } public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox) => TryGetBoundingBox(characterIdentifier, null, out boundingBox); - public bool TryGetBoundingBox(int characterIdentifier, Func characterIdentifierToGlyphIndex, out PdfRectangle boundingBox) + public bool TryGetBoundingBox(int characterIdentifier, Func characterCodeToGlyphId, out PdfRectangle boundingBox) { boundingBox = default(PdfRectangle); - if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index)) + if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index)) { return false; } @@ -51,16 +88,16 @@ { boundingBox = glyph.Bounds; } - + return true; } public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width) => TryGetBoundingAdvancedWidth(characterIdentifier, null, out width); - public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterIdentifierToGlyphIndex, out decimal width) + public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func characterCodeToGlyphId, out decimal width) { width = 0m; - if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index)) + if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index)) { return false; } @@ -80,23 +117,24 @@ return true; } - private bool TryGetGlyphIndex(int characterIdentifier, Func characterIdentifierToGlyphIndex, out int glyphIndex) + private bool TryGetGlyphIndex(int characterIdentifier, Func characterCodeToGlyphId, out int glyphId) { - glyphIndex = 0; + glyphId = 0; - if (characterIdentifierToGlyphIndex != null) - { - glyphIndex = characterIdentifierToGlyphIndex(characterIdentifier); + var externalGlyphId = characterCodeToGlyphId?.Invoke(characterIdentifier); - return true; - } + if (externalGlyphId != null) + { + glyphId = externalGlyphId.Value; + return true; + } if (TableRegister.CMapTable == null) { return false; } - return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphIndex); + return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphId); } } } \ No newline at end of file