correctly map character code to glyph id when retrieving bounding boxes for truetype fonts

previously we just treated character codes as glyph ids when getting the bounding box from the truetype font program itself. this change uses the code for character code to glyph id mapping from pdfbox, with some changes, to retrieve the correct bounding box where possible. since this relies in some places on using the unicode value or name, rather than character code, we add a cache to the individual truetype fonts to store the character code to unicode mapping which should have the benefit of improving performance.
This commit is contained in:
Eliot Jones
2019-12-20 12:47:24 +00:00
parent 7296c3c125
commit 3e6fa4b694
5 changed files with 171 additions and 24 deletions

View File

@@ -33,7 +33,7 @@
}
}
public int GetGlyphIndex(int characterIdentifier)
public int? GetGlyphIndex(int characterIdentifier)
{
if (isIdentity)
{

View File

@@ -10,9 +10,9 @@
{
bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox);
bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox);
bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox);
bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width);
bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width);
bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width);

View File

@@ -71,12 +71,12 @@
return true;
}
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox)
{
throw new NotImplementedException();
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width)
{
throw new NotImplementedException();
}

View File

@@ -22,11 +22,11 @@
private readonly Dictionary<int, CharacterBoundingBox> boundingBoxCache
= new Dictionary<int, CharacterBoundingBox>();
[CanBeNull]
private readonly Encoding encoding;
private readonly Dictionary<int, string> unicodeValuesCache = new Dictionary<int, string>();
[CanBeNull]
private readonly TrueTypeFontProgram fontProgram;
[CanBeNull] private readonly Encoding encoding;
[CanBeNull] private readonly TrueTypeFontProgram fontProgram;
private readonly int firstCharacter;
@@ -68,11 +68,18 @@
{
value = null;
if (unicodeValuesCache.TryGetValue(characterCode, out value))
{
return true;
}
// Behaviour specified by the Extraction of Text Content section of the specification.
// If the font contains a ToUnicode CMap use that.
if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value))
{
unicodeValuesCache[characterCode] = value;
return true;
}
@@ -97,6 +104,11 @@
return false;
}
if (value != null)
{
unicodeValuesCache[characterCode] = value;
}
return value != null;
}
@@ -179,7 +191,7 @@
return descriptor.BoundingBox;
}
if (fontProgram.TryGetBoundingBox(characterCode, out var bounds))
if (fontProgram.TryGetBoundingBox(characterCode, CharacterCodeToGlyphId, out var bounds))
{
return bounds;
}
@@ -194,6 +206,103 @@
return new PdfRectangle(0, 0, GetWidth(characterCode), 0);
}
private int? CharacterCodeToGlyphId(int characterCode)
{
bool HasFlag(FontDescriptorFlags value, FontDescriptorFlags target)
{
return (value & target) == target;
}
if (descriptor == null || !unicodeValuesCache.TryGetValue(characterCode, out var unicode)
|| fontProgram.TableRegister.CMapTable == null
|| encoding == null
|| !encoding.CodeToNameMap.TryGetValue(characterCode, out var name)
|| name == null)
{
return null;
}
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
{
return 0;
}
var glyphId = 0;
if (HasFlag(descriptor.Flags, FontDescriptorFlags.Symbolic) && fontProgram.WindowsSymbolCMap != null)
{
const int startRangeF000 = 0xF000;
const int startRangeF100 = 0xF100;
const int startRangeF200 = 0xF200;
// (3, 0) - (Windows, Symbol)
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode);
if (glyphId == 0 && characterCode >= 0 && characterCode <= 0xFF)
{
// CMap may use one of the following code ranges, so that we have to add the high byte to get the mapped value.
// F000 - F0FF
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF000);
if (glyphId == 0)
{
// F100 - F1FF
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF100);
}
if (glyphId == 0)
{
// F200 - F2FF
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF200);
}
}
// Handle fonts incorrectly set to symbolic.
if (glyphId == 0 && fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode))
{
glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]);
}
}
else
{
// (3, 1) - (Windows, Unicode)
if (fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode))
{
glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]);
}
if (glyphId == 0
&& fontProgram.MacRomanCMap != null
&& MacOsRomanEncoding.Instance.NameToCodeMap.TryGetValue(name, out var macCode))
{
// (1, 0) - (Macintosh, Roman)
glyphId = fontProgram.MacRomanCMap.CharacterCodeToGlyphIndex(macCode);
}
if (glyphId == 0 && fontProgram.TableRegister.PostScriptTable != null)
{
for (var i = 0; i < fontProgram.TableRegister.PostScriptTable.GlyphNames.Length; i++)
{
var glyphName = fontProgram.TableRegister.PostScriptTable.GlyphNames[i];
if (string.Equals(glyphName, name, StringComparison.OrdinalIgnoreCase))
{
return i;
}
}
}
}
if (glyphId != 0)
{
return glyphId;
}
return null;
}
private decimal GetWidth(int characterCode)
{
var index = characterCode - firstCharacter;

View File

@@ -5,6 +5,7 @@
using CidFonts;
using Geometry;
using Parser;
using Tables.CMapSubTables;
using Util.JetBrains.Annotations;
internal class TrueTypeFontProgram : ICidFontProgram
@@ -19,19 +20,55 @@
[CanBeNull]
public string Name => TableRegister.NameTable?.FontName;
public ICMapSubTable WindowsUnicodeCMap { get; }
public ICMapSubTable MacRomanCMap { get; }
public ICMapSubTable WindowsSymbolCMap { get; }
public TrueTypeFontProgram(decimal version, IReadOnlyDictionary<string, TrueTypeHeaderTable> tableHeaders, TableRegister tableRegister)
{
Version = version;
TableHeaders = tableHeaders;
TableRegister = tableRegister ?? throw new ArgumentNullException(nameof(tableRegister));
if (TableRegister.CMapTable != null)
{
const int encodingSymbol = 0;
const int encodingUnicode = 1;
const int encodingMacRoman = 0;
foreach (var subTable in TableRegister.CMapTable.SubTables)
{
if (WindowsSymbolCMap == null
&& subTable.PlatformId == TrueTypeCMapPlatform.Windows
&& subTable.EncodingId == encodingSymbol)
{
WindowsSymbolCMap = subTable;
}
else if (WindowsUnicodeCMap == null
&& subTable.PlatformId == TrueTypeCMapPlatform.Windows
&& subTable.EncodingId == encodingUnicode)
{
WindowsUnicodeCMap = subTable;
}
else if (MacRomanCMap == null
&& subTable.PlatformId == TrueTypeCMapPlatform.Macintosh
&& subTable.EncodingId == encodingMacRoman)
{
MacRomanCMap = subTable;
}
}
}
}
public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox) => TryGetBoundingBox(characterIdentifier, null, out boundingBox);
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox)
{
boundingBox = default(PdfRectangle);
if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index))
if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index))
{
return false;
}
@@ -56,11 +93,11 @@
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width) => TryGetBoundingAdvancedWidth(characterIdentifier, null, out width);
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width)
{
width = 0m;
if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index))
if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index))
{
return false;
}
@@ -80,14 +117,15 @@
return true;
}
private bool TryGetGlyphIndex(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out int glyphIndex)
private bool TryGetGlyphIndex(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out int glyphId)
{
glyphIndex = 0;
glyphId = 0;
if (characterIdentifierToGlyphIndex != null)
var externalGlyphId = characterCodeToGlyphId?.Invoke(characterIdentifier);
if (externalGlyphId != null)
{
glyphIndex = characterIdentifierToGlyphIndex(characterIdentifier);
glyphId = externalGlyphId.Value;
return true;
}
@@ -96,7 +134,7 @@
return false;
}
return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphIndex);
return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphId);
}
}
}