mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 10:47:56 +08:00
correctly map character code to glyph id when retrieving bounding boxes for truetype fonts
previously we just treated character codes as glyph ids when getting the bounding box from the truetype font program itself. this change uses the code for character code to glyph id mapping from pdfbox, with some changes, to retrieve the correct bounding box where possible. since this relies in some places on using the unicode value or name, rather than character code, we add a cache to the individual truetype fonts to store the character code to unicode mapping which should have the benefit of improving performance.
This commit is contained in:
@@ -33,7 +33,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
public int GetGlyphIndex(int characterIdentifier)
|
||||
public int? GetGlyphIndex(int characterIdentifier)
|
||||
{
|
||||
if (isIdentity)
|
||||
{
|
||||
|
@@ -10,9 +10,9 @@
|
||||
{
|
||||
bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox);
|
||||
|
||||
bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox);
|
||||
bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox);
|
||||
|
||||
bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width);
|
||||
bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width);
|
||||
|
||||
bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width);
|
||||
|
||||
|
@@ -71,12 +71,12 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
|
||||
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
|
||||
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
@@ -22,11 +22,11 @@
|
||||
private readonly Dictionary<int, CharacterBoundingBox> boundingBoxCache
|
||||
= new Dictionary<int, CharacterBoundingBox>();
|
||||
|
||||
[CanBeNull]
|
||||
private readonly Encoding encoding;
|
||||
private readonly Dictionary<int, string> unicodeValuesCache = new Dictionary<int, string>();
|
||||
|
||||
[CanBeNull]
|
||||
private readonly TrueTypeFontProgram fontProgram;
|
||||
[CanBeNull] private readonly Encoding encoding;
|
||||
|
||||
[CanBeNull] private readonly TrueTypeFontProgram fontProgram;
|
||||
|
||||
private readonly int firstCharacter;
|
||||
|
||||
@@ -68,11 +68,18 @@
|
||||
{
|
||||
value = null;
|
||||
|
||||
if (unicodeValuesCache.TryGetValue(characterCode, out value))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Behaviour specified by the Extraction of Text Content section of the specification.
|
||||
|
||||
// If the font contains a ToUnicode CMap use that.
|
||||
if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value))
|
||||
{
|
||||
unicodeValuesCache[characterCode] = value;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -90,13 +97,18 @@
|
||||
try
|
||||
{
|
||||
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName)
|
||||
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
|
||||
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (value != null)
|
||||
{
|
||||
unicodeValuesCache[characterCode] = value;
|
||||
}
|
||||
|
||||
return value != null;
|
||||
}
|
||||
|
||||
@@ -179,7 +191,7 @@
|
||||
return descriptor.BoundingBox;
|
||||
}
|
||||
|
||||
if (fontProgram.TryGetBoundingBox(characterCode, out var bounds))
|
||||
if (fontProgram.TryGetBoundingBox(characterCode, CharacterCodeToGlyphId, out var bounds))
|
||||
{
|
||||
return bounds;
|
||||
}
|
||||
@@ -194,6 +206,103 @@
|
||||
return new PdfRectangle(0, 0, GetWidth(characterCode), 0);
|
||||
}
|
||||
|
||||
private int? CharacterCodeToGlyphId(int characterCode)
|
||||
{
|
||||
bool HasFlag(FontDescriptorFlags value, FontDescriptorFlags target)
|
||||
{
|
||||
return (value & target) == target;
|
||||
}
|
||||
|
||||
if (descriptor == null || !unicodeValuesCache.TryGetValue(characterCode, out var unicode)
|
||||
|| fontProgram.TableRegister.CMapTable == null
|
||||
|| encoding == null
|
||||
|| !encoding.CodeToNameMap.TryGetValue(characterCode, out var name)
|
||||
|| name == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var glyphId = 0;
|
||||
|
||||
if (HasFlag(descriptor.Flags, FontDescriptorFlags.Symbolic) && fontProgram.WindowsSymbolCMap != null)
|
||||
{
|
||||
const int startRangeF000 = 0xF000;
|
||||
const int startRangeF100 = 0xF100;
|
||||
const int startRangeF200 = 0xF200;
|
||||
|
||||
// (3, 0) - (Windows, Symbol)
|
||||
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode);
|
||||
|
||||
if (glyphId == 0 && characterCode >= 0 && characterCode <= 0xFF)
|
||||
{
|
||||
// CMap may use one of the following code ranges, so that we have to add the high byte to get the mapped value.
|
||||
|
||||
// F000 - F0FF
|
||||
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF000);
|
||||
|
||||
if (glyphId == 0)
|
||||
{
|
||||
// F100 - F1FF
|
||||
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF100);
|
||||
}
|
||||
|
||||
if (glyphId == 0)
|
||||
{
|
||||
// F200 - F2FF
|
||||
glyphId = fontProgram.WindowsSymbolCMap.CharacterCodeToGlyphIndex(characterCode + startRangeF200);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle fonts incorrectly set to symbolic.
|
||||
if (glyphId == 0 && fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode))
|
||||
{
|
||||
glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// (3, 1) - (Windows, Unicode)
|
||||
if (fontProgram.WindowsUnicodeCMap != null && !string.IsNullOrEmpty(unicode))
|
||||
{
|
||||
glyphId = fontProgram.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(unicode[0]);
|
||||
}
|
||||
|
||||
if (glyphId == 0
|
||||
&& fontProgram.MacRomanCMap != null
|
||||
&& MacOsRomanEncoding.Instance.NameToCodeMap.TryGetValue(name, out var macCode))
|
||||
{
|
||||
// (1, 0) - (Macintosh, Roman)
|
||||
|
||||
glyphId = fontProgram.MacRomanCMap.CharacterCodeToGlyphIndex(macCode);
|
||||
}
|
||||
|
||||
if (glyphId == 0 && fontProgram.TableRegister.PostScriptTable != null)
|
||||
{
|
||||
for (var i = 0; i < fontProgram.TableRegister.PostScriptTable.GlyphNames.Length; i++)
|
||||
{
|
||||
var glyphName = fontProgram.TableRegister.PostScriptTable.GlyphNames[i];
|
||||
|
||||
if (string.Equals(glyphName, name, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (glyphId != 0)
|
||||
{
|
||||
return glyphId;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private decimal GetWidth(int characterCode)
|
||||
{
|
||||
var index = characterCode - firstCharacter;
|
||||
|
@@ -5,6 +5,7 @@
|
||||
using CidFonts;
|
||||
using Geometry;
|
||||
using Parser;
|
||||
using Tables.CMapSubTables;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
internal class TrueTypeFontProgram : ICidFontProgram
|
||||
@@ -19,19 +20,55 @@
|
||||
[CanBeNull]
|
||||
public string Name => TableRegister.NameTable?.FontName;
|
||||
|
||||
public ICMapSubTable WindowsUnicodeCMap { get; }
|
||||
|
||||
public ICMapSubTable MacRomanCMap { get; }
|
||||
|
||||
public ICMapSubTable WindowsSymbolCMap { get; }
|
||||
|
||||
public TrueTypeFontProgram(decimal version, IReadOnlyDictionary<string, TrueTypeHeaderTable> tableHeaders, TableRegister tableRegister)
|
||||
{
|
||||
Version = version;
|
||||
TableHeaders = tableHeaders;
|
||||
TableRegister = tableRegister ?? throw new ArgumentNullException(nameof(tableRegister));
|
||||
|
||||
if (TableRegister.CMapTable != null)
|
||||
{
|
||||
const int encodingSymbol = 0;
|
||||
const int encodingUnicode = 1;
|
||||
const int encodingMacRoman = 0;
|
||||
|
||||
foreach (var subTable in TableRegister.CMapTable.SubTables)
|
||||
{
|
||||
if (WindowsSymbolCMap == null
|
||||
&& subTable.PlatformId == TrueTypeCMapPlatform.Windows
|
||||
&& subTable.EncodingId == encodingSymbol)
|
||||
{
|
||||
WindowsSymbolCMap = subTable;
|
||||
}
|
||||
else if (WindowsUnicodeCMap == null
|
||||
&& subTable.PlatformId == TrueTypeCMapPlatform.Windows
|
||||
&& subTable.EncodingId == encodingUnicode)
|
||||
{
|
||||
WindowsUnicodeCMap = subTable;
|
||||
}
|
||||
else if (MacRomanCMap == null
|
||||
&& subTable.PlatformId == TrueTypeCMapPlatform.Macintosh
|
||||
&& subTable.EncodingId == encodingMacRoman)
|
||||
{
|
||||
MacRomanCMap = subTable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox) => TryGetBoundingBox(characterIdentifier, null, out boundingBox);
|
||||
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
|
||||
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out PdfRectangle boundingBox)
|
||||
{
|
||||
boundingBox = default(PdfRectangle);
|
||||
|
||||
if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index))
|
||||
if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -51,16 +88,16 @@
|
||||
{
|
||||
boundingBox = glyph.Bounds;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width) => TryGetBoundingAdvancedWidth(characterIdentifier, null, out width);
|
||||
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
|
||||
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out decimal width)
|
||||
{
|
||||
width = 0m;
|
||||
|
||||
if (!TryGetGlyphIndex(characterIdentifier, characterIdentifierToGlyphIndex, out var index))
|
||||
if (!TryGetGlyphIndex(characterIdentifier, characterCodeToGlyphId, out var index))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -80,23 +117,24 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
private bool TryGetGlyphIndex(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out int glyphIndex)
|
||||
private bool TryGetGlyphIndex(int characterIdentifier, Func<int, int?> characterCodeToGlyphId, out int glyphId)
|
||||
{
|
||||
glyphIndex = 0;
|
||||
glyphId = 0;
|
||||
|
||||
if (characterIdentifierToGlyphIndex != null)
|
||||
{
|
||||
glyphIndex = characterIdentifierToGlyphIndex(characterIdentifier);
|
||||
var externalGlyphId = characterCodeToGlyphId?.Invoke(characterIdentifier);
|
||||
|
||||
return true;
|
||||
}
|
||||
if (externalGlyphId != null)
|
||||
{
|
||||
glyphId = externalGlyphId.Value;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (TableRegister.CMapTable == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphIndex);
|
||||
return TableRegister.CMapTable.TryGetGlyphIndex(characterIdentifier, out glyphId);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user