finish implementation of truetype cmap replacer #98

this overwrites the cmap table which is moved to the end of the truetype font file. the new table contains a single windows symbol subtable (3,0) of the format 6 type which maps the character codes in the single range 0 -> glyphcount to the corresponding glyph indices in the font. the new cmap table is then written to the new font file and the header value for length is updated.

this also changes many truetype classes to use the corresponding ushort datatype rather than ints, to save space.

the generated file is displaying correctly in most pdf viewers and passes all tests but in adobe acrobat reader the text is present but invisible. this was not a problem with the previous approach to file generation. there is no log information as to why this might be the case but it seems like the answer must be related to the validity of the overwritten truetype file. we might need to provide an additional macroman cmap subtable in case this is required by acrobat reader.

running the produced font for andada-regular through fontvalidator https://github.com/HinTak/Font-Validator/releases indicates a number of issues that may cause the file to be an invalid font (cannot open with microsoft font viewer). the next step is to compare the errors present in the unmodified andada regular file with the errors in our version. the most likely candidates seem to be:
* os/2: font is a symbol font but panose byte 1, familytype, is not set to latin symbol.
* os/2: a unicode range was indicated in ulunicoderange but the font has no characters in that range.
* os/2: the usfirstcharindex/uslastcharindex is not valid.
* os/2: the font contains a 3,0 cmap but the codepagerange bit 31 is clear.
* os/2: the usbreakchar is not mapped to a glyf.
* head: font checksum is incorrect (this can also be the case for working fonts so seems unlikely to be the cause).
This commit is contained in:
Eliot Jones
2019-12-31 14:00:46 +00:00
parent 59c43cc526
commit 3ad03ff3ee
11 changed files with 184 additions and 114 deletions

View File

@@ -18,7 +18,7 @@
/// <summary>
/// An array of the last points of each contour.
/// </summary>
public int[] EndPointsOfContours { get; }
public ushort[] EndPointsOfContours { get; }
public GlyphPoint[] Points { get; }
@@ -26,7 +26,7 @@
public bool IsEmpty => Points.Length == 0;
public Glyph(bool isSimple, byte[] instructions, int[] endPointsOfContours, GlyphPoint[] points,
public Glyph(bool isSimple, byte[] instructions, ushort[] endPointsOfContours, GlyphPoint[] points,
PdfRectangle bounds)
{
IsSimple = isSimple;
@@ -38,7 +38,7 @@
public static IGlyphDescription Empty(PdfRectangle bounds)
{
return new Glyph(true, new byte[0], new int[0], new GlyphPoint[0], bounds);
return new Glyph(true, new byte[0], new ushort[0], new GlyphPoint[0], bounds);
}
public IGlyphDescription DeepClone()
@@ -46,7 +46,7 @@
var clonedInstructions = new byte[Instructions.Length];
Array.Copy(Instructions, clonedInstructions, Instructions.Length);
var clonedEndPoints = new int[EndPointsOfContours.Length];
var clonedEndPoints = new ushort[EndPointsOfContours.Length];
Array.Copy(EndPointsOfContours, clonedEndPoints, EndPointsOfContours.Length);
var clonedPoints = new GlyphPoint[Points.Length];
@@ -80,11 +80,11 @@
return newPoints;
}
private int[] MergeContourEndPoints(IGlyphDescription glyph)
private ushort[] MergeContourEndPoints(IGlyphDescription glyph)
{
var destinationLastEndPoint = EndPointsOfContours[EndPointsOfContours.Length - 1] + 1;
var endPoints = new int[EndPointsOfContours.Length + glyph.EndPointsOfContours.Length];
var endPoints = new ushort[EndPointsOfContours.Length + glyph.EndPointsOfContours.Length];
for (var i = 0; i < EndPointsOfContours.Length; i++)
{
@@ -93,7 +93,7 @@
for (var i = 0; i < glyph.EndPointsOfContours.Length; i++)
{
endPoints[i + EndPointsOfContours.Length] = glyph.EndPointsOfContours[i] + destinationLastEndPoint;
endPoints[i + EndPointsOfContours.Length] = (ushort)(glyph.EndPointsOfContours[i] + destinationLastEndPoint);
}
return endPoints;

View File

@@ -10,7 +10,7 @@
byte[] Instructions { get; }
int[] EndPointsOfContours { get; }
ushort[] EndPointsOfContours { get; }
GlyphPoint[] Points { get; }

View File

@@ -91,11 +91,11 @@
{
public TrueTypeCMapPlatform PlatformId { get; }
public int EncodingId { get; }
public ushort EncodingId { get; }
public long Offset { get; }
public SubTableHeaderEntry(TrueTypeCMapPlatform platformId, int encodingId, long offset)
public SubTableHeaderEntry(TrueTypeCMapPlatform platformId, ushort encodingId, long offset)
{
PlatformId = platformId;
EncodingId = encodingId;

View File

@@ -2,38 +2,37 @@
{
/// <inheritdoc />
/// <summary>
/// The format 0 sub-total where character codes and glyph indices are restricted to a single bytes.
/// The format 0 sub-table where character codes and glyph indices are restricted to a single bytes.
/// </summary>
internal class ByteEncodingCMapTable : ICMapSubTable
{
private const int SizeOfShort = 2;
private const int GlyphMappingLength = 256;
private readonly byte[] glyphMapping;
public TrueTypeCMapPlatform PlatformId { get; }
public int EncodingId { get; }
public ushort EncodingId { get; }
public int FirstCharacterCode { get; }
public ushort LanguageId { get; }
public int LastCharacterCode { get; }
private ByteEncodingCMapTable(TrueTypeCMapPlatform platformId, int encodingId, byte[] glyphMapping)
private ByteEncodingCMapTable(TrueTypeCMapPlatform platformId, ushort encodingId, ushort languageId, byte[] glyphMapping)
{
this.glyphMapping = glyphMapping;
PlatformId = platformId;
EncodingId = encodingId;
LanguageId = languageId;
}
public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, int encodingId)
public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, ushort encodingId)
{
// ReSharper disable UnusedVariable
var length = data.ReadUnsignedShort();
var version = data.ReadUnsignedShort();
// ReSharper restore UnusedVariable
var language = data.ReadUnsignedShort();
var glyphMapping = data.ReadByteArray(GlyphMappingLength);
var glyphMapping = data.ReadByteArray(length - (SizeOfShort * 3));
return new ByteEncodingCMapTable(platformId, encodingId, glyphMapping);
return new ByteEncodingCMapTable(platformId, encodingId, language, glyphMapping);
}
public int CharacterCodeToGlyphIndex(int characterCode)

View File

@@ -12,34 +12,24 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
{
public TrueTypeCMapPlatform PlatformId { get; }
public int EncodingId { get; }
public ushort EncodingId { get; }
public int FirstCharacterCode { get; }
public int LastCharacterCode { get; }
public int Language { get; }
public ushort Language { get; }
public IReadOnlyList<Segment> Segments { get; }
public IReadOnlyList<int> GlyphIds { get; }
public IReadOnlyList<ushort> GlyphIds { get; }
/// <summary>
/// Create a new <see cref="Format4CMapTable"/>.
/// </summary>
public Format4CMapTable(TrueTypeCMapPlatform platformId, int encodingId, int language, IReadOnlyList<Segment> segments, IReadOnlyList<int> glyphIds)
public Format4CMapTable(TrueTypeCMapPlatform platformId, ushort encodingId, ushort language, IReadOnlyList<Segment> segments, IReadOnlyList<ushort> glyphIds)
{
PlatformId = platformId;
EncodingId = encodingId;
Language = language;
Segments = segments ?? throw new ArgumentNullException(nameof(segments));
GlyphIds = glyphIds ?? throw new ArgumentNullException(nameof(glyphIds));
if (Segments.Count > 0)
{
FirstCharacterCode = Segments[0].StartCode;
LastCharacterCode = Segments[Segments.Count - 1].EndCode;
}
}
public int CharacterCodeToGlyphIndex(int characterCode)
@@ -66,7 +56,7 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
return 0;
}
public static Format4CMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, int encodingId)
public static Format4CMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, ushort encodingId)
{
// Length in bytes.
var length = data.ReadUnsignedShort();

View File

@@ -14,13 +14,9 @@
public TrueTypeCMapPlatform PlatformId { get; }
public int EncodingId { get; }
public ushort EncodingId { get; }
public int FirstCharacterCode { get; }
public int LastCharacterCode { get; }
private HighByteMappingCMapTable(TrueTypeCMapPlatform platformId, int encodingId, IReadOnlyDictionary<int, int> characterCodesToGlyphIndices)
private HighByteMappingCMapTable(TrueTypeCMapPlatform platformId, ushort encodingId, IReadOnlyDictionary<int, int> characterCodesToGlyphIndices)
{
this.characterCodesToGlyphIndices = characterCodesToGlyphIndices ?? throw new ArgumentNullException(nameof(characterCodesToGlyphIndices));
PlatformId = platformId;
@@ -37,7 +33,7 @@
return index;
}
public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, TrueTypeCMapPlatform platformId, int encodingId)
public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, TrueTypeCMapPlatform platformId, ushort encodingId)
{
// ReSharper disable UnusedVariable
var length = data.ReadUnsignedShort();

View File

@@ -13,13 +13,9 @@
TrueTypeCMapPlatform PlatformId { get; }
/// <summary>
/// Platform specific encoding indentifier.
/// Platform specific encoding indentifier. Interpretation depends on the value of the <see cref="PlatformId"/>.
/// </summary>
int EncodingId { get; }
int FirstCharacterCode { get; }
int LastCharacterCode { get; }
ushort EncodingId { get; }
/// <summary>
/// Maps from a character code to the array index of the glyph in the font data.
@@ -32,7 +28,7 @@
/// <summary>
/// The platform identifier for a CMap table.
/// </summary>
internal enum TrueTypeCMapPlatform
internal enum TrueTypeCMapPlatform : ushort
{
/// <summary>
/// Unicode.

View File

@@ -10,10 +10,13 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
internal class TrimmedTableMappingCMapTable : ICMapSubTable
{
private readonly int entryCount;
private readonly int[] glyphIndices;
private readonly ushort[] glyphIndices;
/// <inheritdoc />
public TrueTypeCMapPlatform PlatformId { get; }
public int EncodingId { get; }
/// <inheritdoc />
public ushort EncodingId { get; }
public int FirstCharacterCode { get; }
@@ -22,7 +25,7 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
/// <summary>
/// Create a new <see cref="TrimmedTableMappingCMapTable"/>.
/// </summary>
public TrimmedTableMappingCMapTable(TrueTypeCMapPlatform platformId, int encodingId, int firstCharacterCode, int entryCount, int[] glyphIndices)
public TrimmedTableMappingCMapTable(TrueTypeCMapPlatform platformId, ushort encodingId, int firstCharacterCode, int entryCount, ushort[] glyphIndices)
{
FirstCharacterCode = firstCharacterCode;
this.entryCount = entryCount;
@@ -51,7 +54,7 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
return glyphIndices[offset];
}
public static TrimmedTableMappingCMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, int encodingId)
public static TrimmedTableMappingCMapTable Load(TrueTypeDataBytes data, TrueTypeCMapPlatform platformId, ushort encodingId)
{
var length = data.ReadUnsignedShort();
var language = data.ReadUnsignedShort();

View File

@@ -124,9 +124,9 @@
return signedByte < 127 ? signedByte : signedByte - 256;
}
public int[] ReadUnsignedShortArray(int length)
public ushort[] ReadUnsignedShortArray(int length)
{
var result = new int[length];
var result = new ushort[length];
for (int i = 0; i < length; i++)
{

View File

@@ -7,8 +7,9 @@
using System.Text;
using IO;
using PdfPig.Fonts.TrueType;
using PdfPig.Fonts.TrueType.Parser;
internal static class TrueTypeEncodingReplacer
internal static class TrueTypeCMapReplacer
{
private const int SizeOfFraction = 4;
private const int SizeOfShort = 2;
@@ -18,8 +19,18 @@
private const string CMapTag = "cmap";
private const string HeadTag = "head";
public static byte[] ReplaceCMapTables(IInputBytes fontBytes, IReadOnlyDictionary<char, byte> newEncoding)
public static byte[] ReplaceCMapTables(TrueTypeFontProgram fontProgram, IInputBytes fontBytes, IReadOnlyDictionary<char, byte> newEncoding)
{
if (fontBytes == null)
{
throw new ArgumentNullException(nameof(fontBytes));
}
if (newEncoding == null)
{
throw new ArgumentNullException(nameof(newEncoding));
}
var buffer = new byte[2048];
var inputTableHeaders = new Dictionary<string, InputHeader>(StringComparer.OrdinalIgnoreCase);
@@ -97,15 +108,17 @@
inputOffset = fontBytes.CurrentOffset;
}
// TODO: write the modified cmap table here.
// Create a new cmap table here.
var table = GenerateWindowsSymbolTable(fontProgram, newEncoding);
var cmapLocation = inputTableHeaders[CMapTag];
fontBytes.Seek(cmapLocation.HeaderTable.Offset);
var newCmapTableLocation = (uint)stream.Position;
CopyThroughBufferDiscardData(stream, buffer, fontBytes, cmapLocation.HeaderTable.Length);
var newCmapTableLength = (uint)table.Length;
CopyThroughBufferDiscardData(stream, buffer, new ByteArrayInputBytes(table), newCmapTableLength);
outputTableHeaders[cmapLocation.Tag] = new TrueTypeHeaderTable(cmapLocation.Tag, 0, newCmapTableLocation, cmapLocation.HeaderTable.Length);
outputTableHeaders[cmapLocation.Tag] = new TrueTypeHeaderTable(cmapLocation.Tag, 0, newCmapTableLocation, newCmapTableLength);
foreach (var inputHeader in inputTableHeaders)
{
@@ -117,13 +130,21 @@
var inputLength = inputHeader.Value.HeaderTable.Length;
if (outputHeader.Length != inputLength)
var isCmap = inputHeader.Value.IsTable(CMapTag);
if (outputHeader.Length != inputLength && !isCmap)
{
throw new InvalidOperationException($"Actual data length {outputHeader.Length} " +
$"did not match header length {inputLength} for table {inputHeader.Key}.");
}
WriteUInt(stream, outputHeader.Offset);
if (isCmap)
{
// Also overwrite length.
WriteUInt(stream, outputHeader.Length);
}
}
stream.Seek(0, SeekOrigin.Begin);
@@ -158,6 +179,8 @@
// Store the result in checksum adjustment.
WriteUInt(result, checksumAdjustmentLocation, checksumAdjustment);
var canParse = new TrueTypeFontParser().Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(result)));
return result;
}
@@ -187,6 +210,17 @@
stream.Write(buffer, 0, 4);
}
private static void WriteUShort(Stream stream, ushort value)
{
var buffer = new[]
{
(byte) (value >> 8),
(byte) value
};
stream.Write(buffer, 0, 2);
}
private static void WriteUInt(byte[] array, uint offset, uint value)
{
array[offset] = (byte)(value >> 24);
@@ -235,6 +269,80 @@
destination.Write(buffer, 0, read);
}
private static byte[] GenerateWindowsSymbolTable(TrueTypeFontProgram font, IReadOnlyDictionary<char, byte> newEncoding)
{
// We generate a format 6 sub-table.
const ushort cmapVersion = 0;
const ushort numberOfSubtables = 1;
const ushort platformId = 3;
const ushort encodingId = 0;
const ushort format = 6;
const ushort languageId = 0;
var glyphIndices = MapNewEncodingToGlyphIndexArray(font, newEncoding);
using (var memoryStream = new MemoryStream())
{
// Write cmap table header.
WriteUShort(memoryStream, cmapVersion);
WriteUShort(memoryStream, numberOfSubtables);
// Write sub-table index.
WriteUShort(memoryStream, platformId);
WriteUShort(memoryStream, encodingId);
WriteUInt(memoryStream, (uint)(memoryStream.Position + SizeOfInt));
// Write format 6 sub-table.
WriteUShort(memoryStream, format);
var length = (ushort)((5 * SizeOfShort) + (SizeOfShort * glyphIndices.Length));
WriteUShort(memoryStream, length);
WriteUShort(memoryStream, languageId);
WriteUShort(memoryStream, 0);
WriteUShort(memoryStream, (ushort)glyphIndices.Length);
for (var j = 0; j < glyphIndices.Length; j++)
{
WriteUShort(memoryStream, glyphIndices[j]);
}
return memoryStream.ToArray();
}
}
private static ushort[] MapNewEncodingToGlyphIndexArray(TrueTypeFontProgram font, IReadOnlyDictionary<char, byte> newEncoding)
{
var mappingTable = font.WindowsUnicodeCMap ?? font.WindowsSymbolCMap;
if (mappingTable == null)
{
throw new InvalidOperationException();
}
var first = default(ushort?);
var glyphIndices = new ushort[newEncoding.Count + 1];
glyphIndices[0] = 0;
var i = 1;
foreach (var pair in newEncoding.OrderBy(x => x.Value))
{
if (first.HasValue && pair.Value - first.Value != 1)
{
throw new InvalidOperationException("The new encoding contained a gap.");
}
first = pair.Value;
// this must be the actual glyph index from the original cmap table.
glyphIndices[i++] = (ushort)mappingTable.CharacterCodeToGlyphIndex(pair.Key);
}
if (!first.HasValue)
{
throw new InvalidOperationException();
}
return glyphIndices;
}
private class InputHeader
{
public string Tag => HeaderTable.Tag;
@@ -259,25 +367,5 @@
return string.Equals(tag, Tag, StringComparison.OrdinalIgnoreCase);
}
}
private class OutputHeader
{
public string Tag => HeaderTable.Tag;
public TrueTypeHeaderTable HeaderTable { get; }
public uint OffsetInOutput { get; }
public OutputHeader(TrueTypeHeaderTable headerTable, uint offsetInOutput)
{
if (headerTable.Tag == null)
{
throw new ArgumentException($"No tag for header table: {HeaderTable}.");
}
HeaderTable = headerTable;
OffsetInOutput = offsetInOutput;
}
}
}
}

View File

@@ -20,9 +20,9 @@
private readonly TrueTypeFontProgram font;
private readonly IReadOnlyList<byte> fontFileBytes;
//private readonly object mappingLock = new object();
private readonly object mappingLock = new object();
private readonly Dictionary<char, byte> characterMapping = new Dictionary<char, byte>();
//private int characterMappingCounter = 1;
private int characterMappingCounter = 1;
public bool HasWidths { get; } = true;
@@ -47,14 +47,13 @@
public TransformationMatrix GetFontMatrix()
{
var unitsPerEm = font.GetFontMatrixMultiplier();
return TransformationMatrix.FromValues(1.0/unitsPerEm, 0, 0, 1.0/unitsPerEm, 0, 0);
return TransformationMatrix.FromValues(1.0 / unitsPerEm, 0, 0, 1.0 / unitsPerEm, 0, 0);
}
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
{
var b = TrueTypeEncodingReplacer.ReplaceCMapTables(new ByteArrayInputBytes(fontFileBytes), characterMapping);
var b = TrueTypeCMapReplacer.ReplaceCMapTables(font, new ByteArrayInputBytes(fontFileBytes), characterMapping);
// TODO: unfortunately we need to subset the font in order to support custom encoding.
// A symbolic font (one which contains characters not in the standard latin set) -
// should contain a MacRoman (1, 0) or Windows Symbolic (3,0) cmap subtable which maps character codes to glyph id.
var bytes = CompressBytes(b);
@@ -114,7 +113,7 @@
}
var glyphId = font.WindowsUnicodeCMap.CharacterCodeToGlyphIndex(kvp.Key);
var width = font.TableRegister.HorizontalMetricsTable.GetAdvanceWidth(glyphId) * scaling;
var width = decimal.Round(font.TableRegister.HorizontalMetricsTable.GetAdvanceWidth(glyphId) * scaling, 2);
widths.Add(new NumericToken(width));
}
@@ -151,29 +150,28 @@
public byte GetValueForCharacter(char character)
{
return (byte) character;
//lock (mappingLock)
//{
// if (characterMapping.TryGetValue(character, out var result))
// {
// return result;
// }
lock (mappingLock)
{
if (characterMapping.TryGetValue(character, out var result))
{
return result;
}
// if (characterMappingCounter > byte.MaxValue)
// {
// throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" +
// " submit an issue since we will need to add support for composite fonts with multi-byte" +
// " character identifiers.");
// }
if (characterMappingCounter > byte.MaxValue)
{
throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" +
" submit an issue since we will need to add support for composite fonts with multi-byte" +
" character identifiers.");
}
// var value = (byte) characterMappingCounter++;
var value = (byte)characterMappingCounter++;
// characterMapping[character] = value;
characterMapping[character] = value;
// result = value;
result = value;
// return result;
//}
return result;
}
}
private static byte[] CompressBytes(IReadOnlyList<byte> bytes)