diff --git a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs index 40bfbe13..003d4ef1 100644 --- a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs @@ -86,6 +86,11 @@ return fromFont; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return CidFont.FontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/IFont.cs b/src/UglyToad.PdfPig/Fonts/IFont.cs index 68b77db8..3484e598 100644 --- a/src/UglyToad.PdfPig/Fonts/IFont.cs +++ b/src/UglyToad.PdfPig/Fonts/IFont.cs @@ -19,6 +19,8 @@ decimal GetWidth(int characterCode); + PdfRectangle GetBoundingBox(int characterCode); + TransformationMatrix GetFontMatrix(); } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 59ebf1a7..28f09f2a 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -98,6 +98,11 @@ return widths[index]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { // TODO: should this also use units per em? diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 0032b37e..f45f8e35 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -95,6 +95,11 @@ return widths[characterCode - firstChar]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return fontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs index 2ce52a09..d3f6646b 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs @@ -61,6 +61,11 @@ return metrics.WidthX; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return FontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs index 1f6a34df..696c6916 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs @@ -78,6 +78,11 @@ return widths[characterCode - firstChar]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return fontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs index f05fdf70..b0da31be 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs @@ -1,7 +1,5 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Parser { - using System; - using System.Collections.Generic; using Tables; /// @@ -22,5 +20,12 @@ public BasicMaximumProfileTable MaximumProfileTable { get; set; } public PostScriptTable PostScriptTable { get; set; } + + /// + /// Defines mapping of character codes to glyph index values in the font. + /// Can contain mutliple sub-tables to support multiple encoding schemes. + /// Where a character code isn't found it should map to index 0. + /// + public CMapTable CMapTable { get; set; } } } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs index 815b3a04..81d1b076 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -114,6 +114,10 @@ private static void OptionallyParseTables(IReadOnlyDictionary tables, TrueTypeDataBytes data, TableRegister tableRegister) { // cmap + if (tables.TryGetValue(TrueTypeHeaderTable.Cmap, out var cmap)) + { + tableRegister.CMapTable = CMapTable.Load(data, cmap, tableRegister); + } // hmtx if (tables.TryGetValue(TrueTypeHeaderTable.Hmtx, out var hmtxHeaderTable)) @@ -133,3 +137,4 @@ } } } + diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs index 97bb3712..3f3d5054 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs @@ -1,15 +1,34 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables { + /// + /// + /// The format 0 sub-total where character codes and glyph indices are restricted to a single bytes. + /// internal class ByteEncodingCMapTable : ICMapSubTable { - public static ByteEncodingCMapTable Load(TrueTypeDataBytes data) + public int PlatformId { get; } + + public int EncodingId { get; } + + private ByteEncodingCMapTable(int platformId, int encodingId) + { + PlatformId = platformId; + EncodingId = encodingId; + } + + public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) { var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); var glyphMapping = data.ReadByteArray(256); - return new ByteEncodingCMapTable(); + return new ByteEncodingCMapTable(platformId, encodingId); + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new System.NotImplementedException(); } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs new file mode 100644 index 00000000..633f3c98 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs @@ -0,0 +1,138 @@ +// ReSharper disable UnusedVariable +namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables +{ + using System; + using System.Collections.Generic; + + /// + /// + /// A format 4 CMap sub-table which defines gappy ranges of character code to glyph index mappings. + /// + internal class Format4CMapTable : ICMapSubTable + { + public int PlatformId { get; } + + public int EncodingId { get; } + + public int Language { get; } + + public IReadOnlyList Segments { get; } + + public IReadOnlyList GlyphIds { get; } + + /// + /// Create a new . + /// + public Format4CMapTable(int platformId, int encodingId, int language, IReadOnlyList segments, IReadOnlyList glyphIds) + { + PlatformId = platformId; + EncodingId = encodingId; + Language = language; + Segments = segments ?? throw new ArgumentNullException(nameof(segments)); + GlyphIds = glyphIds ?? throw new ArgumentNullException(nameof(glyphIds)); + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new NotImplementedException(); + } + + public static Format4CMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) + { + // Length in bytes. + var length = data.ReadUnsignedShort(); + + // Used for sub-tables with a Macintosh platform ID. + var version = data.ReadUnsignedShort(); + + var doubleSegmentCount = data.ReadUnsignedShort(); + + // Defines the number of contiguous segments. + var segmentCount = doubleSegmentCount / 2; + + // Some crazy sum. + var searchRange = data.ReadUnsignedShort(); + var entrySelector = data.ReadUnsignedShort(); + var rangeShift = data.ReadUnsignedShort(); + + // End character codes for each segment. + var endCounts = data.ReadUnsignedShortArray(segmentCount); + + // Should be zero. + var reservedPad = data.ReadUnsignedShort(); + + // Start character codes for each segment. + var startCounts = data.ReadUnsignedShortArray(segmentCount); + + // Delta for all character codes in the segment. Contrary to the spec this is actually a short[]. + var idDeltas = data.ReadShortArray(segmentCount); + + var idRangeOffsets = data.ReadUnsignedShortArray(segmentCount); + + const int singleIntsRead = 16; + const int intArraysRead = 8; + + var remainingBytes = length - (singleIntsRead + intArraysRead * segmentCount); + + var remainingInts = remainingBytes / 2; + + var glyphIndices = data.ReadUnsignedShortArray(remainingInts); + + var segments = new Segment[endCounts.Length]; + for (int i = 0; i < endCounts.Length; i++) + { + var start = startCounts[i]; + var end = endCounts[i]; + + var delta = idDeltas[i]; + var offsets = idRangeOffsets[i]; + + segments[i] = new Segment(start, end, delta, offsets); + } + + return new Format4CMapTable(platformId, encodingId, version, segments, glyphIndices); + } + + /// + /// A contiguous segment which maps character to glyph codes in a Format 4 CMap sub-table. + /// + public struct Segment + { + /// + /// The start character code in the range. + /// + public int StartCode { get; } + + /// + /// The end character code in the range. + /// + public int EndCode { get; } + + /// + /// The delta for the codes in the segment. + /// + public int IdDelta { get; } + + /// + /// Offset in bytes to glyph index array. + /// + public int IdRangeOffset { get; } + + /// + /// Create a new . + /// + public Segment(int startCode, int endCode, int idDelta, int idRangeOffset) + { + StartCode = startCode; + EndCode = endCode; + IdDelta = idDelta; + IdRangeOffset = idRangeOffset; + } + + public override string ToString() + { + return $"Start: {StartCode}, End: {EndCode}, Delta: {IdDelta}, Offset: {IdRangeOffset}"; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs index 244a54fb..605393bd 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs @@ -3,9 +3,29 @@ using System; using System.Collections.Generic; + /// + /// + /// A format 2 sub-table for Chinese, Japanese and Korean characters. + /// Contains mixed 8/16 bit encodings. + /// internal class HighByteMappingCMapTable : ICMapSubTable { - public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs) + public int PlatformId { get; } + + public int EncodingId { get; } + + public HighByteMappingCMapTable(int platformId, int encodingId) + { + PlatformId = platformId; + EncodingId = encodingId; + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new NotImplementedException(); + } + + public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, int platformId, int encodingId) { var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); @@ -63,7 +83,7 @@ } } - return new HighByteMappingCMapTable(); + return new HighByteMappingCMapTable(platformId, encodingId); } public struct SubHeader diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs index 3b1995ec..f1add3d7 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs @@ -1,7 +1,28 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables { + /// + /// In a TrueType font the CMap table maps from character codes to glyph indices + /// A font which can run on multiple platforms will have multiple encoding tables. These are stored as multiple + /// sub-tables. The represents a single subtotal. + /// internal interface ICMapSubTable { + /// + /// The platform identifier. + /// + /// + /// 0: Unicode + /// 1: Macintosh + /// 2: Reserved + /// 3: Microsoft + /// + int PlatformId { get; } + /// + /// Platform specific encoding indentifier. + /// + int EncodingId { get; } + + int CharacterCodeToGlyphIndex(int characterCode); } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs deleted file mode 100644 index 95abd494..00000000 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables -{ - internal class SegmentMappingDeltaValuesCMapTable : ICMapSubTable - { - public static SegmentMappingDeltaValuesCMapTable Load(TrueTypeDataBytes data) - { - return null; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs index 8a5a1bb3..11a5825f 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs @@ -52,22 +52,50 @@ var format = data.ReadUnsignedShort(); + /* + * There are 9 currently available formats: + * 0: Character code and glyph indices are restricted to a single byte. Rare. + * 2: Suitable for CJK characters. Contain mixed 8/16 byte encoding. + * 4: 2 byte encoding format. Used when character codes fall into (gappy) contiguous ranges. + * 6: 'Trimmed table mapping', used when character codes fall into a single contiguous range. This is dense mapping. + * 8: 16/32 bit coverage. Uses mixed length character codes. + * 10: Similar to format 6, trimmed table/array for 32 bits. + * 12: Segmented coverage, similar to format 4 but for 32 bit/4 byte. + * 13: Many to one mappings. Used by Apple for the LastResort font. + * 14: Unicode variation sequences. + * + * Many of the formats are obsolete or not really used. Modern fonts will tend to use formats 4, 6 and 12. + * For PDF we will support 0, 2 and 4 since these are in the original TrueType spec. + */ switch (format) { case 0: { // Simple 1 to 1 mapping of character codes to glyph codes. - var item = ByteEncodingCMapTable.Load(data); + var item = ByteEncodingCMapTable.Load(data, header.PlatformId, header.EncodingId); tables.Add(item); break; } - case 1: + case 2: { // Useful for CJK characters. Use mixed 8/16 bit encoding. - var item = HighByteMappingCMapTable.Load(data, numberofGlyphs); + var item = HighByteMappingCMapTable.Load(data, numberofGlyphs, header.PlatformId, header.EncodingId); tables.Add(item); break; } + case 4: + { + // Microsoft's standard mapping table. + var item = Format4CMapTable.Load(data, header.PlatformId, header.EncodingId); + tables.Add(item); + + break; + } + case 6: + { + // TODO: support format 6 for modern fonts. + break; + } } } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs index 8594cfbf..3a408624 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs @@ -173,5 +173,17 @@ offsets[i] = ReadUnsignedInt(); } } + + public short[] ReadShortArray(int length) + { + var result = new short[length]; + + for (int i = 0; i < length; i++) + { + result[i] = ReadSignedShort(); + } + + return result; + } } }