From 796a3d615ebff4198a95178baed72cac242eb382 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Fri, 30 Mar 2018 23:12:55 +0100 Subject: [PATCH 1/4] add infrastructure to font to retrieve character bounds, this is unused and will throw for now. add format 4 cmap subtable for truetype cmap table --- .../Fonts/Composite/Type0Font.cs | 5 + src/UglyToad.PdfPig/Fonts/IFont.cs | 2 + .../Fonts/Simple/TrueTypeSimpleFont.cs | 5 + .../Fonts/Simple/Type1FontSimple.cs | 5 + .../Fonts/Simple/Type1Standard14Font.cs | 5 + src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs | 5 + .../Fonts/TrueType/Parser/TableRegister.cs | 9 +- .../TrueType/Parser/TrueTypeFontParser.cs | 5 + .../CMapSubTables/ByteEncodingCMapTable.cs | 23 ++- .../Tables/CMapSubTables/Format4CMapTable.cs | 138 ++++++++++++++++++ .../CMapSubTables/HighByteMappingCMapTable.cs | 24 ++- .../Tables/CMapSubTables/ICMapSubTable.cs | 21 +++ .../SegmentMappingDeltaValuesCMapTable.cs | 10 -- .../Fonts/TrueType/Tables/CMapTable.cs | 34 ++++- .../Fonts/TrueType/TrueTypeDataBytes.cs | 12 ++ 15 files changed, 284 insertions(+), 19 deletions(-) create mode 100644 src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs delete mode 100644 src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs diff --git a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs index 40bfbe13..003d4ef1 100644 --- a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs @@ -86,6 +86,11 @@ return fromFont; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return CidFont.FontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/IFont.cs b/src/UglyToad.PdfPig/Fonts/IFont.cs index 68b77db8..3484e598 100644 --- a/src/UglyToad.PdfPig/Fonts/IFont.cs +++ b/src/UglyToad.PdfPig/Fonts/IFont.cs @@ -19,6 +19,8 @@ decimal GetWidth(int characterCode); + PdfRectangle GetBoundingBox(int characterCode); + TransformationMatrix GetFontMatrix(); } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 59ebf1a7..28f09f2a 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -98,6 +98,11 @@ return widths[index]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { // TODO: should this also use units per em? diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 0032b37e..f45f8e35 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -95,6 +95,11 @@ return widths[characterCode - firstChar]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return fontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs index 2ce52a09..d3f6646b 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs @@ -61,6 +61,11 @@ return metrics.WidthX; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return FontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs index 1f6a34df..696c6916 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs @@ -78,6 +78,11 @@ return widths[characterCode - firstChar]; } + public PdfRectangle GetBoundingBox(int characterCode) + { + throw new System.NotImplementedException(); + } + public TransformationMatrix GetFontMatrix() { return fontMatrix; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs index f05fdf70..b0da31be 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TableRegister.cs @@ -1,7 +1,5 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Parser { - using System; - using System.Collections.Generic; using Tables; /// @@ -22,5 +20,12 @@ public BasicMaximumProfileTable MaximumProfileTable { get; set; } public PostScriptTable PostScriptTable { get; set; } + + /// + /// Defines mapping of character codes to glyph index values in the font. + /// Can contain mutliple sub-tables to support multiple encoding schemes. + /// Where a character code isn't found it should map to index 0. + /// + public CMapTable CMapTable { get; set; } } } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs index 815b3a04..81d1b076 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -114,6 +114,10 @@ private static void OptionallyParseTables(IReadOnlyDictionary tables, TrueTypeDataBytes data, TableRegister tableRegister) { // cmap + if (tables.TryGetValue(TrueTypeHeaderTable.Cmap, out var cmap)) + { + tableRegister.CMapTable = CMapTable.Load(data, cmap, tableRegister); + } // hmtx if (tables.TryGetValue(TrueTypeHeaderTable.Hmtx, out var hmtxHeaderTable)) @@ -133,3 +137,4 @@ } } } + diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs index 97bb3712..3f3d5054 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs @@ -1,15 +1,34 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables { + /// + /// + /// The format 0 sub-total where character codes and glyph indices are restricted to a single bytes. + /// internal class ByteEncodingCMapTable : ICMapSubTable { - public static ByteEncodingCMapTable Load(TrueTypeDataBytes data) + public int PlatformId { get; } + + public int EncodingId { get; } + + private ByteEncodingCMapTable(int platformId, int encodingId) + { + PlatformId = platformId; + EncodingId = encodingId; + } + + public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) { var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); var glyphMapping = data.ReadByteArray(256); - return new ByteEncodingCMapTable(); + return new ByteEncodingCMapTable(platformId, encodingId); + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new System.NotImplementedException(); } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs new file mode 100644 index 00000000..633f3c98 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs @@ -0,0 +1,138 @@ +// ReSharper disable UnusedVariable +namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables +{ + using System; + using System.Collections.Generic; + + /// + /// + /// A format 4 CMap sub-table which defines gappy ranges of character code to glyph index mappings. + /// + internal class Format4CMapTable : ICMapSubTable + { + public int PlatformId { get; } + + public int EncodingId { get; } + + public int Language { get; } + + public IReadOnlyList Segments { get; } + + public IReadOnlyList GlyphIds { get; } + + /// + /// Create a new . + /// + public Format4CMapTable(int platformId, int encodingId, int language, IReadOnlyList segments, IReadOnlyList glyphIds) + { + PlatformId = platformId; + EncodingId = encodingId; + Language = language; + Segments = segments ?? throw new ArgumentNullException(nameof(segments)); + GlyphIds = glyphIds ?? throw new ArgumentNullException(nameof(glyphIds)); + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new NotImplementedException(); + } + + public static Format4CMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) + { + // Length in bytes. + var length = data.ReadUnsignedShort(); + + // Used for sub-tables with a Macintosh platform ID. + var version = data.ReadUnsignedShort(); + + var doubleSegmentCount = data.ReadUnsignedShort(); + + // Defines the number of contiguous segments. + var segmentCount = doubleSegmentCount / 2; + + // Some crazy sum. + var searchRange = data.ReadUnsignedShort(); + var entrySelector = data.ReadUnsignedShort(); + var rangeShift = data.ReadUnsignedShort(); + + // End character codes for each segment. + var endCounts = data.ReadUnsignedShortArray(segmentCount); + + // Should be zero. + var reservedPad = data.ReadUnsignedShort(); + + // Start character codes for each segment. + var startCounts = data.ReadUnsignedShortArray(segmentCount); + + // Delta for all character codes in the segment. Contrary to the spec this is actually a short[]. + var idDeltas = data.ReadShortArray(segmentCount); + + var idRangeOffsets = data.ReadUnsignedShortArray(segmentCount); + + const int singleIntsRead = 16; + const int intArraysRead = 8; + + var remainingBytes = length - (singleIntsRead + intArraysRead * segmentCount); + + var remainingInts = remainingBytes / 2; + + var glyphIndices = data.ReadUnsignedShortArray(remainingInts); + + var segments = new Segment[endCounts.Length]; + for (int i = 0; i < endCounts.Length; i++) + { + var start = startCounts[i]; + var end = endCounts[i]; + + var delta = idDeltas[i]; + var offsets = idRangeOffsets[i]; + + segments[i] = new Segment(start, end, delta, offsets); + } + + return new Format4CMapTable(platformId, encodingId, version, segments, glyphIndices); + } + + /// + /// A contiguous segment which maps character to glyph codes in a Format 4 CMap sub-table. + /// + public struct Segment + { + /// + /// The start character code in the range. + /// + public int StartCode { get; } + + /// + /// The end character code in the range. + /// + public int EndCode { get; } + + /// + /// The delta for the codes in the segment. + /// + public int IdDelta { get; } + + /// + /// Offset in bytes to glyph index array. + /// + public int IdRangeOffset { get; } + + /// + /// Create a new . + /// + public Segment(int startCode, int endCode, int idDelta, int idRangeOffset) + { + StartCode = startCode; + EndCode = endCode; + IdDelta = idDelta; + IdRangeOffset = idRangeOffset; + } + + public override string ToString() + { + return $"Start: {StartCode}, End: {EndCode}, Delta: {IdDelta}, Offset: {IdRangeOffset}"; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs index 244a54fb..605393bd 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs @@ -3,9 +3,29 @@ using System; using System.Collections.Generic; + /// + /// + /// A format 2 sub-table for Chinese, Japanese and Korean characters. + /// Contains mixed 8/16 bit encodings. + /// internal class HighByteMappingCMapTable : ICMapSubTable { - public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs) + public int PlatformId { get; } + + public int EncodingId { get; } + + public HighByteMappingCMapTable(int platformId, int encodingId) + { + PlatformId = platformId; + EncodingId = encodingId; + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + throw new NotImplementedException(); + } + + public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, int platformId, int encodingId) { var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); @@ -63,7 +83,7 @@ } } - return new HighByteMappingCMapTable(); + return new HighByteMappingCMapTable(platformId, encodingId); } public struct SubHeader diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs index 3b1995ec..f1add3d7 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs @@ -1,7 +1,28 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables { + /// + /// In a TrueType font the CMap table maps from character codes to glyph indices + /// A font which can run on multiple platforms will have multiple encoding tables. These are stored as multiple + /// sub-tables. The represents a single subtotal. + /// internal interface ICMapSubTable { + /// + /// The platform identifier. + /// + /// + /// 0: Unicode + /// 1: Macintosh + /// 2: Reserved + /// 3: Microsoft + /// + int PlatformId { get; } + /// + /// Platform specific encoding indentifier. + /// + int EncodingId { get; } + + int CharacterCodeToGlyphIndex(int characterCode); } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs deleted file mode 100644 index 95abd494..00000000 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/SegmentMappingDeltaValuesCMapTable.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables -{ - internal class SegmentMappingDeltaValuesCMapTable : ICMapSubTable - { - public static SegmentMappingDeltaValuesCMapTable Load(TrueTypeDataBytes data) - { - return null; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs index 8a5a1bb3..11a5825f 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs @@ -52,22 +52,50 @@ var format = data.ReadUnsignedShort(); + /* + * There are 9 currently available formats: + * 0: Character code and glyph indices are restricted to a single byte. Rare. + * 2: Suitable for CJK characters. Contain mixed 8/16 byte encoding. + * 4: 2 byte encoding format. Used when character codes fall into (gappy) contiguous ranges. + * 6: 'Trimmed table mapping', used when character codes fall into a single contiguous range. This is dense mapping. + * 8: 16/32 bit coverage. Uses mixed length character codes. + * 10: Similar to format 6, trimmed table/array for 32 bits. + * 12: Segmented coverage, similar to format 4 but for 32 bit/4 byte. + * 13: Many to one mappings. Used by Apple for the LastResort font. + * 14: Unicode variation sequences. + * + * Many of the formats are obsolete or not really used. Modern fonts will tend to use formats 4, 6 and 12. + * For PDF we will support 0, 2 and 4 since these are in the original TrueType spec. + */ switch (format) { case 0: { // Simple 1 to 1 mapping of character codes to glyph codes. - var item = ByteEncodingCMapTable.Load(data); + var item = ByteEncodingCMapTable.Load(data, header.PlatformId, header.EncodingId); tables.Add(item); break; } - case 1: + case 2: { // Useful for CJK characters. Use mixed 8/16 bit encoding. - var item = HighByteMappingCMapTable.Load(data, numberofGlyphs); + var item = HighByteMappingCMapTable.Load(data, numberofGlyphs, header.PlatformId, header.EncodingId); tables.Add(item); break; } + case 4: + { + // Microsoft's standard mapping table. + var item = Format4CMapTable.Load(data, header.PlatformId, header.EncodingId); + tables.Add(item); + + break; + } + case 6: + { + // TODO: support format 6 for modern fonts. + break; + } } } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs index 8594cfbf..3a408624 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeDataBytes.cs @@ -173,5 +173,17 @@ offsets[i] = ReadUnsignedInt(); } } + + public short[] ReadShortArray(int length) + { + var result = new short[length]; + + for (int i = 0; i < length; i++) + { + result[i] = ReadSignedShort(); + } + + return result; + } } } From 92c0ef14cbd7565cd242b7e6882c161b3e39f7c6 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 31 Mar 2018 12:11:12 +0100 Subject: [PATCH 2/4] support format 6 cmap sub tables for truetype fonts. pass the truetypefont to the ifont implementation so we can use it to access font data --- .../Parser/TrueTypeFontParserTests.cs | 2 +- .../Parser/Handlers/TrueTypeFontHandler.cs | 2 +- .../Fonts/Simple/TrueTypeSimpleFont.cs | 11 +++- .../TrueType/Parser/TrueTypeFontParser.cs | 7 ++- .../Tables/CMapSubTables/Format4CMapTable.cs | 5 +- .../Tables/CMapSubTables/ICMapSubTable.cs | 5 ++ .../TrimmedTableMappingCMapTable.cs | 60 +++++++++++++++++++ .../Fonts/TrueType/Tables/CMapTable.cs | 4 +- .../Fonts/TrueType/TrueTypeFont.cs | 19 ++++-- 9 files changed, 100 insertions(+), 15 deletions(-) create mode 100644 src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/TrimmedTableMappingCMapTable.cs diff --git a/src/UglyToad.PdfPig.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs index e1955057..2b223f2e 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs @@ -122,7 +122,7 @@ name = "cvt "; } - var match = font.Tables[name]; + var match = font.TableHeaders[name]; var offset = long.Parse(parts[1]); var length = long.Parse(parts[2]); diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs index bdce7c4c..ec16712a 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs @@ -70,7 +70,7 @@ Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor); - return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap, encoding); + return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap, encoding, font); } private TrueTypeFont ParseTrueTypeFont(FontDescriptor descriptor) diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 28f09f2a..a049d70c 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -7,6 +7,7 @@ using Geometry; using IO; using Tokenization.Tokens; + using TrueType; using Util.JetBrains.Annotations; internal class TrueTypeSimpleFont : IFont @@ -19,6 +20,8 @@ private readonly FontDescriptor descriptor; [CanBeNull] private readonly Encoding encoding; + [CanBeNull] + private readonly TrueTypeFont font; public NameToken Name { get; } @@ -27,16 +30,18 @@ [NotNull] public ToUnicodeCMap ToUnicode { get; set; } - public TrueTypeSimpleFont(NameToken name, int firstCharacterCode, int lastCharacterCode, decimal[] widths, + public TrueTypeSimpleFont(NameToken name, int firstCharacterCode, int lastCharacterCode, decimal[] widths, FontDescriptor descriptor, - [CanBeNull]CMap toUnicodeCMap, - [CanBeNull]Encoding encoding) + [CanBeNull] CMap toUnicodeCMap, + [CanBeNull] Encoding encoding, + [CanBeNull]TrueTypeFont font) { this.firstCharacterCode = firstCharacterCode; this.lastCharacterCode = lastCharacterCode; this.widths = widths; this.descriptor = descriptor; this.encoding = encoding; + this.font = font; Name = name; IsVertical = false; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs index 81d1b076..ede11ee6 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -11,9 +11,13 @@ { var version = (decimal)data.Read32Fixed(); int numberOfTables = data.ReadUnsignedShort(); + + // Read these data points to move to the correct data location. + // ReSharper disable UnusedVariable int searchRange = data.ReadUnsignedShort(); int entrySelector = data.ReadUnsignedShort(); int rangeShift = data.ReadUnsignedShort(); + // ReSharper restore UnusedVariable var tables = new Dictionary(); @@ -80,7 +84,6 @@ tableRegister.MaximumProfileTable = BasicMaximumProfileTable.Load(data, maxHeaderTable); // post - var postScriptTable = default(PostScriptTable); if (tables.TryGetValue(TrueTypeHeaderTable.Post, out var postscriptHeaderTable)) { tableRegister.PostScriptTable = PostScriptTable.Load(data, table, tableRegister.MaximumProfileTable); @@ -108,7 +111,7 @@ OptionallyParseTables(tables, data, tableRegister); } - return new TrueTypeFont(version, tables, tableRegister.HeaderTable); + return new TrueTypeFont(version, tables, tableRegister); } private static void OptionallyParseTables(IReadOnlyDictionary tables, TrueTypeDataBytes data, TableRegister tableRegister) diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs index 633f3c98..6e14e5e3 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs @@ -69,10 +69,11 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables var idRangeOffsets = data.ReadUnsignedShortArray(segmentCount); - const int singleIntsRead = 16; + const int singleIntsRead = 8; const int intArraysRead = 8; - var remainingBytes = length - (singleIntsRead + intArraysRead * segmentCount); + // ReSharper disable once ArrangeRedundantParentheses + var remainingBytes = length - ((singleIntsRead * 2) + intArraysRead * segmentCount); var remainingInts = remainingBytes / 2; diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs index f1add3d7..3b48f289 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ICMapSubTable.cs @@ -23,6 +23,11 @@ /// int EncodingId { get; } + /// + /// Maps from a character code to the array index of the glyph in the font data. + /// + /// The character code. + /// The index of the glyph information for this character. int CharacterCodeToGlyphIndex(int characterCode); } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/TrimmedTableMappingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/TrimmedTableMappingCMapTable.cs new file mode 100644 index 00000000..c0b46ad1 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/TrimmedTableMappingCMapTable.cs @@ -0,0 +1,60 @@ +// ReSharper disable UnusedVariable +namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables +{ + using System; + + /// + /// + /// A format 6 CMap sub-table which uses 2 bytes to map a contiguous range of character codes to glyph indices. + /// + internal class TrimmedTableMappingCMapTable : ICMapSubTable + { + private readonly int firstCharacterCode; + private readonly int entryCount; + private readonly int[] glyphIndices; + + public int PlatformId { get; } + public int EncodingId { get; } + + /// + /// Create a new . + /// + public TrimmedTableMappingCMapTable(int platformId, int encodingId, int firstCharacterCode, int entryCount, int[] glyphIndices) + { + this.firstCharacterCode = firstCharacterCode; + this.entryCount = entryCount; + this.glyphIndices = glyphIndices ?? throw new ArgumentNullException(nameof(glyphIndices)); + + PlatformId = platformId; + EncodingId = encodingId; + } + + public int CharacterCodeToGlyphIndex(int characterCode) + { + if (characterCode < firstCharacterCode || characterCode > firstCharacterCode + entryCount) + { + return 0; + } + + var offset = characterCode - firstCharacterCode; + + return glyphIndices[offset]; + } + + public static TrimmedTableMappingCMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) + { + var length = data.ReadUnsignedShort(); + var language = data.ReadUnsignedShort(); + + // First character code in the range. + var firstCode = data.ReadUnsignedShort(); + + // Number of character codes in the range. + var entryCount = data.ReadUnsignedShort(); + + var glyphIndices = data.ReadUnsignedShortArray(entryCount); + + return new TrimmedTableMappingCMapTable(platformId, encodingId, firstCode, entryCount, glyphIndices); + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs index 11a5825f..c5362848 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs @@ -88,12 +88,12 @@ // Microsoft's standard mapping table. var item = Format4CMapTable.Load(data, header.PlatformId, header.EncodingId); tables.Add(item); - break; } case 6: { - // TODO: support format 6 for modern fonts. + var item = TrimmedTableMappingCMapTable.Load(data, header.PlatformId, header.EncodingId); + tables.Add(item); break; } } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFont.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFont.cs index d6b93353..69b9efd7 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFont.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeFont.cs @@ -1,22 +1,33 @@ namespace UglyToad.PdfPig.Fonts.TrueType { + using System; using System.Collections.Generic; using CidFonts; + using Parser; using Tables; internal class TrueTypeFont : ICidFontProgram { public decimal Version { get; } - public IReadOnlyDictionary Tables { get; } + public IReadOnlyDictionary TableHeaders { get; } public HeaderTable HeaderTable { get; } + public CMapTable CMapTable { get; } + public GlyphDataTable GlyphTable { get; } - public TrueTypeFont(decimal version, IReadOnlyDictionary tables, HeaderTable headerTable) + public TrueTypeFont(decimal version, IReadOnlyDictionary tableHeaders, TableRegister tableRegister) { + if (tableRegister == null) + { + throw new ArgumentNullException(nameof(tableRegister)); + } + Version = version; - Tables = tables; - HeaderTable = headerTable; + TableHeaders = tableHeaders; + HeaderTable = tableRegister.HeaderTable; + CMapTable = tableRegister.CMapTable; + GlyphTable = tableRegister.GlyphDataTable; } } } \ No newline at end of file From 0ae20c51f3eff13c16105188e7fdb439ccabb3c2 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 31 Mar 2018 12:43:06 +0100 Subject: [PATCH 3/4] implement glyph index getter for the 3 implemented truetype cmap sub tables --- .../CMapSubTables/ByteEncodingCMapTable.cs | 19 +++++++++++++---- .../Tables/CMapSubTables/Format4CMapTable.cs | 21 ++++++++++++++++++- .../CMapSubTables/HighByteMappingCMapTable.cs | 16 +++++++++++--- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs index 3f3d5054..1f6360e5 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs @@ -6,29 +6,40 @@ /// internal class ByteEncodingCMapTable : ICMapSubTable { + private const int GlyphMappingLength = 256; + private readonly byte[] glyphMapping; + public int PlatformId { get; } public int EncodingId { get; } - private ByteEncodingCMapTable(int platformId, int encodingId) + private ByteEncodingCMapTable(int platformId, int encodingId, byte[] glyphMapping) { + this.glyphMapping = glyphMapping; PlatformId = platformId; EncodingId = encodingId; } public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) { + // ReSharper disable UnusedVariable var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); + // ReSharper restore UnusedVariable - var glyphMapping = data.ReadByteArray(256); + var glyphMapping = data.ReadByteArray(GlyphMappingLength); - return new ByteEncodingCMapTable(platformId, encodingId); + return new ByteEncodingCMapTable(platformId, encodingId, glyphMapping); } public int CharacterCodeToGlyphIndex(int characterCode) { - throw new System.NotImplementedException(); + if (characterCode < GlyphMappingLength || characterCode >= GlyphMappingLength) + { + return 0; + } + + return glyphMapping[characterCode]; } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs index 6e14e5e3..604a6df8 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/Format4CMapTable.cs @@ -34,7 +34,26 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables public int CharacterCodeToGlyphIndex(int characterCode) { - throw new NotImplementedException(); + for (var i = 0; i < Segments.Count; i++) + { + var segment = Segments[i]; + + if (segment.EndCode < characterCode || segment.StartCode > characterCode) + { + continue; + } + + if (segment.IdRangeOffset == 0) + { + return (characterCode + segment.IdDelta) % ushort.MaxValue; + } + + var offset = segment.IdRangeOffset / 2 + (characterCode - segment.StartCode); + + return GlyphIds[offset - Segments.Count + i]; + } + + return 0; } public static Format4CMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId) diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs index 605393bd..a0388a2a 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/HighByteMappingCMapTable.cs @@ -10,25 +10,35 @@ /// internal class HighByteMappingCMapTable : ICMapSubTable { + private readonly IReadOnlyDictionary characterCodesToGlyphIndices; + public int PlatformId { get; } public int EncodingId { get; } - public HighByteMappingCMapTable(int platformId, int encodingId) + private HighByteMappingCMapTable(int platformId, int encodingId, IReadOnlyDictionary characterCodesToGlyphIndices) { + this.characterCodesToGlyphIndices = characterCodesToGlyphIndices ?? throw new ArgumentNullException(nameof(characterCodesToGlyphIndices)); PlatformId = platformId; EncodingId = encodingId; } public int CharacterCodeToGlyphIndex(int characterCode) { - throw new NotImplementedException(); + if (!characterCodesToGlyphIndices.TryGetValue(characterCode, out var index)) + { + return 0; + } + + return index; } public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, int platformId, int encodingId) { + // ReSharper disable UnusedVariable var length = data.ReadUnsignedShort(); var version = data.ReadUnsignedShort(); + // ReSharper restore UnusedVariable var subHeaderKeys = new int[256]; var maximumSubHeaderIndex = 0; @@ -83,7 +93,7 @@ } } - return new HighByteMappingCMapTable(platformId, encodingId); + return new HighByteMappingCMapTable(platformId, encodingId, characterCodeToGlyphId); } public struct SubHeader From ea55256e785423596a922c1174e0d044b8e12d15 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 31 Mar 2018 13:27:27 +0100 Subject: [PATCH 4/4] wire up retrieving glyph bounding box from a truetype font when retrieving a displacement vector. this is not used currently --- .../Fonts/Simple/TrueTypeSimpleFont.cs | 22 +++++++++++++++---- .../CMapSubTables/ByteEncodingCMapTable.cs | 2 +- .../Fonts/TrueType/Tables/CMapTable.cs | 22 +++++++++++++++++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index a049d70c..bd6053d4 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -13,7 +13,7 @@ internal class TrueTypeSimpleFont : IFont { private static readonly TransformationMatrix FontMatrix = - TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0); + TransformationMatrix.FromValues(1 / 1000m, 0, 0, 1 / 1000m, 0, 0); private readonly int firstCharacterCode; private readonly int lastCharacterCode; private readonly decimal[] widths; @@ -33,7 +33,7 @@ public TrueTypeSimpleFont(NameToken name, int firstCharacterCode, int lastCharacterCode, decimal[] widths, FontDescriptor descriptor, [CanBeNull] CMap toUnicodeCMap, - [CanBeNull] Encoding encoding, + [CanBeNull] Encoding encoding, [CanBeNull]TrueTypeFont font) { this.firstCharacterCode = firstCharacterCode; @@ -88,13 +88,15 @@ { var tx = GetWidth(characterCode); + var box = GetBoundingBox(characterCode); + return new PdfVector(tx / 1000m, 0); } public decimal GetWidth(int characterCode) { var index = characterCode - firstCharacterCode; - + if (index < 0 || index >= widths.Length) { return descriptor.MissingWidth; @@ -105,7 +107,19 @@ public PdfRectangle GetBoundingBox(int characterCode) { - throw new System.NotImplementedException(); + if (font?.CMapTable == null) + { + return descriptor.BoundingBox; + } + + if (!font.CMapTable.TryGetGlyphIndex(characterCode, out var index)) + { + return descriptor.BoundingBox; + } + + var glyph = font.GlyphTable.Glyphs[index]; + + return glyph?.GlyphBounds ?? descriptor.BoundingBox; } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs index 1f6360e5..50cb2dc0 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapSubTables/ByteEncodingCMapTable.cs @@ -34,7 +34,7 @@ public int CharacterCodeToGlyphIndex(int characterCode) { - if (characterCode < GlyphMappingLength || characterCode >= GlyphMappingLength) + if (characterCode < 0 || characterCode >= GlyphMappingLength) { return 0; } diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs index c5362848..9775074d 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Tables/CMapTable.cs @@ -21,6 +21,28 @@ DirectoryTable = directoryTable; } + public bool TryGetGlyphIndex(int characterCode, out int glyphIndex) + { + glyphIndex = 0; + + if (subTables.Count == 0) + { + return false; + } + + foreach (var subTable in subTables) + { + glyphIndex = subTable.CharacterCodeToGlyphIndex(characterCode); + + if (glyphIndex != 0) + { + return true; + } + } + + return false; + } + public static CMapTable Load(TrueTypeDataBytes data, TrueTypeHeaderTable table, TableRegister tableRegister) { data.Seek(table.Offset);