add infrastructure to font to retrieve character bounds, this is unused and will throw for now. add format 4 cmap subtable for truetype cmap table

This commit is contained in:
Eliot Jones
2018-03-30 23:12:55 +01:00
parent 70025edd79
commit 796a3d615e
15 changed files with 284 additions and 19 deletions

View File

@@ -86,6 +86,11 @@
return fromFont;
}
public PdfRectangle GetBoundingBox(int characterCode)
{
throw new NotImplementedException();
}
public TransformationMatrix GetFontMatrix()
{
return CidFont.FontMatrix;

View File

@@ -19,6 +19,8 @@
decimal GetWidth(int characterCode);
PdfRectangle GetBoundingBox(int characterCode);
TransformationMatrix GetFontMatrix();
}
}

View File

@@ -98,6 +98,11 @@
return widths[index];
}
public PdfRectangle GetBoundingBox(int characterCode)
{
throw new System.NotImplementedException();
}
public TransformationMatrix GetFontMatrix()
{
// TODO: should this also use units per em?

View File

@@ -95,6 +95,11 @@
return widths[characterCode - firstChar];
}
public PdfRectangle GetBoundingBox(int characterCode)
{
throw new System.NotImplementedException();
}
public TransformationMatrix GetFontMatrix()
{
return fontMatrix;

View File

@@ -61,6 +61,11 @@
return metrics.WidthX;
}
public PdfRectangle GetBoundingBox(int characterCode)
{
throw new NotImplementedException();
}
public TransformationMatrix GetFontMatrix()
{
return FontMatrix;

View File

@@ -78,6 +78,11 @@
return widths[characterCode - firstChar];
}
public PdfRectangle GetBoundingBox(int characterCode)
{
throw new System.NotImplementedException();
}
public TransformationMatrix GetFontMatrix()
{
return fontMatrix;

View File

@@ -1,7 +1,5 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Parser
{
using System;
using System.Collections.Generic;
using Tables;
/// <summary>
@@ -22,5 +20,12 @@
public BasicMaximumProfileTable MaximumProfileTable { get; set; }
public PostScriptTable PostScriptTable { get; set; }
/// <summary>
/// Defines mapping of character codes to glyph index values in the font.
/// Can contain mutliple sub-tables to support multiple encoding schemes.
/// Where a character code isn't found it should map to index 0.
/// </summary>
public CMapTable CMapTable { get; set; }
}
}

View File

@@ -114,6 +114,10 @@
private static void OptionallyParseTables(IReadOnlyDictionary<string, TrueTypeHeaderTable> tables, TrueTypeDataBytes data, TableRegister tableRegister)
{
// cmap
if (tables.TryGetValue(TrueTypeHeaderTable.Cmap, out var cmap))
{
tableRegister.CMapTable = CMapTable.Load(data, cmap, tableRegister);
}
// hmtx
if (tables.TryGetValue(TrueTypeHeaderTable.Hmtx, out var hmtxHeaderTable))
@@ -133,3 +137,4 @@
}
}
}

View File

@@ -1,15 +1,34 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
{
/// <inheritdoc />
/// <summary>
/// The format 0 sub-total where character codes and glyph indices are restricted to a single bytes.
/// </summary>
internal class ByteEncodingCMapTable : ICMapSubTable
{
public static ByteEncodingCMapTable Load(TrueTypeDataBytes data)
public int PlatformId { get; }
public int EncodingId { get; }
private ByteEncodingCMapTable(int platformId, int encodingId)
{
PlatformId = platformId;
EncodingId = encodingId;
}
public static ByteEncodingCMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId)
{
var length = data.ReadUnsignedShort();
var version = data.ReadUnsignedShort();
var glyphMapping = data.ReadByteArray(256);
return new ByteEncodingCMapTable();
return new ByteEncodingCMapTable(platformId, encodingId);
}
public int CharacterCodeToGlyphIndex(int characterCode)
{
throw new System.NotImplementedException();
}
}
}

View File

@@ -0,0 +1,138 @@
// ReSharper disable UnusedVariable
namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
{
using System;
using System.Collections.Generic;
/// <inheritdoc />
/// <summary>
/// A format 4 CMap sub-table which defines gappy ranges of character code to glyph index mappings.
/// </summary>
internal class Format4CMapTable : ICMapSubTable
{
public int PlatformId { get; }
public int EncodingId { get; }
public int Language { get; }
public IReadOnlyList<Segment> Segments { get; }
public IReadOnlyList<int> GlyphIds { get; }
/// <summary>
/// Create a new <see cref="Format4CMapTable"/>.
/// </summary>
public Format4CMapTable(int platformId, int encodingId, int language, IReadOnlyList<Segment> segments, IReadOnlyList<int> glyphIds)
{
PlatformId = platformId;
EncodingId = encodingId;
Language = language;
Segments = segments ?? throw new ArgumentNullException(nameof(segments));
GlyphIds = glyphIds ?? throw new ArgumentNullException(nameof(glyphIds));
}
public int CharacterCodeToGlyphIndex(int characterCode)
{
throw new NotImplementedException();
}
public static Format4CMapTable Load(TrueTypeDataBytes data, int platformId, int encodingId)
{
// Length in bytes.
var length = data.ReadUnsignedShort();
// Used for sub-tables with a Macintosh platform ID.
var version = data.ReadUnsignedShort();
var doubleSegmentCount = data.ReadUnsignedShort();
// Defines the number of contiguous segments.
var segmentCount = doubleSegmentCount / 2;
// Some crazy sum.
var searchRange = data.ReadUnsignedShort();
var entrySelector = data.ReadUnsignedShort();
var rangeShift = data.ReadUnsignedShort();
// End character codes for each segment.
var endCounts = data.ReadUnsignedShortArray(segmentCount);
// Should be zero.
var reservedPad = data.ReadUnsignedShort();
// Start character codes for each segment.
var startCounts = data.ReadUnsignedShortArray(segmentCount);
// Delta for all character codes in the segment. Contrary to the spec this is actually a short[].
var idDeltas = data.ReadShortArray(segmentCount);
var idRangeOffsets = data.ReadUnsignedShortArray(segmentCount);
const int singleIntsRead = 16;
const int intArraysRead = 8;
var remainingBytes = length - (singleIntsRead + intArraysRead * segmentCount);
var remainingInts = remainingBytes / 2;
var glyphIndices = data.ReadUnsignedShortArray(remainingInts);
var segments = new Segment[endCounts.Length];
for (int i = 0; i < endCounts.Length; i++)
{
var start = startCounts[i];
var end = endCounts[i];
var delta = idDeltas[i];
var offsets = idRangeOffsets[i];
segments[i] = new Segment(start, end, delta, offsets);
}
return new Format4CMapTable(platformId, encodingId, version, segments, glyphIndices);
}
/// <summary>
/// A contiguous segment which maps character to glyph codes in a Format 4 CMap sub-table.
/// </summary>
public struct Segment
{
/// <summary>
/// The start character code in the range.
/// </summary>
public int StartCode { get; }
/// <summary>
/// The end character code in the range.
/// </summary>
public int EndCode { get; }
/// <summary>
/// The delta for the codes in the segment.
/// </summary>
public int IdDelta { get; }
/// <summary>
/// Offset in bytes to glyph index array.
/// </summary>
public int IdRangeOffset { get; }
/// <summary>
/// Create a new <see cref="Segment"/>.
/// </summary>
public Segment(int startCode, int endCode, int idDelta, int idRangeOffset)
{
StartCode = startCode;
EndCode = endCode;
IdDelta = idDelta;
IdRangeOffset = idRangeOffset;
}
public override string ToString()
{
return $"Start: {StartCode}, End: {EndCode}, Delta: {IdDelta}, Offset: {IdRangeOffset}";
}
}
}
}

View File

@@ -3,9 +3,29 @@
using System;
using System.Collections.Generic;
/// <inheritdoc />
/// <summary>
/// A format 2 sub-table for Chinese, Japanese and Korean characters.
/// Contains mixed 8/16 bit encodings.
/// </summary>
internal class HighByteMappingCMapTable : ICMapSubTable
{
public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs)
public int PlatformId { get; }
public int EncodingId { get; }
public HighByteMappingCMapTable(int platformId, int encodingId)
{
PlatformId = platformId;
EncodingId = encodingId;
}
public int CharacterCodeToGlyphIndex(int characterCode)
{
throw new NotImplementedException();
}
public static HighByteMappingCMapTable Load(TrueTypeDataBytes data, int numberOfGlyphs, int platformId, int encodingId)
{
var length = data.ReadUnsignedShort();
var version = data.ReadUnsignedShort();
@@ -63,7 +83,7 @@
}
}
return new HighByteMappingCMapTable();
return new HighByteMappingCMapTable(platformId, encodingId);
}
public struct SubHeader

View File

@@ -1,7 +1,28 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
{
/// <summary>
/// In a TrueType font the CMap table maps from character codes to glyph indices
/// A font which can run on multiple platforms will have multiple encoding tables. These are stored as multiple
/// sub-tables. The <see cref="ICMapSubTable"/> represents a single subtotal.
/// </summary>
internal interface ICMapSubTable
{
/// <summary>
/// The platform identifier.
/// </summary>
/// <remarks>
/// 0: Unicode
/// 1: Macintosh
/// 2: Reserved
/// 3: Microsoft
/// </remarks>
int PlatformId { get; }
/// <summary>
/// Platform specific encoding indentifier.
/// </summary>
int EncodingId { get; }
int CharacterCodeToGlyphIndex(int characterCode);
}
}

View File

@@ -1,10 +0,0 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
{
internal class SegmentMappingDeltaValuesCMapTable : ICMapSubTable
{
public static SegmentMappingDeltaValuesCMapTable Load(TrueTypeDataBytes data)
{
return null;
}
}
}

View File

@@ -52,22 +52,50 @@
var format = data.ReadUnsignedShort();
/*
* There are 9 currently available formats:
* 0: Character code and glyph indices are restricted to a single byte. Rare.
* 2: Suitable for CJK characters. Contain mixed 8/16 byte encoding.
* 4: 2 byte encoding format. Used when character codes fall into (gappy) contiguous ranges.
* 6: 'Trimmed table mapping', used when character codes fall into a single contiguous range. This is dense mapping.
* 8: 16/32 bit coverage. Uses mixed length character codes.
* 10: Similar to format 6, trimmed table/array for 32 bits.
* 12: Segmented coverage, similar to format 4 but for 32 bit/4 byte.
* 13: Many to one mappings. Used by Apple for the LastResort font.
* 14: Unicode variation sequences.
*
* Many of the formats are obsolete or not really used. Modern fonts will tend to use formats 4, 6 and 12.
* For PDF we will support 0, 2 and 4 since these are in the original TrueType spec.
*/
switch (format)
{
case 0:
{
// Simple 1 to 1 mapping of character codes to glyph codes.
var item = ByteEncodingCMapTable.Load(data);
var item = ByteEncodingCMapTable.Load(data, header.PlatformId, header.EncodingId);
tables.Add(item);
break;
}
case 1:
case 2:
{
// Useful for CJK characters. Use mixed 8/16 bit encoding.
var item = HighByteMappingCMapTable.Load(data, numberofGlyphs);
var item = HighByteMappingCMapTable.Load(data, numberofGlyphs, header.PlatformId, header.EncodingId);
tables.Add(item);
break;
}
case 4:
{
// Microsoft's standard mapping table.
var item = Format4CMapTable.Load(data, header.PlatformId, header.EncodingId);
tables.Add(item);
break;
}
case 6:
{
// TODO: support format 6 for modern fonts.
break;
}
}
}

View File

@@ -173,5 +173,17 @@
offsets[i] = ReadUnsignedInt();
}
}
public short[] ReadShortArray(int length)
{
var result = new short[length];
for (int i = 0; i < length; i++)
{
result[i] = ReadSignedShort();
}
return result;
}
}
}