start adding true type font parsing logic

This commit is contained in:
Eliot Jones
2017-12-03 21:31:59 +00:00
parent 644fd3e19d
commit 33c10a3ff7
13 changed files with 643 additions and 3 deletions

View File

@@ -0,0 +1,50 @@
namespace UglyToad.Pdf.Tests.Fonts.TrueType.Parser
{
using System;
using System.IO;
using System.Linq;
using IO;
using Pdf.Fonts.TrueType;
using Pdf.Fonts.TrueType.Parser;
using Xunit;
public class TrueTypeFontParserTests
{
private static byte[] GetFileBytes(string name)
{
var manifestFiles = typeof(TrueTypeFontParserTests).Assembly.GetManifestResourceNames();
var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0);
using (var memoryStream = new MemoryStream())
using (var stream = typeof(TrueTypeFontParserTests).Assembly.GetManifestResourceStream(match))
{
stream.CopyTo(memoryStream);
return memoryStream.ToArray();
}
}
private readonly TrueTypeFontParser parser = new TrueTypeFontParser();
[Fact]
public void ParseRegularRoboto()
{
var bytes = GetFileBytes("Roboto-Regular");
var input = new TrueTypeDataBytes(new ByteArrayInputBytes(bytes));
parser.Parse(input);
}
[Fact]
public void ParseEmbeddedSimpleGoogleDocssGautmi()
{
var bytes = GetFileBytes("google-simple-doc");
var input = new TrueTypeDataBytes(new ByteArrayInputBytes(bytes));
parser.Parse(input);
}
}
}

View File

@@ -7,10 +7,18 @@
</PropertyGroup>
<ItemGroup>
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Fonts\TrueType\google-simple-doc.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</EmbeddedResource>
<EmbeddedResource Include="Fonts\TrueType\Roboto-Regular.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</EmbeddedResource>
<Content Include="Integration\Documents\Single Page Simple - from google drive.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>

View File

@@ -7,6 +7,7 @@
using Parser.Handlers;
using Parser.Parts;
using Pdf.Parser;
using TrueType.Parser;
internal class FontFactory
{
@@ -16,7 +17,7 @@
{
Handlers = new Dictionary<CosName, IFontHandler>
{
{CosName.TYPE0, new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory()))}
{CosName.TYPE0, new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()))}
};
}

View File

@@ -8,16 +8,21 @@
using Cos;
using Filters;
using Geometry;
using IO;
using Pdf.Parser;
using TrueType;
using TrueType.Parser;
using Util;
internal class CidFontFactory
{
private readonly FontDescriptorFactory descriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
public CidFontFactory(FontDescriptorFactory descriptorFactory)
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser)
{
this.descriptorFactory = descriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
}
public ICidFont Generate(PdfDictionary dictionary, ParsingArguments arguments, bool isLenientParsing)
@@ -75,7 +80,7 @@
return true;
}
private static void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments)
private void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments)
{
if (descriptor?.FontFile == null)
{
@@ -90,6 +95,18 @@
}
var fontFile = fontFileStream.Decode(arguments.Get<IFilterProvider>());
File.WriteAllBytes("C:\\git\\google-simple-doc.ttf", fontFile);
switch (descriptor.FontFile.FileType)
{
case DescriptorFontFile.FontFileType.TrueType:
var input = new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile));
trueTypeFontParser.Parse(input);
break;
default:
throw new NotSupportedException("Currently only TrueType fonts are supported.");
}
}
private static IReadOnlyDictionary<int, decimal> ReadWidths(PdfDictionary dict)

View File

@@ -0,0 +1,35 @@
namespace UglyToad.Pdf.Fonts.TrueType.Parser
{
using Tables;
internal class HeaderTableParser : ITrueTypeTableParser
{
public string Tag => TrueTypeFontTable.Head;
public ITable Parse(TrueTypeDataBytes data, TrueTypeFontTable table)
{
var version = data.Read32Fixed();
var fontRevision = data.Read32Fixed();
var checkSumAdjustment = data.ReadUnsignedInt();
var magicNumber = data.ReadUnsignedInt();
var flags = data.ReadUnsignedShort();
var unitsPerEm = data.ReadUnsignedShort();
var created = data.ReadInternationalDate();
var modified = data.ReadInternationalDate();
var xMin = data.ReadSignedShort();
var yMin = data.ReadSignedShort();
var xMax = data.ReadSignedShort();
var yMax = data.ReadSignedShort();
var macStyle = data.ReadUnsignedShort();
var lowestRecPpem = data.ReadUnsignedShort();
var fontDirectionHint = data.ReadSignedShort();
var indexToLocFormat = data.ReadSignedShort();
var glyphDataFormat = data.ReadSignedShort();
return new HeaderTable(table, (decimal)version, (decimal)fontRevision, checkSumAdjustment,
magicNumber, flags, unitsPerEm, created, modified,
xMin, yMin, xMax, yMax, macStyle, lowestRecPpem,
fontDirectionHint, indexToLocFormat, glyphDataFormat);
}
}
}

View File

@@ -0,0 +1,11 @@
namespace UglyToad.Pdf.Fonts.TrueType.Parser
{
using Tables;
internal interface ITrueTypeTableParser
{
string Tag { get; }
ITable Parse(TrueTypeDataBytes data, TrueTypeFontTable table);
}
}

View File

@@ -0,0 +1,71 @@
namespace UglyToad.Pdf.Fonts.TrueType.Parser
{
using System;
using System.Collections.Generic;
using Util.JetBrains.Annotations;
internal class TrueTypeFontParser
{
private const int TagLength = 4;
private static readonly IReadOnlyDictionary<string, ITrueTypeTableParser> parsers =
new Dictionary<string, ITrueTypeTableParser>
{
{TrueTypeFontTable.Head, new HeaderTableParser()}
};
public void Parse(TrueTypeDataBytes data)
{
var version = data.Read32Fixed();
int numberOfTables = data.ReadUnsignedShort();
int searchRange = data.ReadUnsignedShort();
int entrySelector = data.ReadUnsignedShort();
int rangeShift = data.ReadUnsignedShort();
var tables = new Dictionary<string, TrueTypeFontTable>();
for (var i = 0; i < numberOfTables; i++)
{
var table = ReadTable(data);
if (table != null)
{
tables[table.Tag] = table;
}
}
ParseTables(tables, data);
return;
}
[CanBeNull]
private static TrueTypeFontTable ReadTable(TrueTypeDataBytes data)
{
var tag = data.ReadString(TagLength);
var checksum = data.ReadUnsignedInt();
var offset = data.ReadUnsignedInt();
var length = data.ReadUnsignedInt();
// skip tables with zero length (except glyf)
if (length == 0 && !string.Equals(tag, TrueTypeFontTable.Glyf))
{
return null;
}
return new TrueTypeFontTable(tag, checksum, offset, length);
}
private static void ParseTables(IReadOnlyDictionary<string, TrueTypeFontTable> tables, TrueTypeDataBytes data)
{
var isPostScript = tables.ContainsKey(TrueTypeFontTable.Cff);
if (!tables.TryGetValue(TrueTypeFontTable.Head, out var table))
{
throw new InvalidOperationException($"The {TrueTypeFontTable.Head} table is required.");
}
var header = parsers[TrueTypeFontTable.Head].Parse(data, table);
}
}
}

View File

@@ -0,0 +1,107 @@
namespace UglyToad.Pdf.Fonts.TrueType.Tables
{
using System;
/// <summary>
/// Gives global information about the font.
/// </summary>
internal class HeaderTable : ITable
{
public string Tag => TrueTypeFontTable.Head;
/// <summary>
/// Bold macStyle flag.
/// </summary>
public const int MacStyleBold = 1;
/// <summary>
/// Italic macStyle flag.
/// </summary>
public const int MacStyleItalic = 2;
public TrueTypeFontTable DirectoryTable { get; }
public decimal Version { get; }
public decimal Revision { get; }
public long CheckSumAdjustment { get; }
public long MagicNumber { get; }
public int Flags { get; }
public int UnitsPerEm { get; }
public DateTime Created { get; }
public DateTime Modified { get; }
public short XMin { get; }
public short YMin { get; }
public short XMax { get; }
public short YMax { get; }
public int MacStyle { get; }
/// <summary>
/// Smallest readable size in pixels.
/// </summary>
public int LowestRecommendedPpem { get; }
public FontDirection FontDirectionHint { get; }
/// <summary>
/// 0 for short offsets, 1 for long.
/// </summary>
public short IndexToLocFormat { get; }
/// <summary>
/// 0 for current format.
/// </summary>
public short GlyphDataFormat { get; }
public HeaderTable(TrueTypeFontTable directoryTable, decimal version, decimal revision, long checkSumAdjustment,
long magicNumber, int flags, int unitsPerEm,
DateTime created, DateTime modified,
short xMin, short yMin,
short xMax, short yMax,
int macStyle,
int lowestRecommendedPpem,
short fontDirectionHint,
short indexToLocFormat,
short glyphDataFormat)
{
DirectoryTable = directoryTable ?? throw new ArgumentNullException(nameof(directoryTable));
Version = version;
Revision = revision;
CheckSumAdjustment = checkSumAdjustment;
MagicNumber = magicNumber;
Flags = flags;
UnitsPerEm = unitsPerEm;
Created = created;
Modified = modified;
XMin = xMin;
YMin = yMin;
XMax = xMax;
YMax = yMax;
MacStyle = macStyle;
LowestRecommendedPpem = lowestRecommendedPpem;
FontDirectionHint = (FontDirection)fontDirectionHint;
IndexToLocFormat = indexToLocFormat;
GlyphDataFormat = glyphDataFormat;
}
public enum FontDirection
{
StronglyRightToLeftWithNeutrals = -2,
StronglyRightToLeft = -1,
FullyMixedDirectional = 0,
StronglyLeftToRight = 1,
StronglyLeftToRightWithNeutrals = 2
}
}
}

View File

@@ -0,0 +1,9 @@
namespace UglyToad.Pdf.Fonts.TrueType.Tables
{
internal interface ITable
{
string Tag { get; }
TrueTypeFontTable DirectoryTable { get; }
}
}

View File

@@ -0,0 +1,142 @@
namespace UglyToad.Pdf.Fonts.TrueType
{
using System;
using System.Globalization;
using System.IO;
using System.Text;
using IO;
using Util;
internal class TrueTypeDataBytes
{
private readonly IInputBytes inputBytes;
public TrueTypeDataBytes(IInputBytes inputBytes)
{
this.inputBytes = inputBytes;
}
public float Read32Fixed()
{
float retval = ReadSignedShort();
retval += (ReadUnsignedShort() / 65536.0f);
return retval;
}
public short ReadSignedShort()
{
int ch1 = Read();
int ch2 = Read();
if ((ch1 | ch2) < 0)
{
throw new EndOfStreamException();
}
return (short)((ch1 << 8) + (ch2 << 0));
}
public int ReadUnsignedShort()
{
int ch1 = Read();
int ch2 = Read();
if ((ch1 | ch2) < 0)
{
throw new EndOfStreamException();
}
return (ch1 << 8) + (ch2 << 0);
}
public int Read()
{
// We're no longer moving because we're at the end.
if (!inputBytes.MoveNext())
{
return -1;
}
int result = inputBytes.CurrentByte;
return (result + 256) % 256;
}
public byte[] Read(int numberOfBytes)
{
byte[] data = new byte[numberOfBytes];
int amountRead = 0;
while (amountRead < numberOfBytes)
{
if (!inputBytes.MoveNext())
{
throw new EndOfStreamException();
}
data[amountRead] = inputBytes.CurrentByte;
amountRead++;
}
return data;
}
public string ReadString(int length)
{
return ReadString(length, OtherEncodings.Iso88591);
}
public string ReadString(int length, Encoding encoding)
{
byte[] buffer = Read(length);
var str = encoding.GetString(buffer);
return str;
}
public long ReadUnsignedInt()
{
long byte1 = Read();
long byte2 = Read();
long byte3 = Read();
long byte4 = Read();
if (byte4 < 0)
{
throw new EndOfStreamException();
}
return (byte1 << 24) + (byte2 << 16) + (byte3 << 8) + (byte4 << 0);
}
public int ReadSignedInt()
{
int ch1 = Read();
int ch2 = Read();
int ch3 = Read();
int ch4 = Read();
if ((ch1 | ch2 | ch3 | ch4) < 0)
{
throw new EndOfStreamException();
}
return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0);
}
public long ReadLong()
{
return (ReadSignedInt() << 32) + (ReadSignedInt() & 0xFFFFFFFFL);
}
public DateTime ReadInternationalDate()
{
// TODO: this returns the wrong value, investigate...
long secondsSince1904 = ReadLong();
var date = new DateTime(1904, 1, 1, 0, 0, 0, 0, new GregorianCalendar());
var result = date.AddSeconds(secondsSince1904);
return result;
}
}
}

View File

@@ -0,0 +1,189 @@
namespace UglyToad.Pdf.Fonts.TrueType
{
/// <summary>
/// A table directory entry from the TrueType font file.
/// </summary>
internal class TrueTypeFontTable
{
#region RequiredTableTags
/// <summary>
/// Character to glyph mapping.
/// </summary>
/// <remarks>Required</remarks>
public const string Cmap = "cmap";
/// <summary>
/// Glyph data.
/// </summary>
/// <remarks>Required</remarks>
public const string Glyf = "glyf";
/// <summary>
/// Font header.
/// </summary>
/// <remarks>Required</remarks>
public const string Head = "head";
/// <summary>
/// Horizontal header.
/// </summary>
/// <remarks>Required</remarks>
public const string Hhea = "hhea";
/// <summary>
/// Horizontal metrics.
/// </summary>
/// <remarks>Required</remarks>
public const string Hmtx = "hmtx";
/// <summary>
/// Index to location.
/// </summary>
/// <remarks>Required</remarks>
public const string Loca = "loca";
/// <summary>
/// Maximum profile.
/// </summary>
/// <remarks>Required</remarks>
public const string Maxp = "maxp";
/// <summary>
/// Naming table.
/// </summary>
/// <remarks>Required</remarks>
public const string Name = "name";
/// <summary>
/// PostScript information.
/// </summary>
/// <remarks>Required</remarks>
public const string Post = "post";
/// <summary>
/// OS/2 and Windows specific metrics.
/// </summary>
/// <remarks>Required</remarks>
public const string Os2 = "OS/2";
#endregion
#region OptionalTableTags
/// <summary>
/// Control Value Table.
/// </summary>
/// <remarks>Optional</remarks>
public const string Cvt = "cvt ";
/// <summary>
/// Embedded bitmap data.
/// </summary>
/// <remarks>Optional</remarks>
public const string Ebdt = "EBDT";
/// <summary>
/// Embedded bitmap location data.
/// </summary>
/// <remarks>Optional</remarks>
public const string Eblc = "EBLC";
/// <summary>
/// Embedded bitmap scaling data.
/// </summary>
/// <remarks>Optional</remarks>
public const string Ebsc = "EBSC";
/// <summary>
/// Font program.
/// </summary>
/// <remarks>Optional</remarks>
public const string Fpgm = "fpgm";
/// <summary>
/// Grid-fitting and scan conversion procedure (grayscale).
/// </summary>
/// <remarks>Optional</remarks>
public const string Gasp = "gasp";
/// <summary>
/// Horizontal device metrics.
/// </summary>
/// <remarks>Optional</remarks>
public const string Hdmx = "hdmx";
/// <summary>
/// Kerning.
/// </summary>
/// <remarks>Optional</remarks>
public const string Kern = "kern";
/// <summary>
/// Linear threshold title.
/// </summary>
/// <remarks>Optional</remarks>
public const string Ltsh = "LTSH";
/// <summary>
/// CVT program.
/// </summary>
/// <remarks>Optional</remarks>
public const string Prep = "prep";
/// <summary>
/// PCL5.
/// </summary>
/// <remarks>Optional</remarks>
public const string Pclt = "PCLT";
/// <summary>
/// Vertical device metrics.
/// </summary>
/// <remarks>Optional</remarks>
public const string Vdmx = "VDMX";
/// <summary>
/// Vertical metrics header.
/// </summary>
/// <remarks>Optional</remarks>
public const string Vhea = "vhea";
/// <summary>
/// Vertical metrics.
/// </summary>
/// <remarks>Optional</remarks>
public const string Vmtx = "vmtx";
#endregion
#region PostScriptTableTags
public const string Cff = "cff ";
#endregion
/// <summary>
/// The 4 byte tag identifying the table.
/// </summary>
public string Tag { get; }
/// <summary>
/// The checksum for the table.
/// </summary>
public long CheckSum { get; }
/// <summary>
/// Offset of the table from the beginning of the file.
/// </summary>
public long Offset { get; }
/// <summary>
/// The length of the table.
/// </summary>
public long Length { get; }
public TrueTypeFontTable(string tag, long checkSum, long offset, long length)
{
Tag = tag;
CheckSum = checkSum;
Offset = offset;
Length = length;
}
}
}