diff --git a/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs new file mode 100644 index 00000000..0a98135f --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs @@ -0,0 +1,50 @@ +namespace UglyToad.Pdf.Tests.Fonts.TrueType.Parser +{ + using System; + using System.IO; + using System.Linq; + using IO; + using Pdf.Fonts.TrueType; + using Pdf.Fonts.TrueType.Parser; + using Xunit; + + public class TrueTypeFontParserTests + { + private static byte[] GetFileBytes(string name) + { + var manifestFiles = typeof(TrueTypeFontParserTests).Assembly.GetManifestResourceNames(); + + var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0); + + using (var memoryStream = new MemoryStream()) + using (var stream = typeof(TrueTypeFontParserTests).Assembly.GetManifestResourceStream(match)) + { + stream.CopyTo(memoryStream); + + return memoryStream.ToArray(); + } + } + + private readonly TrueTypeFontParser parser = new TrueTypeFontParser(); + + [Fact] + public void ParseRegularRoboto() + { + var bytes = GetFileBytes("Roboto-Regular"); + + var input = new TrueTypeDataBytes(new ByteArrayInputBytes(bytes)); + + parser.Parse(input); + } + + [Fact] + public void ParseEmbeddedSimpleGoogleDocssGautmi() + { + var bytes = GetFileBytes("google-simple-doc"); + + var input = new TrueTypeDataBytes(new ByteArrayInputBytes(bytes)); + + parser.Parse(input); + } + } +} diff --git a/src/UglyToad.Pdf.Tests/Fonts/TrueType/Roboto-Regular.ttf b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Roboto-Regular.ttf new file mode 100644 index 00000000..7d9a6c4c Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Roboto-Regular.ttf differ diff --git a/src/UglyToad.Pdf.Tests/Fonts/TrueType/google-simple-doc.ttf b/src/UglyToad.Pdf.Tests/Fonts/TrueType/google-simple-doc.ttf new file mode 100644 index 00000000..0b876103 Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Fonts/TrueType/google-simple-doc.ttf differ diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj index 2595600f..51b2ce16 100644 --- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj +++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj @@ -7,10 +7,18 @@ + + + + PreserveNewest + + + PreserveNewest + PreserveNewest diff --git a/src/UglyToad.Pdf/Fonts/FontFactory.cs b/src/UglyToad.Pdf/Fonts/FontFactory.cs index 34a0d704..c97cd2a0 100644 --- a/src/UglyToad.Pdf/Fonts/FontFactory.cs +++ b/src/UglyToad.Pdf/Fonts/FontFactory.cs @@ -7,6 +7,7 @@ using Parser.Handlers; using Parser.Parts; using Pdf.Parser; + using TrueType.Parser; internal class FontFactory { @@ -16,7 +17,7 @@ { Handlers = new Dictionary { - {CosName.TYPE0, new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory()))} + {CosName.TYPE0, new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()))} }; } diff --git a/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs b/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs index d9cb5f7f..bfacec5c 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs @@ -8,16 +8,21 @@ using Cos; using Filters; using Geometry; + using IO; using Pdf.Parser; + using TrueType; + using TrueType.Parser; using Util; internal class CidFontFactory { private readonly FontDescriptorFactory descriptorFactory; + private readonly TrueTypeFontParser trueTypeFontParser; - public CidFontFactory(FontDescriptorFactory descriptorFactory) + public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser) { this.descriptorFactory = descriptorFactory; + this.trueTypeFontParser = trueTypeFontParser; } public ICidFont Generate(PdfDictionary dictionary, ParsingArguments arguments, bool isLenientParsing) @@ -75,7 +80,7 @@ return true; } - private static void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments) + private void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments) { if (descriptor?.FontFile == null) { @@ -90,6 +95,18 @@ } var fontFile = fontFileStream.Decode(arguments.Get()); + + File.WriteAllBytes("C:\\git\\google-simple-doc.ttf", fontFile); + + switch (descriptor.FontFile.FileType) + { + case DescriptorFontFile.FontFileType.TrueType: + var input = new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile)); + trueTypeFontParser.Parse(input); + break; + default: + throw new NotSupportedException("Currently only TrueType fonts are supported."); + } } private static IReadOnlyDictionary ReadWidths(PdfDictionary dict) diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Parser/HeaderTableParser.cs b/src/UglyToad.Pdf/Fonts/TrueType/Parser/HeaderTableParser.cs new file mode 100644 index 00000000..48900547 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/Parser/HeaderTableParser.cs @@ -0,0 +1,35 @@ +namespace UglyToad.Pdf.Fonts.TrueType.Parser +{ + using Tables; + + internal class HeaderTableParser : ITrueTypeTableParser + { + public string Tag => TrueTypeFontTable.Head; + + public ITable Parse(TrueTypeDataBytes data, TrueTypeFontTable table) + { + var version = data.Read32Fixed(); + var fontRevision = data.Read32Fixed(); + var checkSumAdjustment = data.ReadUnsignedInt(); + var magicNumber = data.ReadUnsignedInt(); + var flags = data.ReadUnsignedShort(); + var unitsPerEm = data.ReadUnsignedShort(); + var created = data.ReadInternationalDate(); + var modified = data.ReadInternationalDate(); + var xMin = data.ReadSignedShort(); + var yMin = data.ReadSignedShort(); + var xMax = data.ReadSignedShort(); + var yMax = data.ReadSignedShort(); + var macStyle = data.ReadUnsignedShort(); + var lowestRecPpem = data.ReadUnsignedShort(); + var fontDirectionHint = data.ReadSignedShort(); + var indexToLocFormat = data.ReadSignedShort(); + var glyphDataFormat = data.ReadSignedShort(); + + return new HeaderTable(table, (decimal)version, (decimal)fontRevision, checkSumAdjustment, + magicNumber, flags, unitsPerEm, created, modified, + xMin, yMin, xMax, yMax, macStyle, lowestRecPpem, + fontDirectionHint, indexToLocFormat, glyphDataFormat); + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Parser/ITrueTypeTableParser.cs b/src/UglyToad.Pdf/Fonts/TrueType/Parser/ITrueTypeTableParser.cs new file mode 100644 index 00000000..61aa9aa9 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/Parser/ITrueTypeTableParser.cs @@ -0,0 +1,11 @@ +namespace UglyToad.Pdf.Fonts.TrueType.Parser +{ + using Tables; + + internal interface ITrueTypeTableParser + { + string Tag { get; } + + ITable Parse(TrueTypeDataBytes data, TrueTypeFontTable table); + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs new file mode 100644 index 00000000..5a997625 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -0,0 +1,71 @@ +namespace UglyToad.Pdf.Fonts.TrueType.Parser +{ + using System; + using System.Collections.Generic; + using Util.JetBrains.Annotations; + + internal class TrueTypeFontParser + { + private const int TagLength = 4; + + private static readonly IReadOnlyDictionary parsers = + new Dictionary + { + {TrueTypeFontTable.Head, new HeaderTableParser()} + }; + + public void Parse(TrueTypeDataBytes data) + { + var version = data.Read32Fixed(); + int numberOfTables = data.ReadUnsignedShort(); + int searchRange = data.ReadUnsignedShort(); + int entrySelector = data.ReadUnsignedShort(); + int rangeShift = data.ReadUnsignedShort(); + + var tables = new Dictionary(); + + for (var i = 0; i < numberOfTables; i++) + { + var table = ReadTable(data); + + if (table != null) + { + tables[table.Tag] = table; + } + } + + ParseTables(tables, data); + + return; + } + + [CanBeNull] + private static TrueTypeFontTable ReadTable(TrueTypeDataBytes data) + { + var tag = data.ReadString(TagLength); + var checksum = data.ReadUnsignedInt(); + var offset = data.ReadUnsignedInt(); + var length = data.ReadUnsignedInt(); + + // skip tables with zero length (except glyf) + if (length == 0 && !string.Equals(tag, TrueTypeFontTable.Glyf)) + { + return null; + } + + return new TrueTypeFontTable(tag, checksum, offset, length); + } + + private static void ParseTables(IReadOnlyDictionary tables, TrueTypeDataBytes data) + { + var isPostScript = tables.ContainsKey(TrueTypeFontTable.Cff); + + if (!tables.TryGetValue(TrueTypeFontTable.Head, out var table)) + { + throw new InvalidOperationException($"The {TrueTypeFontTable.Head} table is required."); + } + + var header = parsers[TrueTypeFontTable.Head].Parse(data, table); + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Tables/HeaderTable.cs b/src/UglyToad.Pdf/Fonts/TrueType/Tables/HeaderTable.cs new file mode 100644 index 00000000..dcd3daa3 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/Tables/HeaderTable.cs @@ -0,0 +1,107 @@ +namespace UglyToad.Pdf.Fonts.TrueType.Tables +{ + using System; + + /// + /// Gives global information about the font. + /// + internal class HeaderTable : ITable + { + public string Tag => TrueTypeFontTable.Head; + + /// + /// Bold macStyle flag. + /// + public const int MacStyleBold = 1; + + /// + /// Italic macStyle flag. + /// + public const int MacStyleItalic = 2; + + public TrueTypeFontTable DirectoryTable { get; } + + public decimal Version { get; } + + public decimal Revision { get; } + + public long CheckSumAdjustment { get; } + + public long MagicNumber { get; } + + public int Flags { get; } + + public int UnitsPerEm { get; } + + public DateTime Created { get; } + + public DateTime Modified { get; } + + public short XMin { get; } + + public short YMin { get; } + + public short XMax { get; } + + public short YMax { get; } + + public int MacStyle { get; } + + /// + /// Smallest readable size in pixels. + /// + public int LowestRecommendedPpem { get; } + + public FontDirection FontDirectionHint { get; } + + /// + /// 0 for short offsets, 1 for long. + /// + public short IndexToLocFormat { get; } + + /// + /// 0 for current format. + /// + public short GlyphDataFormat { get; } + + public HeaderTable(TrueTypeFontTable directoryTable, decimal version, decimal revision, long checkSumAdjustment, + long magicNumber, int flags, int unitsPerEm, + DateTime created, DateTime modified, + short xMin, short yMin, + short xMax, short yMax, + int macStyle, + int lowestRecommendedPpem, + short fontDirectionHint, + short indexToLocFormat, + short glyphDataFormat) + { + DirectoryTable = directoryTable ?? throw new ArgumentNullException(nameof(directoryTable)); + Version = version; + Revision = revision; + CheckSumAdjustment = checkSumAdjustment; + MagicNumber = magicNumber; + Flags = flags; + UnitsPerEm = unitsPerEm; + Created = created; + Modified = modified; + XMin = xMin; + YMin = yMin; + XMax = xMax; + YMax = yMax; + MacStyle = macStyle; + LowestRecommendedPpem = lowestRecommendedPpem; + FontDirectionHint = (FontDirection)fontDirectionHint; + IndexToLocFormat = indexToLocFormat; + GlyphDataFormat = glyphDataFormat; + } + + public enum FontDirection + { + StronglyRightToLeftWithNeutrals = -2, + StronglyRightToLeft = -1, + FullyMixedDirectional = 0, + StronglyLeftToRight = 1, + StronglyLeftToRightWithNeutrals = 2 + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Tables/ITable.cs b/src/UglyToad.Pdf/Fonts/TrueType/Tables/ITable.cs new file mode 100644 index 00000000..7a609ef9 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/Tables/ITable.cs @@ -0,0 +1,9 @@ +namespace UglyToad.Pdf.Fonts.TrueType.Tables +{ + internal interface ITable + { + string Tag { get; } + + TrueTypeFontTable DirectoryTable { get; } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs new file mode 100644 index 00000000..18db68ea --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs @@ -0,0 +1,142 @@ +namespace UglyToad.Pdf.Fonts.TrueType +{ + using System; + using System.Globalization; + using System.IO; + using System.Text; + using IO; + using Util; + + internal class TrueTypeDataBytes + { + private readonly IInputBytes inputBytes; + + public TrueTypeDataBytes(IInputBytes inputBytes) + { + this.inputBytes = inputBytes; + } + + public float Read32Fixed() + { + float retval = ReadSignedShort(); + retval += (ReadUnsignedShort() / 65536.0f); + return retval; + } + + public short ReadSignedShort() + { + int ch1 = Read(); + int ch2 = Read(); + if ((ch1 | ch2) < 0) + { + throw new EndOfStreamException(); + } + + return (short)((ch1 << 8) + (ch2 << 0)); + } + + public int ReadUnsignedShort() + { + int ch1 = Read(); + int ch2 = Read(); + if ((ch1 | ch2) < 0) + { + throw new EndOfStreamException(); + } + + return (ch1 << 8) + (ch2 << 0); + } + + public int Read() + { + // We're no longer moving because we're at the end. + if (!inputBytes.MoveNext()) + { + return -1; + } + + int result = inputBytes.CurrentByte; + + return (result + 256) % 256; + } + + public byte[] Read(int numberOfBytes) + { + byte[] data = new byte[numberOfBytes]; + int amountRead = 0; + + while (amountRead < numberOfBytes) + { + if (!inputBytes.MoveNext()) + { + throw new EndOfStreamException(); + } + + data[amountRead] = inputBytes.CurrentByte; + amountRead++; + } + + return data; + } + + public string ReadString(int length) + { + return ReadString(length, OtherEncodings.Iso88591); + } + + public string ReadString(int length, Encoding encoding) + { + byte[] buffer = Read(length); + + var str = encoding.GetString(buffer); + + return str; + } + + public long ReadUnsignedInt() + { + long byte1 = Read(); + long byte2 = Read(); + long byte3 = Read(); + long byte4 = Read(); + + if (byte4 < 0) + { + throw new EndOfStreamException(); + } + + return (byte1 << 24) + (byte2 << 16) + (byte3 << 8) + (byte4 << 0); + } + + public int ReadSignedInt() + { + int ch1 = Read(); + int ch2 = Read(); + int ch3 = Read(); + int ch4 = Read(); + if ((ch1 | ch2 | ch3 | ch4) < 0) + { + throw new EndOfStreamException(); + } + + return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0); + } + + public long ReadLong() + { + return (ReadSignedInt() << 32) + (ReadSignedInt() & 0xFFFFFFFFL); + } + + public DateTime ReadInternationalDate() + { + // TODO: this returns the wrong value, investigate... + long secondsSince1904 = ReadLong(); + + var date = new DateTime(1904, 1, 1, 0, 0, 0, 0, new GregorianCalendar()); + + var result = date.AddSeconds(secondsSince1904); + + return result; + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeFontTable.cs b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeFontTable.cs new file mode 100644 index 00000000..8a0b57ac --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeFontTable.cs @@ -0,0 +1,189 @@ +namespace UglyToad.Pdf.Fonts.TrueType +{ + /// + /// A table directory entry from the TrueType font file. + /// + internal class TrueTypeFontTable + { + #region RequiredTableTags + /// + /// Character to glyph mapping. + /// + /// Required + public const string Cmap = "cmap"; + + /// + /// Glyph data. + /// + /// Required + public const string Glyf = "glyf"; + + /// + /// Font header. + /// + /// Required + public const string Head = "head"; + + /// + /// Horizontal header. + /// + /// Required + public const string Hhea = "hhea"; + + /// + /// Horizontal metrics. + /// + /// Required + public const string Hmtx = "hmtx"; + + /// + /// Index to location. + /// + /// Required + public const string Loca = "loca"; + + /// + /// Maximum profile. + /// + /// Required + public const string Maxp = "maxp"; + + /// + /// Naming table. + /// + /// Required + public const string Name = "name"; + + /// + /// PostScript information. + /// + /// Required + public const string Post = "post"; + + /// + /// OS/2 and Windows specific metrics. + /// + /// Required + public const string Os2 = "OS/2"; + #endregion + + #region OptionalTableTags + /// + /// Control Value Table. + /// + /// Optional + public const string Cvt = "cvt "; + + /// + /// Embedded bitmap data. + /// + /// Optional + public const string Ebdt = "EBDT"; + + /// + /// Embedded bitmap location data. + /// + /// Optional + public const string Eblc = "EBLC"; + + /// + /// Embedded bitmap scaling data. + /// + /// Optional + public const string Ebsc = "EBSC"; + + /// + /// Font program. + /// + /// Optional + public const string Fpgm = "fpgm"; + + /// + /// Grid-fitting and scan conversion procedure (grayscale). + /// + /// Optional + public const string Gasp = "gasp"; + + /// + /// Horizontal device metrics. + /// + /// Optional + public const string Hdmx = "hdmx"; + + /// + /// Kerning. + /// + /// Optional + public const string Kern = "kern"; + + /// + /// Linear threshold title. + /// + /// Optional + public const string Ltsh = "LTSH"; + + /// + /// CVT program. + /// + /// Optional + public const string Prep = "prep"; + + /// + /// PCL5. + /// + /// Optional + public const string Pclt = "PCLT"; + + /// + /// Vertical device metrics. + /// + /// Optional + public const string Vdmx = "VDMX"; + + /// + /// Vertical metrics header. + /// + /// Optional + public const string Vhea = "vhea"; + + /// + /// Vertical metrics. + /// + /// Optional + public const string Vmtx = "vmtx"; + #endregion + + #region PostScriptTableTags + + public const string Cff = "cff "; + #endregion + + /// + /// The 4 byte tag identifying the table. + /// + public string Tag { get; } + + /// + /// The checksum for the table. + /// + public long CheckSum { get; } + + /// + /// Offset of the table from the beginning of the file. + /// + public long Offset { get; } + + /// + /// The length of the table. + /// + public long Length { get; } + + public TrueTypeFontTable(string tag, long checkSum, long offset, long length) + { + Tag = tag; + CheckSum = checkSum; + Offset = offset; + Length = length; + } + } +}