From e5aa7522489ea67aae3e090fdcb7ec219f1af1c8 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 17 Dec 2017 09:51:37 +0000 Subject: [PATCH] fix problems with parsing the glyph table and other bugs with truetype parsing --- .../Parser/TrueTypeFontParserTests.cs | 56 ++++++++++ .../Fonts/TrueType/TrueTypeDataBytesTests.cs | 27 +++++ .../Fonts/TrueType/Parser/TrueTypeFont.cs | 6 +- .../TrueType/Parser/TrueTypeFontParser.cs | 9 +- .../Fonts/TrueType/Tables/GlyphDataTable.cs | 103 +++++++++++++++++- .../TrueType/Tables/IndexToLocationTable.cs | 30 ++--- .../Fonts/TrueType/TrueTypeDataBytes.cs | 40 ++++++- .../Fonts/TrueType/TrueTypeHeaderTable.cs | 5 + src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs | 2 +- 9 files changed, 255 insertions(+), 23 deletions(-) create mode 100644 src/UglyToad.Pdf.Tests/Fonts/TrueType/TrueTypeDataBytesTests.cs diff --git a/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs index 3792212f..37971191 100644 --- a/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs +++ b/src/UglyToad.Pdf.Tests/Fonts/TrueType/Parser/TrueTypeFontParserTests.cs @@ -78,6 +78,62 @@ Assert.Equal(0, font.HeaderTable.GlyphDataFormat); } + [Fact] + public void RobotoHeaderReadCorrectly() + { + var data = new[] + { + // key, offset, length, checksum + "DSIG 158596 8 1", + "GDEF 316 72 408950881", + "GPOS 388 35744 355098641", + "GSUB 36132 662 3357985284", + "OS/2 36796 96 3097700805", + "cmap 36892 1750 298470964", + "cvt 156132 38 119085513", + "fpgm 156172 2341 2494100564", + "gasp 156124 8 16", + "glyf 38644 88820 3302131736", + "head 127464 54 346075833", + "hhea 127520 36 217516755", + "hmtx 127556 4148 1859679943", + "kern 131704 12306 2002873469", + "loca 144012 2076 77421448", + "maxp 146088 32 89459325", + "name 146120 830 44343214", + "post 146952 9171 3638780613", + "prep 158516 77 251381919" + }; + + var bytes = GetFileBytes("Roboto-Regular"); + + var input = new TrueTypeDataBytes(new ByteArrayInputBytes(bytes)); + + var font = parser.Parse(input); + + foreach (var s in data) + { + var parts = s.Split(' ', StringSplitOptions.RemoveEmptyEntries); + + var name = parts[0]; + + if (name == "cvt") + { + name = "cvt "; + } + + var match = font.Tables[name]; + + var offset = long.Parse(parts[1]); + var length = long.Parse(parts[2]); + var checksum = long.Parse(parts[3]); + + Assert.Equal(offset, match.Offset); + Assert.Equal(length, match.Length); + Assert.Equal(checksum, match.CheckSum); + } + } + [Fact] public void ParseEmbeddedSimpleGoogleDocssGautmi() { diff --git a/src/UglyToad.Pdf.Tests/Fonts/TrueType/TrueTypeDataBytesTests.cs b/src/UglyToad.Pdf.Tests/Fonts/TrueType/TrueTypeDataBytesTests.cs new file mode 100644 index 00000000..b8f54519 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Fonts/TrueType/TrueTypeDataBytesTests.cs @@ -0,0 +1,27 @@ +namespace UglyToad.Pdf.Tests.Fonts.TrueType +{ + using IO; + using Pdf.Fonts.TrueType; + using Xunit; + + public class TrueTypeDataBytesTests + { + [Fact] + public void ReadUnsignedInt() + { + var input = new ByteArrayInputBytes(new byte[] + { + 220, + 43, + 250, + 6 + }); + + var data = new TrueTypeDataBytes(input); + + var result = data.ReadUnsignedInt(); + + Assert.Equal(3693869574L, result); + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFont.cs b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFont.cs index 24257111..6e567851 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFont.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFont.cs @@ -1,16 +1,20 @@ namespace UglyToad.Pdf.Fonts.TrueType.Parser { + using System.Collections.Generic; using Tables; internal class TrueTypeFont { public decimal Version { get; } + public IReadOnlyDictionary Tables { get; } + public HeaderTable HeaderTable { get; } - public TrueTypeFont(decimal version, HeaderTable headerTable) + public TrueTypeFont(decimal version, IReadOnlyDictionary tables, HeaderTable headerTable) { Version = version; + Tables = tables; HeaderTable = headerTable; } } diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs index 57e35407..ce71f1ee 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -89,9 +89,16 @@ var indexToLocationTable = IndexToLocationTable.Load(data, indexToLocationHeaderTable, header, maximumProfile); + + if (!tables.TryGetValue(TrueTypeHeaderTable.Glyf, out var glyphHeaderTable)) + { + throw new InvalidOperationException("The glpyh table is required for non-PostScript fonts."); + } + + var glyphTable = GlyphDataTable.Load(data, glyphHeaderTable, header, indexToLocationTable); } - return new TrueTypeFont(version, header); + return new TrueTypeFont(version, tables, header); } } } diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Tables/GlyphDataTable.cs b/src/UglyToad.Pdf/Fonts/TrueType/Tables/GlyphDataTable.cs index 088efa7f..eaf375f2 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/Tables/GlyphDataTable.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/Tables/GlyphDataTable.cs @@ -1,6 +1,8 @@ namespace UglyToad.Pdf.Fonts.TrueType.Tables { using System; + using System.Collections.Generic; + using Util.JetBrains.Annotations; /// /// Describes the glyphs in the font. @@ -11,6 +13,15 @@ public TrueTypeHeaderTable DirectoryTable { get; } + [ItemCanBeNull] + public IReadOnlyList Glyphs { get; } + + public GlyphDataTable(TrueTypeHeaderTable directoryTable, IReadOnlyList glyphs) + { + DirectoryTable = directoryTable; + Glyphs = glyphs ?? throw new ArgumentNullException(nameof(glyphs)); + } + public static GlyphDataTable Load(TrueTypeDataBytes data, TrueTypeHeaderTable table, HeaderTable headerTable, IndexToLocationTable indexToLocationTable) { @@ -22,7 +33,7 @@ var glyphCount = entryCount - 1; - var glyphs = new object[glyphCount]; + var glyphs = new IGlyphDescription[glyphCount]; for (var i = 0; i < glyphCount; i++) { @@ -32,7 +43,7 @@ continue; } - data.Seek(offsets[i] - 1); + data.Seek(offsets[i] - 1 + table.Offset); var contourCount = data.ReadSignedShort(); @@ -50,20 +61,94 @@ } else { - + } } - throw new NotImplementedException(); + return new GlyphDataTable(table, glyphs); } private static SimpleGlyphDescription ReadSimpleGlyph(TrueTypeDataBytes data, short contourCount, TrueTypeGlyphBounds bounds) { - throw new NotImplementedException("Reading simple glyphs not supported yet."); + var endPointsOfContours = data.ReadUnsignedShortArray(contourCount); + + var instructionLength = data.ReadUnsignedShort(); + + data.ReadByteArray(instructionLength); + + var pointCount = 0; + if (contourCount > 0) + { + pointCount = endPointsOfContours[contourCount - 1] + 1; + } + + var flags = ReadFlags(data, pointCount); + + var xCoordinates = ReadCoordinates(data, pointCount, flags, SimpleGlyphFlags.XShortVector, + SimpleGlyphFlags.XSignOrSame); + + var yCoordinates = ReadCoordinates(data, pointCount, flags, SimpleGlyphFlags.YShortVector, + SimpleGlyphFlags.YSignOrSame); + + return new SimpleGlyphDescription(instructionLength, endPointsOfContours, flags, xCoordinates, yCoordinates); + } + + private static SimpleGlyphFlags[] ReadFlags(TrueTypeDataBytes data, int pointCount) + { + var result = new SimpleGlyphFlags[pointCount]; + + for (var i = 0; i < pointCount; i++) + { + result[i] = (SimpleGlyphFlags)data.ReadByte(); + } + + return result; + } + + private static short[] ReadCoordinates(TrueTypeDataBytes data, int pointCount, SimpleGlyphFlags[] flags, SimpleGlyphFlags isByte, SimpleGlyphFlags signOrSame) + { + var xs = new short[pointCount]; + var x = 0; + for (var i = 0; i < pointCount; i++) + { + int dx; + if (flags[i].HasFlag(isByte)) + { + var b = data.ReadByte(); + dx = flags[i].HasFlag(signOrSame) ? b : -b; + } + else + { + if (flags[i].HasFlag(signOrSame)) + { + dx = 0; + } + else + { + dx = data.ReadSignedShort(); + } + } + + x += dx; + + // TODO: overflow? + xs[i] = (short)x; + } + + return xs; } } - internal class SimpleGlyphDescription + internal interface IGlyphDescription + { + bool IsSimple { get; } + + SimpleGlyphDescription SimpleGlyph { get; } + + object CompositeGlyph { get; } + } + + internal class SimpleGlyphDescription : IGlyphDescription { /// /// The total number of bytes for instructions. @@ -100,6 +185,12 @@ XCoordinates = xCoordinates; YCoordinates = yCoordinates; } + + public bool IsSimple { get; } = true; + + public SimpleGlyphDescription SimpleGlyph => this; + + public object CompositeGlyph { get; } = null; } [Flags] diff --git a/src/UglyToad.Pdf/Fonts/TrueType/Tables/IndexToLocationTable.cs b/src/UglyToad.Pdf/Fonts/TrueType/Tables/IndexToLocationTable.cs index 82bf4bd5..062272d4 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/Tables/IndexToLocationTable.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/Tables/IndexToLocationTable.cs @@ -33,27 +33,31 @@ var format = headerTable.IndexToLocFormat; - var glyphCount = maximumProfileTable.NumberOfGlyphs; + var glyphCount = maximumProfileTable.NumberOfGlyphs + 1; var offsets = new long[glyphCount]; - for (int i = 0; i < glyphCount; i++) + switch (format) { - switch (format) - { - case shortFormat: - // The local offset divided by 2 is stored. - offsets[i] = data.ReadUnsignedShort() * 2; + case shortFormat: + { // The local offset divided by 2 is stored. + for (int i = 0; i < glyphCount; i++) + { + offsets[i] = data.ReadUnsignedShort() * 2; + } break; - case longFormat: + } + case longFormat: + { // The actual offset is stored. - offsets[i] = data.ReadLong(); + data.ReadUnsignedIntArray(offsets, glyphCount); break; - default: - throw new InvalidOperationException($"The format {format} was invalid for the index to location (loca) table."); - } + } + default: + throw new InvalidOperationException($"The format {format} was invalid for the index to location (loca) table."); } - + + return new IndexToLocationTable(table, offsets); } } diff --git a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs index 686ea03b..a3d58323 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeDataBytes.cs @@ -59,6 +59,13 @@ } } + public byte ReadByte() + { + ReadBuffered(internalBuffer, 1); + + return internalBuffer[0]; + } + /// /// Reads the 4 character tag from the TrueType file. /// @@ -78,7 +85,7 @@ { ReadBuffered(internalBuffer, 4); - return (internalBuffer[0] << 24) + (internalBuffer[1] << 16) + (internalBuffer[2] << 8) + (internalBuffer[3] << 0); + return ((long)internalBuffer[0] << 24) + ((long)internalBuffer[1] << 16) + (internalBuffer[2] << 8) + (internalBuffer[3] << 0); } public int ReadSignedInt() @@ -130,8 +137,39 @@ public int ReadSignedByte() { ReadBuffered(internalBuffer, 1); + var signedByte = internalBuffer[0]; + return signedByte < 127 ? signedByte : signedByte - 256; } + + public int[] ReadUnsignedShortArray(int length) + { + var result = new int[length]; + + for (int i = 0; i < length; i++) + { + result[i] = ReadUnsignedShort(); + } + + return result; + } + + public byte[] ReadByteArray(int length) + { + var result = new byte[length]; + + ReadBuffered(result, length); + + return result; + } + + public void ReadUnsignedIntArray(long[] offsets, int length) + { + for (int i = 0; i < length; i++) + { + offsets[i] = ReadUnsignedInt(); + } + } } } diff --git a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeHeaderTable.cs b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeHeaderTable.cs index 96060e20..16123b83 100644 --- a/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeHeaderTable.cs +++ b/src/UglyToad.Pdf/Fonts/TrueType/TrueTypeHeaderTable.cs @@ -185,5 +185,10 @@ Offset = offset; Length = length; } + + public override string ToString() + { + return $"{Tag} {Offset} {Length} {CheckSum}"; + } } } diff --git a/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs b/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs index 1de99d8d..c2819066 100644 --- a/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs +++ b/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs @@ -46,7 +46,7 @@ public void Seek(long position) { CurrentOffset = (int)position; - CurrentByte = bytes[CurrentOffset]; + CurrentByte = CurrentOffset < 0 ? (byte)0 : bytes[CurrentOffset]; } } } \ No newline at end of file