From fe315be2ef559650a295c8bb3d3923108a9257f1 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Fri, 3 Jan 2020 14:31:27 +0000 Subject: [PATCH] fix truetype subsetting for composite glyphs #98 each glyph included in the subset must count towards the number of glyphs, the horizontal metrics and the maximum profile table for the output truetype font. each glyph must also lie on a 4 byte boundary in the output file. the output file is valid for the windows system font calibri containing accented characters but the roboto subset files are still invalid. moves all subsetting related classes into their own namespace which will be made public. --- .../Writer/PdfDocumentBuilderTests.cs | 46 +++++++++ .../TrueTypeGlyphTableSubsetter.cs | 94 +++++++++++++------ .../Subsetting/TrueTypeSubsetEncoding.cs | 14 +++ .../Subsetting/TrueTypeSubsetGlyphTable.cs | 61 ++++++++++++ .../{ => Subsetting}/TrueTypeSubsetter.cs | 73 ++++++-------- .../Writer/Fonts/TrueTypeWritingFont.cs | 2 +- 6 files changed, 212 insertions(+), 78 deletions(-) rename src/UglyToad.PdfPig/Writer/Fonts/{ => Subsetting}/TrueTypeGlyphTableSubsetter.cs (81%) create mode 100644 src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetEncoding.cs create mode 100644 src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetGlyphTable.cs rename src/UglyToad.PdfPig/Writer/Fonts/{ => Subsetting}/TrueTypeSubsetter.cs (83%) diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs index 0f67eec4..7909a1ae 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs @@ -145,6 +145,52 @@ } } + [Fact] + public void WindowsOnlyCanWriteSinglePageAccentedCharactersSystemFont() + { + var builder = new PdfDocumentBuilder(); + + builder.DocumentInformation.Title = "Hello Windows!"; + + var page = builder.AddPage(PageSize.A4); + + var file = @"C:\Windows\Fonts\Calibri.ttf"; + + if (!File.Exists(file)) + { + return; + } + + byte[] bytes; + try + { + bytes = File.ReadAllBytes(file); + } + catch + { + return; + } + + var font = builder.AddTrueTypeFont(bytes); + + page.AddText("eé", 12, new PdfPoint(30, 520), font); + + Assert.NotEmpty(page.Operations); + + var b = builder.Build(); + + WriteFile(nameof(WindowsOnlyCanWriteSinglePageAccentedCharactersSystemFont), b); + + Assert.NotEmpty(b); + + using (var document = PdfDocument.Open(b)) + { + var page1 = document.GetPage(1); + + Assert.Equal("eé", page1.Text); + } + } + [Fact] public void WindowsOnlyCanWriteSinglePageHelloWorldSystemFont() { diff --git a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeGlyphTableSubsetter.cs b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeGlyphTableSubsetter.cs similarity index 81% rename from src/UglyToad.PdfPig/Writer/Fonts/TrueTypeGlyphTableSubsetter.cs rename to src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeGlyphTableSubsetter.cs index dba7f5c0..c8357bfd 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeGlyphTableSubsetter.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeGlyphTableSubsetter.cs @@ -1,18 +1,51 @@ -namespace UglyToad.PdfPig.Writer.Fonts +namespace UglyToad.PdfPig.Writer.Fonts.Subsetting { using System; using System.Collections.Generic; using System.IO; + using PdfPig.Fonts.Exceptions; using PdfPig.Fonts.TrueType; using PdfPig.Fonts.TrueType.Glyphs; + using PdfPig.Fonts.TrueType.Tables; using Util; using IndexMap = TrueTypeSubsetter.OldToNewGlyphIndex; + /// + /// Produces a glyph table which contains a subset of the glyphs in the input font. + /// internal static class TrueTypeGlyphTableSubsetter { - public static NewGlyphTable SubsetGlyphTable(TrueTypeFontProgram font, byte[] fontBytes, IndexMap[] mapping) + /// + /// Creates a new glyph table from the input font which contains only the glyphs required by the input mapping. + /// + /// The font used to create this subset. + /// The raw bytes of the input font. + /// The mapping of old glyph indices to new glyph indices. + /// A new glyph table and associated information for use in creating a valid TrueType font file. + public static TrueTypeSubsetGlyphTable SubsetGlyphTable(TrueTypeFontProgram font, byte[] fontBytes, IndexMap[] mapping) { + if (font == null) + { + throw new ArgumentNullException(nameof(font)); + } + + if (fontBytes == null) + { + throw new ArgumentNullException(nameof(fontBytes)); + } + + if (mapping == null) + { + throw new ArgumentNullException(nameof(mapping)); + } + var data = new TrueTypeDataBytes(fontBytes); + var advanceWidthTable = font.TableRegister.HorizontalMetricsTable; + + if (advanceWidthTable == null) + { + throw new InvalidFontFormatException($"Font: {font} did not contain a horizontal metrics table, cannot subset."); + } var existingGlyphs = GetGlyphRecordsInFont(font, data); @@ -30,6 +63,7 @@ } var glyphLocations = new List(); + var advanceWidths = new List(); var compositeIndicesToReplace = new List<(uint offset, ushort newIndex)>(); @@ -56,13 +90,13 @@ { // Else we need to copy the dependency glyph from the original. var actualDependencyRecord = existingGlyphs[dependency.Index]; - + // We need to add it to the set of glyphs to copy. newDependencyIndex = glyphsToCopy.Count; glyphsToCopy.Add(actualDependencyRecord); glyphsToCopyOriginalIndex.Add((int)dependency.Index); } - + var withinGlyphDataIndexOffset = dependency.OffsetOfIndexWithinData - newRecord.Offset; compositeIndicesToReplace.Add(((uint)withinGlyphDataIndexOffset, (ushort)newDependencyIndex)); @@ -72,6 +106,9 @@ // Record the glyph location. glyphLocations.Add((uint)stream.Position); + var advanceWidth = advanceWidthTable.HorizontalMetrics[glyphsToCopyOriginalIndex[i]]; + advanceWidths.Add(advanceWidth); + if (newRecord.Type == GlyphType.Empty) { // TODO: if this is the last glyph this might be a problem. @@ -83,14 +120,21 @@ var glyphBytes = data.ReadByteArray(newRecord.DataLength); // Update any indices referenced by composite glyphs to match the new index of the dependency. - foreach (var toReplace in compositeIndicesToReplace) + foreach (var (offset, newIndex) in compositeIndicesToReplace) { - glyphBytes[toReplace.offset] = (byte)(toReplace.newIndex >> 8); - glyphBytes[toReplace.offset + 1] = (byte)toReplace.newIndex; + glyphBytes[offset] = (byte)(newIndex >> 8); + glyphBytes[offset + 1] = (byte)newIndex; } + + stream.Write(glyphBytes, 0, glyphBytes.Length); // Each glyph description must start at a 4 byte boundary. - stream.Write(glyphBytes, 0, glyphBytes.Length); + var remainder = glyphBytes.Length % 4; + var bytesToPad = remainder == 0 ? 0 : 4 - remainder; + for (var j = 0; j < bytesToPad; j++) + { + stream.WriteByte(0); + } } var output = stream.ToArray(); @@ -98,7 +142,7 @@ glyphLocations.Add((uint)output.Length); var offsets = glyphLocations.ToArray(); - return new NewGlyphTable(output, offsets); + return new TrueTypeSubsetGlyphTable(output, offsets, advanceWidths.ToArray()); } } @@ -120,7 +164,7 @@ if (indexToLocationTable.GlyphOffsets[i + 1] <= indexToLocationTable.GlyphOffsets[i]) { - glyphRecords[i] = new GlyphRecord(i, glyphOffset); + glyphRecords[i] = new GlyphRecord(glyphOffset); continue; } @@ -143,13 +187,13 @@ if (type == GlyphType.Simple) { ReadSimpleGlyph(data, numberOfContours); - glyphRecords[i] = new GlyphRecord(i, glyphOffset, type, (int)(data.Position - glyphOffset)); + glyphRecords[i] = new GlyphRecord(glyphOffset, type, (int)(data.Position - glyphOffset)); } else { var glyphIndices = ReadCompositeGlyph(data); - glyphRecords[i] = new GlyphRecord(i, glyphOffset, type, (int)(data.Position - glyphOffset), glyphIndices); + glyphRecords[i] = new GlyphRecord(glyphOffset, type, (int)(data.Position - glyphOffset), glyphIndices); } } @@ -221,9 +265,11 @@ } } + // ReSharper disable UnusedVariable var xCoordinates = ReadCoordinates(perPointFlags, data, SimpleGlyphFlags.XSingleByte, SimpleGlyphFlags.ThisXIsTheSame); var yCoordinates = ReadCoordinates(perPointFlags, data, SimpleGlyphFlags.YSingleByte, SimpleGlyphFlags.ThisYIsTheSame); + // ReSharper restore UnusedVariable } private static short[] ReadCoordinates(SimpleGlyphFlags[] flags, TrueTypeDataBytes data, @@ -329,8 +375,6 @@ private class GlyphRecord { - public int Index { get; } - public int Offset { get; } public GlyphType Type { get; } @@ -342,19 +386,17 @@ /// public IReadOnlyList DependencyIndices { get; } - public GlyphRecord(int index, int offset, GlyphType type, int dataLength, + public GlyphRecord(int offset, GlyphType type, int dataLength, IReadOnlyList dependentIndices = null) { - Index = index; Offset = offset; Type = type; DataLength = dataLength; DependencyIndices = dependentIndices ?? EmptyArray.Instance; } - public GlyphRecord(int index, int offset) + public GlyphRecord(int offset) { - Index = index; Offset = offset; Type = GlyphType.Empty; DataLength = 0; @@ -369,6 +411,9 @@ Composite } + /// + /// Marks a glyph index referenced by a composite glyph. + /// private struct CompositeGlyphIndexReference { /// @@ -387,18 +432,5 @@ OffsetOfIndexWithinData = offsetOfIndexWithinData; } } - - public class NewGlyphTable - { - public byte[] Bytes { get; } - - public uint[] GlyphOffsets { get; } - - public NewGlyphTable(byte[] bytes, uint[] glyphOffsets) - { - Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes)); - GlyphOffsets = glyphOffsets ?? throw new ArgumentNullException(nameof(glyphOffsets)); - } - } } } diff --git a/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetEncoding.cs b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetEncoding.cs new file mode 100644 index 00000000..e06136db --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetEncoding.cs @@ -0,0 +1,14 @@ +namespace UglyToad.PdfPig.Writer.Fonts.Subsetting +{ + using System.Collections.Generic; + + internal class TrueTypeSubsetEncoding + { + public IReadOnlyList Characters { get; } + + public TrueTypeSubsetEncoding(IReadOnlyList characters) + { + Characters = characters; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetGlyphTable.cs b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetGlyphTable.cs new file mode 100644 index 00000000..613e61bb --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetGlyphTable.cs @@ -0,0 +1,61 @@ +namespace UglyToad.PdfPig.Writer.Fonts.Subsetting +{ + using System; + using PdfPig.Fonts.TrueType.Tables; + + /// + /// Details of the new glyph 'glyf' table created when subsetting a TrueType font. + /// + internal class TrueTypeSubsetGlyphTable + { + /// + /// The raw bytes of the new table. + /// + public byte[] Bytes { get; } + + /// + /// The offsets of each of the glyphs in the new table. + /// + public uint[] GlyphOffsets { get; } + + /// + /// The corresponding horizontal metrics for each glyph. + /// + public HorizontalMetricsTable.HorizontalMetric[] HorizontalMetrics { get; } + + /// + /// The number of glyphs in the new table. + /// + public ushort GlyphCount => (ushort)(GlyphOffsets.Length - 1); + + /// + /// Create a new . + /// + public TrueTypeSubsetGlyphTable(byte[] bytes, uint[] glyphOffsets, HorizontalMetricsTable.HorizontalMetric[] horizontalMetrics) + { + Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes)); + GlyphOffsets = glyphOffsets ?? throw new ArgumentNullException(nameof(glyphOffsets)); + HorizontalMetrics = horizontalMetrics ?? throw new ArgumentNullException(nameof(horizontalMetrics)); + } + + /// + /// Convert the values to s. + /// + public long[] OffsetsAsLongs() + { + var data = new long[GlyphOffsets.Length]; + for (var i = 0; i < GlyphOffsets.Length; i++) + { + data[i] = GlyphOffsets[i]; + } + + return data; + } + + /// + public override string ToString() + { + return $"{GlyphCount} glyphs. Data is {Bytes.Length} bytes."; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeSubsetter.cs b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetter.cs similarity index 83% rename from src/UglyToad.PdfPig/Writer/Fonts/TrueTypeSubsetter.cs rename to src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetter.cs index d38b10d0..45734413 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeSubsetter.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/Subsetting/TrueTypeSubsetter.cs @@ -1,10 +1,11 @@ -namespace UglyToad.PdfPig.Writer.Fonts +namespace UglyToad.PdfPig.Writer.Fonts.Subsetting { using System; using System.Collections.Generic; using System.IO; using System.Linq; using IO; + using PdfPig.Fonts.Exceptions; using PdfPig.Fonts.TrueType; using PdfPig.Fonts.TrueType.Parser; using PdfPig.Fonts.TrueType.Tables; @@ -12,7 +13,9 @@ using Util; internal static class TrueTypeSubsetter - { + { + private const ushort IndexToLocLong = 1; + /* * The PDF specification requires the following 10 tables: * glyf @@ -25,6 +28,7 @@ * fpgm * prep * cmap + * But not all fonts include 'cvt ' and 'fpgm'. */ private static readonly IReadOnlyList RequiredTags = new[] { @@ -73,12 +77,20 @@ for (var i = 0; i < RequiredTags.Count; i++) { var tag = RequiredTags[i]; - var entry = new DirectoryEntry(tag, stream.Position, font.TableHeaders[tag]); + + if (!font.TableHeaders.TryGetValue(tag, out var inputHeader)) + { + throw new InvalidFontFormatException($"Font does not contain table required for subsetting: {tag}."); + } + + var entry = new DirectoryEntry(tag, stream.Position, inputHeader); entry.DummyHeader.Write(stream); directoryEntries[i] = entry; } - TrueTypeGlyphTableSubsetter.NewGlyphTable newGlyphTable = null; + // Generate the glyph subset. + TrueTypeSubsetGlyphTable trueTypeSubsetGlyphTable = TrueTypeGlyphTableSubsetter.SubsetGlyphTable(font, fontBytes, indexMapping); + // Write the actual tables. for (var i = 0; i < directoryEntries.Length; i++) { @@ -95,49 +107,40 @@ } else if (entry.Tag == TrueTypeHeaderTable.Glyf) { - newGlyphTable = TrueTypeGlyphTableSubsetter.SubsetGlyphTable(font, fontBytes, indexMapping); - stream.Write(newGlyphTable.Bytes, 0, newGlyphTable.Bytes.Length); + stream.Write(trueTypeSubsetGlyphTable.Bytes, 0, trueTypeSubsetGlyphTable.Bytes.Length); } else if (entry.Tag == TrueTypeHeaderTable.Hmtx) { - var hmtx = GetHorizontalMetricsTable(font, entry, indexMapping); + var hmtx = GetHorizontalMetricsTable(entry, trueTypeSubsetGlyphTable); hmtx.Write(stream); } else if (entry.Tag == TrueTypeHeaderTable.Loca) { - if (newGlyphTable == null) - { - throw new InvalidOperationException(); - } - var table = new IndexToLocationTable(entry.DummyHeader, IndexToLocationTable.EntryFormat.Long, - newGlyphTable.GlyphOffsets.Select(x => (long)x).ToArray()); + trueTypeSubsetGlyphTable.GlyphOffsets.Select(x => (long)x).ToArray()); table.Write(stream); } else if (entry.Tag == TrueTypeHeaderTable.Head) { // Update indexToLoc format. var headBytes = GetRawInputTableBytes(fontBytes, entry); - WriteUShort(headBytes, headBytes.Length - 4, 1); + WriteUShort(headBytes, headBytes.Length - 4, IndexToLocLong); stream.Write(headBytes, 0, headBytes.Length); + + // TODO: zero out checksum adjustment bytes. } else if (entry.Tag == TrueTypeHeaderTable.Hhea) { // Update number of h metrics. var hheaBytes = GetRawInputTableBytes(fontBytes, entry); - WriteUShort(hheaBytes, hheaBytes.Length - 2, (ushort)indexMapping.Length); + WriteUShort(hheaBytes, hheaBytes.Length - 2, (ushort)trueTypeSubsetGlyphTable.HorizontalMetrics.Length); stream.Write(hheaBytes, 0, hheaBytes.Length); } else if (entry.Tag == TrueTypeHeaderTable.Maxp) { - if (newGlyphTable == null) - { - throw new InvalidOperationException(); - } - // Update number of glyphs. var maxpBytes = GetRawInputTableBytes(fontBytes, entry); - WriteUShort(maxpBytes, 4, (ushort)(newGlyphTable.GlyphOffsets.Length - 1)); + WriteUShort(maxpBytes, 4, trueTypeSubsetGlyphTable.GlyphCount); stream.Write(maxpBytes, 0, maxpBytes.Length); } else @@ -182,7 +185,7 @@ result[0] = new OldToNewGlyphIndex(0, 0, '\0'); - var previousCMap = font.MacRomanCMap ?? font.WindowsUnicodeCMap ?? font.WindowsSymbolCMap; + var previousCMap = font.WindowsUnicodeCMap ?? font.WindowsSymbolCMap ?? font.MacRomanCMap; if (previousCMap == null) { @@ -223,21 +226,9 @@ return cmap; } - private static HorizontalMetricsTable GetHorizontalMetricsTable(TrueTypeFontProgram font, DirectoryEntry entry, OldToNewGlyphIndex[] encoding) + private static HorizontalMetricsTable GetHorizontalMetricsTable(DirectoryEntry entry, TrueTypeSubsetGlyphTable glyphTable) { - var current = font.TableRegister.HorizontalMetricsTable; - - var newMetrics = new HorizontalMetricsTable.HorizontalMetric[encoding.Length]; - - for (var i = 0; i < encoding.Length; i++) - { - var mapping = encoding[i]; - // TODO: might be an additional lsb only. - var value = current.HorizontalMetrics[mapping.OldIndex]; - newMetrics[i] = value; - } - - return new HorizontalMetricsTable(entry.DummyHeader, newMetrics, EmptyArray.Instance); + return new HorizontalMetricsTable(entry.DummyHeader, glyphTable.HorizontalMetrics, EmptyArray.Instance); } private static byte[] GetRawInputTableBytes(byte[] font, DirectoryEntry entry) @@ -297,15 +288,5 @@ } } } - - internal class TrueTypeSubsetEncoding - { - public IReadOnlyList Characters { get; } - - public TrueTypeSubsetEncoding(IReadOnlyList characters) - { - Characters = characters; - } - } } diff --git a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs index fddc9865..78782af5 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs @@ -7,13 +7,13 @@ using Core; using Filters; using Geometry; - using IO; using Logging; using Tokens; using PdfPig.Fonts; using PdfPig.Fonts.Exceptions; using PdfPig.Fonts.TrueType; using PdfPig.Fonts.TrueType.Tables; + using Subsetting; internal class TrueTypeWritingFont : IWritingFont {