fix truetype subsetting for composite glyphs #98

each glyph included in the subset must count towards the number of glyphs, the horizontal metrics and the maximum profile table for the output truetype font. each glyph must also lie on a 4 byte boundary in the output file.

the output file is valid for the windows system font calibri containing accented characters but the roboto subset files are still invalid.

moves all subsetting related classes into their own namespace which will be made public.
This commit is contained in:
Eliot Jones
2020-01-03 14:31:27 +00:00
parent bb5677c11e
commit fe315be2ef
6 changed files with 212 additions and 78 deletions

View File

@@ -145,6 +145,52 @@
}
}
[Fact]
public void WindowsOnlyCanWriteSinglePageAccentedCharactersSystemFont()
{
var builder = new PdfDocumentBuilder();
builder.DocumentInformation.Title = "Hello Windows!";
var page = builder.AddPage(PageSize.A4);
var file = @"C:\Windows\Fonts\Calibri.ttf";
if (!File.Exists(file))
{
return;
}
byte[] bytes;
try
{
bytes = File.ReadAllBytes(file);
}
catch
{
return;
}
var font = builder.AddTrueTypeFont(bytes);
page.AddText("eé", 12, new PdfPoint(30, 520), font);
Assert.NotEmpty(page.Operations);
var b = builder.Build();
WriteFile(nameof(WindowsOnlyCanWriteSinglePageAccentedCharactersSystemFont), b);
Assert.NotEmpty(b);
using (var document = PdfDocument.Open(b))
{
var page1 = document.GetPage(1);
Assert.Equal("eé", page1.Text);
}
}
[Fact]
public void WindowsOnlyCanWriteSinglePageHelloWorldSystemFont()
{

View File

@@ -1,18 +1,51 @@
namespace UglyToad.PdfPig.Writer.Fonts
namespace UglyToad.PdfPig.Writer.Fonts.Subsetting
{
using System;
using System.Collections.Generic;
using System.IO;
using PdfPig.Fonts.Exceptions;
using PdfPig.Fonts.TrueType;
using PdfPig.Fonts.TrueType.Glyphs;
using PdfPig.Fonts.TrueType.Tables;
using Util;
using IndexMap = TrueTypeSubsetter.OldToNewGlyphIndex;
/// <summary>
/// Produces a glyph table which contains a subset of the glyphs in the input font.
/// </summary>
internal static class TrueTypeGlyphTableSubsetter
{
public static NewGlyphTable SubsetGlyphTable(TrueTypeFontProgram font, byte[] fontBytes, IndexMap[] mapping)
/// <summary>
/// Creates a new glyph table from the input font which contains only the glyphs required by the input mapping.
/// </summary>
/// <param name="font">The font used to create this subset.</param>
/// <param name="fontBytes">The raw bytes of the input font.</param>
/// <param name="mapping">The mapping of old glyph indices to new glyph indices.</param>
/// <returns>A new glyph table and associated information for use in creating a valid TrueType font file.</returns>
public static TrueTypeSubsetGlyphTable SubsetGlyphTable(TrueTypeFontProgram font, byte[] fontBytes, IndexMap[] mapping)
{
if (font == null)
{
throw new ArgumentNullException(nameof(font));
}
if (fontBytes == null)
{
throw new ArgumentNullException(nameof(fontBytes));
}
if (mapping == null)
{
throw new ArgumentNullException(nameof(mapping));
}
var data = new TrueTypeDataBytes(fontBytes);
var advanceWidthTable = font.TableRegister.HorizontalMetricsTable;
if (advanceWidthTable == null)
{
throw new InvalidFontFormatException($"Font: {font} did not contain a horizontal metrics table, cannot subset.");
}
var existingGlyphs = GetGlyphRecordsInFont(font, data);
@@ -30,6 +63,7 @@
}
var glyphLocations = new List<uint>();
var advanceWidths = new List<HorizontalMetricsTable.HorizontalMetric>();
var compositeIndicesToReplace = new List<(uint offset, ushort newIndex)>();
@@ -56,13 +90,13 @@
{
// Else we need to copy the dependency glyph from the original.
var actualDependencyRecord = existingGlyphs[dependency.Index];
// We need to add it to the set of glyphs to copy.
newDependencyIndex = glyphsToCopy.Count;
glyphsToCopy.Add(actualDependencyRecord);
glyphsToCopyOriginalIndex.Add((int)dependency.Index);
}
var withinGlyphDataIndexOffset = dependency.OffsetOfIndexWithinData - newRecord.Offset;
compositeIndicesToReplace.Add(((uint)withinGlyphDataIndexOffset, (ushort)newDependencyIndex));
@@ -72,6 +106,9 @@
// Record the glyph location.
glyphLocations.Add((uint)stream.Position);
var advanceWidth = advanceWidthTable.HorizontalMetrics[glyphsToCopyOriginalIndex[i]];
advanceWidths.Add(advanceWidth);
if (newRecord.Type == GlyphType.Empty)
{
// TODO: if this is the last glyph this might be a problem.
@@ -83,14 +120,21 @@
var glyphBytes = data.ReadByteArray(newRecord.DataLength);
// Update any indices referenced by composite glyphs to match the new index of the dependency.
foreach (var toReplace in compositeIndicesToReplace)
foreach (var (offset, newIndex) in compositeIndicesToReplace)
{
glyphBytes[toReplace.offset] = (byte)(toReplace.newIndex >> 8);
glyphBytes[toReplace.offset + 1] = (byte)toReplace.newIndex;
glyphBytes[offset] = (byte)(newIndex >> 8);
glyphBytes[offset + 1] = (byte)newIndex;
}
stream.Write(glyphBytes, 0, glyphBytes.Length);
// Each glyph description must start at a 4 byte boundary.
stream.Write(glyphBytes, 0, glyphBytes.Length);
var remainder = glyphBytes.Length % 4;
var bytesToPad = remainder == 0 ? 0 : 4 - remainder;
for (var j = 0; j < bytesToPad; j++)
{
stream.WriteByte(0);
}
}
var output = stream.ToArray();
@@ -98,7 +142,7 @@
glyphLocations.Add((uint)output.Length);
var offsets = glyphLocations.ToArray();
return new NewGlyphTable(output, offsets);
return new TrueTypeSubsetGlyphTable(output, offsets, advanceWidths.ToArray());
}
}
@@ -120,7 +164,7 @@
if (indexToLocationTable.GlyphOffsets[i + 1] <= indexToLocationTable.GlyphOffsets[i])
{
glyphRecords[i] = new GlyphRecord(i, glyphOffset);
glyphRecords[i] = new GlyphRecord(glyphOffset);
continue;
}
@@ -143,13 +187,13 @@
if (type == GlyphType.Simple)
{
ReadSimpleGlyph(data, numberOfContours);
glyphRecords[i] = new GlyphRecord(i, glyphOffset, type, (int)(data.Position - glyphOffset));
glyphRecords[i] = new GlyphRecord(glyphOffset, type, (int)(data.Position - glyphOffset));
}
else
{
var glyphIndices = ReadCompositeGlyph(data);
glyphRecords[i] = new GlyphRecord(i, glyphOffset, type, (int)(data.Position - glyphOffset), glyphIndices);
glyphRecords[i] = new GlyphRecord(glyphOffset, type, (int)(data.Position - glyphOffset), glyphIndices);
}
}
@@ -221,9 +265,11 @@
}
}
// ReSharper disable UnusedVariable
var xCoordinates = ReadCoordinates(perPointFlags, data, SimpleGlyphFlags.XSingleByte, SimpleGlyphFlags.ThisXIsTheSame);
var yCoordinates = ReadCoordinates(perPointFlags, data, SimpleGlyphFlags.YSingleByte, SimpleGlyphFlags.ThisYIsTheSame);
// ReSharper restore UnusedVariable
}
private static short[] ReadCoordinates(SimpleGlyphFlags[] flags, TrueTypeDataBytes data,
@@ -329,8 +375,6 @@
private class GlyphRecord
{
public int Index { get; }
public int Offset { get; }
public GlyphType Type { get; }
@@ -342,19 +386,17 @@
/// </summary>
public IReadOnlyList<CompositeGlyphIndexReference> DependencyIndices { get; }
public GlyphRecord(int index, int offset, GlyphType type, int dataLength,
public GlyphRecord(int offset, GlyphType type, int dataLength,
IReadOnlyList<CompositeGlyphIndexReference> dependentIndices = null)
{
Index = index;
Offset = offset;
Type = type;
DataLength = dataLength;
DependencyIndices = dependentIndices ?? EmptyArray<CompositeGlyphIndexReference>.Instance;
}
public GlyphRecord(int index, int offset)
public GlyphRecord(int offset)
{
Index = index;
Offset = offset;
Type = GlyphType.Empty;
DataLength = 0;
@@ -369,6 +411,9 @@
Composite
}
/// <summary>
/// Marks a glyph index referenced by a composite glyph.
/// </summary>
private struct CompositeGlyphIndexReference
{
/// <summary>
@@ -387,18 +432,5 @@
OffsetOfIndexWithinData = offsetOfIndexWithinData;
}
}
public class NewGlyphTable
{
public byte[] Bytes { get; }
public uint[] GlyphOffsets { get; }
public NewGlyphTable(byte[] bytes, uint[] glyphOffsets)
{
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
GlyphOffsets = glyphOffsets ?? throw new ArgumentNullException(nameof(glyphOffsets));
}
}
}
}

View File

@@ -0,0 +1,14 @@
namespace UglyToad.PdfPig.Writer.Fonts.Subsetting
{
using System.Collections.Generic;
internal class TrueTypeSubsetEncoding
{
public IReadOnlyList<char> Characters { get; }
public TrueTypeSubsetEncoding(IReadOnlyList<char> characters)
{
Characters = characters;
}
}
}

View File

@@ -0,0 +1,61 @@
namespace UglyToad.PdfPig.Writer.Fonts.Subsetting
{
using System;
using PdfPig.Fonts.TrueType.Tables;
/// <summary>
/// Details of the new glyph 'glyf' table created when subsetting a TrueType font.
/// </summary>
internal class TrueTypeSubsetGlyphTable
{
/// <summary>
/// The raw bytes of the new table.
/// </summary>
public byte[] Bytes { get; }
/// <summary>
/// The offsets of each of the glyphs in the new table.
/// </summary>
public uint[] GlyphOffsets { get; }
/// <summary>
/// The corresponding horizontal metrics for each glyph.
/// </summary>
public HorizontalMetricsTable.HorizontalMetric[] HorizontalMetrics { get; }
/// <summary>
/// The number of glyphs in the new table.
/// </summary>
public ushort GlyphCount => (ushort)(GlyphOffsets.Length - 1);
/// <summary>
/// Create a new <see cref="TrueTypeSubsetGlyphTable"/>.
/// </summary>
public TrueTypeSubsetGlyphTable(byte[] bytes, uint[] glyphOffsets, HorizontalMetricsTable.HorizontalMetric[] horizontalMetrics)
{
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
GlyphOffsets = glyphOffsets ?? throw new ArgumentNullException(nameof(glyphOffsets));
HorizontalMetrics = horizontalMetrics ?? throw new ArgumentNullException(nameof(horizontalMetrics));
}
/// <summary>
/// Convert the <see cref="GlyphOffsets"/> values to <see langword="long"/>s.
/// </summary>
public long[] OffsetsAsLongs()
{
var data = new long[GlyphOffsets.Length];
for (var i = 0; i < GlyphOffsets.Length; i++)
{
data[i] = GlyphOffsets[i];
}
return data;
}
/// <inheritdoc />
public override string ToString()
{
return $"{GlyphCount} glyphs. Data is {Bytes.Length} bytes.";
}
}
}

View File

@@ -1,10 +1,11 @@
namespace UglyToad.PdfPig.Writer.Fonts
namespace UglyToad.PdfPig.Writer.Fonts.Subsetting
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using IO;
using PdfPig.Fonts.Exceptions;
using PdfPig.Fonts.TrueType;
using PdfPig.Fonts.TrueType.Parser;
using PdfPig.Fonts.TrueType.Tables;
@@ -12,7 +13,9 @@
using Util;
internal static class TrueTypeSubsetter
{
{
private const ushort IndexToLocLong = 1;
/*
* The PDF specification requires the following 10 tables:
* glyf
@@ -25,6 +28,7 @@
* fpgm
* prep
* cmap
* But not all fonts include 'cvt ' and 'fpgm'.
*/
private static readonly IReadOnlyList<string> RequiredTags = new[]
{
@@ -73,12 +77,20 @@
for (var i = 0; i < RequiredTags.Count; i++)
{
var tag = RequiredTags[i];
var entry = new DirectoryEntry(tag, stream.Position, font.TableHeaders[tag]);
if (!font.TableHeaders.TryGetValue(tag, out var inputHeader))
{
throw new InvalidFontFormatException($"Font does not contain table required for subsetting: {tag}.");
}
var entry = new DirectoryEntry(tag, stream.Position, inputHeader);
entry.DummyHeader.Write(stream);
directoryEntries[i] = entry;
}
TrueTypeGlyphTableSubsetter.NewGlyphTable newGlyphTable = null;
// Generate the glyph subset.
TrueTypeSubsetGlyphTable trueTypeSubsetGlyphTable = TrueTypeGlyphTableSubsetter.SubsetGlyphTable(font, fontBytes, indexMapping);
// Write the actual tables.
for (var i = 0; i < directoryEntries.Length; i++)
{
@@ -95,49 +107,40 @@
}
else if (entry.Tag == TrueTypeHeaderTable.Glyf)
{
newGlyphTable = TrueTypeGlyphTableSubsetter.SubsetGlyphTable(font, fontBytes, indexMapping);
stream.Write(newGlyphTable.Bytes, 0, newGlyphTable.Bytes.Length);
stream.Write(trueTypeSubsetGlyphTable.Bytes, 0, trueTypeSubsetGlyphTable.Bytes.Length);
}
else if (entry.Tag == TrueTypeHeaderTable.Hmtx)
{
var hmtx = GetHorizontalMetricsTable(font, entry, indexMapping);
var hmtx = GetHorizontalMetricsTable(entry, trueTypeSubsetGlyphTable);
hmtx.Write(stream);
}
else if (entry.Tag == TrueTypeHeaderTable.Loca)
{
if (newGlyphTable == null)
{
throw new InvalidOperationException();
}
var table = new IndexToLocationTable(entry.DummyHeader, IndexToLocationTable.EntryFormat.Long,
newGlyphTable.GlyphOffsets.Select(x => (long)x).ToArray());
trueTypeSubsetGlyphTable.GlyphOffsets.Select(x => (long)x).ToArray());
table.Write(stream);
}
else if (entry.Tag == TrueTypeHeaderTable.Head)
{
// Update indexToLoc format.
var headBytes = GetRawInputTableBytes(fontBytes, entry);
WriteUShort(headBytes, headBytes.Length - 4, 1);
WriteUShort(headBytes, headBytes.Length - 4, IndexToLocLong);
stream.Write(headBytes, 0, headBytes.Length);
// TODO: zero out checksum adjustment bytes.
}
else if (entry.Tag == TrueTypeHeaderTable.Hhea)
{
// Update number of h metrics.
var hheaBytes = GetRawInputTableBytes(fontBytes, entry);
WriteUShort(hheaBytes, hheaBytes.Length - 2, (ushort)indexMapping.Length);
WriteUShort(hheaBytes, hheaBytes.Length - 2, (ushort)trueTypeSubsetGlyphTable.HorizontalMetrics.Length);
stream.Write(hheaBytes, 0, hheaBytes.Length);
}
else if (entry.Tag == TrueTypeHeaderTable.Maxp)
{
if (newGlyphTable == null)
{
throw new InvalidOperationException();
}
// Update number of glyphs.
var maxpBytes = GetRawInputTableBytes(fontBytes, entry);
WriteUShort(maxpBytes, 4, (ushort)(newGlyphTable.GlyphOffsets.Length - 1));
WriteUShort(maxpBytes, 4, trueTypeSubsetGlyphTable.GlyphCount);
stream.Write(maxpBytes, 0, maxpBytes.Length);
}
else
@@ -182,7 +185,7 @@
result[0] = new OldToNewGlyphIndex(0, 0, '\0');
var previousCMap = font.MacRomanCMap ?? font.WindowsUnicodeCMap ?? font.WindowsSymbolCMap;
var previousCMap = font.WindowsUnicodeCMap ?? font.WindowsSymbolCMap ?? font.MacRomanCMap;
if (previousCMap == null)
{
@@ -223,21 +226,9 @@
return cmap;
}
private static HorizontalMetricsTable GetHorizontalMetricsTable(TrueTypeFontProgram font, DirectoryEntry entry, OldToNewGlyphIndex[] encoding)
private static HorizontalMetricsTable GetHorizontalMetricsTable(DirectoryEntry entry, TrueTypeSubsetGlyphTable glyphTable)
{
var current = font.TableRegister.HorizontalMetricsTable;
var newMetrics = new HorizontalMetricsTable.HorizontalMetric[encoding.Length];
for (var i = 0; i < encoding.Length; i++)
{
var mapping = encoding[i];
// TODO: might be an additional lsb only.
var value = current.HorizontalMetrics[mapping.OldIndex];
newMetrics[i] = value;
}
return new HorizontalMetricsTable(entry.DummyHeader, newMetrics, EmptyArray<short>.Instance);
return new HorizontalMetricsTable(entry.DummyHeader, glyphTable.HorizontalMetrics, EmptyArray<short>.Instance);
}
private static byte[] GetRawInputTableBytes(byte[] font, DirectoryEntry entry)
@@ -297,15 +288,5 @@
}
}
}
internal class TrueTypeSubsetEncoding
{
public IReadOnlyList<char> Characters { get; }
public TrueTypeSubsetEncoding(IReadOnlyList<char> characters)
{
Characters = characters;
}
}
}

View File

@@ -7,13 +7,13 @@
using Core;
using Filters;
using Geometry;
using IO;
using Logging;
using Tokens;
using PdfPig.Fonts;
using PdfPig.Fonts.Exceptions;
using PdfPig.Fonts.TrueType;
using PdfPig.Fonts.TrueType.Tables;
using Subsetting;
internal class TrueTypeWritingFont : IWritingFont
{