truetype encoding replacer and checksum calculator #98

we need to provide a custom cmap for our overridden fonts when creating a document using truetype fonts. in order to do this without writing a complete subsetter (yet) we simply rearrange the font by moving the cmap table to the end of the font.

in order to keep a valid font we need to recalculate the offsets and checksums for all table headers. this adds a calculator which can calculate per-table checksums as well as the whole-font checksum used to calculate the checksum adjustment recorded in the head table.

now that the cmap table has been moved to the end of the font file we can overwrite it with a different-length custom cmap table without further invasive changes to the rest of the truetype file. this isn't implemented yet in this commit but will be the next thing to implement.

in truetype writing font we've temporarily reverted the change which maps characters to bytes until the custom cmap is written so we can ensure for this change the output font file is still valid and can be interpreted by pdf consumers. once the custom cmap is written we can uncomment the mapping logic and it should all just work.
This commit is contained in:
Eliot Jones
2019-12-29 16:28:17 +00:00
parent f319e7f4b5
commit 59c43cc526
6 changed files with 604 additions and 30 deletions

View File

@@ -51,7 +51,7 @@
return null;
}
return new TrueTypeHeaderTable(tag, checksum, offset, length);
return new TrueTypeHeaderTable(tag, (uint)checksum, (uint)offset, (uint)length);
}
private static TrueTypeFontProgram ParseTables(float version, IReadOnlyDictionary<string, TrueTypeHeaderTable> tables, TrueTypeDataBytes data)

View File

@@ -0,0 +1,161 @@
namespace UglyToad.PdfPig.Fonts.TrueType
{
using System;
using System.Collections.Generic;
using IO;
internal static class TrueTypeChecksumCalculator
{
private const string HeaderTableTag = "head";
// Preceded by 2 32-fixed fraction values.
private const int ChecksumAdjustmentPosition = 8;
public static uint CalculateWholeFontChecksum(IInputBytes bytes, TrueTypeHeaderTable headerTable)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
if (!IsHeadTable(headerTable))
{
throw new ArgumentException($"Can only calculate checksum for the whole font when the head table is provided. Got: {headerTable}.");
}
bytes.Seek(0);
return Calculate(ToChecksumSkippedEnumerable(bytes, headerTable));
}
public static uint Calculate(IInputBytes bytes, TrueTypeHeaderTable table)
{
bytes.Seek(table.Offset);
if (IsHeadTable(table))
{
// To calculate the checkSum for the 'head' table which itself includes the
// checkSumAdjustment entry for the entire font, do the following:
// Set the checkSumAdjustment to 0.
// Calculate the checksum as normal.
var fullTableBytes = new byte[table.Length];
var read = bytes.Read(fullTableBytes);
if (read != table.Length)
{
throw new InvalidOperationException();
}
// Zero out the checksum adjustment
fullTableBytes[ChecksumAdjustmentPosition] = 0;
fullTableBytes[ChecksumAdjustmentPosition + 1] = 0;
fullTableBytes[ChecksumAdjustmentPosition + 2] = 0;
fullTableBytes[ChecksumAdjustmentPosition + 3] = 0;
return Calculate(fullTableBytes);
}
var result = 0u;
unchecked
{
while (TryReadUInt(bytes, table.Offset + table.Length, out var next))
{
result += next;
}
}
return result;
}
/// <summary>
/// Calculate the TrueType checksum for the provided bytes.
/// </summary>
public static uint Calculate(IEnumerable<byte> bytes)
{
var result = 0u;
unchecked
{
using (var enumerator = bytes.GetEnumerator())
{
while (TryReadUInt(enumerator, out var next))
{
result += next;
}
}
}
return result;
}
private static bool IsHeadTable(TrueTypeHeaderTable table) => string.Equals(HeaderTableTag, table.Tag, StringComparison.OrdinalIgnoreCase);
private static bool TryReadUInt(IEnumerator<byte> enumerator, out uint result)
{
result = 0;
if (!enumerator.MoveNext())
{
return false;
}
var top = enumerator.Current;
var three = enumerator.MoveNext() ? enumerator.Current : 0;
var two = enumerator.MoveNext() ? enumerator.Current : 0;
var one = enumerator.MoveNext() ? enumerator.Current : 0;
result = (uint)(((long)top << 24)
+ ((long)three << 16)
+ (two << 8)
+ (one << 0));
return true;
}
private static bool TryReadUInt(IInputBytes input, long endAt, out uint result)
{
result = 0;
byte ReadNext()
{
if (input.CurrentOffset == endAt || !input.MoveNext())
{
return 0;
}
return input.CurrentByte;
}
if (input.CurrentOffset >= endAt)
{
return false;
}
var top = ReadNext();
var three = ReadNext();
var two = ReadNext();
var one = ReadNext();
result = (uint)(((long)top << 24)
+ ((long)three << 16)
+ (two << 8)
+ (one << 0));
return true;
}
private static IEnumerable<byte> ToChecksumSkippedEnumerable(IInputBytes bytes, TrueTypeHeaderTable table)
{
while (bytes.MoveNext())
{
// Skip checksum adjustment
if (bytes.CurrentOffset > table.Offset + ChecksumAdjustmentPosition && bytes.CurrentOffset <= table.Offset + ChecksumAdjustmentPosition + 4)
{
continue;
}
yield return bytes.CurrentByte;
}
}
}
}

View File

@@ -170,19 +170,19 @@
/// <summary>
/// The checksum for the table.
/// </summary>
public long CheckSum { get; }
public uint CheckSum { get; }
/// <summary>
/// Offset of the table from the beginning of the file.
/// </summary>
public long Offset { get; }
public uint Offset { get; }
/// <summary>
/// The length of the table.
/// </summary>
public long Length { get; }
public uint Length { get; }
public TrueTypeHeaderTable(string tag, long checkSum, long offset, long length)
public TrueTypeHeaderTable(string tag, uint checkSum, uint offset, uint length)
{
Tag = tag ?? throw new ArgumentNullException(nameof(tag));
CheckSum = checkSum;

View File

@@ -0,0 +1,283 @@
namespace UglyToad.PdfPig.Writer.Fonts
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using IO;
using PdfPig.Fonts.TrueType;
internal static class TrueTypeEncodingReplacer
{
private const int SizeOfFraction = 4;
private const int SizeOfShort = 2;
private const int SizeOfTag = 4;
private const int SizeOfInt = 4;
private const string CMapTag = "cmap";
private const string HeadTag = "head";
public static byte[] ReplaceCMapTables(IInputBytes fontBytes, IReadOnlyDictionary<char, byte> newEncoding)
{
var buffer = new byte[2048];
var inputTableHeaders = new Dictionary<string, InputHeader>(StringComparer.OrdinalIgnoreCase);
var outputTableHeaders = new Dictionary<string, TrueTypeHeaderTable>(StringComparer.OrdinalIgnoreCase);
var fileChecksumOffset = SizeOfTag;
byte[] result;
using (var stream = new MemoryStream())
{
// Write the file header details and read the number of tables out.
CopyThroughBufferPreserveData(stream, buffer, fontBytes, SizeOfFraction + (SizeOfShort * 4));
var numberOfTables = ReadUShortFromBuffer(buffer, SizeOfFraction);
// For each table read the table header values and preserve the order by storing the offset in the input file
// at which the the header was read.
for (var i = 0; i < numberOfTables; i++)
{
var offsetOfHeader = (uint)stream.Position;
CopyThroughBufferPreserveData(stream, buffer, fontBytes, SizeOfTag + (SizeOfInt * 3));
var tag = Encoding.UTF8.GetString(buffer, 0, SizeOfTag);
var checksum = ReadUIntFromBuffer(buffer, fileChecksumOffset);
var offset = ReadUIntFromBuffer(buffer, SizeOfTag + SizeOfInt);
var length = ReadUIntFromBuffer(buffer, SizeOfTag + (SizeOfInt * 2));
var headerTable = new TrueTypeHeaderTable(tag, checksum, offset, length);
// Store the locations of the tables in this font.
inputTableHeaders[tag] = new InputHeader(headerTable, offsetOfHeader);
}
// Copy raw bytes for each of the tables from the input to the output including any additional bytes not in
// tables but present in the input.
var inputOffset = fontBytes.CurrentOffset;
foreach (var inputHeader in inputTableHeaders.OrderBy(x => x.Value.HeaderTable.Offset))
{
var location = inputHeader.Value.HeaderTable;
var gapFromPrevious = location.Offset - inputOffset;
if (gapFromPrevious > 0)
{
CopyThroughBufferDiscardData(stream, buffer, fontBytes, gapFromPrevious);
}
if (inputHeader.Value.IsTable(CMapTag))
{
// Skip the CMap table for now, move it to the end in the output so we can resize it dynamically.
inputOffset = location.Offset + location.Length;
fontBytes.Seek(inputOffset);
continue;
}
var outputOffset = (uint)stream.Position;
outputTableHeaders[location.Tag] = new TrueTypeHeaderTable(location.Tag, 0, outputOffset, location.Length);
CopyThroughBufferDiscardData(stream, buffer, fontBytes, location.Length);
var writtenLength = stream.Position - outputOffset;
if (writtenLength != location.Length)
{
throw new InvalidOperationException($"Expected to write {location.Length} bytes for table {location.Tag} " +
$"but wrote {stream.Position - outputOffset}.");
}
inputOffset = fontBytes.CurrentOffset;
}
// TODO: write the modified cmap table here.
var cmapLocation = inputTableHeaders[CMapTag];
fontBytes.Seek(cmapLocation.HeaderTable.Offset);
var newCmapTableLocation = (uint)stream.Position;
CopyThroughBufferDiscardData(stream, buffer, fontBytes, cmapLocation.HeaderTable.Length);
outputTableHeaders[cmapLocation.Tag] = new TrueTypeHeaderTable(cmapLocation.Tag, 0, newCmapTableLocation, cmapLocation.HeaderTable.Length);
foreach (var inputHeader in inputTableHeaders)
{
// Go back to the location of the offset
var headerOffsetLocation = inputHeader.Value.OffsetInInput + SizeOfTag + SizeOfInt;
stream.Seek(headerOffsetLocation, SeekOrigin.Begin);
var outputHeader = outputTableHeaders[inputHeader.Key];
var inputLength = inputHeader.Value.HeaderTable.Length;
if (outputHeader.Length != inputLength)
{
throw new InvalidOperationException($"Actual data length {outputHeader.Length} " +
$"did not match header length {inputLength} for table {inputHeader.Key}.");
}
WriteUInt(stream, outputHeader.Offset);
}
stream.Seek(0, SeekOrigin.Begin);
// Done writing to stream, just checksums left to repair.
result = stream.ToArray();
}
var inputBytes = new ByteArrayInputBytes(result);
// Overwrite checksum values per table.
foreach (var inputHeader in inputTableHeaders)
{
var outputHeader = outputTableHeaders[inputHeader.Key];
var headerOffset = inputHeader.Value.OffsetInInput;
var newChecksum = TrueTypeChecksumCalculator.Calculate(inputBytes, outputHeader);
// Overwrite the checksum value.
WriteUInt(result, headerOffset + SizeOfTag, newChecksum);
}
// Overwrite the checksum adjustment which records the whole font checksum.
var headTable = outputTableHeaders[HeadTag];
var wholeFontChecksum = TrueTypeChecksumCalculator.CalculateWholeFontChecksum(inputBytes, headTable);
// Calculate the checksum for the entire font and subtract the value from the hex value B1B0AFBA.
var checksumAdjustmentLocation = headTable.Offset + 8;
var checksumAdjustment = 0xB1B0AFBA - wholeFontChecksum;
// Store the result in checksum adjustment.
WriteUInt(result, checksumAdjustmentLocation, checksumAdjustment);
return result;
}
private static ushort ReadUShortFromBuffer(byte[] buffer, int location)
{
return (ushort)((buffer[location] << 8) + (buffer[location + 1] << 0));
}
private static uint ReadUIntFromBuffer(byte[] buffer, int location)
{
return (uint)(((long)buffer[location] << 24)
+ ((long)buffer[location + 1] << 16)
+ (buffer[location + 2] << 8)
+ (buffer[location + 3] << 0));
}
private static void WriteUInt(Stream stream, uint value)
{
var buffer = new[]
{
(byte) (value >> 24),
(byte) (value >> 16),
(byte) (value >> 8),
(byte) value
};
stream.Write(buffer, 0, 4);
}
private static void WriteUInt(byte[] array, uint offset, uint value)
{
array[offset] = (byte)(value >> 24);
array[offset + 1] = (byte)(value >> 16);
array[offset + 2] = (byte)(value >> 8);
array[offset + 3] = (byte)(value >> 0);
}
private static void CopyThroughBufferDiscardData(Stream destination, byte[] buffer, IInputBytes input, long size)
{
var filled = 0;
while (filled < size)
{
var expected = (int)Math.Min(size - filled, 2048);
var read = input.Read(buffer, expected);
if (read != expected)
{
throw new InvalidOperationException($"Failed to read {size} bytes starting at offset {input.CurrentOffset - read}.");
}
destination.Write(buffer, 0, read);
filled += read;
}
}
/// <summary>
/// Copies data from the input to the destination stream while also populating the buffer with the full
/// run of copied data in the buffer from position 0 -> size.
/// </summary>
private static void CopyThroughBufferPreserveData(Stream destination, byte[] buffer, IInputBytes input, int size)
{
if (size > buffer.Length)
{
throw new InvalidOperationException("Cannot use this method to read more bytes than fit in the buffer.");
}
var read = input.Read(buffer, size);
if (read != size)
{
throw new InvalidOperationException($"Failed to read {size} bytes starting at offset {input.CurrentOffset - read}.");
}
destination.Write(buffer, 0, read);
}
private class InputHeader
{
public string Tag => HeaderTable.Tag;
public TrueTypeHeaderTable HeaderTable { get; }
public uint OffsetInInput { get; }
public InputHeader(TrueTypeHeaderTable headerTable, uint offsetInInput)
{
if (headerTable.Tag == null)
{
throw new ArgumentException($"No tag for header table: {HeaderTable}.");
}
HeaderTable = headerTable;
OffsetInInput = offsetInInput;
}
public bool IsTable(string tag)
{
return string.Equals(tag, Tag, StringComparison.OrdinalIgnoreCase);
}
}
private class OutputHeader
{
public string Tag => HeaderTable.Tag;
public TrueTypeHeaderTable HeaderTable { get; }
public uint OffsetInOutput { get; }
public OutputHeader(TrueTypeHeaderTable headerTable, uint offsetInOutput)
{
if (headerTable.Tag == null)
{
throw new ArgumentException($"No tag for header table: {HeaderTable}.");
}
HeaderTable = headerTable;
OffsetInOutput = offsetInOutput;
}
}
}
}

View File

@@ -7,21 +7,22 @@
using Core;
using Filters;
using Geometry;
using IO;
using Logging;
using Tokens;
using UglyToad.PdfPig.Fonts;
using UglyToad.PdfPig.Fonts.Exceptions;
using UglyToad.PdfPig.Fonts.TrueType;
using UglyToad.PdfPig.Fonts.TrueType.Tables;
using PdfPig.Fonts;
using PdfPig.Fonts.Exceptions;
using PdfPig.Fonts.TrueType;
using PdfPig.Fonts.TrueType.Tables;
internal class TrueTypeWritingFont : IWritingFont
{
private readonly TrueTypeFontProgram font;
private readonly IReadOnlyList<byte> fontFileBytes;
private readonly object mappingLock = new object();
//private readonly object mappingLock = new object();
private readonly Dictionary<char, byte> characterMapping = new Dictionary<char, byte>();
private int characterMappingCounter = 1;
//private int characterMappingCounter = 1;
public bool HasWidths { get; } = true;
@@ -51,14 +52,16 @@
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
{
var b = TrueTypeEncodingReplacer.ReplaceCMapTables(new ByteArrayInputBytes(fontFileBytes), characterMapping);
// TODO: unfortunately we need to subset the font in order to support custom encoding.
// A symbolic font (one which contains characters not in the standard latin set) -
// should contain a MacRoman (1, 0) or Windows Symbolic (3,0) cmap subtable which maps character codes to glyph id.
var bytes = CompressBytes(fontFileBytes);
var bytes = CompressBytes(b);
var embeddedFile = new StreamToken(new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Length, new NumericToken(bytes.Length) },
{ NameToken.Length1, new NumericToken(fontFileBytes.Count) },
{ NameToken.Length1, new NumericToken(b.Length) },
{ NameToken.Filter, new ArrayToken(new []{ NameToken.FlateDecode }) }
}), bytes);
@@ -148,28 +151,29 @@
public byte GetValueForCharacter(char character)
{
lock (mappingLock)
{
if (characterMapping.TryGetValue(character, out var result))
{
return result;
}
return (byte) character;
//lock (mappingLock)
//{
// if (characterMapping.TryGetValue(character, out var result))
// {
// return result;
// }
if (characterMappingCounter > byte.MaxValue)
{
throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" +
" submit an issue since we will need to add support for composite fonts with multi-byte" +
" character identifiers.");
}
// if (characterMappingCounter > byte.MaxValue)
// {
// throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" +
// " submit an issue since we will need to add support for composite fonts with multi-byte" +
// " character identifiers.");
// }
var value = (byte) characterMappingCounter++;
// var value = (byte) characterMappingCounter++;
characterMapping[character] = value;
// characterMapping[character] = value;
result = value;
// result = value;
return result;
}
// return result;
//}
}
private static byte[] CompressBytes(IReadOnlyList<byte> bytes)