diff --git a/src/UglyToad.PdfPig.Tests/Fonts/TrueType/TrueTypeChecksumCalculatorTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/TrueType/TrueTypeChecksumCalculatorTests.cs new file mode 100644 index 00000000..9943c928 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Fonts/TrueType/TrueTypeChecksumCalculatorTests.cs @@ -0,0 +1,126 @@ +namespace UglyToad.PdfPig.Tests.Fonts.TrueType +{ + using System; + using System.IO; + using System.Linq; + using PdfPig.Fonts.TrueType; + using PdfPig.Fonts.TrueType.Parser; + using PdfPig.IO; + using Xunit; + + public class TrueTypeChecksumCalculatorTests + { + private readonly TrueTypeFontParser parser = new TrueTypeFontParser(); + + [Fact] + public void CalculatedChecksumsMatchRoboto() + { + // Both checksums are wrong in the file. + Run(GetFileBytes("Roboto-Regular.ttf"), false, false); + } + + [Fact] + public void CalculatedChecksumsMatchAndada() + { + Run(GetFileBytes("Andada-Regular.ttf"), true, true); + } + + [Fact] + public void CalculatedChecksumsMatchGoogleDoc() + { + // Checksum adjustment is wrong. + Run(GetFileBytes("google-simple-doc.ttf"), true, false); + } + + [Fact] + public void CalculatedChecksumsMatchPMing() + { + // Checksum adjustment is wrong. + Run(GetFileBytes("PMingLiU.ttf"), true, false); + } + + [Fact] + public void CalculatedChecksumsMatchCalibriWindows() + { + const string path = @"C:\Windows\Fonts\Calibri.ttf"; + if (!File.Exists(path)) + { + return; + } + + Run(File.ReadAllBytes(path), true, true); + } + + [Fact] + public void CalculatedChecksumsMatchCourierNewWindows() + { + const string path = @"C:\Windows\Fonts\cour.ttf"; + if (!File.Exists(path)) + { + return; + } + + Run(File.ReadAllBytes(path), true, true); + } + + private void Run(byte[] bytes, bool checkHeaderChecksum, bool checkWholeFileChecksum) + { + var inputBytes = new ByteArrayInputBytes(bytes); + + var font = parser.Parse(new TrueTypeDataBytes(inputBytes)); + + inputBytes = new ByteArrayInputBytes(bytes); + + foreach (var header in font.TableHeaders) + { + // Acts as the whole table checksum + if (header.Key == "head") + { + if (checkHeaderChecksum) + { + var headerChecksum = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value); + + Assert.Equal(header.Value.CheckSum, headerChecksum); + } + + continue; + } + + var input = bytes.Skip((int)header.Value.Offset).Take((int)header.Value.Length); + + var checksum = TrueTypeChecksumCalculator.Calculate(input); + + Assert.Equal(header.Value.CheckSum, checksum); + + var checksumByTable = TrueTypeChecksumCalculator.Calculate(inputBytes, header.Value); + + Assert.Equal(header.Value.CheckSum, checksumByTable); + } + + if (checkWholeFileChecksum) + { + var headerActual = font.TableHeaders["head"]; + var wholeFontChecksum = TrueTypeChecksumCalculator.CalculateWholeFontChecksum(inputBytes, headerActual); + var adjustment = 0xB1B0AFBA - wholeFontChecksum; + var adjustmentRecorded = font.TableRegister.HeaderTable.CheckSumAdjustment; + + Assert.Equal(adjustmentRecorded, adjustment); + + var expectedWholeFontChecksum = 0xB1B0AFBA - adjustmentRecorded; + + Assert.Equal(expectedWholeFontChecksum, wholeFontChecksum); + } + } + + private static byte[] GetFileBytes(string name) + { + var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Fonts", "TrueType"); + + name = name.EndsWith(".ttf") || name.EndsWith(".txt") ? name : name + ".ttf"; + + var file = Path.Combine(path, name); + + return File.ReadAllBytes(file); + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs index 7057659f..0159a33f 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -51,7 +51,7 @@ return null; } - return new TrueTypeHeaderTable(tag, checksum, offset, length); + return new TrueTypeHeaderTable(tag, (uint)checksum, (uint)offset, (uint)length); } private static TrueTypeFontProgram ParseTables(float version, IReadOnlyDictionary tables, TrueTypeDataBytes data) diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeChecksumCalculator.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeChecksumCalculator.cs new file mode 100644 index 00000000..07065976 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeChecksumCalculator.cs @@ -0,0 +1,161 @@ +namespace UglyToad.PdfPig.Fonts.TrueType +{ + using System; + using System.Collections.Generic; + using IO; + + internal static class TrueTypeChecksumCalculator + { + private const string HeaderTableTag = "head"; + + // Preceded by 2 32-fixed fraction values. + private const int ChecksumAdjustmentPosition = 8; + + public static uint CalculateWholeFontChecksum(IInputBytes bytes, TrueTypeHeaderTable headerTable) + { + if (bytes == null) + { + throw new ArgumentNullException(nameof(bytes)); + } + + if (!IsHeadTable(headerTable)) + { + throw new ArgumentException($"Can only calculate checksum for the whole font when the head table is provided. Got: {headerTable}."); + } + + bytes.Seek(0); + + return Calculate(ToChecksumSkippedEnumerable(bytes, headerTable)); + } + + public static uint Calculate(IInputBytes bytes, TrueTypeHeaderTable table) + { + bytes.Seek(table.Offset); + + if (IsHeadTable(table)) + { + // To calculate the checkSum for the 'head' table which itself includes the + // checkSumAdjustment entry for the entire font, do the following: + // Set the checkSumAdjustment to 0. + // Calculate the checksum as normal. + var fullTableBytes = new byte[table.Length]; + var read = bytes.Read(fullTableBytes); + if (read != table.Length) + { + throw new InvalidOperationException(); + } + + // Zero out the checksum adjustment + fullTableBytes[ChecksumAdjustmentPosition] = 0; + fullTableBytes[ChecksumAdjustmentPosition + 1] = 0; + fullTableBytes[ChecksumAdjustmentPosition + 2] = 0; + fullTableBytes[ChecksumAdjustmentPosition + 3] = 0; + + return Calculate(fullTableBytes); + } + + var result = 0u; + + unchecked + { + while (TryReadUInt(bytes, table.Offset + table.Length, out var next)) + { + result += next; + } + } + + return result; + } + + /// + /// Calculate the TrueType checksum for the provided bytes. + /// + public static uint Calculate(IEnumerable bytes) + { + var result = 0u; + + unchecked + { + using (var enumerator = bytes.GetEnumerator()) + { + while (TryReadUInt(enumerator, out var next)) + { + result += next; + } + } + } + + return result; + } + + private static bool IsHeadTable(TrueTypeHeaderTable table) => string.Equals(HeaderTableTag, table.Tag, StringComparison.OrdinalIgnoreCase); + + private static bool TryReadUInt(IEnumerator enumerator, out uint result) + { + result = 0; + + if (!enumerator.MoveNext()) + { + return false; + } + + var top = enumerator.Current; + var three = enumerator.MoveNext() ? enumerator.Current : 0; + var two = enumerator.MoveNext() ? enumerator.Current : 0; + var one = enumerator.MoveNext() ? enumerator.Current : 0; + + result = (uint)(((long)top << 24) + + ((long)three << 16) + + (two << 8) + + (one << 0)); + + return true; + } + + private static bool TryReadUInt(IInputBytes input, long endAt, out uint result) + { + result = 0; + + byte ReadNext() + { + if (input.CurrentOffset == endAt || !input.MoveNext()) + { + return 0; + } + + return input.CurrentByte; + } + + if (input.CurrentOffset >= endAt) + { + return false; + } + + var top = ReadNext(); + var three = ReadNext(); + var two = ReadNext(); + var one = ReadNext(); + + result = (uint)(((long)top << 24) + + ((long)three << 16) + + (two << 8) + + (one << 0)); + + return true; + } + + private static IEnumerable ToChecksumSkippedEnumerable(IInputBytes bytes, TrueTypeHeaderTable table) + { + while (bytes.MoveNext()) + { + // Skip checksum adjustment + if (bytes.CurrentOffset > table.Offset + ChecksumAdjustmentPosition && bytes.CurrentOffset <= table.Offset + ChecksumAdjustmentPosition + 4) + { + continue; + } + + yield return bytes.CurrentByte; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeHeaderTable.cs b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeHeaderTable.cs index 7d79259f..15664535 100644 --- a/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeHeaderTable.cs +++ b/src/UglyToad.PdfPig/Fonts/TrueType/TrueTypeHeaderTable.cs @@ -170,19 +170,19 @@ /// /// The checksum for the table. /// - public long CheckSum { get; } + public uint CheckSum { get; } /// /// Offset of the table from the beginning of the file. /// - public long Offset { get; } + public uint Offset { get; } /// /// The length of the table. /// - public long Length { get; } + public uint Length { get; } - public TrueTypeHeaderTable(string tag, long checkSum, long offset, long length) + public TrueTypeHeaderTable(string tag, uint checkSum, uint offset, uint length) { Tag = tag ?? throw new ArgumentNullException(nameof(tag)); CheckSum = checkSum; diff --git a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeEncodingReplacer.cs b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeEncodingReplacer.cs new file mode 100644 index 00000000..794a74f4 --- /dev/null +++ b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeEncodingReplacer.cs @@ -0,0 +1,283 @@ +namespace UglyToad.PdfPig.Writer.Fonts +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + using System.Text; + using IO; + using PdfPig.Fonts.TrueType; + + internal static class TrueTypeEncodingReplacer + { + private const int SizeOfFraction = 4; + private const int SizeOfShort = 2; + private const int SizeOfTag = 4; + private const int SizeOfInt = 4; + + private const string CMapTag = "cmap"; + private const string HeadTag = "head"; + + public static byte[] ReplaceCMapTables(IInputBytes fontBytes, IReadOnlyDictionary newEncoding) + { + var buffer = new byte[2048]; + + var inputTableHeaders = new Dictionary(StringComparer.OrdinalIgnoreCase); + var outputTableHeaders = new Dictionary(StringComparer.OrdinalIgnoreCase); + + var fileChecksumOffset = SizeOfTag; + + byte[] result; + + using (var stream = new MemoryStream()) + { + // Write the file header details and read the number of tables out. + CopyThroughBufferPreserveData(stream, buffer, fontBytes, SizeOfFraction + (SizeOfShort * 4)); + + var numberOfTables = ReadUShortFromBuffer(buffer, SizeOfFraction); + + // For each table read the table header values and preserve the order by storing the offset in the input file + // at which the the header was read. + for (var i = 0; i < numberOfTables; i++) + { + var offsetOfHeader = (uint)stream.Position; + + CopyThroughBufferPreserveData(stream, buffer, fontBytes, SizeOfTag + (SizeOfInt * 3)); + + var tag = Encoding.UTF8.GetString(buffer, 0, SizeOfTag); + + var checksum = ReadUIntFromBuffer(buffer, fileChecksumOffset); + var offset = ReadUIntFromBuffer(buffer, SizeOfTag + SizeOfInt); + var length = ReadUIntFromBuffer(buffer, SizeOfTag + (SizeOfInt * 2)); + + var headerTable = new TrueTypeHeaderTable(tag, checksum, offset, length); + + // Store the locations of the tables in this font. + inputTableHeaders[tag] = new InputHeader(headerTable, offsetOfHeader); + } + + // Copy raw bytes for each of the tables from the input to the output including any additional bytes not in + // tables but present in the input. + var inputOffset = fontBytes.CurrentOffset; + + foreach (var inputHeader in inputTableHeaders.OrderBy(x => x.Value.HeaderTable.Offset)) + { + var location = inputHeader.Value.HeaderTable; + + var gapFromPrevious = location.Offset - inputOffset; + + if (gapFromPrevious > 0) + { + CopyThroughBufferDiscardData(stream, buffer, fontBytes, gapFromPrevious); + } + + if (inputHeader.Value.IsTable(CMapTag)) + { + // Skip the CMap table for now, move it to the end in the output so we can resize it dynamically. + inputOffset = location.Offset + location.Length; + fontBytes.Seek(inputOffset); + + continue; + } + + var outputOffset = (uint)stream.Position; + + outputTableHeaders[location.Tag] = new TrueTypeHeaderTable(location.Tag, 0, outputOffset, location.Length); + + CopyThroughBufferDiscardData(stream, buffer, fontBytes, location.Length); + + var writtenLength = stream.Position - outputOffset; + + if (writtenLength != location.Length) + { + throw new InvalidOperationException($"Expected to write {location.Length} bytes for table {location.Tag} " + + $"but wrote {stream.Position - outputOffset}."); + } + + inputOffset = fontBytes.CurrentOffset; + } + + // TODO: write the modified cmap table here. + var cmapLocation = inputTableHeaders[CMapTag]; + + fontBytes.Seek(cmapLocation.HeaderTable.Offset); + + var newCmapTableLocation = (uint)stream.Position; + CopyThroughBufferDiscardData(stream, buffer, fontBytes, cmapLocation.HeaderTable.Length); + + outputTableHeaders[cmapLocation.Tag] = new TrueTypeHeaderTable(cmapLocation.Tag, 0, newCmapTableLocation, cmapLocation.HeaderTable.Length); + + foreach (var inputHeader in inputTableHeaders) + { + // Go back to the location of the offset + var headerOffsetLocation = inputHeader.Value.OffsetInInput + SizeOfTag + SizeOfInt; + stream.Seek(headerOffsetLocation, SeekOrigin.Begin); + + var outputHeader = outputTableHeaders[inputHeader.Key]; + + var inputLength = inputHeader.Value.HeaderTable.Length; + + if (outputHeader.Length != inputLength) + { + throw new InvalidOperationException($"Actual data length {outputHeader.Length} " + + $"did not match header length {inputLength} for table {inputHeader.Key}."); + } + + WriteUInt(stream, outputHeader.Offset); + } + + stream.Seek(0, SeekOrigin.Begin); + + // Done writing to stream, just checksums left to repair. + result = stream.ToArray(); + } + + var inputBytes = new ByteArrayInputBytes(result); + + // Overwrite checksum values per table. + foreach (var inputHeader in inputTableHeaders) + { + var outputHeader = outputTableHeaders[inputHeader.Key]; + + var headerOffset = inputHeader.Value.OffsetInInput; + + var newChecksum = TrueTypeChecksumCalculator.Calculate(inputBytes, outputHeader); + + // Overwrite the checksum value. + WriteUInt(result, headerOffset + SizeOfTag, newChecksum); + } + + // Overwrite the checksum adjustment which records the whole font checksum. + var headTable = outputTableHeaders[HeadTag]; + var wholeFontChecksum = TrueTypeChecksumCalculator.CalculateWholeFontChecksum(inputBytes, headTable); + + // Calculate the checksum for the entire font and subtract the value from the hex value B1B0AFBA. + var checksumAdjustmentLocation = headTable.Offset + 8; + var checksumAdjustment = 0xB1B0AFBA - wholeFontChecksum; + + // Store the result in checksum adjustment. + WriteUInt(result, checksumAdjustmentLocation, checksumAdjustment); + + return result; + } + + private static ushort ReadUShortFromBuffer(byte[] buffer, int location) + { + return (ushort)((buffer[location] << 8) + (buffer[location + 1] << 0)); + } + + private static uint ReadUIntFromBuffer(byte[] buffer, int location) + { + return (uint)(((long)buffer[location] << 24) + + ((long)buffer[location + 1] << 16) + + (buffer[location + 2] << 8) + + (buffer[location + 3] << 0)); + } + + private static void WriteUInt(Stream stream, uint value) + { + var buffer = new[] + { + (byte) (value >> 24), + (byte) (value >> 16), + (byte) (value >> 8), + (byte) value + }; + + stream.Write(buffer, 0, 4); + } + + private static void WriteUInt(byte[] array, uint offset, uint value) + { + array[offset] = (byte)(value >> 24); + array[offset + 1] = (byte)(value >> 16); + array[offset + 2] = (byte)(value >> 8); + array[offset + 3] = (byte)(value >> 0); + } + + private static void CopyThroughBufferDiscardData(Stream destination, byte[] buffer, IInputBytes input, long size) + { + var filled = 0; + while (filled < size) + { + var expected = (int)Math.Min(size - filled, 2048); + + var read = input.Read(buffer, expected); + + if (read != expected) + { + throw new InvalidOperationException($"Failed to read {size} bytes starting at offset {input.CurrentOffset - read}."); + } + + destination.Write(buffer, 0, read); + + filled += read; + } + } + + /// + /// Copies data from the input to the destination stream while also populating the buffer with the full + /// run of copied data in the buffer from position 0 -> size. + /// + private static void CopyThroughBufferPreserveData(Stream destination, byte[] buffer, IInputBytes input, int size) + { + if (size > buffer.Length) + { + throw new InvalidOperationException("Cannot use this method to read more bytes than fit in the buffer."); + } + + var read = input.Read(buffer, size); + if (read != size) + { + throw new InvalidOperationException($"Failed to read {size} bytes starting at offset {input.CurrentOffset - read}."); + } + + destination.Write(buffer, 0, read); + } + + private class InputHeader + { + public string Tag => HeaderTable.Tag; + + public TrueTypeHeaderTable HeaderTable { get; } + + public uint OffsetInInput { get; } + + public InputHeader(TrueTypeHeaderTable headerTable, uint offsetInInput) + { + if (headerTable.Tag == null) + { + throw new ArgumentException($"No tag for header table: {HeaderTable}."); + } + + HeaderTable = headerTable; + OffsetInInput = offsetInInput; + } + + public bool IsTable(string tag) + { + return string.Equals(tag, Tag, StringComparison.OrdinalIgnoreCase); + } + } + + private class OutputHeader + { + public string Tag => HeaderTable.Tag; + + public TrueTypeHeaderTable HeaderTable { get; } + + public uint OffsetInOutput { get; } + + public OutputHeader(TrueTypeHeaderTable headerTable, uint offsetInOutput) + { + if (headerTable.Tag == null) + { + throw new ArgumentException($"No tag for header table: {HeaderTable}."); + } + + HeaderTable = headerTable; + OffsetInOutput = offsetInOutput; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs index f9ec5b94..9195f707 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/TrueTypeWritingFont.cs @@ -7,21 +7,22 @@ using Core; using Filters; using Geometry; + using IO; using Logging; using Tokens; - using UglyToad.PdfPig.Fonts; - using UglyToad.PdfPig.Fonts.Exceptions; - using UglyToad.PdfPig.Fonts.TrueType; - using UglyToad.PdfPig.Fonts.TrueType.Tables; + using PdfPig.Fonts; + using PdfPig.Fonts.Exceptions; + using PdfPig.Fonts.TrueType; + using PdfPig.Fonts.TrueType.Tables; internal class TrueTypeWritingFont : IWritingFont { private readonly TrueTypeFontProgram font; private readonly IReadOnlyList fontFileBytes; - private readonly object mappingLock = new object(); + //private readonly object mappingLock = new object(); private readonly Dictionary characterMapping = new Dictionary(); - private int characterMappingCounter = 1; + //private int characterMappingCounter = 1; public bool HasWidths { get; } = true; @@ -51,14 +52,16 @@ public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context) { + var b = TrueTypeEncodingReplacer.ReplaceCMapTables(new ByteArrayInputBytes(fontFileBytes), characterMapping); + // TODO: unfortunately we need to subset the font in order to support custom encoding. // A symbolic font (one which contains characters not in the standard latin set) - // should contain a MacRoman (1, 0) or Windows Symbolic (3,0) cmap subtable which maps character codes to glyph id. - var bytes = CompressBytes(fontFileBytes); + var bytes = CompressBytes(b); var embeddedFile = new StreamToken(new DictionaryToken(new Dictionary { { NameToken.Length, new NumericToken(bytes.Length) }, - { NameToken.Length1, new NumericToken(fontFileBytes.Count) }, + { NameToken.Length1, new NumericToken(b.Length) }, { NameToken.Filter, new ArrayToken(new []{ NameToken.FlateDecode }) } }), bytes); @@ -148,28 +151,29 @@ public byte GetValueForCharacter(char character) { - lock (mappingLock) - { - if (characterMapping.TryGetValue(character, out var result)) - { - return result; - } + return (byte) character; + //lock (mappingLock) + //{ + // if (characterMapping.TryGetValue(character, out var result)) + // { + // return result; + // } - if (characterMappingCounter > byte.MaxValue) - { - throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" + - " submit an issue since we will need to add support for composite fonts with multi-byte" + - " character identifiers."); - } + // if (characterMappingCounter > byte.MaxValue) + // { + // throw new NotSupportedException("Cannot support more than 255 separate characters in a simple TrueType font, please" + + // " submit an issue since we will need to add support for composite fonts with multi-byte" + + // " character identifiers."); + // } - var value = (byte) characterMappingCounter++; + // var value = (byte) characterMappingCounter++; - characterMapping[character] = value; + // characterMapping[character] = value; - result = value; + // result = value; - return result; - } + // return result; + //} } private static byte[] CompressBytes(IReadOnlyList bytes)