From 81935d5cdbfca0899ff0b608b7d5e2315cb6f377 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Fri, 16 Nov 2018 21:30:59 +0000 Subject: [PATCH] tidy up cff font classes and start adding type 2 charstring support for #6 --- src/UglyToad.PdfPig/Fonts/CharStringStack.cs | 78 +++++++++ .../CharStrings/LazyType2Command.cs | 37 ++++ .../CharStrings/Type2CharStringParser.cs | 160 ++++++++++++++++++ .../CompactFontFormatData.cs | 103 +++++++++++ .../CompactFontFormatHeader.cs | 49 ++++++ .../CompactFontFormatIndividualFontParser.cs | 13 ++ .../CompactFontFormatParser.cs | 123 -------------- .../CompactFontFormatTopLevelDictionary.cs | 19 ++- ...mpactFontFormatTopLevelDictionaryReader.cs | 2 +- .../CharStrings/Commands/LazyType1Command.cs | 47 +---- .../Commands/Type1BuildCharContext.cs | 4 +- 11 files changed, 465 insertions(+), 170 deletions(-) create mode 100644 src/UglyToad.PdfPig/Fonts/CharStringStack.cs create mode 100644 src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs create mode 100644 src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs create mode 100644 src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatData.cs create mode 100644 src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatHeader.cs diff --git a/src/UglyToad.PdfPig/Fonts/CharStringStack.cs b/src/UglyToad.PdfPig/Fonts/CharStringStack.cs new file mode 100644 index 00000000..6567e533 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CharStringStack.cs @@ -0,0 +1,78 @@ +namespace UglyToad.PdfPig.Fonts +{ + using System; + using System.Collections.Generic; + using System.Linq; + + /// + /// The stack of numeric operands currently active in a CharString. + /// + internal class CharStringStack + { + private readonly List stack = new List(); + + /// + /// The current size of the stack. + /// + public int Length => stack.Count; + + /// + /// Whether it's possible to pop a value from either end of the stack. + /// + public bool CanPop => stack.Count > 0; + + /// + /// Remove and return the value from the top of the stack. + /// + /// The value from the top of the stack. + public decimal PopTop() + { + if (stack.Count == 0) + { + throw new InvalidOperationException("Cannot pop from the top of an empty stack, invalid charstring parsed."); + } + + var result = stack[stack.Count - 1]; + stack.RemoveAt(stack.Count - 1); + return result; + } + + /// + /// Remove and return the value from the bottom of the stack. + /// + /// The value from the bottom of the stack. + public decimal PopBottom() + { + if (stack.Count == 0) + { + throw new InvalidOperationException("Cannot pop from the bottom of an empty stack, invalid charstring parsed."); + } + + var result = stack[0]; + stack.RemoveAt(0); + return result; + } + + /// + /// Adds the value to the top of the stack. + /// + /// The value to add. + public void Push(decimal value) + { + stack.Add(value); + } + + /// + /// Removes all values from the stack. + /// + public void Clear() + { + stack.Clear(); + } + + public override string ToString() + { + return string.Join(" ", stack.Select(x => x.ToString())); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs new file mode 100644 index 00000000..a2f75446 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs @@ -0,0 +1,37 @@ +namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings +{ + using System; + using System.Diagnostics; + + /// + /// Represents the deferred execution of a Type 2 CharString command. + /// + internal class LazyType2Command + { + private readonly Action runCommand; + + public string Name { get; } + + public LazyType2Command(string name, Action runCommand) + { + Name = name; + this.runCommand = runCommand ?? throw new ArgumentNullException(nameof(runCommand)); + } + + [DebuggerStepThrough] + public void Run(Type2BuildCharContext context) + { + runCommand(context); + } + + public override string ToString() + { + return Name; + } + } + + internal class Type2BuildCharContext + { + + } +} diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs new file mode 100644 index 00000000..5a4c84bd --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs @@ -0,0 +1,160 @@ +namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings +{ + using System.Collections.Generic; + using Util; + + /// + /// + /// + /// A Type 2 charstring program is a sequence of unsigned 8-bit bytes that encode numbers and operators. + /// The byte value specifies a operator, a number, or subsequent bytes that are to be interpreted in a specific manner + /// + internal class Type2CharStringParser + { + public static void Parse(IReadOnlyList> charStringBytes) + { + for (var i = 0; i < charStringBytes.Count; i++) + { + var charString = charStringBytes[i]; + ParseSingle(charString); + } + } + + private static IReadOnlyList> ParseSingle(IReadOnlyList bytes) + { + var instructions = new List>(); + for (var i = 0; i < bytes.Count; i++) + { + var b = bytes[i]; + if (b <= 31 && b != 28) + { + var command = GetCommand(b, bytes, ref i); + instructions.Add(Union.Two(command)); + } + else + { + var number = InterpretNumber(b, bytes, ref i); + instructions.Add(Union.One(number)); + } + } + + return instructions; + } + + /// + /// The Type 2 interpretation of a number with an initial byte value of 255 differs from how it is interpreted in the Type 1 format + /// and 28 has a special meaning. + /// + private static decimal InterpretNumber(byte b, IReadOnlyList bytes, ref int i) + { + if (b == 28) + { + return bytes[++i] << 8 | bytes[++i]; + } + + if (b >= 32 && b <= 246) + { + return b - 139; + } + + if (b >= 247 && b <= 250) + { + var w = bytes[++i]; + return ((b - 247) * 256) + w + 108; + } + + if (b >= 251 && b <= 254) + { + var w = bytes[++i]; + return -((b - 251) * 256) - w - 108; + } + + /* + * If the charstring byte contains the value 255, the next four bytes indicate a two's complement signed number. + * The first of these the four bytes contains the highest order bits, the second byte contains the next higher order bits + * and the fourth byte contains the lowest order bits. + * This number is interpreted as a Fixed; that is, a signed number with 16 bits of fraction + */ + var lead = bytes[++i] << 8 | bytes[++i]; + var fractionalPart = bytes[++i] << 8 | bytes[++i]; + + return lead + (fractionalPart / 65535m); + } + + private static readonly IReadOnlyDictionary SingleByteCommandStore = new Dictionary + { + { 1, new LazyType2Command("hstem", x => { })}, + { 3, new LazyType2Command("vstem", x => { })}, + { 4, new LazyType2Command("vmoveto", x => { })}, + { 5, new LazyType2Command("rlineto", x => { })}, + { 6, new LazyType2Command("hlineto", x => { })}, + { 7, new LazyType2Command("vlineto", x => { })}, + { 8, new LazyType2Command("rrcurveto", x => { })}, + { 10, new LazyType2Command("callsubr", x => { })}, + { 11, new LazyType2Command("return", x => { })}, + { 14, new LazyType2Command("endchar", x => { })}, + { 18, new LazyType2Command("hstemhm", x => { })}, + { 19, new LazyType2Command("hintmask", x => { })}, + { 20, new LazyType2Command("cntrmask", x => { })}, + { 21, new LazyType2Command("rmoveto", x => { })}, + { 22, new LazyType2Command("hmoveto", x => { })}, + { 23, new LazyType2Command("vstemhm", x => { })}, + { 24, new LazyType2Command("rcurveline", x => { })}, + { 25, new LazyType2Command("rlinecurve", x => { })}, + { 26, new LazyType2Command("vvcurveto", x => { })}, + { 27, new LazyType2Command("hhcurveto", x => { })}, + { 29, new LazyType2Command("callgsubr", x => { })}, + { 30, new LazyType2Command("vhcurveto", x => { })}, + { 31, new LazyType2Command("hvcurveto", x => { })} + }; + + private static readonly IReadOnlyDictionary TwoByteCommandStore = new Dictionary + { + { 3, new LazyType2Command("and", x => { })}, + { 4, new LazyType2Command("or", x => { })}, + { 5, new LazyType2Command("not", x => { })}, + { 9, new LazyType2Command("abs", x => { })}, + { 10, new LazyType2Command("add", x => { })}, + { 11, new LazyType2Command("sub", x => { })}, + { 12, new LazyType2Command("div", x => { })}, + { 14, new LazyType2Command("neg", x => { })}, + { 15, new LazyType2Command("eq", x => { })}, + { 18, new LazyType2Command("drop", x => { })}, + { 20, new LazyType2Command("put", x => { })}, + { 21, new LazyType2Command("get", x => { })}, + { 22, new LazyType2Command("ifelse", x => { })}, + { 23, new LazyType2Command("random", x => { })}, + { 24, new LazyType2Command("mul", x => { })}, + { 26, new LazyType2Command("sqrt", x => { })}, + { 27, new LazyType2Command("dup", x => { })}, + { 28, new LazyType2Command("exch", x => { })}, + { 29, new LazyType2Command("index", x => { })}, + { 30, new LazyType2Command("roll", x => { })}, + { 34, new LazyType2Command("hflex", x => { })}, + { 35, new LazyType2Command("flex", x => { })}, + { 36, new LazyType2Command("hflex1", x => { })}, + { 37, new LazyType2Command("flex1", x => { })}, + }; + + private static LazyType2Command GetCommand(byte b, IReadOnlyList bytes, ref int i) + { + if (b == 12) + { + var b2 = bytes[++i]; + if (TwoByteCommandStore.TryGetValue(b2, out var commandTwoByte)) + { + return commandTwoByte; + } + + return new LazyType2Command($"unknown: {b} {b2}", x => {}); + } + + if (SingleByteCommandStore.TryGetValue(b, out var command)) + { + return command; + } + + return new LazyType2Command($"unknown: {b}", x => {}); + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatData.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatData.cs new file mode 100644 index 00000000..d813f2ab --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatData.cs @@ -0,0 +1,103 @@ +namespace UglyToad.PdfPig.Fonts.CompactFontFormat +{ + using System; + using System.Text; + + /// + /// Provides access to the raw bytes of this Compact Font Format file with utility methods for reading data types from it. + /// + internal class CompactFontFormatData + { + private readonly byte[] dataBytes; + + public int Position { get; private set; } = -1; + + public CompactFontFormatData(byte[] dataBytes) + { + this.dataBytes = dataBytes; + } + + public string ReadString(int length, Encoding encoding) + { + var bytes = new byte[length]; + + for (var i = 0; i < bytes.Length; i++) + { + bytes[i] = ReadByte(); + } + + return encoding.GetString(bytes); + } + + public byte ReadCard8() + { + return ReadByte(); + } + + public ushort ReadCard16() + { + return (ushort)(ReadByte() << 8 | ReadByte()); + } + + public byte ReadOffsize() + { + return ReadByte(); + } + + public int ReadOffset(int offsetSize) + { + var value = 0; + + for (var i = 0; i < offsetSize; i++) + { + value = value << 8 | ReadByte(); + } + + return value; + } + + public byte ReadByte() + { + Position++; + + if (Position >= dataBytes.Length) + { + throw new IndexOutOfRangeException($"Cannot read byte at position {Position} of an array which is {dataBytes.Length} bytes long."); + } + + return dataBytes[Position]; + } + + public byte Peek() + { + return dataBytes[Position + 1]; + } + + public bool CanRead() + { + return Position < dataBytes.Length - 1; + } + + public void Seek(int offset) + { + Position = offset - 1; + } + + public long ReadLong() + { + return (ReadCard16() << 16) | ReadCard16(); + } + + public byte[] ReadBytes(int length) + { + var result = new byte[length]; + + for (int i = 0; i < length; i++) + { + result[i] = ReadByte(); + } + + return result; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatHeader.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatHeader.cs new file mode 100644 index 00000000..a3e5f6b3 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatHeader.cs @@ -0,0 +1,49 @@ +namespace UglyToad.PdfPig.Fonts.CompactFontFormat +{ + /// + /// The header table for the binary data of a Compact Font Format file. + /// + internal struct CompactFontFormatHeader + { + /// + /// The major version of this font format. Starting at 1. + /// + public byte MajorVersion { get; } + + /// + /// The minor version of this font format. Starting at 0. Indicates extensions to the format which + /// are undetectable by readers which do not support them. + /// + public byte MinorVersion { get; } + + /// + /// Indicates the size of this header in bytes so that future changes to the format may include extra data after the field. + /// + public byte SizeInBytes { get; } + + /// + /// Specifies the size of all offsets relative to the start of the data in the font. + /// + public byte OffsetSize { get; } + + /// + /// Creates a new . + /// + /// The major version of this font format. + /// The minor version of this font format. + /// Indicates the size of this header in bytes so that future changes to the format may include extra data after the offsetSize field. + /// Specifies the size of all offsets relative to the start of the data in the font. + public CompactFontFormatHeader(byte majorVersion, byte minorVersion, byte sizeInBytes, byte offsetSize) + { + MajorVersion = majorVersion; + MinorVersion = minorVersion; + SizeInBytes = sizeInBytes; + OffsetSize = offsetSize; + } + + public override string ToString() + { + return $"Major: {MajorVersion}, Minor: {MinorVersion}, Header Size: {SizeInBytes}, Offset: {OffsetSize}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs index a21db1df..f07984ce 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs @@ -1,5 +1,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat { + using System; + using CharStrings; using Dictionaries; internal class CompactFontFormatIndividualFontParser @@ -42,6 +44,17 @@ data.Seek(dictionary.CharStringsOffset); var index = indexReader.ReadDictionaryData(data); + + switch (dictionary.CharStringType) + { + case CompactFontFormatCharStringType.Type1: + throw new NotImplementedException(); + case CompactFontFormatCharStringType.Type2: + Type2CharStringParser.Parse(index); + break; + default: + throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {dictionary.CharStringType}."); + } } } } diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs index f3ef8264..f6640aab 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs @@ -1,7 +1,6 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat { using System; - using System.Text; using Util; internal class CompactFontFormatParser @@ -97,126 +96,4 @@ return result; } } - - internal class CompactFontFormatData - { - private readonly byte[] dataBytes; - - public int Position { get; private set; } = -1; - - public CompactFontFormatData(byte[] dataBytes) - { - this.dataBytes = dataBytes; - } - - public string ReadString(int length, Encoding encoding) - { - var bytes = new byte[length]; - - for (var i = 0; i < bytes.Length; i++) - { - bytes[i] = ReadByte(); - } - - return encoding.GetString(bytes); - } - - public byte ReadCard8() - { - return ReadByte(); - } - - public ushort ReadCard16() - { - return (ushort)(ReadByte() << 8 | ReadByte()); - } - - public byte ReadOffsize() - { - return ReadByte(); - } - - public int ReadOffset(int offsetSize) - { - var value = 0; - - for (var i = 0; i < offsetSize; i++) - { - value = value << 8 | ReadByte(); - } - - return value; - } - - public byte ReadByte() - { - Position++; - - if (Position >= dataBytes.Length) - { - throw new IndexOutOfRangeException($"Cannot read byte at position {Position} of an array which is {dataBytes.Length} bytes long."); - } - - return dataBytes[Position]; - } - - public byte Peek() - { - return dataBytes[Position + 1]; - } - - public bool CanRead() - { - return Position < dataBytes.Length - 1; - } - - public void Seek(int offset) - { - Position = offset - 1; - } - - public long ReadLong() - { - return (ReadCard16() << 16) | ReadCard16(); - } - - public byte[] ReadBytes(int length) - { - var result = new byte[length]; - - for (int i = 0; i < length; i++) - { - result[i] = ReadByte(); - } - - return result; - } - } - - /// - /// The header table for the binary data of a CFF file. - /// - internal struct CompactFontFormatHeader - { - public byte MajorVersion { get; } - - public byte MinorVersion { get; } - - public byte SizeInBytes { get; } - - public byte OffsetSize { get; } - - public CompactFontFormatHeader(byte majorVersion, byte minorVersion, byte sizeInBytes, byte offsetSize) - { - MajorVersion = majorVersion; - MinorVersion = minorVersion; - SizeInBytes = sizeInBytes; - OffsetSize = offsetSize; - } - - public override string ToString() - { - return $"Major: {MajorVersion}, Minor: {MinorVersion}, Header Size: {SizeInBytes}, Offset: {OffsetSize}"; - } - } } diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionary.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionary.cs index fe711fd3..d05bd3cb 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionary.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionary.cs @@ -30,7 +30,7 @@ public decimal PaintType { get; set; } - public int CharstringType { get; set; } = 2; + public CompactFontFormatCharStringType CharStringType { get; set; } = CompactFontFormatCharStringType.Type2; public TransformationMatrix FontMatrix { get; set; } = TransformationMatrix.FromValues(0.001m, 0m, 0.001m, 0, 0, 0); @@ -62,5 +62,22 @@ public string BaseFontName { get; set; } public decimal[] BaseFontBlend { get; set; } + + + } + + /// + /// Defines the format of the CharString data contained within a Compact Font Format font. + /// + internal enum CompactFontFormatCharStringType + { + /// + /// The Type 1 CharString format as defined by the Adobe Type 1 Font Format. + /// + Type1 = 1, + /// + /// The Type 2 CharString format as defined by Adobe Technical Note #5177. This is the default type. + /// + Type2 = 2 } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionaryReader.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionaryReader.cs index 8fa9aec8..0685d6df 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionaryReader.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/Dictionaries/CompactFontFormatTopLevelDictionaryReader.cs @@ -65,7 +65,7 @@ dictionary.PaintType = operands[0].Decimal; break; case 6: - dictionary.CharstringType = GetIntOrDefault(operands); + dictionary.CharStringType = (CompactFontFormatCharStringType)GetIntOrDefault(operands, 2); break; case 7: { diff --git a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/LazyType1Command.cs b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/LazyType1Command.cs index c5843800..fbef7e61 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/LazyType1Command.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/LazyType1Command.cs @@ -1,8 +1,7 @@ -using System; - -namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands +namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands { - using System.Collections.Generic; + using System; + using System.Diagnostics; /// /// Represents the deferred execution of a Type 1 Build Char command. @@ -19,6 +18,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands this.runCommand = runCommand ?? throw new ArgumentNullException(nameof(runCommand)); } + [DebuggerStepThrough] public void Run(Type1BuildCharContext context) { runCommand(context); @@ -29,43 +29,4 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands return Name; } } - - internal class Type1Stack - { - private readonly List stack = new List(); - - public decimal PopTop() - { - if (stack.Count == 0) - { - throw new InvalidOperationException("Cannot pop from the top of an empty stack, invalid charstring parsed."); - } - - var result = stack[stack.Count - 1]; - stack.RemoveAt(stack.Count - 1); - return result; - } - - public decimal PopBottom() - { - if (stack.Count == 0) - { - throw new InvalidOperationException("Cannot pop from the bottom of an empty stack, invalid charstring parsed."); - } - - var result = stack[0]; - stack.RemoveAt(0); - return result; - } - - public void Push(decimal value) - { - stack.Add(value); - } - - public void Clear() - { - stack.Clear(); - } - } } diff --git a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs index 04a93855..e4dba413 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs @@ -19,9 +19,9 @@ public PdfPoint CurrentPosition { get; set; } - public Type1Stack Stack { get; } = new Type1Stack(); + public CharStringStack Stack { get; } = new CharStringStack(); - public Type1Stack PostscriptStack { get; } = new Type1Stack(); + public CharStringStack PostscriptStack { get; } = new CharStringStack(); public IReadOnlyList FlexPoints { get; }