start parsing private dictionaries for type 1 fonts. subroutines and charstrings left to implement.

This commit is contained in:
Eliot Jones
2018-10-27 19:45:45 +01:00
parent 2a8777a93a
commit 7fab13e877
6 changed files with 481 additions and 45 deletions

View File

@@ -12,8 +12,10 @@
{
private const ushort EexecEncryptionKey = 55665;
private const int EexecRandomBytes = 4;
private const int Len4Bytes = 4;
private const int Password = 5839;
public IReadOnlyList<byte> Parse(IReadOnlyList<byte> bytes)
public IReadOnlyList<byte> Parse(IReadOnlyList<byte> bytes, bool isLenientParsing)
{
if (!IsBinary(bytes))
{
@@ -43,18 +45,189 @@
}
var next = tokenizer.GetNext();
if (next?.Type != Type1Token.TokenType.Integer || !(next is Type1TextToken textToken))
if (next?.Type != Type1Token.TokenType.Integer)
{
throw new InvalidOperationException($"No length token was present in the stream following the private dictionary start, instead got {next}.");
}
var length = textToken.AsInt();
var length = next.AsInt();
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dict");
// actually could also be "/Private 10 dict def Private begin"
// instead of the "dup"
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dup");
ReadExpectedAfterOptional(tokenizer, Type1Token.TokenType.Name, "def", Type1Token.TokenType.Name, "dup");
ReadExpected(tokenizer, Type1Token.TokenType.Name, "begin");
/*
* The lenIV entry is an integer specifying the number of random bytes at the beginning of charstrings for charstring encryption.
* The default value of lenIV is 4.
*/
var lenIv = Len4Bytes;
var builder = new Type1PrivateDictionary.Builder();
for (var i = 0; i < length; i++)
{
var token = tokenizer.GetNext();
// premature end
if (token.Type != Type1Token.TokenType.Literal)
{
break;
}
var key = token.Text;
switch (key)
{
case Type1Symbols.RdProcedure:
{
var procedureTokens = ReadProcedure(tokenizer);
ReadTillDef(tokenizer);
break;
}
case Type1Symbols.NoAccessDef:
{
var procedureTokens = ReadProcedure(tokenizer);
ReadTillDef(tokenizer);
break;
}
case Type1Symbols.NoAccessPut:
{
var procedureTokens = ReadProcedure(tokenizer);
ReadTillDef(tokenizer);
break;
}
case Type1Symbols.BlueValues:
{
var blueValues = ReadArrayValues(tokenizer, x => x.AsInt());
builder.BlueValues = blueValues;
break;
}
case Type1Symbols.OtherBlues:
{
var otherBlues = ReadArrayValues(tokenizer, x => x.AsInt());
builder.OtherBlues = otherBlues;
break;
}
case Type1Symbols.StdHorizontalStemWidth:
{
var widths = ReadArrayValues(tokenizer, x => x.AsDecimal());
var width = widths[0];
builder.StandardHorizontalWidth = width;
break;
}
case Type1Symbols.StdVerticalStemWidth:
{
var widths = ReadArrayValues(tokenizer, x => x.AsDecimal());
var width = widths[0];
builder.StandardVerticalWidth = width;
break;
}
case Type1Symbols.StemSnapHorizontalWidths:
{
var widths = ReadArrayValues(tokenizer, x => x.AsDecimal());
builder.StempSnapHorizontalWidths = widths;
break;
}
case Type1Symbols.StemSnapVerticalWidths:
{
var widths = ReadArrayValues(tokenizer, x => x.AsDecimal());
builder.StemSnapVerticalWidths = widths;
break;
}
case Type1Symbols.BlueScale:
{
builder.BlueScale = ReadNumeric(tokenizer);
break;
}
case Type1Symbols.ForceBold:
{
builder.ForceBold = ReadBoolean(tokenizer);
break;
}
case Type1Symbols.MinFeature:
{
var procedureTokens = ReadProcedure(tokenizer);
if (!isLenientParsing)
{
var valid = procedureTokens.Count == 2 && procedureTokens[0].AsInt() == 16
&& procedureTokens[1].AsInt() == 16;
if (!valid)
{
var valueMessage = $"{{ {string.Join(", ", procedureTokens.Select(x => x.ToString()))} }}";
throw new InvalidOperationException($"Type 1 font MinFeature should be {{16,16}} but got: {valueMessage}.");
}
}
break;
}
case Type1Symbols.Password:
{
var password = (int)ReadNumeric(tokenizer);
if (password != Password && !isLenientParsing)
{
throw new InvalidOperationException($"Type 1 font had the wrong password: {password}");
}
builder.Password = password;
break;
}
case Type1Symbols.UniqueId:
{
var id = (int)ReadNumeric(tokenizer);
builder.UniqueId = id;
break;
}
case Type1Symbols.Len4:
{
lenIv = (int)ReadNumeric(tokenizer);
break;
}
case Type1Symbols.BlueShift:
{
builder.BlueShift = (int)ReadNumeric(tokenizer);
break;
}
case Type1Symbols.BlueFuzz:
{
builder.BlueFuzz = (int)ReadNumeric(tokenizer);
break;
}
case Type1Symbols.FamilyBlues:
{
builder.FamilyBlues = ReadArrayValues(tokenizer, x => x.AsInt());
break;
}
case Type1Symbols.FamilyOtherBlues:
{
builder.FamilyOtherBlues = ReadArrayValues(tokenizer, x => x.AsInt());
break;
}
case Type1Symbols.LanguageGroup:
{
builder.LanguageGroup = (int)ReadNumeric(tokenizer);
break;
}
case Type1Symbols.RndStemUp:
{
builder.RoundStemUp = ReadBoolean(tokenizer);
break;
}
case Type1Symbols.Subroutines:
{
//readSubrs(lenIV);
break;
}
case Type1Symbols.OtherSubroutines:
{
ReadOtherSubroutines(tokenizer, isLenientParsing);
break;
}
}
}
while (tokenizer.CurrentToken != null)
{
tokenizer.GetNext();
@@ -170,18 +343,204 @@
return plainBytes;
}
private static void ReadExpected(Type1Tokenizer tokenizer, Type1Token.TokenType type, string text)
private static void ReadExpected(Type1Tokenizer tokenizer, Type1Token.TokenType type, string text = null)
{
var token = tokenizer.GetNext();
if (token == null)
{
throw new InvalidOperationException($"Type 1 Encrypted portion ended when a token with text '{text}' was expected instead.");
throw new InvalidOperationException($"Type 1 Encrypted portion ended when a token with text '{text}' was expected.");
}
if (token.Type != type || !(token is Type1TextToken textToken) || !string.Equals(textToken.Text, text, StringComparison.OrdinalIgnoreCase))
if (token.Type != type || (text != null && !string.Equals(token.Text, text, StringComparison.OrdinalIgnoreCase)))
{
throw new InvalidOperationException($"Found invalid token {token} when type {type} with text {text} was expected.");
}
}
private static void ReadExpectedAfterOptional(Type1Tokenizer tokenizer, Type1Token.TokenType optionalType, string optionalText,
Type1Token.TokenType type, string text)
{
var token = tokenizer.GetNext();
if (token == null)
{
throw new InvalidOperationException($"Type 1 Encrypted portion ended when a token with text '{optionalText}' or '{text}' was expected.");
}
if (token.Type == type && string.Equals(token.Text, text, StringComparison.OrdinalIgnoreCase))
{
return;
}
if (token.Type == optionalType && string.Equals(token.Text, optionalText, StringComparison.OrdinalIgnoreCase))
{
ReadExpected(tokenizer, type, text);
return;
}
throw new InvalidOperationException($"Found invalid token {token} when type {type} with text {text} was expected.");
}
private static IReadOnlyList<Type1Token> ReadProcedure(Type1Tokenizer tokenizer)
{
var tokens = new List<Type1Token>();
var depth = -1;
ReadProcedure(tokenizer, tokens, ref depth);
return tokens;
}
private static void ReadProcedure(Type1Tokenizer tokenizer, List<Type1Token> tokens, ref int depth)
{
if (depth == -1)
{
ReadExpected(tokenizer, Type1Token.TokenType.StartProc);
depth = 1;
}
if (depth == 0)
{
return;
}
Type1Token token;
while ((token = tokenizer.GetNext()) != null)
{
if (token.Type == Type1Token.TokenType.StartProc)
{
depth += 1;
ReadProcedure(tokenizer, tokens, ref depth);
}
else if (token.Type == Type1Token.TokenType.EndProc)
{
depth--;
break;
}
else
{
tokens.Add(token);
}
}
}
private static void ReadTillDef(Type1Tokenizer tokenizer)
{
Type1Token token;
while ((token = tokenizer.GetNext()) != null)
{
if (token.Type == Type1Token.TokenType.Name)
{
if (string.Equals(token.Text, "def", StringComparison.OrdinalIgnoreCase))
{
break;
}
}
else
{
throw new InvalidOperationException($"Encountered unexpected non-name token while reading till 'def' token: {token}");
}
}
}
private static void ReadTillPut(Type1Tokenizer tokenizer)
{
Type1Token token;
while ((token = tokenizer.GetNext()) != null)
{
if (string.Equals(token.Text, "put", StringComparison.OrdinalIgnoreCase))
{
break;
}
switch (token.Text)
{
case "NP":
case "|":
return;
}
}
}
private static IReadOnlyList<T> ReadArrayValues<T>(Type1Tokenizer tokenizer, Func<Type1Token, T> converter, bool hasReadStart = false)
{
if (!hasReadStart)
{
ReadExpected(tokenizer, Type1Token.TokenType.StartArray);
}
var results = new List<T>();
Type1Token token;
while ((token = tokenizer.GetNext()) != null)
{
if (token.Type == Type1Token.TokenType.EndArray)
{
break;
}
try
{
var result = converter(token);
results.Add(result);
}
catch (Exception ex)
{
throw new InvalidOperationException($"Conversion of token '{token}' to value of type {typeof(T).Name} failed.", ex);
}
}
return results;
}
private static decimal ReadNumeric(Type1Tokenizer tokenizer)
{
var token = tokenizer.GetNext();
if (token == null || (token.Type != Type1Token.TokenType.Integer && token.Type != Type1Token.TokenType.Real))
{
throw new InvalidOperationException($"Expected to read a numeric token, instead got: {token}.");
}
return token.AsDecimal();
}
private static bool ReadBoolean(Type1Tokenizer tokenizer)
{
var token = tokenizer.GetNext();
if (token == null || (!string.Equals(token.Text, "true", StringComparison.OrdinalIgnoreCase)
&& !string.Equals(token.Text, "false", StringComparison.OrdinalIgnoreCase)))
{
throw new InvalidOperationException($"Expected to read a boolean token, instead got: {token}.");
}
return token.AsBool();
}
private static void ReadOtherSubroutines(Type1Tokenizer tokenizer, bool isLenientParsing)
{
var start = tokenizer.GetNext();
if (start.Type == Type1Token.TokenType.StartArray)
{
ReadArrayValues(tokenizer, x => x, true);
}
else if (start.Type == Type1Token.TokenType.Integer || start.Type == Type1Token.TokenType.Real)
{
var length = start.AsInt();
ReadExpected(tokenizer, Type1Token.TokenType.Name, "array");
for (var i = 0; i < length; i++)
{
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dup");
ReadNumeric(tokenizer);
ReadTillPut(tokenizer);
}
ReadTillDef(tokenizer);
}
else if (!isLenientParsing)
{
throw new InvalidOperationException($"Failed to read start of /OtherSubrs array. Got start token: {start}.");
}
}
}
}

View File

@@ -146,7 +146,7 @@
var matrix = GetFontMatrix(dictionaries);
var boundingBox = GetBoundingBox(dictionaries);
var binaryPortion = encryptedPortionParser.Parse(eexecPortion);
var binaryPortion = encryptedPortionParser.Parse(eexecPortion, false);
return new Type1Font(name, encoding, matrix, boundingBox ?? new PdfRectangle());
}

View File

@@ -0,0 +1,31 @@
namespace UglyToad.PdfPig.Fonts.Type1.Parser
{
internal static class Type1Symbols
{
public const string BlueFuzz = "BlueFuzz";
public const string BlueScale = "BlueScale";
public const string BlueShift = "BlueShift";
public const string BlueValues = "BlueValues";
public const string ExpansionFactor = "ExpansionFactor";
public const string FamilyBlues = "FamilyBlues";
public const string FamilyOtherBlues = "FamilyOtherBlues";
public const string ForceBold = "ForceBold";
public const string LanguageGroup = "LanguageGroup";
public const string Len4 = "lenIV";
public const string MinFeature = "MinFeature";
public const string NoAccessDef = "ND";
public const string NoAccessPut = "NP";
public const string OtherBlues = "OtherBlues";
public const string OtherSubroutines = "OtherSubrs";
public const string Password = "password";
public const string RdProcedure = "RD";
public const string RdProcedureAlt = "-|";
public const string RndStemUp = "RndStemUp";
public const string StdHorizontalStemWidth = "StdHW";
public const string StdVerticalStemWidth = "StdVW";
public const string StemSnapHorizontalWidths = "StemSnapH";
public const string StemSnapVerticalWidths = "StemSnapV";
public const string Subroutines = "Subrs";
public const string UniqueId = "UniqueID";
}
}

View File

@@ -9,7 +9,7 @@
public override bool IsPrivateDictionary { get; } = false;
public Type1DataToken(TokenType type, IReadOnlyList<byte> data) : base(type)
public Type1DataToken(TokenType type, IReadOnlyList<byte> data) : base(string.Empty, type)
{
if (type != TokenType.Charstring)
{
@@ -25,26 +25,28 @@
}
}
internal class Type1TextToken : Type1Token
internal class Type1Token
{
public TokenType Type { get; }
public string Text { get; }
public override bool IsPrivateDictionary => Type == TokenType.Literal && string.Equals(Text, "Private", StringComparison.OrdinalIgnoreCase);
public virtual bool IsPrivateDictionary => Type == TokenType.Literal && string.Equals(Text, "Private", StringComparison.OrdinalIgnoreCase);
public Type1TextToken(char c, TokenType type) : this(c.ToString(), type) { }
public Type1TextToken(string text, TokenType type) : base(type)
public Type1Token(char c, TokenType type) : this(c.ToString(), type) { }
public Type1Token(string text, TokenType type)
{
Text = text;
Type = type;
}
public int AsInt()
{
return (int)AsFloat();
return (int)AsDecimal();
}
public float AsFloat()
public decimal AsDecimal()
{
return float.Parse(Text);
return decimal.Parse(Text);
}
public bool AsBool()
@@ -56,18 +58,6 @@
{
return $"Token[type={Type}, text={Text}]";
}
}
internal abstract class Type1Token
{
public TokenType Type { get; }
public abstract bool IsPrivateDictionary { get; }
protected Type1Token(TokenType type)
{
Type = type;
}
public enum TokenType
{

View File

@@ -56,17 +56,17 @@
case ')':
throw new InvalidOperationException("Encountered an end of string ')' outside of string.");
case '[':
return new Type1TextToken(c, Type1Token.TokenType.StartArray);
return new Type1Token(c, Type1Token.TokenType.StartArray);
case ']':
return new Type1TextToken(c, Type1Token.TokenType.EndArray);
return new Type1Token(c, Type1Token.TokenType.EndArray);
case '{':
return new Type1TextToken(c, Type1Token.TokenType.StartProc);
return new Type1Token(c, Type1Token.TokenType.StartProc);
case '}':
return new Type1TextToken(c, Type1Token.TokenType.EndProc);
return new Type1Token(c, Type1Token.TokenType.EndProc);
case '/':
{
var name = ReadLiteral();
return new Type1TextToken(name, Type1Token.TokenType.Literal);
return new Type1Token(name, Type1Token.TokenType.Literal);
}
case '<':
{
@@ -74,10 +74,10 @@
if (following == '<')
{
bytes.MoveNext();
return new Type1TextToken("<<", Type1Token.TokenType.StartDict);
return new Type1Token("<<", Type1Token.TokenType.StartDict);
}
return new Type1TextToken(c, Type1Token.TokenType.Name);
return new Type1Token(c, Type1Token.TokenType.Name);
}
case '>':
{
@@ -85,10 +85,10 @@
if (following == '>')
{
bytes.MoveNext();
return new Type1TextToken(">>", Type1Token.TokenType.EndDict);
return new Type1Token(">>", Type1Token.TokenType.EndDict);
}
return new Type1TextToken(c, Type1Token.TokenType.Name);
return new Type1Token(c, Type1Token.TokenType.Name);
}
default:
{
@@ -119,13 +119,13 @@
{
if (previousToken.Type == Type1Token.TokenType.Integer)
{
return ReadCharString(((Type1TextToken)previousToken).AsInt());
return ReadCharString(previousToken.AsInt());
}
throw new InvalidOperationException($"Expected integer token before {name} at offset {bytes.CurrentOffset}.");
}
return new Type1TextToken(name, Type1Token.TokenType.Name);
return new Type1Token(name, Type1Token.TokenType.Name);
}
}
}
@@ -134,7 +134,7 @@
return null;
}
private Type1TextToken ReadString()
private Type1Token ReadString()
{
char GetNext()
{
@@ -158,7 +158,7 @@
if (openParens == 0)
{
// end of string
return new Type1TextToken(stringBuffer.ToString(), Type1Token.TokenType.String);
return new Type1Token(stringBuffer.ToString(), Type1Token.TokenType.String);
}
stringBuffer.Append(')');
openParens--;
@@ -197,7 +197,7 @@
return null;
}
private bool TryReadNumber(char c, out Type1TextToken numberToken)
private bool TryReadNumber(char c, out Type1Token numberToken)
{
char GetNext()
{
@@ -253,7 +253,7 @@
// integer
bytes.Seek(bytes.CurrentOffset - 1);
numberToken = new Type1TextToken(sb.ToString(), Type1Token.TokenType.Integer);
numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Integer);
return true;
}
@@ -313,11 +313,11 @@
if (radix != null)
{
var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString()));
numberToken = new Type1TextToken(number.ToString(), Type1Token.TokenType.Integer);
numberToken = new Type1Token(number.ToString(), Type1Token.TokenType.Integer);
}
else
{
numberToken = new Type1TextToken(sb.ToString(), Type1Token.TokenType.Real);
numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Real);
}
return true;

View File

@@ -0,0 +1,56 @@
namespace UglyToad.PdfPig.Fonts.Type1
{
using System.Collections.Generic;
internal class Type1PrivateDictionary
{
public class Builder
{
public object Rd { get; set; }
public object NoAccessPut { get; set; }
public object NoAccessDef { get; set; }
public object[] Subroutines { get; set; }
public object[] OtherSubroutines { get; set; }
public int? UniqueId { get; set; }
public IReadOnlyList<int> BlueValues { get; set; }
public IReadOnlyList<int> OtherBlues { get; set; }
public IReadOnlyList<int> FamilyBlues { get; set; }
public IReadOnlyList<int> FamilyOtherBlues { get; set; }
public decimal? BlueScale { get; set; }
public int? BlueShift { get; set; }
public int? BlueFuzz { get; set; }
public decimal? StandardHorizontalWidth { get; set; }
public decimal? StandardVerticalWidth { get; set; }
public IReadOnlyList<decimal> StempSnapHorizontalWidths { get; set; }
public IReadOnlyList<decimal> StemSnapVerticalWidths { get; set; }
public bool ForceBold { get; set; }
public int LanguageGroup { get; set; }
public int? Password { get; set; }
public int LenIv { get; set; }
public object[] MinFeature { get; set; }
public bool RoundStemUp { get; set; }
}
}
}