fix type 1 font tests and port more parsing logic

This commit is contained in:
Eliot Jones
2018-10-24 19:11:44 +01:00
parent df0b60c2e1
commit 2a8777a93a
5 changed files with 67 additions and 93 deletions

View File

@@ -37,89 +37,20 @@
}
[Fact]
public void CanReadAsciiPart()
public void CanReadEncryptedPortion()
{
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
var bytes = GetFileBytes("CMCSC10");
parser.Parse(bytes.Bytes, 0, 0);
parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
}
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
%%CreationDate: 1991 Aug 20 16:34:54
% Copyright (C) 1997 American Mathematical Society. All Rights Reserved.
11 dict begin
/FontInfo 7 dict dup begin
/version (1.0) readonly def
/Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def
/FullName (CMBX12) readonly def
/FamilyName (Computer Modern) readonly def
/Weight (Bold) readonly def
/ItalicAngle 0 def
/isFixedPitch false def
end readonly def
/FontName /WDKAAR+CMBX12 def
/PaintType 0 def
/FontType 1 def
/FontMatrix [0.001 0 0 0.001 0 0] readonly def
/Encoding 256 array
0 1 255 {1 index exch /.notdef put} for
dup 12 /fi put
dup 46 /period put
dup 49 /one put
dup 50 /two put
dup 51 /three put
dup 52 /four put
dup 53 /five put
dup 65 /A put
dup 66 /B put
dup 67 /C put
dup 69 /E put
dup 73 /I put
dup 77 /M put
dup 78 /N put
dup 80 /P put
dup 82 /R put
dup 83 /S put
dup 84 /T put
dup 97 /a put
dup 98 /b put
dup 99 /c put
dup 100 /d put
dup 101 /e put
dup 102 /f put
dup 103 /g put
dup 104 /h put
dup 105 /i put
dup 107 /k put
dup 108 /l put
dup 109 /m put
dup 110 /n put
dup 111 /o put
dup 112 /p put
dup 114 /r put
dup 115 /s put
dup 116 /t put
dup 117 /u put
dup 118 /v put
dup 120 /x put
dup 121 /y put
readonly def
/FontBBox{-53 -251 1139 750}readonly def
/UniqueID 5000769 def
currentdict end
currentfile eexec
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ð7Ԑ×1¼Iu`“ÂõÎ>ä9Á?î\ºlüýÄ6Ag_Â_²ÂGÄ´/³0¨;2j~þªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†
©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ԑ×1¼Iu`“ÂõÎ>ä9Á?î\ºlüýÄ6Ag_Â_²ÂGÄ´/³0¨;2j~þ
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ԑ×1¼Iu`“ÂõÎ>ä9Á?î\ºlüýÄ6Ag_Â_²ÂGÄ´/³0¨;2j~þ
×1¼Iu`“ÂõÎ>ä9Á?î\ºlüýÄ6Ag_Â_²ÂGÄ´/³0¨;2j~þv7Ԑ×1¼Iu`“ÂõÎ>ä9Á?î\ºlüýÄ6Ag_Â_²ÂGÄ´/³0¨;2j~þ000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000
cleartomark";
[Fact]
public void CanReadAsciiPart()
{
var bytes = GetFileBytes("CMBX12");
parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
}
private static byte[] GetFileBytes(string name)
{

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Fonts.Type1.Parser
{
using System;
using System.Collections.Generic;
using System.Linq;
using IO;
@@ -25,11 +26,6 @@
var str = OtherEncodings.BytesAsLatin1String(decrypted.ToArray());
var tokenizer = new Type1Tokenizer(new ByteArrayInputBytes(decrypted));
while (tokenizer.CurrentToken != null)
{
tokenizer.GetNext();
}
/*
* After 4 random characters follows the /Private dictionary and the /CharString dictionary.
* The first defines a number of technical terms involving character construction, and contains also an array of subroutines used in character paths.
@@ -37,6 +33,33 @@
* Both the subroutines and the character descriptions are yet again encrypted in a fashion similar to the entire binary segment, but now with an initial value of R = 4330 instead of 55665.
*/
while (!tokenizer.CurrentToken.IsPrivateDictionary)
{
tokenizer.GetNext();
if (tokenizer.CurrentToken == null)
{
throw new InvalidOperationException("Did not find the private dictionary start token.");
}
}
var next = tokenizer.GetNext();
if (next?.Type != Type1Token.TokenType.Integer || !(next is Type1TextToken textToken))
{
throw new InvalidOperationException($"No length token was present in the stream following the private dictionary start, instead got {next}.");
}
var length = textToken.AsInt();
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dict");
// actually could also be "/Private 10 dict def Private begin"
// instead of the "dup"
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dup");
ReadExpected(tokenizer, Type1Token.TokenType.Name, "begin");
while (tokenizer.CurrentToken != null)
{
tokenizer.GetNext();
}
return decrypted;
}
@@ -146,5 +169,19 @@
return plainBytes;
}
private static void ReadExpected(Type1Tokenizer tokenizer, Type1Token.TokenType type, string text)
{
var token = tokenizer.GetNext();
if (token == null)
{
throw new InvalidOperationException($"Type 1 Encrypted portion ended when a token with text '{text}' was expected instead.");
}
if (token.Type != type || !(token is Type1TextToken textToken) || !string.Equals(textToken.Text, text, StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException($"Found invalid token {token} when type {type} with text {text} was expected.");
}
}
}
}

View File

@@ -5,7 +5,6 @@
using Exceptions;
using Geometry;
using IO;
using PdfPig.Parser.Parts;
using Tokenization;
using Tokenization.Scanner;
using Tokenization.Tokens;
@@ -15,7 +14,6 @@
private const string ClearToMark = "cleartomark";
private const int PfbFileIndicator = 0x80;
private const int EexecKey = 55665;
private readonly Type1EncryptedPortionParser encryptedPortionParser;

View File

@@ -7,6 +7,8 @@
{
public IReadOnlyList<byte> Data { get; }
public override bool IsPrivateDictionary { get; } = false;
public Type1DataToken(TokenType type, IReadOnlyList<byte> data) : base(type)
{
if (type != TokenType.Charstring)
@@ -20,7 +22,6 @@
public override string ToString()
{
return $"Token[type = {Type}, data = {Data.Count} bytes]";
}
}
@@ -28,6 +29,8 @@
{
public string Text { get; }
public override bool IsPrivateDictionary => Type == TokenType.Literal && string.Equals(Text, "Private", StringComparison.OrdinalIgnoreCase);
public Type1TextToken(char c, TokenType type) : this(c.ToString(), type) { }
public Type1TextToken(string text, TokenType type) : base(type)
{
@@ -55,11 +58,13 @@
}
}
internal class Type1Token
internal abstract class Type1Token
{
public TokenType Type { get; }
public Type1Token(TokenType type)
public abstract bool IsPrivateDictionary { get; }
protected Type1Token(TokenType type)
{
Type = type;
}

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.Parser
{
using System;
using System.Collections.Generic;
using System.Text;
using IO;
using PdfPig.Parser.Parts;
@@ -12,15 +13,18 @@
private readonly StringBuilder stringBuffer = new StringBuilder();
private readonly IInputBytes bytes;
private readonly List<string> comments;
private int openParens;
private Type1Token previousToken;
public Type1Token CurrentToken { get; private set; }
public IReadOnlyList<string> Comments => comments;
public Type1Tokenizer(IInputBytes bytes)
{
this.bytes = bytes;
comments = new List<string>();
CurrentToken = ReadNextToken();
}
@@ -45,7 +49,7 @@
switch (c)
{
case '%':
var comment = ReadComment();
comments.Add(ReadComment());
break;
case '(':
return ReadString();
@@ -100,7 +104,7 @@
break;
}
if (TryReadNumber(out var number))
if (TryReadNumber(c, out var number))
{
return number;
}
@@ -193,7 +197,7 @@
return null;
}
private bool TryReadNumber(out Type1TextToken numberToken)
private bool TryReadNumber(char c, out Type1TextToken numberToken)
{
char GetNext()
{
@@ -208,7 +212,6 @@
var sb = new StringBuilder();
StringBuilder radix = null;
char c = GetNext();
var hasDigit = false;
// optional + or -