mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
fix type 1 font tests and port more parsing logic
This commit is contained in:
@@ -37,89 +37,20 @@
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadAsciiPart()
|
||||
public void CanReadEncryptedPortion()
|
||||
{
|
||||
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
|
||||
var bytes = GetFileBytes("CMCSC10");
|
||||
|
||||
parser.Parse(bytes.Bytes, 0, 0);
|
||||
parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
|
||||
}
|
||||
|
||||
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
|
||||
%%CreationDate: 1991 Aug 20 16:34:54
|
||||
% Copyright (C) 1997 American Mathematical Society. All Rights Reserved.
|
||||
11 dict begin
|
||||
/FontInfo 7 dict dup begin
|
||||
/version (1.0) readonly def
|
||||
/Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def
|
||||
/FullName (CMBX12) readonly def
|
||||
/FamilyName (Computer Modern) readonly def
|
||||
/Weight (Bold) readonly def
|
||||
/ItalicAngle 0 def
|
||||
/isFixedPitch false def
|
||||
end readonly def
|
||||
/FontName /WDKAAR+CMBX12 def
|
||||
/PaintType 0 def
|
||||
/FontType 1 def
|
||||
/FontMatrix [0.001 0 0 0.001 0 0] readonly def
|
||||
/Encoding 256 array
|
||||
0 1 255 {1 index exch /.notdef put} for
|
||||
dup 12 /fi put
|
||||
dup 46 /period put
|
||||
dup 49 /one put
|
||||
dup 50 /two put
|
||||
dup 51 /three put
|
||||
dup 52 /four put
|
||||
dup 53 /five put
|
||||
dup 65 /A put
|
||||
dup 66 /B put
|
||||
dup 67 /C put
|
||||
dup 69 /E put
|
||||
dup 73 /I put
|
||||
dup 77 /M put
|
||||
dup 78 /N put
|
||||
dup 80 /P put
|
||||
dup 82 /R put
|
||||
dup 83 /S put
|
||||
dup 84 /T put
|
||||
dup 97 /a put
|
||||
dup 98 /b put
|
||||
dup 99 /c put
|
||||
dup 100 /d put
|
||||
dup 101 /e put
|
||||
dup 102 /f put
|
||||
dup 103 /g put
|
||||
dup 104 /h put
|
||||
dup 105 /i put
|
||||
dup 107 /k put
|
||||
dup 108 /l put
|
||||
dup 109 /m put
|
||||
dup 110 /n put
|
||||
dup 111 /o put
|
||||
dup 112 /p put
|
||||
dup 114 /r put
|
||||
dup 115 /s put
|
||||
dup 116 /t put
|
||||
dup 117 /u put
|
||||
dup 118 /v put
|
||||
dup 120 /x put
|
||||
dup 121 /y put
|
||||
readonly def
|
||||
/FontBBox{-53 -251 1139 750}readonly def
|
||||
/UniqueID 5000769 def
|
||||
currentdict end
|
||||
currentfile eexec
|
||||
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ð7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†
|
||||
©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
|
||||
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
|
||||
7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þv7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
cleartomark";
|
||||
[Fact]
|
||||
public void CanReadAsciiPart()
|
||||
{
|
||||
var bytes = GetFileBytes("CMBX12");
|
||||
|
||||
parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
|
||||
}
|
||||
|
||||
private static byte[] GetFileBytes(string name)
|
||||
{
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Type1.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using IO;
|
||||
@@ -25,11 +26,6 @@
|
||||
var str = OtherEncodings.BytesAsLatin1String(decrypted.ToArray());
|
||||
|
||||
var tokenizer = new Type1Tokenizer(new ByteArrayInputBytes(decrypted));
|
||||
while (tokenizer.CurrentToken != null)
|
||||
{
|
||||
tokenizer.GetNext();
|
||||
}
|
||||
|
||||
/*
|
||||
* After 4 random characters follows the /Private dictionary and the /CharString dictionary.
|
||||
* The first defines a number of technical terms involving character construction, and contains also an array of subroutines used in character paths.
|
||||
@@ -37,6 +33,33 @@
|
||||
* Both the subroutines and the character descriptions are yet again encrypted in a fashion similar to the entire binary segment, but now with an initial value of R = 4330 instead of 55665.
|
||||
*/
|
||||
|
||||
while (!tokenizer.CurrentToken.IsPrivateDictionary)
|
||||
{
|
||||
tokenizer.GetNext();
|
||||
if (tokenizer.CurrentToken == null)
|
||||
{
|
||||
throw new InvalidOperationException("Did not find the private dictionary start token.");
|
||||
}
|
||||
}
|
||||
|
||||
var next = tokenizer.GetNext();
|
||||
if (next?.Type != Type1Token.TokenType.Integer || !(next is Type1TextToken textToken))
|
||||
{
|
||||
throw new InvalidOperationException($"No length token was present in the stream following the private dictionary start, instead got {next}.");
|
||||
}
|
||||
|
||||
var length = textToken.AsInt();
|
||||
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dict");
|
||||
// actually could also be "/Private 10 dict def Private begin"
|
||||
// instead of the "dup"
|
||||
ReadExpected(tokenizer, Type1Token.TokenType.Name, "dup");
|
||||
ReadExpected(tokenizer, Type1Token.TokenType.Name, "begin");
|
||||
|
||||
while (tokenizer.CurrentToken != null)
|
||||
{
|
||||
tokenizer.GetNext();
|
||||
}
|
||||
|
||||
return decrypted;
|
||||
}
|
||||
|
||||
@@ -146,5 +169,19 @@
|
||||
|
||||
return plainBytes;
|
||||
}
|
||||
|
||||
private static void ReadExpected(Type1Tokenizer tokenizer, Type1Token.TokenType type, string text)
|
||||
{
|
||||
var token = tokenizer.GetNext();
|
||||
if (token == null)
|
||||
{
|
||||
throw new InvalidOperationException($"Type 1 Encrypted portion ended when a token with text '{text}' was expected instead.");
|
||||
}
|
||||
|
||||
if (token.Type != type || !(token is Type1TextToken textToken) || !string.Equals(textToken.Text, text, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new InvalidOperationException($"Found invalid token {token} when type {type} with text {text} was expected.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@
|
||||
using Exceptions;
|
||||
using Geometry;
|
||||
using IO;
|
||||
using PdfPig.Parser.Parts;
|
||||
using Tokenization;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
@@ -15,7 +14,6 @@
|
||||
private const string ClearToMark = "cleartomark";
|
||||
|
||||
private const int PfbFileIndicator = 0x80;
|
||||
private const int EexecKey = 55665;
|
||||
|
||||
private readonly Type1EncryptedPortionParser encryptedPortionParser;
|
||||
|
||||
|
@@ -7,6 +7,8 @@
|
||||
{
|
||||
public IReadOnlyList<byte> Data { get; }
|
||||
|
||||
public override bool IsPrivateDictionary { get; } = false;
|
||||
|
||||
public Type1DataToken(TokenType type, IReadOnlyList<byte> data) : base(type)
|
||||
{
|
||||
if (type != TokenType.Charstring)
|
||||
@@ -20,7 +22,6 @@
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Token[type = {Type}, data = {Data.Count} bytes]";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +29,8 @@
|
||||
{
|
||||
public string Text { get; }
|
||||
|
||||
public override bool IsPrivateDictionary => Type == TokenType.Literal && string.Equals(Text, "Private", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
public Type1TextToken(char c, TokenType type) : this(c.ToString(), type) { }
|
||||
public Type1TextToken(string text, TokenType type) : base(type)
|
||||
{
|
||||
@@ -55,11 +58,13 @@
|
||||
}
|
||||
}
|
||||
|
||||
internal class Type1Token
|
||||
internal abstract class Type1Token
|
||||
{
|
||||
public TokenType Type { get; }
|
||||
|
||||
public Type1Token(TokenType type)
|
||||
public abstract bool IsPrivateDictionary { get; }
|
||||
|
||||
protected Type1Token(TokenType type)
|
||||
{
|
||||
Type = type;
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Type1.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using IO;
|
||||
using PdfPig.Parser.Parts;
|
||||
@@ -12,15 +13,18 @@
|
||||
private readonly StringBuilder stringBuffer = new StringBuilder();
|
||||
|
||||
private readonly IInputBytes bytes;
|
||||
private readonly List<string> comments;
|
||||
|
||||
private int openParens;
|
||||
private Type1Token previousToken;
|
||||
|
||||
public Type1Token CurrentToken { get; private set; }
|
||||
public IReadOnlyList<string> Comments => comments;
|
||||
|
||||
public Type1Tokenizer(IInputBytes bytes)
|
||||
{
|
||||
this.bytes = bytes;
|
||||
comments = new List<string>();
|
||||
CurrentToken = ReadNextToken();
|
||||
}
|
||||
|
||||
@@ -45,7 +49,7 @@
|
||||
switch (c)
|
||||
{
|
||||
case '%':
|
||||
var comment = ReadComment();
|
||||
comments.Add(ReadComment());
|
||||
break;
|
||||
case '(':
|
||||
return ReadString();
|
||||
@@ -100,7 +104,7 @@
|
||||
break;
|
||||
}
|
||||
|
||||
if (TryReadNumber(out var number))
|
||||
if (TryReadNumber(c, out var number))
|
||||
{
|
||||
return number;
|
||||
}
|
||||
@@ -193,7 +197,7 @@
|
||||
return null;
|
||||
}
|
||||
|
||||
private bool TryReadNumber(out Type1TextToken numberToken)
|
||||
private bool TryReadNumber(char c, out Type1TextToken numberToken)
|
||||
{
|
||||
char GetNext()
|
||||
{
|
||||
@@ -208,7 +212,6 @@
|
||||
var sb = new StringBuilder();
|
||||
StringBuilder radix = null;
|
||||
|
||||
char c = GetNext();
|
||||
var hasDigit = false;
|
||||
|
||||
// optional + or -
|
||||
|
Reference in New Issue
Block a user