add very hacky parsing for type 1 font files in order to read the encoding

2025-10-15 19:54:52 +08:00 · 2018-01-14 18:59:03 +00:00
parent 615ee88a46
commit 4443cde229
13 changed files with 627 additions and 17 deletions
--- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
@@ -0,0 +1,95 @@
+namespace UglyToad.PdfPig.Tests.Fonts.Type1
+{
+    using PdfPig.Fonts.Type1.Parser;
+    using Xunit;
+
+    public class Type1FontParserTests
+    {
+        private readonly Type1FontParser parser = new Type1FontParser();
+
+        [Fact]
+        public void CanRead()
+        {
+            var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
+
+            parser.Parse(bytes.Bytes);
+        }
+
+        private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
+%%CreationDate: 1991 Aug 20 16:34:54
+% Copyright (C) 1997 American Mathematical Society. All Rights Reserved.
+11 dict begin
+/FontInfo 7 dict dup begin
+/version (1.0) readonly def
+/Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def
+/FullName (CMBX12) readonly def
+/FamilyName (Computer Modern) readonly def
+/Weight (Bold) readonly def
+/ItalicAngle 0 def
+/isFixedPitch false def
+end readonly def
+/FontName /WDKAAR+CMBX12 def
+/PaintType 0 def
+/FontType 1 def
+/FontMatrix [0.001 0 0 0.001 0 0] readonly def
+/Encoding 256 array
+0 1 255 {1 index exch /.notdef put} for
+dup 12 /fi put
+dup 46 /period put
+dup 49 /one put
+dup 50 /two put
+dup 51 /three put
+dup 52 /four put
+dup 53 /five put
+dup 65 /A put
+dup 66 /B put
+dup 67 /C put
+dup 69 /E put
+dup 73 /I put
+dup 77 /M put
+dup 78 /N put
+dup 80 /P put
+dup 82 /R put
+dup 83 /S put
+dup 84 /T put
+dup 97 /a put
+dup 98 /b put
+dup 99 /c put
+dup 100 /d put
+dup 101 /e put
+dup 102 /f put
+dup 103 /g put
+dup 104 /h put
+dup 105 /i put
+dup 107 /k put
+dup 108 /l put
+dup 109 /m put
+dup 110 /n put
+dup 111 /o put
+dup 112 /p put
+dup 114 /r put
+dup 115 /s put
+dup 116 /t put
+dup 117 /u put
+dup 118 /v put
+dup 120 /x put
+dup 121 /y put
+readonly def
+/FontBBox{-53 -251 1139 750}readonly def
+/UniqueID 5000769 def
+currentdict end
+currentfile eexec
+ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ð7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†
+©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
+ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
+7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þv7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+cleartomark";
+    }
+}
--- a/src/UglyToad.PdfPig.Tests/Integration/Documents/ICML03-081.pdf
+++ b/src/UglyToad.PdfPig.Tests/Integration/Documents/ICML03-081.pdf
--- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs
@@ -0,0 +1,40 @@
+namespace UglyToad.PdfPig.Tests.Integration
+{
+    using System;
+    using System.IO;
+    using Xunit;
+
+    public class LaTexTests
+    {
+        private static string GetFilename()
+        {
+            var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
+
+            return Path.Combine(documentFolder, "ICML03-081.pdf");
+        }
+
+        [Fact]
+        public void CanReadContent()
+        {
+            using (var document = PdfDocument.Open(GetFilename()))
+            {
+                var page = document.GetPage(1);
+
+                Assert.Contains("TacklingthePoorAssumptionsofNaiveBayesTextClassiﬁers", page.Text);
+
+                var page2 = document.GetPage(2);
+
+                Assert.Contains("isθc={θc1,θc2,...,θcn},", page2.Text);
+            }
+        }
+
+        [Fact]
+        public void HasCorrectNumberOfPages()
+        {
+            using (var document = PdfDocument.Open(GetFilename()))
+            {
+                Assert.Equal(8, document.NumberOfPages);
+            }
+        }
+    }
+}
--- a/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs
@@ -10,10 +10,7 @@
        [Fact]
        public void Tests()
        {
-            using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\ICML03-081.pdf"))
-            {
-                var page = document.GetPage(1);
-            }
+            
        }
    }
 }
--- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
+++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
@@ -13,6 +13,7 @@
    <None Remove="Integration\Documents\FarmerMac.pdf" />
    <None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
    <None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
+    <None Remove="Integration\Documents\ICML03-081.pdf" />
    <None Remove="Integration\Documents\Judgement Document.pdf" />
    <None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
    <None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
@@ -39,6 +40,9 @@
    <Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </Content>
+    <Content Include="Integration\Documents\ICML03-081.pdf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
    <Content Include="Integration\Documents\Judgement Document.pdf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </Content>
--- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
+++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
@@ -1,6 +1,5 @@
 namespace UglyToad.PdfPig.Fonts.Parser.Handlers
 {
-    using System;
    using Cmap;
    using ContentStream;
    using Cos;
@@ -13,6 +12,8 @@
    using Simple;
    using Tokenization.Scanner;
    using Tokenization.Tokens;
+    using Type1;
+    using Type1.Parser;

    internal class Type1FontHandler : IFontHandler
    {
@@ -22,10 +23,12 @@
        private readonly FontDescriptorFactory fontDescriptorFactory;
        private readonly IEncodingReader encodingReader;
        private readonly IPdfObjectScanner scanner;
+        private readonly Type1FontParser type1FontParser;

        public Type1FontHandler(IPdfObjectParser pdfObjectParser, CMapCache cMapCache, IFilterProvider filterProvider, 
            FontDescriptorFactory fontDescriptorFactory, IEncodingReader encodingReader,
-            IPdfObjectScanner scanner)
+            IPdfObjectScanner scanner,
+            Type1FontParser type1FontParser)
        {
            this.pdfObjectParser = pdfObjectParser;
            this.cMapCache = cMapCache;
@@ -33,6 +36,7 @@
            this.fontDescriptorFactory = fontDescriptorFactory;
            this.encodingReader = encodingReader;
            this.scanner = scanner;
+            this.type1FontParser = type1FontParser;
        }

        public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
@@ -60,7 +64,7 @@

            var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);

-            ParseType1Font(descriptor, isLenientParsing);
+            var font = ParseType1Font(descriptor, isLenientParsing);

            var name = FontDictionaryAccessHelper.GetName(pdfObjectParser, dictionary, descriptor, reader, isLenientParsing);
            
@@ -79,19 +83,24 @@

            Encoding encoding = encodingReader.Read(dictionary, reader, isLenientParsing, descriptor);

-            return new Type1Font(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap);
+            if (encoding == null && font?.Encoding.Count > 0)
+            {
+                encoding = new BuiltInEncoding(font.Encoding);
+            }
+
+            return new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap);
        }

-        private void ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
+        private Type1Font ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
        {
            if (descriptor?.FontFile == null)
            {
-                return;
+                return null;
            }

            if (descriptor.FontFile.ObjectKey.ObjectNumber == 0)
            {
-                return;
+                return null;
            }
            
            try
@@ -100,14 +109,16 @@

                if (stream == null)
                {
-                    return;
+                    return null;
                }

                var raw = new PdfRawStream(stream);

                var bytes = raw.Decode(filterProvider);

-                // TODO: parse
+                var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes));
+
+                return font;
            }
            catch
            {
@@ -116,6 +127,8 @@
                    throw;
                }
            }
+
+            return null;
        }
    }
 }
--- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs
+++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs
@@ -11,7 +11,7 @@
    /// <summary>
    /// TODO: implement this properly if you find a Type 1 font in the wild.
    /// </summary>
-    internal class Type1Font : IFont
+    internal class Type1FontSimple : IFont
    {
        private readonly int firstChar;
        private readonly int lastChar;
@@ -25,7 +25,7 @@

        public bool IsVertical { get; } = false;

-        public Type1Font(CosName name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding, CMap toUnicodeCMap)
+        public Type1FontSimple(CosName name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding, CMap toUnicodeCMap)
        {
            this.firstChar = firstChar;
            this.lastChar = lastChar;
--- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
@@ -1,10 +1,299 @@
 namespace UglyToad.PdfPig.Fonts.Type1.Parser
 {
+    using System;
+    using System.Collections.Generic;
+    using Cos;
+    using Exceptions;
+    using Geometry;
+    using IO;
+    using Tokenization;
+    using Tokenization.Scanner;
+    using Tokenization.Tokens;
+
    internal class Type1FontParser
    {
-        public void Parse()
+        public Type1Font Parse(IInputBytes inputBytes)
        {
+            var scanner = new CoreTokenScanner(inputBytes);

+            if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
+            {
+                throw new InvalidFontFormatException("The Type1 program did not start with '%!'.");
+            }
+
+            string name;
+            var parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
+            if (parts.Length == 3)
+            {
+                name = parts[1];
+            }
+            else
+            {
+                name = "Unknown";
+            }
+
+            var comments = new List<string>();
+
+            while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken)
+            {
+                comments.Add(commentToken.Data);
+            }
+
+            var dictionaries = new List<DictionaryToken>();
+
+            // Override arrays and names since type 1 handles these differently.
+            var arrayTokenizer = new Type1ArrayTokenizer();
+            var nameTokenizer = new Type1NameTokenizer();
+            scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
+            scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);
+
+            try
+            {
+                var tokenSet = new PreviousTokenSet();
+                tokenSet.Add(scanner.CurrentToken);
+                while (scanner.MoveNext())
+                {
+                    if (scanner.CurrentToken is OperatorToken operatorToken)
+                    {
+                        HandleOperator(operatorToken, inputBytes, scanner, tokenSet, dictionaries);
+                    }
+
+                    tokenSet.Add(scanner.CurrentToken);
+                }
+            }
+            finally
+            {
+                scanner.DeregisterCustomTokenizer(arrayTokenizer);
+                scanner.DeregisterCustomTokenizer(nameTokenizer);
+            }
+
+            var encoding = GetEncoding(dictionaries);
+            var matrix = GetFontMatrix(dictionaries);
+            var boundingBox = GetBoundingBox(dictionaries);
+
+            return new Type1Font(name, encoding, matrix, boundingBox);
+        }
+
+        private void HandleOperator(OperatorToken token, IInputBytes bytes, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
+        {
+            switch (token.Data)
+            {
+                case "dict":
+                    var number = ((NumericToken)set[0]).Int;
+                    var dictionary = ReadDictionary(number, scanner);
+
+                    dictionaries.Add(dictionary);
+                    break;
+                case "currentfile":
+                    if (!scanner.MoveNext() || scanner.CurrentToken != OperatorToken.Eexec)
+                    {
+                        return;
+                    }
+
+                    // For now we will not read this stuff.
+                    SkipEncryptedContent(bytes);
+                    break;
+                default:
+                    return;
+            }
+        }
+
+        private void SkipEncryptedContent(IInputBytes bytes)
+        {
+            bytes.Seek(bytes.Length - 1);
+
+            while (bytes.MoveNext())
+            {
+                // skip to end.
+            }
+        }
+
+        private static DictionaryToken ReadDictionary(int keys, ISeekableTokenScanner scanner)
+        {
+            IToken previousToken = null;
+
+            var dictionary = new Dictionary<IToken, IToken>();
+
+            // Skip the operators "dup" etc to reach "begin".
+            while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken operatorToken) || operatorToken.Data != "begin"))
+            {
+                // Skipping.
+            }
+
+            for (int i = 0; i < keys; i++)
+            {
+                if (!scanner.TryReadToken(out NameToken key))
+                {
+                    return new DictionaryToken(dictionary);
+                }
+
+                if (key.Data.Equals(CosName.ENCODING))
+                {
+                    dictionary[key] = ReadEncoding(scanner);
+                    continue;
+                }
+
+                while (scanner.MoveNext())
+                {
+                    if (scanner.CurrentToken == OperatorToken.Def)
+                    {
+                        dictionary[key] = previousToken;
+
+                        break;
+                    }
+
+                    if (scanner.CurrentToken == OperatorToken.Dict)
+                    {
+                        if (!(previousToken is NumericToken numeric))
+                        {
+                            return new DictionaryToken(dictionary);
+                        }
+
+                        var inner = ReadDictionary(numeric.Int, scanner);
+
+                        previousToken = inner;
+                    }
+                    else if (scanner.CurrentToken == OperatorToken.Readonly)
+                    {
+                        // skip
+                    }
+                    else if (scanner.CurrentToken is OperatorToken op && op.Data == "end")
+                    {
+                        // skip
+                    }
+                    else
+                    {
+                        previousToken = scanner.CurrentToken;
+                    }
+                }
+            }
+
+            return new DictionaryToken(dictionary);
+        }
+
+        private static ArrayToken ReadEncoding(ISeekableTokenScanner scanner)
+        {
+            var result = new List<IToken>();
+
+            // Treat encoding differently, it's what we came here for!
+            if (!scanner.TryReadToken(out NumericToken _))
+            {
+                return new ArrayToken(result);
+            }
+
+            if (!scanner.TryReadToken(out OperatorToken arrayOperatorToken) || arrayOperatorToken.Data != "array")
+            {
+                return new ArrayToken(result);
+            }
+
+            while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken forOperator) || forOperator.Data != "for"))
+            {
+                // skip these operators for now, they're probably important...
+            }
+
+            if (scanner.CurrentToken != OperatorToken.For)
+            {
+                return new ArrayToken(result);
+            }
+
+            while (scanner.MoveNext() && scanner.CurrentToken != OperatorToken.Def && scanner.CurrentToken != OperatorToken.Readonly)
+            {
+                if (scanner.CurrentToken != OperatorToken.Dup)
+                {
+                    throw new InvalidFontFormatException("Expected the array for encoding to begin with 'dup'.");
+                }
+
+                scanner.MoveNext();
+                var number = (NumericToken)scanner.CurrentToken;
+                scanner.MoveNext();
+                var name = (NameToken)scanner.CurrentToken;
+
+                if (!scanner.TryReadToken(out OperatorToken put) || put != OperatorToken.Put)
+                {
+                    throw new InvalidFontFormatException("Expected the array entry to end with 'put'.");
+                }
+
+                result.Add(number);
+                result.Add(name);
+            }
+
+            while (scanner.CurrentToken != OperatorToken.Def && scanner.MoveNext())
+            {
+                // skip
+            }
+
+            return new ArrayToken(result);
+        }
+
+        private static Dictionary<int, string> GetEncoding(IReadOnlyList<DictionaryToken> dictionaries)
+        {
+            var result = new Dictionary<int, string>();
+
+            foreach (var dictionary in dictionaries)
+            {
+                if (dictionary.TryGetByName(CosName.ENCODING, out var token) && token is ArrayToken encodingArray)
+                {
+                    for (var i = 0; i < encodingArray.Data.Count; i += 2)
+                    {
+                        var code = (NumericToken) encodingArray.Data[i];
+                        var name = (NameToken) encodingArray.Data[i + 1];
+
+                        result[code.Int] = name.Data.Name;
+                    }
+
+                    return result;
+                }
+            }
+
+            return result;
+        }
+
+        private static ArrayToken GetFontMatrix(IReadOnlyList<DictionaryToken> dictionaries)
+        {
+            foreach (var dictionaryToken in dictionaries)
+            {
+                if (dictionaryToken.TryGetByName(CosName.FONT_MATRIX, out var token) && token is ArrayToken array)
+                {
+                    return array;
+                }
+            }
+
+            return null;
+        }
+
+        private static PdfRectangle GetBoundingBox(IReadOnlyList<DictionaryToken> dictionaries)
+        {
+            foreach (var dictionary in dictionaries)
+            {
+                if (dictionary.TryGetByName(CosName.FONT_BBOX, out var token) && token is ArrayToken array && array.Data.Count == 4)
+                {
+                    var x1 = (NumericToken) array.Data[0];
+                    var y1 = (NumericToken) array.Data[1];
+                    var x2 = (NumericToken) array.Data[2];
+                    var y2 = (NumericToken) array.Data[3];
+
+                    return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data);
+                }
+            }
+
+            return null;
+        }
+
+        private class PreviousTokenSet
+        {
+            private readonly IToken[] tokens = new IToken[3];
+
+            public IToken this[int index] => tokens[2 - index];
+
+            public void Add(IToken token)
+            {
+                tokens[0] = tokens[1];
+                tokens[1] = tokens[2];
+                tokens[2] = token;
+            }
        }
    }
 }
+
+
+
--- a/src/UglyToad.PdfPig/Fonts/Type1/Type1Font.cs
+++ b/src/UglyToad.PdfPig/Fonts/Type1/Type1Font.cs
@@ -0,0 +1,32 @@
+namespace UglyToad.PdfPig.Fonts.Type1
+{
+    using System.Collections.Generic;
+    using Cos;
+    using Geometry;
+    using Tokenization.Tokens;
+    using Util.JetBrains.Annotations;
+
+    /// <summary>
+    /// The information from the Type 1 font file.
+    /// </summary>
+    internal class Type1Font
+    {
+        public string Name { get; }
+        
+        public IReadOnlyDictionary<int, string> Encoding { get; }
+
+        [CanBeNull]
+        public ArrayToken FontMatrix { get; }
+
+        [CanBeNull]
+        public PdfRectangle BoundingBox { get; }
+
+        public Type1Font(string name, IReadOnlyDictionary<int, string> encoding, ArrayToken fontMatrix, PdfRectangle boundingBox)
+        {
+            Name = name;
+            Encoding = encoding;
+            FontMatrix = fontMatrix;
+            BoundingBox = boundingBox;
+        }
+    }
+}
--- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
+++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
@@ -13,6 +13,7 @@
    using Fonts.Parser.Handlers;
    using Fonts.Parser.Parts;
    using Fonts.TrueType.Parser;
+    using Fonts.Type1.Parser;
    using Graphics;
    using IO;
    using Logging;
@@ -90,7 +91,7 @@
                filterProvider,
                pdfObjectParser),
                new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
-                new Type1FontHandler(pdfObjectParser, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, pdfScanner),
+                new Type1FontHandler(pdfObjectParser, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, pdfScanner, new Type1FontParser()),
                new Type3FontHandler(pdfObjectParser, cMapCache, filterProvider, encodingReader));

            var dynamicParser = container.Get<DynamicParser>();
--- a/src/UglyToad.PdfPig/Tokenization/Tokens/OperatorToken.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Tokens/OperatorToken.cs
@@ -11,6 +11,13 @@
        public static readonly OperatorToken EndObject = new OperatorToken("endobj");
        public static readonly OperatorToken StartStream = new OperatorToken("stream");
        public static readonly OperatorToken EndStream = new OperatorToken("endstream");
+        public static readonly OperatorToken Eexec = new OperatorToken("eexec");
+        public static readonly OperatorToken Def = new OperatorToken("def");
+        public static readonly OperatorToken Dict = new OperatorToken("dict");
+        public static readonly OperatorToken Readonly = new OperatorToken("readonly");
+        public static readonly OperatorToken Dup = new OperatorToken("dup");
+        public static readonly OperatorToken For = new OperatorToken("for");
+        public static readonly OperatorToken Put = new OperatorToken("put");

        public string Data { get; }

@@ -39,6 +46,20 @@
                    return StartStream;
                case "endstream":
                    return EndStream;
+                case "eexec":
+                    return Eexec;
+                case "def":
+                    return Def;
+                case "dict":
+                    return Dict;
+                case "readonly":
+                    return Readonly;
+                case "dup":
+                    return Dup;
+                case "for":
+                    return For;
+                case "put":
+                    return Put;
                default:
                    return new OperatorToken(data);
            }
--- a/src/UglyToad.PdfPig/Tokenization/Type1ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Type1ArrayTokenizer.cs
@@ -0,0 +1,76 @@
+namespace UglyToad.PdfPig.Tokenization
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Globalization;
+    using System.Text;
+    using IO;
+    using Tokens;
+
+    internal class Type1ArrayTokenizer : ITokenizer
+    {
+        public bool ReadsNextByte { get; } = false;
+
+        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
+        {
+            token = null;
+
+            if (currentByte != '{')
+            {
+                return false;
+            }
+
+            var builder = new StringBuilder();
+
+            while (inputBytes.MoveNext())
+            {
+                if (inputBytes.CurrentByte == '}')
+                {
+                    break;
+                }
+
+                builder.Append((char) inputBytes.CurrentByte);
+            }
+
+            var parts = builder.ToString().Split(new[] {" "}, StringSplitOptions.RemoveEmptyEntries);
+
+            var tokens = new List<IToken>();
+
+            foreach (var part in parts)
+            {
+                if (char.IsNumber(part[0]) || part[0] == '-')
+                {
+                    if (decimal.TryParse(part, NumberStyles.AllowLeadingSign, null, out var value))
+                    {
+                        tokens.Add(new NumericToken(value));
+                    }
+                    else
+                    {
+                        tokens.Add(OperatorToken.Create(part));
+                    }
+
+                    continue;
+                }
+
+                if (part[0] == '/')
+                {
+                    tokens.Add(new NameToken(part.Substring(1)));
+                    continue;
+                }
+
+                if (part[0] == '(' && part[part.Length - 1] == ')')
+                {
+                    tokens.Add(new StringToken(part));
+                    continue;
+                }
+
+                tokens.Add(OperatorToken.Create(part));
+
+            }
+
+            token = new ArrayToken(tokens);
+
+            return true;
+        }
+    }
+}
--- a/src/UglyToad.PdfPig/Tokenization/Type1NameTokenizer.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Type1NameTokenizer.cs
@@ -0,0 +1,42 @@
+namespace UglyToad.PdfPig.Tokenization
+{
+    using System.Text;
+    using IO;
+    using Parser.Parts;
+    using Tokens;
+
+    internal class Type1NameTokenizer : ITokenizer
+    {
+        public bool ReadsNextByte { get; } = true;
+
+        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
+        {
+            token = null;
+
+            if (currentByte != '/')
+            {
+                return false;
+            }
+
+            var builder = new StringBuilder();
+            while (inputBytes.MoveNext())
+            {
+                if (ReadHelper.IsWhitespace(inputBytes.CurrentByte)
+                    || inputBytes.CurrentByte == '{'
+                    || inputBytes.CurrentByte == '<'
+                    || inputBytes.CurrentByte == '/'
+                    || inputBytes.CurrentByte == '['
+                    || inputBytes.CurrentByte == '(')
+                {
+                    break;
+                }
+
+                builder.Append((char)inputBytes.CurrentByte);
+            }
+
+            token = new NameToken(builder.ToString());
+
+            return true;
+        }
+    }
+}