start adding code and tests for reading metrics of type 1 fonts

2025-11-28 17:47:12 +08:00 · 2018-04-11 22:51:31 +01:00
parent ea55256e78
commit 7af2b1bcb9
11 changed files with 1373 additions and 36 deletions
--- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/AdobeUtopia.pfa
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/AdobeUtopia.pfa
--- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
@@ -1,14 +1,37 @@
 namespace UglyToad.PdfPig.Tests.Fonts.Type1
 {
+    using System;
+    using System.IO;
+    using System.Linq;
+    using System.Text;
    using PdfPig.Fonts.Type1.Parser;
+    using PdfPig.IO;
+    using PdfPig.Util;
    using Xunit;

    public class Type1FontParserTests
    {
-        private readonly Type1FontParser parser = new Type1FontParser();
+        private readonly Type1FontParser parser = new Type1FontParser(new Type1EncryptedPortionParser());

        [Fact]
-        public void CanRead()
+        public void CanReadHexEncryptedPortion()
+        {
+            var bytes = GetFileBytes("AdobeUtopia.pfa");
+            
+            parser.Parse(new ByteArrayInputBytes(bytes));
+        }
+
+        [Fact]
+        public void CanReadBinaryEncryptedPortion()
+        {
+            // TODO: support reading in these pfb files
+            //var bytes = GetFileBytes("cmbx8.pfb");
+            
+            //parser.Parse(new ByteArrayInputBytes(bytes));
+        }
+
+        [Fact]
+        public void CanReadAsciiPart()
        {
            var bytes = StringBytesTestConverter.Convert(Cmbx12, false);

@@ -91,5 +114,20 @@ currentfile eexec
 0000000000000000000000000000000000000000000000000000000000000000
 0000000000000000000000000000000000000000000000000000000000000000
 cleartomark";
+
+        private static byte[] GetFileBytes(string name)
+        {
+            var manifestFiles = typeof(Type1FontParserTests).Assembly.GetManifestResourceNames();
+
+            var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0);
+
+            using (var memoryStream = new MemoryStream())
+            using (var stream = typeof(Type1FontParserTests).Assembly.GetManifestResourceStream(match))
+            {
+                stream.CopyTo(memoryStream);
+
+                return memoryStream.ToArray();
+            }
+        }
    }
 }
--- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
+++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
@@ -10,6 +10,7 @@
  <ItemGroup>
    <None Remove="Fonts\TrueType\google-simple-doc.ttf" />
    <None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
+    <None Remove="Fonts\Type1\AdobeUtopia.pfa" />
    <None Remove="Integration\Documents\FarmerMac.pdf" />
    <None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
    <None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
@@ -31,6 +32,9 @@
    <EmbeddedResource Include="Fonts\TrueType\Roboto-Regular.ttf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </EmbeddedResource>
+    <EmbeddedResource Include="Fonts\Type1\AdobeUtopia.pfa">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </EmbeddedResource>
    <Content Include="Integration\Documents\FarmerMac.pdf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </Content>
--- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
+++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
@@ -12,6 +12,7 @@
    using Tokenization.Tokens;
    using Type1;
    using Type1.Parser;
+    using Util;

    internal class Type1FontHandler : IFontHandler
    {
@@ -111,6 +112,8 @@
                
                var bytes = stream.Decode(filterProvider);

+                var text = OtherEncodings.BytesAsLatin1String(bytes);
+
                var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes));

                return font;
--- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs
+++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs
@@ -0,0 +1,123 @@
+namespace UglyToad.PdfPig.Fonts.Type1.Parser
+{
+    using System.Collections.Generic;
+    using System.Linq;
+    using PdfPig.Parser.Parts;
+    using Tokenization.Tokens;
+    using Util;
+
+    internal class Type1EncryptedPortionParser
+    {
+        private const ushort EexecEncryptionKey = 55665;
+        private const int EexecRandomBytes = 4;
+
+        public void Parse(IReadOnlyList<byte> bytes)
+        {
+            if (!IsBinary(bytes))
+            {
+                bytes = ConvertHexToBinary(bytes);
+            }
+
+            var decrypted = Decrypt(bytes, EexecEncryptionKey, EexecRandomBytes);
+
+            var str = OtherEncodings.BytesAsLatin1String(decrypted.ToArray());
+        }
+
+        /// <summary>
+        /// To distinguish between binary and hex the first 4 bytes (of the ciphertext) for hex must
+        /// obey these restrictions:
+        /// The first byte must not be whitespace.
+        /// One of the first four ciphertext bytes must not be an ASCII hex character.
+        /// </summary>
+        /// <param name="bytes"></param>
+        /// <returns></returns>
+        private static bool IsBinary(IReadOnlyList<byte> bytes)
+        {
+            if (bytes.Count < 4)
+            {
+                return true;
+            }
+
+            if (ReadHelper.IsWhitespace(bytes[0]))
+            {
+                return true;
+            }
+
+            for (var i = 1; i < 4; i++)
+            {
+                var b = bytes[i];
+
+                if (!ReadHelper.IsHex(b))
+                {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        private static IReadOnlyList<byte> ConvertHexToBinary(IReadOnlyList<byte> bytes)
+        {
+            var result = new List<byte>(bytes.Count / 2);
+
+            var last = '\0';
+            var offset = 0;
+            for (var i = 0; i < bytes.Count; i++)
+            {
+                var c = (char)bytes[i];
+                if (!ReadHelper.IsHex(c))
+                {
+                    // TODO: do I need to assert this must be whitespace?
+                    continue;
+                }
+
+                if (offset == 1)
+                {
+                    result.Add(HexToken.Convert(last, c));
+                    offset = 0;
+                }
+                else
+                {
+                    offset++;
+                }
+
+                last = c;
+            }
+
+            return result;
+        }
+
+        private static IReadOnlyList<byte> Decrypt(IReadOnlyList<byte> bytes, int key, int randomBytes)
+        {
+            if (randomBytes == -1)
+            {
+                return bytes;
+            }
+
+            if (randomBytes > bytes.Count || bytes.Count == 0)
+            {
+                return new byte[0];
+            }
+
+            const int c1 = 52845;
+            const int c2 = 22719;
+
+            var plainBytes = new byte[bytes.Count - randomBytes];
+
+            for (var i = 0; i < bytes.Count; i++)
+            {
+                var cipher = bytes[i] & 0xFF;
+                var plain = cipher ^ key >> 8;
+
+                if (i >= randomBytes)
+                {
+                    plainBytes[i - randomBytes] = (byte)plain;
+                }
+
+                key = (cipher + key) * c1 + c2 & 0xffff;
+            }
+
+            return plainBytes;
+        }
+    }
+}
--- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
@@ -11,6 +11,14 @@

    internal class Type1FontParser
    {
+        private const string ClearToMark = "cleartomark";
+        private readonly Type1EncryptedPortionParser encryptedPortionParser;
+
+        public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser)
+        {
+            this.encryptedPortionParser = encryptedPortionParser;
+        }
+
        public Type1Font Parse(IInputBytes inputBytes)
        {
            var scanner = new CoreTokenScanner(inputBytes);
@@ -46,6 +54,8 @@
            scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
            scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);

+            var eexecPortion = new List<byte>();
+
            try
            {
                var tokenSet = new PreviousTokenSet();
@@ -54,7 +64,46 @@
                {
                    if (scanner.CurrentToken is OperatorToken operatorToken)
                    {
-                        HandleOperator(operatorToken, inputBytes, scanner, tokenSet, dictionaries);
+                        if (Equals(scanner.CurrentToken, OperatorToken.Eexec))
+                        {
+                            int offset = 0;
+
+                            while (inputBytes.MoveNext())
+                            {
+                                if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
+                                {
+                                    offset++;
+                                }
+                                else
+                                {
+                                    if (offset > 0)
+                                    {
+                                        for (int i = 0; i < offset; i++)
+                                        {
+                                            eexecPortion.Add((byte)ClearToMark[i]);
+                                        }
+                                    }
+
+                                    offset = 0;
+                                }
+
+                                if (offset == ClearToMark.Length)
+                                {
+                                    break;
+                                }
+
+                                if (offset > 0)
+                                {
+                                    continue;
+                                }
+
+                                eexecPortion.Add(inputBytes.CurrentByte);
+                            }
+                        }
+                        else
+                        {
+                            HandleOperator(operatorToken, scanner, tokenSet, dictionaries);
+                        }
                    }

                    tokenSet.Add(scanner.CurrentToken);
@@ -70,10 +119,12 @@
            var matrix = GetFontMatrix(dictionaries);
            var boundingBox = GetBoundingBox(dictionaries);

+            encryptedPortionParser.Parse(eexecPortion);
+
            return new Type1Font(name, encoding, matrix, boundingBox);
        }

-        private void HandleOperator(OperatorToken token, IInputBytes bytes, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
+        private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
        {
            switch (token.Data)
            {
@@ -83,30 +134,11 @@

                    dictionaries.Add(dictionary);
                    break;
-                case "currentfile":
-                    if (!scanner.MoveNext() || scanner.CurrentToken != OperatorToken.Eexec)
-                    {
-                        return;
-                    }
-
-                    // For now we will not read this stuff.
-                    SkipEncryptedContent(bytes);
-                    break;
                default:
                    return;
            }
        }

-        private void SkipEncryptedContent(IInputBytes bytes)
-        {
-            bytes.Seek(bytes.Length - 1);
-
-            while (bytes.MoveNext())
-            {
-                // skip to end.
-            }
-        }
-
        private static DictionaryToken ReadDictionary(int keys, ISeekableTokenScanner scanner)
        {
            IToken previousToken = null;
@@ -277,7 +309,7 @@

            return null;
        }
-
+        
        private class PreviousTokenSet
        {
            private readonly IToken[] tokens = new IToken[3];
--- a/src/UglyToad.PdfPig/IO/ByteArrayInputBytes.cs
+++ b/src/UglyToad.PdfPig/IO/ByteArrayInputBytes.cs
@@ -1,11 +1,13 @@
 namespace UglyToad.PdfPig.IO
 {
    using System.Collections.Generic;
+    using System.Diagnostics;

    internal class ByteArrayInputBytes : IInputBytes
    {
        private readonly IReadOnlyList<byte> bytes;

+        [DebuggerStepThrough]
        public ByteArrayInputBytes(IReadOnlyList<byte> bytes)
        {
            this.bytes = bytes;
--- a/src/UglyToad.PdfPig/Parser/Parts/ReadHelper.cs
+++ b/src/UglyToad.PdfPig/Parser/Parts/ReadHelper.cs
@@ -250,8 +250,9 @@
        {
            return ' ' == c;
        }
-        
-        public static bool IsHexDigit(char ch)
+
+        public static bool IsHex(byte b) => IsHex((char) b);
+        public static bool IsHex(char ch)
        {
            return char.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
        }
--- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
+++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
@@ -104,7 +104,7 @@
                cMapCache, 
                filterProvider, pdfScanner),
                new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
-                new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontParser()),
+                new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontParser(new Type1EncryptedPortionParser())),
                new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
            
            var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
--- a/src/UglyToad.PdfPig/Tokenization/NameTokenizer.cs
+++ b/src/UglyToad.PdfPig/Tokenization/NameTokenizer.cs
@@ -36,7 +36,7 @@
                }
                else if (escapeActive)
                {
-                    if (ReadHelper.IsHexDigit((char)b))
+                    if (ReadHelper.IsHex((char)b))
                    {
                        escapedChars[postEscapeRead] = (char)b;
                        postEscapeRead++;
--- a/src/UglyToad.PdfPig/Tokenization/Tokens/HexToken.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Tokens/HexToken.cs
@@ -32,14 +32,6 @@ namespace UglyToad.PdfPig.Tokenization.Tokens
            {'f', 0x0F }
        };

-        private static byte Convert(char high, char low)
-        {
-            var highByte = HexMap[high];
-            var lowByte = HexMap[low];
-
-            return (byte)(highByte << 4 | lowByte);
-        }
-
        public string Data { get; }

        public IReadOnlyList<byte> Bytes { get; }
@@ -75,6 +67,14 @@ namespace UglyToad.PdfPig.Tokenization.Tokens
            Data = builder.ToString();
        }

+        public static byte Convert(char high, char low)
+        {
+            var highByte = HexMap[high];
+            var lowByte = HexMap[low];
+
+            return (byte)(highByte << 4 | lowByte);
+        }
+
        public static int ConvertHexBytesToInt(HexToken token)
        {
            var bytes = token.Bytes;