From d6e1dccc01de0d448aa2ca07cf82b5afac6c045e Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 5 Dec 2019 16:32:10 +0000 Subject: [PATCH] add support for standardencoding in type 1 fonts #78 --- .../Fonts/Parser/Handlers/Type1FontHandler.cs | 7 ++-- .../Fonts/Simple/Type1FontSimple.cs | 9 ++-- .../Fonts/Type1/Parser/Type1FontParser.cs | 42 +++++++++++++------ src/UglyToad.PdfPig/UglyToad.PdfPig.csproj | 2 +- 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs index 622ecbad..638634d9 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -97,13 +97,14 @@ } } - Encoding fromFont = null; - font?.Match(x => fromFont = x.Encoding != null ? new BuiltInEncoding(x.Encoding) : default(Encoding), x => + Encoding fromFont = font?.Match(x => x.Encoding != null ? new BuiltInEncoding(x.Encoding) : default(Encoding), x => { if (x.Fonts != null && x.Fonts.Count > 0) { - fromFont = x.Fonts.First().Value.Encoding; + return x.Fonts.First().Value.Encoding; } + + return default(Encoding); }); Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor, fromFont); diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 0603d360..8f905c6a 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -1,6 +1,5 @@ namespace UglyToad.PdfPig.Fonts.Simple { - using System; using System.Collections.Generic; using Cmap; using CompactFontFormat; @@ -165,11 +164,10 @@ return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0); } - var rect = default(PdfRectangle?); - fontProgram.Match(x => + var rect = fontProgram.Match(x => { var name = encoding.GetName(characterCode); - rect = x.GetCharacterBoundingBox(name); + return x.GetCharacterBoundingBox(name); }, x => { @@ -182,7 +180,8 @@ { characterName = x.GetCharacterName(characterCode); } - rect = x.GetCharacterBoundingBox(characterName); + + return x.GetCharacterBoundingBox(characterName); }); if (!rect.HasValue) diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs index 254f185b..0d16fa43 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs @@ -2,6 +2,7 @@ { using System; using System.Collections.Generic; + using Encodings; using Exceptions; using Geometry; using IO; @@ -254,7 +255,8 @@ if (key.Data.Equals(NameToken.Encoding)) { - dictionary[key] = ReadEncoding(scanner); + var encoding = ReadEncoding(scanner); + dictionary[key] = (IToken)encoding.encoding ?? encoding.name; continue; } @@ -296,19 +298,25 @@ return new DictionaryToken(dictionary); } - private static ArrayToken ReadEncoding(ISeekableTokenScanner scanner) + private static (ArrayToken encoding, NameToken name) ReadEncoding(ISeekableTokenScanner scanner) { var result = new List(); // Treat encoding differently, it's what we came here for! if (!scanner.TryReadToken(out NumericToken _)) { - return new ArrayToken(result); + // The tokens following /Encoding may be StandardEncoding def. + if (scanner.CurrentToken is OperatorToken encodingName + && encodingName.Data.Equals(NameToken.StandardEncoding)) + { + return (null, NameToken.StandardEncoding); + } + return (new ArrayToken(result), null); } if (!scanner.TryReadToken(out OperatorToken arrayOperatorToken) || arrayOperatorToken.Data != "array") { - return new ArrayToken(result); + return (new ArrayToken(result), null); } while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken forOperator) || forOperator.Data != "for")) @@ -318,7 +326,7 @@ if (scanner.CurrentToken != OperatorToken.For) { - return new ArrayToken(result); + return (new ArrayToken(result), null); } while (scanner.MoveNext() && scanner.CurrentToken != OperatorToken.Def && scanner.CurrentToken != OperatorToken.Readonly) @@ -347,26 +355,34 @@ // skip } - return new ArrayToken(result); + return (new ArrayToken(result), null); } - private static Dictionary GetEncoding(IReadOnlyList dictionaries) + private static IReadOnlyDictionary GetEncoding(IReadOnlyList dictionaries) { var result = new Dictionary(); foreach (var dictionary in dictionaries) { - if (dictionary.TryGet(NameToken.Encoding, out var token) && token is ArrayToken encodingArray) + if (dictionary.TryGet(NameToken.Encoding, out var token)) { - for (var i = 0; i < encodingArray.Data.Count; i += 2) + if (token is ArrayToken encodingArray) { - var code = (NumericToken)encodingArray.Data[i]; - var name = (NameToken)encodingArray.Data[i + 1]; + for (var i = 0; i < encodingArray.Data.Count; i += 2) + { + var code = (NumericToken) encodingArray.Data[i]; + var name = (NameToken) encodingArray.Data[i + 1]; - result[code.Int] = name.Data; + result[code.Int] = name.Data; + } + + return result; } - return result; + if (token is NameToken encodingName && encodingName.Equals(NameToken.StandardEncoding)) + { + return StandardEncoding.Instance.CodeToNameMap; + } } } diff --git a/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj b/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj index 77bc61af..80a4b2f3 100644 --- a/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj +++ b/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj @@ -12,7 +12,7 @@ PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract https://github.com/UglyToad/PdfPig true - 0.0.9 + 0.0.9.91 0.0.9.0 0.0.9.0 https://raw.githubusercontent.com/UglyToad/PdfPig/master/documentation/pdfpig.png