From 206eb91ff1e170e7b717090eef98cff0d0cc7aef Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 21 Dec 2017 22:45:18 +0000 Subject: [PATCH] finish reading the encoding cmap for the type 0 font --- .../Fonts/Parser/Handlers/Type0FontHandler.cs | 62 +++++++++++++++---- src/UglyToad.Pdf/Util/Bootstrapper.cs | 2 +- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs index fa145775..e91442c2 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs @@ -13,11 +13,13 @@ { private readonly CidFontFactory cidFontFactory; private readonly CMapCache cMapCache; + private readonly IFilterProvider filterProvider; - public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache) + public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider) { this.cidFontFactory = cidFontFactory; this.cMapCache = cMapCache; + this.filterProvider = filterProvider; } public IFont Generate(PdfDictionary dictionary, ParsingArguments arguments) @@ -26,7 +28,7 @@ var baseFont = dictionary.GetName(CosName.BASE_FONT); - ReadEncoding(dictionary); + var cMap = ReadEncoding(dictionary, out var isCMapPredefined); if (TryGetFirstDescendant(dictionary, out var descendantObject)) { @@ -38,6 +40,8 @@ } } + var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false); + CMap toUnicodeCMap = null; if (dictionary.ContainsKey(CosName.TO_UNICODE)) { @@ -45,12 +49,11 @@ var toUnicode = dynamicParser.Parse(arguments, toUnicodeValue as CosObject, false) as RawCosStream; - var decodedUnicodeCMap = toUnicode?.Decode(arguments.Container.Get()); + var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); if (decodedUnicodeCMap != null) { - toUnicodeCMap = arguments.Container.Get() - .Parse(new ByteArrayInputBytes(decodedUnicodeCMap), arguments.IsLenientParsing); + toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), arguments.IsLenientParsing); } } @@ -99,28 +102,65 @@ cidFontFactory.Generate(dictionary, arguments, arguments.IsLenientParsing); } - private void ReadEncoding(PdfDictionary dictionary) + private CMap ReadEncoding(PdfDictionary dictionary, out bool isCMapPredefined) { + isCMapPredefined = false; + CMap result = default(CMap); + if (dictionary.TryGetValue(CosName.ENCODING, out var value)) { if (value is CosName encodingName) { var cmap = cMapCache.Get(encodingName.Name); - if (cmap == null) - { - throw new InvalidOperationException("Missing CMap for " + encodingName.Name); - } + result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Name); + + isCMapPredefined = true; } else if (value is RawCosStream stream) { - + var decoded = stream.Decode(filterProvider); + + var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false); + + result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary); } else { throw new InvalidOperationException("Could not read the encoding, expected a name or a stream but got a: " + value.GetType().Name); } } + + return result; + } + + private static CMap GetUcs2CMap(PdfDictionary dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont) + { + if (!isCMapPredefined) + { + return null; + } + + /* + * If the font is a composite font that uses one of the predefined CMaps except Identity–H and Identity–V or whose descendant + * CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or Adobe-Korea1 character collection use a UCS2 CMap. + */ + + var encodingName = dictionary.GetName(CosName.ENCODING); + + if (encodingName == null) + { + return null; + } + + var isPredefinedIdentityMap = encodingName.Equals(CosName.IDENTITY_H) || encodingName.Equals(CosName.IDENTITY_V); + + if (isPredefinedIdentityMap && !usesDescendantAdobeFont) + { + return null; + } + + throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue."); } } } diff --git a/src/UglyToad.Pdf/Util/Bootstrapper.cs b/src/UglyToad.Pdf/Util/Bootstrapper.cs index 63914480..c26c94a5 100644 --- a/src/UglyToad.Pdf/Util/Bootstrapper.cs +++ b/src/UglyToad.Pdf/Util/Bootstrapper.cs @@ -59,7 +59,7 @@ var cmapParser = new CMapParser(); var afmParser = new AdobeFontMetricsParser(); - var type0FontFactory = new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()), new CMapCache(cmapParser)); + var type0FontFactory = new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()), new CMapCache(cmapParser), filterProvider); var fontFactory = new FontFactory(type0FontFactory); var container = new Container();