diff --git a/src/UglyToad.PdfPig.Tests/Fonts/CompactFontFormat/CompactFontFormatParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/CompactFontFormat/CompactFontFormatParserTests.cs index 07bbd0c5..3413a9b3 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/CompactFontFormat/CompactFontFormatParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/CompactFontFormat/CompactFontFormatParserTests.cs @@ -4,15 +4,13 @@ using System.IO; using System.Linq; using PdfPig.Fonts.CompactFontFormat; - using PdfPig.Fonts.CompactFontFormat.CharStrings; using PdfPig.Fonts.CompactFontFormat.Dictionaries; using Xunit; public class CompactFontFormatParserTests { - private readonly CompactFontFormatParser parser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser( - new CompactFontFormatIndexReader(), new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()), - new CompactFontFormatIndexReader()); + private readonly CompactFontFormatParser parser = new CompactFontFormatParser( + new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader())); [Fact] public void CanReadMinionPro() diff --git a/src/UglyToad.PdfPig/Fonts/CMapCache.cs b/src/UglyToad.PdfPig/Fonts/CMapCache.cs deleted file mode 100644 index 21c6d4c4..00000000 --- a/src/UglyToad.PdfPig/Fonts/CMapCache.cs +++ /dev/null @@ -1,45 +0,0 @@ -namespace UglyToad.PdfPig.Fonts -{ - using System; - using System.Collections.Generic; - using Cmap; - using IO; - using Parser; - - internal class CMapCache - { - private readonly Dictionary cache = new Dictionary(StringComparer.InvariantCultureIgnoreCase); - private readonly CMapParser cMapParser; - - public CMapCache(CMapParser cMapParser) - { - this.cMapParser = cMapParser; - } - - public CMap Get(string name) - { - if (cache.TryGetValue(name, out var result)) - { - return result; - } - - result = cMapParser.ParseExternal(name); - - cache[name] = result; - - return result; - } - - public CMap Parse(IInputBytes bytes, bool isLenientParsing) - { - if (bytes == null) - { - throw new ArgumentNullException(nameof(bytes)); - } - - var result = cMapParser.Parse(bytes, isLenientParsing); - - return result; - } - } -} diff --git a/src/UglyToad.PdfPig/Fonts/CharacterIdentifierSystemInfo.cs b/src/UglyToad.PdfPig/Fonts/CharacterIdentifierSystemInfo.cs index 9b7a7f1b..4ca549e3 100644 --- a/src/UglyToad.PdfPig/Fonts/CharacterIdentifierSystemInfo.cs +++ b/src/UglyToad.PdfPig/Fonts/CharacterIdentifierSystemInfo.cs @@ -31,7 +31,7 @@ public override string ToString() { - return $"{Registry} | {Ordering} | {Supplement}"; + return $"{Registry}-{Ordering}-{Supplement}"; } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Fonts/Cmap/CMapCache.cs b/src/UglyToad.PdfPig/Fonts/Cmap/CMapCache.cs new file mode 100644 index 00000000..4d7aa8d8 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/Cmap/CMapCache.cs @@ -0,0 +1,44 @@ +namespace UglyToad.PdfPig.Fonts.Cmap +{ + using System; + using System.Collections.Generic; + using IO; + using Parser; + + internal static class CMapCache + { + private static readonly Dictionary Cache = new Dictionary(StringComparer.OrdinalIgnoreCase); + private static readonly object Lock = new object(); + + private static readonly CMapParser CMapParser = new CMapParser(); + + public static CMap Get(string name) + { + lock (Lock) + { + if (Cache.TryGetValue(name, out var result)) + { + return result; + } + + result = CMapParser.ParseExternal(name); + + Cache[name] = result; + + return result; + } + } + + public static CMap Parse(IInputBytes bytes, bool isLenientParsing) + { + if (bytes == null) + { + throw new ArgumentNullException(nameof(bytes)); + } + + var result = CMapParser.Parse(bytes, isLenientParsing); + + return result; + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/Cmap/CmapUtils.cs b/src/UglyToad.PdfPig/Fonts/Cmap/CmapUtils.cs index 0cd2a6eb..d40c3e1f 100644 --- a/src/UglyToad.PdfPig/Fonts/Cmap/CmapUtils.cs +++ b/src/UglyToad.PdfPig/Fonts/Cmap/CmapUtils.cs @@ -2,7 +2,7 @@ { using System.Collections.Generic; - internal static class CmapUtils + internal static class CMapUtils { public static int ToInt(this IReadOnlyList data, int length) { diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs index aa736fe5..e5bf398d 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs @@ -683,22 +683,16 @@ }; public static Type2CharStrings Parse([NotNull] IReadOnlyList> charStringBytes, - [NotNull] CompactFontFormatIndex localSubroutines, - [NotNull] CompactFontFormatIndex globalSubroutines, ICompactFontFormatCharset charset) + CompactFontFormatSubroutinesSelector subroutinesSelector, ICompactFontFormatCharset charset) { if (charStringBytes == null) { throw new ArgumentNullException(nameof(charStringBytes)); } - if (localSubroutines == null) + if (subroutinesSelector == null) { - throw new ArgumentNullException(nameof(localSubroutines)); - } - - if (globalSubroutines == null) - { - throw new ArgumentNullException(nameof(globalSubroutines)); + throw new ArgumentNullException(nameof(subroutinesSelector)); } var charStrings = new Dictionary(); @@ -706,11 +700,12 @@ { var charString = charStringBytes[i]; var name = charset.GetNameByGlyphId(i); + var (globalSubroutines, localSubroutines) = subroutinesSelector.GetSubroutines(i); var sequence = ParseSingle(charString.ToList(), localSubroutines, globalSubroutines); charStrings[name] = new Type2CharStrings.CommandSequence(sequence); } - return new Type2CharStrings(charStrings, localSubroutines, globalSubroutines); + return new Type2CharStrings(charStrings); } private static IReadOnlyList> ParseSingle(List bytes, diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStrings.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStrings.cs index 7db38d10..9fafe002 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStrings.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2CharStrings.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; - using Dictionaries; using Geometry; using Util; using Util.JetBrains.Annotations; @@ -23,24 +22,10 @@ /// public IReadOnlyDictionary CharStrings { get; } - /// - /// The indexed bytes for the local subroutines in this font. - /// - [NotNull] - public CompactFontFormatIndex LocalSubroutines { get; } - /// - /// The indexed bytes for the global subroutines in this font set. - /// - [NotNull] - public CompactFontFormatIndex GlobalSubroutines { get; } - - public Type2CharStrings(IReadOnlyDictionary charStrings, CompactFontFormatIndex localSubroutines, - CompactFontFormatIndex globalSubroutines) + public Type2CharStrings(IReadOnlyDictionary charStrings) { CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings)); - LocalSubroutines = localSubroutines ?? throw new ArgumentNullException(nameof(localSubroutines)); - GlobalSubroutines = globalSubroutines ?? throw new ArgumentNullException(nameof(globalSubroutines)); } /// diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFont.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFont.cs index 94b27d19..172efa4b 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFont.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatFont.cs @@ -66,7 +66,6 @@ { public IReadOnlyList FontDictionaries { get; } public IReadOnlyList PrivateDictionaries { get; } - public IReadOnlyList LocalSubroutines { get; } public ICompactFontFormatFdSelect FdSelect { get; } public CompactFontFormatCidFont(CompactFontFormatTopLevelDictionary topDictionary, CompactFontFormatPrivateDictionary privateDictionary, @@ -74,12 +73,10 @@ Union charStrings, IReadOnlyList fontDictionaries, IReadOnlyList privateDictionaries, - IReadOnlyList localSubroutines, ICompactFontFormatFdSelect fdSelect) : base(topDictionary, privateDictionary, charset, charStrings, null) { FontDictionaries = fontDictionaries; PrivateDictionaries = privateDictionaries; - LocalSubroutines = localSubroutines; FdSelect = fdSelect; } diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndexReader.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndexReader.cs index 986f0a32..725b4239 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndexReader.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndexReader.cs @@ -1,12 +1,11 @@ -using System; - -namespace UglyToad.PdfPig.Fonts.CompactFontFormat +namespace UglyToad.PdfPig.Fonts.CompactFontFormat { + using System; using Util; - internal class CompactFontFormatIndexReader + internal static class CompactFontFormatIndexReader { - public CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data) + public static CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data) { var index = ReadIndex(data); @@ -39,7 +38,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat return new CompactFontFormatIndex(results); } - public int[] ReadIndex(CompactFontFormatData data) + public static int[] ReadIndex(CompactFontFormatData data) { var count = data.ReadCard16(); diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs index ea4540cf..169e37e9 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatIndividualFontParser.cs @@ -13,15 +13,12 @@ internal class CompactFontFormatIndividualFontParser { - private readonly CompactFontFormatIndexReader indexReader; private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader; private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader; - public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader, - CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader, + public CompactFontFormatIndividualFontParser(CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader, CompactFontFormatPrivateDictionaryReader privateDictionaryReader) { - this.indexReader = indexReader; this.topLevelDictionaryReader = topLevelDictionaryReader; this.privateDictionaryReader = privateDictionaryReader; } @@ -53,12 +50,12 @@ { data.Seek(privateDictionary.LocalSubroutineOffset.Value + topDictionary.PrivateDictionaryLocation.Value.Offset); - localSubroutines = indexReader.ReadDictionaryData(data); + localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data); } data.Seek(topDictionary.CharStringsOffset); - var charStringIndex = indexReader.ReadDictionaryData(data); + var charStringIndex = CompactFontFormatIndexReader.ReadDictionaryData(data); ICompactFontFormatCharset charset; if (topDictionary.CharSetOffset >= 0) @@ -93,24 +90,14 @@ charset = CompactFontFormatIsoAdobeCharset.Value; } } - - data.Seek(topDictionary.CharStringsOffset); - - Type2CharStrings charStrings; - switch (topDictionary.CharStringType) - { - case CompactFontFormatCharStringType.Type1: - throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font."); - case CompactFontFormatCharStringType.Type2: - charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset); - break; - default: - throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}."); - } - + if (topDictionary.IsCidFont) { - return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary, charset, Union.Two(charStrings)); + return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary, + charset, + globalSubroutineIndex, + localSubroutines, + charStringIndex); } var encoding = topDictionary.EncodingOffset; @@ -133,6 +120,10 @@ } } + var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutineIndex, localSubroutines); + + var charStrings = ReadCharStrings(data, topDictionary, charStringIndex, subroutineSelector, charset); + return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union.Two(charStrings), fontEncoding); } @@ -204,22 +195,43 @@ return "SID" + index; } + private static Type2CharStrings ReadCharStrings(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary, + CompactFontFormatIndex charStringIndex, + CompactFontFormatSubroutinesSelector subroutinesSelector, + ICompactFontFormatCharset charset) + { + data.Seek(topDictionary.CharStringsOffset); + + switch (topDictionary.CharStringType) + { + case CompactFontFormatCharStringType.Type1: + throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font."); + case CompactFontFormatCharStringType.Type2: + return Type2CharStringParser.Parse(charStringIndex, subroutinesSelector, charset); + default: + throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}."); + } + } + private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary, int numberOfGlyphs, IReadOnlyList stringIndex, CompactFontFormatPrivateDictionary privateDictionary, ICompactFontFormatCharset charset, - Union charstrings) + CompactFontFormatIndex globalSubroutines, + CompactFontFormatIndex localSubroutinesTop, + CompactFontFormatIndex charStringIndex) { var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray; data.Seek(offset); - var fontDict = indexReader.ReadDictionaryData(data); + var fontDict = CompactFontFormatIndexReader.ReadDictionaryData(data); var privateDictionaries = new List(); var fontDictionaries = new List(); var fontLocalSubroutines = new List(); + foreach (var index in fontDict) { var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex); @@ -238,9 +250,13 @@ if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0) { data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value); - var localSubroutines = indexReader.ReadDictionaryData(data); + var localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data); fontLocalSubroutines.Add(localSubroutines); } + else + { + fontLocalSubroutines.Add(null); + } fontDictionaries.Add(topLevelDictionaryCid); privateDictionaries.Add(privateDictionaryCid); @@ -267,8 +283,15 @@ throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}."); } - return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, charstrings, - fontDictionaries, privateDictionaries, fontLocalSubroutines, fdSelect); + var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutines, localSubroutinesTop, + fdSelect, fontLocalSubroutines); + + var charStrings = ReadCharStrings(data, topLevelDictionary, charStringIndex, subroutineSelector, charset); + + var union = Union.Two(charStrings); + + return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, union, + fontDictionaries, privateDictionaries, fdSelect); } private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs, diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs index 2cf04bd7..129bc54a 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatParser.cs @@ -11,12 +11,10 @@ private const string TagTtfonly = "\u0000\u0001\u0000\u0000"; private readonly CompactFontFormatIndividualFontParser individualFontParser; - private readonly CompactFontFormatIndexReader indexReader; - public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader) + public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser) { this.individualFontParser = individualFontParser; - this.indexReader = indexReader; } public CompactFontFormatFontProgram Parse(CompactFontFormatData data) @@ -41,11 +39,11 @@ var fontNames = ReadStringIndex(data); - var topLevelDictionaryIndex = indexReader.ReadDictionaryData(data); + var topLevelDictionaryIndex = CompactFontFormatIndexReader.ReadDictionaryData(data); var stringIndex = ReadStringIndex(data); - var globalSubroutineIndex = indexReader.ReadDictionaryData(data); + var globalSubroutineIndex = CompactFontFormatIndexReader.ReadDictionaryData(data); var fonts = new Dictionary(); @@ -79,9 +77,9 @@ /// /// Reads indexed string data. /// - private string[] ReadStringIndex(CompactFontFormatData data) + private static string[] ReadStringIndex(CompactFontFormatData data) { - var index = indexReader.ReadIndex(data); + var index = CompactFontFormatIndexReader.ReadIndex(data); if (index.Length == 0) { diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatSubroutinesSelector.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatSubroutinesSelector.cs new file mode 100644 index 00000000..3fc82fbf --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CompactFontFormatSubroutinesSelector.cs @@ -0,0 +1,53 @@ +namespace UglyToad.PdfPig.Fonts.CompactFontFormat +{ + using System.Collections.Generic; + + internal class CompactFontFormatSubroutinesSelector + { + private readonly CompactFontFormatIndex global; + private readonly CompactFontFormatIndex local; + + // CID fonts have per sub-font subroutines. + private readonly bool isCid; + private readonly ICompactFontFormatFdSelect fdSelect; + private readonly IReadOnlyList perFontLocalSubroutines; + + public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local) + { + this.global = global; + this.local = local; + } + + public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local, + ICompactFontFormatFdSelect fdSelect, + IReadOnlyList perFontLocalSubroutines) + { + this.global = global; + this.local = local; + + this.fdSelect = fdSelect; + this.perFontLocalSubroutines = perFontLocalSubroutines; + + isCid = true; + } + + public (CompactFontFormatIndex global, CompactFontFormatIndex local) GetSubroutines(int glyphId) + { + if (!isCid) + { + return (global, local); + } + + var fdIndex = fdSelect.GetFontDictionaryIndex(glyphId); + + if (fdIndex < 0 || fdIndex >= perFontLocalSubroutines.Count) + { + return (global, local); + } + + var localPerFont = perFontLocalSubroutines[fdIndex]; + + return (global, localPerFont ?? local); + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs index 1eedb8fc..aa12395d 100644 --- a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs @@ -14,6 +14,10 @@ /// internal class Type0Font : IFont, IVerticalWritingSupported { + private readonly CMap ucs2CMap; + // ReSharper disable once NotAccessedField.Local + private readonly bool isChineseJapaneseOrKorean; + public NameToken Name => BaseFont; [NotNull] @@ -30,8 +34,13 @@ public bool IsVertical => CMap.WritingMode == WritingMode.Vertical; - public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap) + public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap, + CMap ucs2CMap, + bool isChineseJapaneseOrKorean) { + this.ucs2CMap = ucs2CMap; + this.isChineseJapaneseOrKorean = isChineseJapaneseOrKorean; + BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont)); CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont)); CMap = cmap ?? throw new ArgumentNullException(nameof(cmap)); @@ -55,6 +64,11 @@ if (!ToUnicode.CanMapToUnicode) { + if (ucs2CMap != null && ucs2CMap.TryConvertToUnicode(characterCode, out value)) + { + return value != null; + } + return false; } diff --git a/src/UglyToad.PdfPig/Fonts/Encodings/GlyphList.cs b/src/UglyToad.PdfPig/Fonts/Encodings/GlyphList.cs index 059504c1..5d7c2cd7 100644 --- a/src/UglyToad.PdfPig/Fonts/Encodings/GlyphList.cs +++ b/src/UglyToad.PdfPig/Fonts/Encodings/GlyphList.cs @@ -18,6 +18,9 @@ private static readonly Lazy LazyAdobeGlyphList = new Lazy(() => GlyphListFactory.Get("glyphlist")); public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value; + private static readonly Lazy LazyAdditionalGlyphList = new Lazy(() => GlyphListFactory.Get("additional")); + public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value; + private static readonly Lazy LazyZapfDingbatsGlyphList = new Lazy(() => GlyphListFactory.Get("zapfdingbats")); public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value; diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs index 49b1085d..185bb0f2 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/TrueTypeFontHandler.cs @@ -24,14 +24,12 @@ private readonly ILog log; private readonly IPdfTokenScanner pdfScanner; private readonly IFilterProvider filterProvider; - private readonly CMapCache cMapCache; private readonly FontDescriptorFactory fontDescriptorFactory; private readonly TrueTypeFontParser trueTypeFontParser; private readonly IEncodingReader encodingReader; private readonly ISystemFontFinder systemFontFinder; public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, - CMapCache cMapCache, FontDescriptorFactory fontDescriptorFactory, TrueTypeFontParser trueTypeFontParser, IEncodingReader encodingReader, @@ -39,7 +37,6 @@ { this.log = log; this.filterProvider = filterProvider; - this.cMapCache = cMapCache; this.fontDescriptorFactory = fontDescriptorFactory; this.trueTypeFontParser = trueTypeFontParser; this.encodingReader = encodingReader; @@ -113,7 +110,7 @@ if (decodedUnicodeCMap != null) { - toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type0FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type0FontHandler.cs index 89c27fec..ae8c6ebb 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type0FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type0FontHandler.cs @@ -16,15 +16,13 @@ internal class Type0FontHandler : IFontHandler { private readonly CidFontFactory cidFontFactory; - private readonly CMapCache cMapCache; private readonly IFilterProvider filterProvider; private readonly IPdfTokenScanner scanner; - public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider, + public Type0FontHandler(CidFontFactory cidFontFactory, IFilterProvider filterProvider, IPdfTokenScanner scanner) { this.cidFontFactory = cidFontFactory; - this.cMapCache = cMapCache; this.filterProvider = filterProvider; this.scanner = scanner; } @@ -59,7 +57,7 @@ throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); } - var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false); + var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont); CMap toUnicodeCMap = null; if (dictionary.ContainsKey(NameToken.ToUnicode)) @@ -72,11 +70,11 @@ if (decodedUnicodeCMap != null) { - toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } - var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap); + var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean); return font; } @@ -139,7 +137,7 @@ { if (value is NameToken encodingName) { - var cmap = cMapCache.Get(encodingName.Data); + var cmap = CMapCache.Get(encodingName.Data); result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Data); @@ -149,7 +147,7 @@ { var decoded = stream.Decode(filterProvider); - var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false); + var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded), false); result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary); } @@ -162,11 +160,11 @@ return result; } - private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont) + private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont) { if (!isCMapPredefined) { - return null; + return (null, false); } /* @@ -178,17 +176,43 @@ if (encodingName == null) { - return null; + return (null, false); } + var isChineseJapaneseOrKorean = false; + + if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase)) + { + isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase) + || string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase) + || string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase) + || string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase); + } + + var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV); - if (isPredefinedIdentityMap && !usesDescendantAdobeFont) + if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean) { - return null; + return (null, false); } - throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue."); + if (!isChineseJapaneseOrKorean) + { + return (null, false); + } + + var fullCmapName = cidFont.SystemInfo.ToString(); + var nonUnicodeCMap = CMapCache.Get(fullCmapName); + + if (nonUnicodeCMap == null) + { + return (null, true); + } + + var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2"; + + return (CMapCache.Get(unicodeCMapName), true); } } } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs index a7da84a7..0f476db2 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -19,21 +19,19 @@ internal class Type1FontHandler : IFontHandler { private readonly IPdfTokenScanner pdfScanner; - private readonly CMapCache cMapCache; private readonly IFilterProvider filterProvider; private readonly FontDescriptorFactory fontDescriptorFactory; private readonly IEncodingReader encodingReader; private readonly Type1FontParser type1FontParser; private readonly CompactFontFormatParser compactFontFormatParser; - public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider, + public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, FontDescriptorFactory fontDescriptorFactory, IEncodingReader encodingReader, Type1FontParser type1FontParser, CompactFontFormatParser compactFontFormatParser) { this.pdfScanner = pdfScanner; - this.cMapCache = cMapCache; this.filterProvider = filterProvider; this.fontDescriptorFactory = fontDescriptorFactory; this.encodingReader = encodingReader; @@ -95,7 +93,7 @@ if (decodedUnicodeCMap != null) { - toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type3FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type3FontHandler.cs index a0cffbd3..a0ef5da9 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type3FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type3FontHandler.cs @@ -15,15 +15,13 @@ internal class Type3FontHandler : IFontHandler { - private readonly CMapCache cMapCache; private readonly IFilterProvider filterProvider; private readonly IEncodingReader encodingReader; private readonly IPdfTokenScanner scanner; - public Type3FontHandler(IPdfTokenScanner scanner, CMapCache cMapCache, IFilterProvider filterProvider, + public Type3FontHandler(IPdfTokenScanner scanner, IFilterProvider filterProvider, IEncodingReader encodingReader) { - this.cMapCache = cMapCache; this.filterProvider = filterProvider; this.encodingReader = encodingReader; this.scanner = scanner; @@ -50,7 +48,7 @@ if (decodedUnicodeCMap != null) { - toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 57c27402..520765b2 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -71,9 +71,9 @@ // Behaviour specified by the Extraction of Text Content section of the specification. // If the font contains a ToUnicode CMap use that. - if (ToUnicode.CanMapToUnicode) + if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value)) { - return ToUnicode.TryGet(characterCode, out value); + return true; } if (encoding == null) @@ -86,17 +86,18 @@ // Map the character code to a character name. var encodedCharacterName = encoding.GetName(characterCode); - // Look up the character name in the Adobe Glyph List. + // Look up the character name in the Adobe Glyph List or additional Glyph List. try { - value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName); + value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName) + ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); } catch { return false; } - return true; + return value != null; } public CharacterBoundingBox GetBoundingBox(int characterCode) diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 3d350721..50aad759 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -71,7 +71,6 @@ { var log = container.Get(); var filterProvider = container.Get(); - var cMapCache = new CMapCache(new CMapParser()); CrossReferenceTable crossReferenceTable = null; @@ -100,9 +99,8 @@ var trueTypeFontParser = new TrueTypeFontParser(); var fontDescriptorFactory = new FontDescriptorFactory(); - var compactFontFormatIndexReader = new CompactFontFormatIndexReader(); - var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(), - new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader); + var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(), + new CompactFontFormatPrivateDictionaryReader())); var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner, @@ -117,12 +115,11 @@ var encodingReader = new EncodingReader(pdfScanner); var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, - cMapCache, filterProvider, pdfScanner), - new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())), - new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, + new TrueTypeFontHandler(log, pdfScanner, filterProvider, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())), + new Type1FontHandler(pdfScanner, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser), - new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader)); + new Type3FontHandler(pdfScanner, filterProvider, encodingReader)); var resourceContainer = new ResourceStore(pdfScanner, fontFactory); diff --git a/src/UglyToad.PdfPig/Resources/GlyphList/additional b/src/UglyToad.PdfPig/Resources/GlyphList/additional index 72ea294d..3dcb84d5 100644 --- a/src/UglyToad.PdfPig/Resources/GlyphList/additional +++ b/src/UglyToad.PdfPig/Resources/GlyphList/additional @@ -108,6 +108,7 @@ logicalanddisplay;2227 logicalandtext;2227 logicalordisplay;2228 logicalortext;2228 +nonmarkingreturn;000D parenleftBig;0028 parenleftbig;0028 parenleftBigg;0028