use cid font subroutines where applicable. add ucs 2 cmap support for type 1 fonts

* cid cff fonts have multiple sub-fonts and multiple private dictionaries, in addition to a top level font and private dictionary. this fix uses the specific sub-dictionary when getting local subroutines on a per-glyph basis.
* chinese, japanese or korean fonts can use a ucs-2 encoding cmap for retrieving unicode values.
* add support for the additional glyph list for unicode values in true type fonts. adds nonmarkingreturn mapping to carriage return.
* makes font parsing classes static where there's no reason for them to be per-instance.
This commit is contained in:
Eliot Jones
2019-12-19 13:33:44 +00:00
parent a167d4c1dd
commit c30cd1b96d
21 changed files with 240 additions and 160 deletions

View File

@@ -4,15 +4,13 @@
using System.IO;
using System.Linq;
using PdfPig.Fonts.CompactFontFormat;
using PdfPig.Fonts.CompactFontFormat.CharStrings;
using PdfPig.Fonts.CompactFontFormat.Dictionaries;
using Xunit;
public class CompactFontFormatParserTests
{
private readonly CompactFontFormatParser parser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(
new CompactFontFormatIndexReader(), new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()),
new CompactFontFormatIndexReader());
private readonly CompactFontFormatParser parser = new CompactFontFormatParser(
new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()));
[Fact]
public void CanReadMinionPro()

View File

@@ -1,45 +0,0 @@
namespace UglyToad.PdfPig.Fonts
{
using System;
using System.Collections.Generic;
using Cmap;
using IO;
using Parser;
internal class CMapCache
{
private readonly Dictionary<string, CMap> cache = new Dictionary<string, CMap>(StringComparer.InvariantCultureIgnoreCase);
private readonly CMapParser cMapParser;
public CMapCache(CMapParser cMapParser)
{
this.cMapParser = cMapParser;
}
public CMap Get(string name)
{
if (cache.TryGetValue(name, out var result))
{
return result;
}
result = cMapParser.ParseExternal(name);
cache[name] = result;
return result;
}
public CMap Parse(IInputBytes bytes, bool isLenientParsing)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
var result = cMapParser.Parse(bytes, isLenientParsing);
return result;
}
}
}

View File

@@ -31,7 +31,7 @@
public override string ToString()
{
return $"{Registry} | {Ordering} | {Supplement}";
return $"{Registry}-{Ordering}-{Supplement}";
}
}
}

View File

@@ -0,0 +1,44 @@
namespace UglyToad.PdfPig.Fonts.Cmap
{
using System;
using System.Collections.Generic;
using IO;
using Parser;
internal static class CMapCache
{
private static readonly Dictionary<string, CMap> Cache = new Dictionary<string, CMap>(StringComparer.OrdinalIgnoreCase);
private static readonly object Lock = new object();
private static readonly CMapParser CMapParser = new CMapParser();
public static CMap Get(string name)
{
lock (Lock)
{
if (Cache.TryGetValue(name, out var result))
{
return result;
}
result = CMapParser.ParseExternal(name);
Cache[name] = result;
return result;
}
}
public static CMap Parse(IInputBytes bytes, bool isLenientParsing)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
var result = CMapParser.Parse(bytes, isLenientParsing);
return result;
}
}
}

View File

@@ -2,7 +2,7 @@
{
using System.Collections.Generic;
internal static class CmapUtils
internal static class CMapUtils
{
public static int ToInt(this IReadOnlyList<byte> data, int length)
{

View File

@@ -683,22 +683,16 @@
};
public static Type2CharStrings Parse([NotNull] IReadOnlyList<IReadOnlyList<byte>> charStringBytes,
[NotNull] CompactFontFormatIndex localSubroutines,
[NotNull] CompactFontFormatIndex globalSubroutines, ICompactFontFormatCharset charset)
CompactFontFormatSubroutinesSelector subroutinesSelector, ICompactFontFormatCharset charset)
{
if (charStringBytes == null)
{
throw new ArgumentNullException(nameof(charStringBytes));
}
if (localSubroutines == null)
if (subroutinesSelector == null)
{
throw new ArgumentNullException(nameof(localSubroutines));
}
if (globalSubroutines == null)
{
throw new ArgumentNullException(nameof(globalSubroutines));
throw new ArgumentNullException(nameof(subroutinesSelector));
}
var charStrings = new Dictionary<string, Type2CharStrings.CommandSequence>();
@@ -706,11 +700,12 @@
{
var charString = charStringBytes[i];
var name = charset.GetNameByGlyphId(i);
var (globalSubroutines, localSubroutines) = subroutinesSelector.GetSubroutines(i);
var sequence = ParseSingle(charString.ToList(), localSubroutines, globalSubroutines);
charStrings[name] = new Type2CharStrings.CommandSequence(sequence);
}
return new Type2CharStrings(charStrings, localSubroutines, globalSubroutines);
return new Type2CharStrings(charStrings);
}
private static IReadOnlyList<Union<decimal, LazyType2Command>> ParseSingle(List<byte> bytes,

View File

@@ -3,7 +3,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Dictionaries;
using Geometry;
using Util;
using Util.JetBrains.Annotations;
@@ -23,24 +22,10 @@
/// </summary>
public IReadOnlyDictionary<string, CommandSequence> CharStrings { get; }
/// <summary>
/// The indexed bytes for the local subroutines in this font.
/// </summary>
[NotNull]
public CompactFontFormatIndex LocalSubroutines { get; }
/// <summary>
/// The indexed bytes for the global subroutines in this font set.
/// </summary>
[NotNull]
public CompactFontFormatIndex GlobalSubroutines { get; }
public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings, CompactFontFormatIndex localSubroutines,
CompactFontFormatIndex globalSubroutines)
public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings)
{
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
LocalSubroutines = localSubroutines ?? throw new ArgumentNullException(nameof(localSubroutines));
GlobalSubroutines = globalSubroutines ?? throw new ArgumentNullException(nameof(globalSubroutines));
}
/// <summary>

View File

@@ -66,7 +66,6 @@
{
public IReadOnlyList<CompactFontFormatTopLevelDictionary> FontDictionaries { get; }
public IReadOnlyList<CompactFontFormatPrivateDictionary> PrivateDictionaries { get; }
public IReadOnlyList<CompactFontFormatIndex> LocalSubroutines { get; }
public ICompactFontFormatFdSelect FdSelect { get; }
public CompactFontFormatCidFont(CompactFontFormatTopLevelDictionary topDictionary, CompactFontFormatPrivateDictionary privateDictionary,
@@ -74,12 +73,10 @@
Union<Type1CharStrings, Type2CharStrings> charStrings,
IReadOnlyList<CompactFontFormatTopLevelDictionary> fontDictionaries,
IReadOnlyList<CompactFontFormatPrivateDictionary> privateDictionaries,
IReadOnlyList<CompactFontFormatIndex> localSubroutines,
ICompactFontFormatFdSelect fdSelect) : base(topDictionary, privateDictionary, charset, charStrings, null)
{
FontDictionaries = fontDictionaries;
PrivateDictionaries = privateDictionaries;
LocalSubroutines = localSubroutines;
FdSelect = fdSelect;
}

View File

@@ -1,12 +1,11 @@
using System;
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using Util;
internal class CompactFontFormatIndexReader
internal static class CompactFontFormatIndexReader
{
public CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data)
public static CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
@@ -39,7 +38,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat
return new CompactFontFormatIndex(results);
}
public int[] ReadIndex(CompactFontFormatData data)
public static int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();

View File

@@ -13,15 +13,12 @@
internal class CompactFontFormatIndividualFontParser
{
private readonly CompactFontFormatIndexReader indexReader;
private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader;
private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader;
public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader,
CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
public CompactFontFormatIndividualFontParser(CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
CompactFontFormatPrivateDictionaryReader privateDictionaryReader)
{
this.indexReader = indexReader;
this.topLevelDictionaryReader = topLevelDictionaryReader;
this.privateDictionaryReader = privateDictionaryReader;
}
@@ -53,12 +50,12 @@
{
data.Seek(privateDictionary.LocalSubroutineOffset.Value + topDictionary.PrivateDictionaryLocation.Value.Offset);
localSubroutines = indexReader.ReadDictionaryData(data);
localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
}
data.Seek(topDictionary.CharStringsOffset);
var charStringIndex = indexReader.ReadDictionaryData(data);
var charStringIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
ICompactFontFormatCharset charset;
if (topDictionary.CharSetOffset >= 0)
@@ -94,23 +91,13 @@
}
}
data.Seek(topDictionary.CharStringsOffset);
Type2CharStrings charStrings;
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
case CompactFontFormatCharStringType.Type2:
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
break;
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
if (topDictionary.IsCidFont)
{
return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary,
charset,
globalSubroutineIndex,
localSubroutines,
charStringIndex);
}
var encoding = topDictionary.EncodingOffset;
@@ -133,6 +120,10 @@
}
}
var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutineIndex, localSubroutines);
var charStrings = ReadCharStrings(data, topDictionary, charStringIndex, subroutineSelector, charset);
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings), fontEncoding);
}
@@ -204,22 +195,43 @@
return "SID" + index;
}
private static Type2CharStrings ReadCharStrings(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
CompactFontFormatIndex charStringIndex,
CompactFontFormatSubroutinesSelector subroutinesSelector,
ICompactFontFormatCharset charset)
{
data.Seek(topDictionary.CharStringsOffset);
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
case CompactFontFormatCharStringType.Type2:
return Type2CharStringParser.Parse(charStringIndex, subroutinesSelector, charset);
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
}
private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
int numberOfGlyphs,
IReadOnlyList<string> stringIndex,
CompactFontFormatPrivateDictionary privateDictionary,
ICompactFontFormatCharset charset,
Union<Type1CharStrings, Type2CharStrings> charstrings)
CompactFontFormatIndex globalSubroutines,
CompactFontFormatIndex localSubroutinesTop,
CompactFontFormatIndex charStringIndex)
{
var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray;
data.Seek(offset);
var fontDict = indexReader.ReadDictionaryData(data);
var fontDict = CompactFontFormatIndexReader.ReadDictionaryData(data);
var privateDictionaries = new List<CompactFontFormatPrivateDictionary>();
var fontDictionaries = new List<CompactFontFormatTopLevelDictionary>();
var fontLocalSubroutines = new List<CompactFontFormatIndex>();
foreach (var index in fontDict)
{
var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex);
@@ -238,9 +250,13 @@
if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0)
{
data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value);
var localSubroutines = indexReader.ReadDictionaryData(data);
var localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
fontLocalSubroutines.Add(localSubroutines);
}
else
{
fontLocalSubroutines.Add(null);
}
fontDictionaries.Add(topLevelDictionaryCid);
privateDictionaries.Add(privateDictionaryCid);
@@ -267,8 +283,15 @@
throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}.");
}
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, charstrings,
fontDictionaries, privateDictionaries, fontLocalSubroutines, fdSelect);
var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutines, localSubroutinesTop,
fdSelect, fontLocalSubroutines);
var charStrings = ReadCharStrings(data, topLevelDictionary, charStringIndex, subroutineSelector, charset);
var union = Union<Type1CharStrings, Type2CharStrings>.Two(charStrings);
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, union,
fontDictionaries, privateDictionaries, fdSelect);
}
private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs,

View File

@@ -11,12 +11,10 @@
private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
private readonly CompactFontFormatIndividualFontParser individualFontParser;
private readonly CompactFontFormatIndexReader indexReader;
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader)
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser)
{
this.individualFontParser = individualFontParser;
this.indexReader = indexReader;
}
public CompactFontFormatFontProgram Parse(CompactFontFormatData data)
@@ -41,11 +39,11 @@
var fontNames = ReadStringIndex(data);
var topLevelDictionaryIndex = indexReader.ReadDictionaryData(data);
var topLevelDictionaryIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
var stringIndex = ReadStringIndex(data);
var globalSubroutineIndex = indexReader.ReadDictionaryData(data);
var globalSubroutineIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
var fonts = new Dictionary<string, CompactFontFormatFont>();
@@ -79,9 +77,9 @@
/// <summary>
/// Reads indexed string data.
/// </summary>
private string[] ReadStringIndex(CompactFontFormatData data)
private static string[] ReadStringIndex(CompactFontFormatData data)
{
var index = indexReader.ReadIndex(data);
var index = CompactFontFormatIndexReader.ReadIndex(data);
if (index.Length == 0)
{

View File

@@ -0,0 +1,53 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System.Collections.Generic;
internal class CompactFontFormatSubroutinesSelector
{
private readonly CompactFontFormatIndex global;
private readonly CompactFontFormatIndex local;
// CID fonts have per sub-font subroutines.
private readonly bool isCid;
private readonly ICompactFontFormatFdSelect fdSelect;
private readonly IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines;
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local)
{
this.global = global;
this.local = local;
}
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local,
ICompactFontFormatFdSelect fdSelect,
IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines)
{
this.global = global;
this.local = local;
this.fdSelect = fdSelect;
this.perFontLocalSubroutines = perFontLocalSubroutines;
isCid = true;
}
public (CompactFontFormatIndex global, CompactFontFormatIndex local) GetSubroutines(int glyphId)
{
if (!isCid)
{
return (global, local);
}
var fdIndex = fdSelect.GetFontDictionaryIndex(glyphId);
if (fdIndex < 0 || fdIndex >= perFontLocalSubroutines.Count)
{
return (global, local);
}
var localPerFont = perFontLocalSubroutines[fdIndex];
return (global, localPerFont ?? local);
}
}
}

View File

@@ -14,6 +14,10 @@
/// </summary>
internal class Type0Font : IFont, IVerticalWritingSupported
{
private readonly CMap ucs2CMap;
// ReSharper disable once NotAccessedField.Local
private readonly bool isChineseJapaneseOrKorean;
public NameToken Name => BaseFont;
[NotNull]
@@ -30,8 +34,13 @@
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap)
public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap,
CMap ucs2CMap,
bool isChineseJapaneseOrKorean)
{
this.ucs2CMap = ucs2CMap;
this.isChineseJapaneseOrKorean = isChineseJapaneseOrKorean;
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
@@ -55,6 +64,11 @@
if (!ToUnicode.CanMapToUnicode)
{
if (ucs2CMap != null && ucs2CMap.TryConvertToUnicode(characterCode, out value))
{
return value != null;
}
return false;
}

View File

@@ -18,6 +18,9 @@
private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist"));
public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value;
private static readonly Lazy<GlyphList> LazyAdditionalGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("additional"));
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value;
private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));
public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value;

View File

@@ -24,14 +24,12 @@
private readonly ILog log;
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly CMapCache cMapCache;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly IEncodingReader encodingReader;
private readonly ISystemFontFinder systemFontFinder;
public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
CMapCache cMapCache,
FontDescriptorFactory fontDescriptorFactory,
TrueTypeFontParser trueTypeFontParser,
IEncodingReader encodingReader,
@@ -39,7 +37,6 @@
{
this.log = log;
this.filterProvider = filterProvider;
this.cMapCache = cMapCache;
this.fontDescriptorFactory = fontDescriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
this.encodingReader = encodingReader;
@@ -113,7 +110,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
}
}

View File

@@ -16,15 +16,13 @@
internal class Type0FontHandler : IFontHandler
{
private readonly CidFontFactory cidFontFactory;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IPdfTokenScanner scanner;
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider,
public Type0FontHandler(CidFontFactory cidFontFactory, IFilterProvider filterProvider,
IPdfTokenScanner scanner)
{
this.cidFontFactory = cidFontFactory;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.scanner = scanner;
}
@@ -59,7 +57,7 @@
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
}
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont);
CMap toUnicodeCMap = null;
if (dictionary.ContainsKey(NameToken.ToUnicode))
@@ -72,11 +70,11 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
}
}
var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap);
var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);
return font;
}
@@ -139,7 +137,7 @@
{
if (value is NameToken encodingName)
{
var cmap = cMapCache.Get(encodingName.Data);
var cmap = CMapCache.Get(encodingName.Data);
result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Data);
@@ -149,7 +147,7 @@
{
var decoded = stream.Decode(filterProvider);
var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false);
var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded), false);
result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary);
}
@@ -162,11 +160,11 @@
return result;
}
private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont)
private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont)
{
if (!isCMapPredefined)
{
return null;
return (null, false);
}
/*
@@ -178,17 +176,43 @@
if (encodingName == null)
{
return null;
return (null, false);
}
var isChineseJapaneseOrKorean = false;
if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
{
isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase);
}
var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV);
if (isPredefinedIdentityMap && !usesDescendantAdobeFont)
if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean)
{
return null;
return (null, false);
}
throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue.");
if (!isChineseJapaneseOrKorean)
{
return (null, false);
}
var fullCmapName = cidFont.SystemInfo.ToString();
var nonUnicodeCMap = CMapCache.Get(fullCmapName);
if (nonUnicodeCMap == null)
{
return (null, true);
}
var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2";
return (CMapCache.Get(unicodeCMapName), true);
}
}
}

View File

@@ -19,21 +19,19 @@
internal class Type1FontHandler : IFontHandler
{
private readonly IPdfTokenScanner pdfScanner;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader;
private readonly Type1FontParser type1FontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider,
public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader,
Type1FontParser type1FontParser,
CompactFontFormatParser compactFontFormatParser)
{
this.pdfScanner = pdfScanner;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader;
@@ -95,7 +93,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
}
}

View File

@@ -15,15 +15,13 @@
internal class Type3FontHandler : IFontHandler
{
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IEncodingReader encodingReader;
private readonly IPdfTokenScanner scanner;
public Type3FontHandler(IPdfTokenScanner scanner, CMapCache cMapCache, IFilterProvider filterProvider,
public Type3FontHandler(IPdfTokenScanner scanner, IFilterProvider filterProvider,
IEncodingReader encodingReader)
{
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.encodingReader = encodingReader;
this.scanner = scanner;
@@ -50,7 +48,7 @@
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
}
}

View File

@@ -71,9 +71,9 @@
// Behaviour specified by the Extraction of Text Content section of the specification.
// If the font contains a ToUnicode CMap use that.
if (ToUnicode.CanMapToUnicode)
if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value))
{
return ToUnicode.TryGet(characterCode, out value);
return true;
}
if (encoding == null)
@@ -86,17 +86,18 @@
// Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List.
// Look up the character name in the Adobe Glyph List or additional Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName)
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
}
catch
{
return false;
}
return true;
return value != null;
}
public CharacterBoundingBox GetBoundingBox(int characterCode)

View File

@@ -71,7 +71,6 @@
{
var log = container.Get<ILog>();
var filterProvider = container.Get<IFilterProvider>();
var cMapCache = new CMapCache(new CMapParser());
CrossReferenceTable crossReferenceTable = null;
@@ -100,9 +99,8 @@
var trueTypeFontParser = new TrueTypeFontParser();
var fontDescriptorFactory = new FontDescriptorFactory();
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()));
var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
pdfScanner,
@@ -117,12 +115,11 @@
var encodingReader = new EncodingReader(pdfScanner);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new TrueTypeFontHandler(log, pdfScanner, filterProvider, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
var resourceContainer = new ResourceStore(pdfScanner, fontFactory);

View File

@@ -108,6 +108,7 @@ logicalanddisplay;2227
logicalandtext;2227
logicalordisplay;2228
logicalortext;2228
nonmarkingreturn;000D
parenleftBig;0028
parenleftbig;0028
parenleftBigg;0028