use cid font subroutines where applicable. add ucs 2 cmap support for type 1 fonts

* cid cff fonts have multiple sub-fonts and multiple private dictionaries, in addition to a top level font and private dictionary. this fix uses the specific sub-dictionary when getting local subroutines on a per-glyph basis.
* chinese, japanese or korean fonts can use a ucs-2 encoding cmap for retrieving unicode values.
* add support for the additional glyph list for unicode values in true type fonts. adds nonmarkingreturn mapping to carriage return.
* makes font parsing classes static where there's no reason for them to be per-instance.
This commit is contained in:
Eliot Jones
2019-12-19 13:33:44 +00:00
parent a167d4c1dd
commit c30cd1b96d
21 changed files with 240 additions and 160 deletions

View File

@@ -4,15 +4,13 @@
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using PdfPig.Fonts.CompactFontFormat; using PdfPig.Fonts.CompactFontFormat;
using PdfPig.Fonts.CompactFontFormat.CharStrings;
using PdfPig.Fonts.CompactFontFormat.Dictionaries; using PdfPig.Fonts.CompactFontFormat.Dictionaries;
using Xunit; using Xunit;
public class CompactFontFormatParserTests public class CompactFontFormatParserTests
{ {
private readonly CompactFontFormatParser parser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser( private readonly CompactFontFormatParser parser = new CompactFontFormatParser(
new CompactFontFormatIndexReader(), new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()), new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()));
new CompactFontFormatIndexReader());
[Fact] [Fact]
public void CanReadMinionPro() public void CanReadMinionPro()

View File

@@ -1,45 +0,0 @@
namespace UglyToad.PdfPig.Fonts
{
using System;
using System.Collections.Generic;
using Cmap;
using IO;
using Parser;
internal class CMapCache
{
private readonly Dictionary<string, CMap> cache = new Dictionary<string, CMap>(StringComparer.InvariantCultureIgnoreCase);
private readonly CMapParser cMapParser;
public CMapCache(CMapParser cMapParser)
{
this.cMapParser = cMapParser;
}
public CMap Get(string name)
{
if (cache.TryGetValue(name, out var result))
{
return result;
}
result = cMapParser.ParseExternal(name);
cache[name] = result;
return result;
}
public CMap Parse(IInputBytes bytes, bool isLenientParsing)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
var result = cMapParser.Parse(bytes, isLenientParsing);
return result;
}
}
}

View File

@@ -31,7 +31,7 @@
public override string ToString() public override string ToString()
{ {
return $"{Registry} | {Ordering} | {Supplement}"; return $"{Registry}-{Ordering}-{Supplement}";
} }
} }
} }

View File

@@ -0,0 +1,44 @@
namespace UglyToad.PdfPig.Fonts.Cmap
{
using System;
using System.Collections.Generic;
using IO;
using Parser;
internal static class CMapCache
{
private static readonly Dictionary<string, CMap> Cache = new Dictionary<string, CMap>(StringComparer.OrdinalIgnoreCase);
private static readonly object Lock = new object();
private static readonly CMapParser CMapParser = new CMapParser();
public static CMap Get(string name)
{
lock (Lock)
{
if (Cache.TryGetValue(name, out var result))
{
return result;
}
result = CMapParser.ParseExternal(name);
Cache[name] = result;
return result;
}
}
public static CMap Parse(IInputBytes bytes, bool isLenientParsing)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
var result = CMapParser.Parse(bytes, isLenientParsing);
return result;
}
}
}

View File

@@ -2,7 +2,7 @@
{ {
using System.Collections.Generic; using System.Collections.Generic;
internal static class CmapUtils internal static class CMapUtils
{ {
public static int ToInt(this IReadOnlyList<byte> data, int length) public static int ToInt(this IReadOnlyList<byte> data, int length)
{ {

View File

@@ -683,22 +683,16 @@
}; };
public static Type2CharStrings Parse([NotNull] IReadOnlyList<IReadOnlyList<byte>> charStringBytes, public static Type2CharStrings Parse([NotNull] IReadOnlyList<IReadOnlyList<byte>> charStringBytes,
[NotNull] CompactFontFormatIndex localSubroutines, CompactFontFormatSubroutinesSelector subroutinesSelector, ICompactFontFormatCharset charset)
[NotNull] CompactFontFormatIndex globalSubroutines, ICompactFontFormatCharset charset)
{ {
if (charStringBytes == null) if (charStringBytes == null)
{ {
throw new ArgumentNullException(nameof(charStringBytes)); throw new ArgumentNullException(nameof(charStringBytes));
} }
if (localSubroutines == null) if (subroutinesSelector == null)
{ {
throw new ArgumentNullException(nameof(localSubroutines)); throw new ArgumentNullException(nameof(subroutinesSelector));
}
if (globalSubroutines == null)
{
throw new ArgumentNullException(nameof(globalSubroutines));
} }
var charStrings = new Dictionary<string, Type2CharStrings.CommandSequence>(); var charStrings = new Dictionary<string, Type2CharStrings.CommandSequence>();
@@ -706,11 +700,12 @@
{ {
var charString = charStringBytes[i]; var charString = charStringBytes[i];
var name = charset.GetNameByGlyphId(i); var name = charset.GetNameByGlyphId(i);
var (globalSubroutines, localSubroutines) = subroutinesSelector.GetSubroutines(i);
var sequence = ParseSingle(charString.ToList(), localSubroutines, globalSubroutines); var sequence = ParseSingle(charString.ToList(), localSubroutines, globalSubroutines);
charStrings[name] = new Type2CharStrings.CommandSequence(sequence); charStrings[name] = new Type2CharStrings.CommandSequence(sequence);
} }
return new Type2CharStrings(charStrings, localSubroutines, globalSubroutines); return new Type2CharStrings(charStrings);
} }
private static IReadOnlyList<Union<decimal, LazyType2Command>> ParseSingle(List<byte> bytes, private static IReadOnlyList<Union<decimal, LazyType2Command>> ParseSingle(List<byte> bytes,

View File

@@ -3,7 +3,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using Dictionaries;
using Geometry; using Geometry;
using Util; using Util;
using Util.JetBrains.Annotations; using Util.JetBrains.Annotations;
@@ -23,24 +22,10 @@
/// </summary> /// </summary>
public IReadOnlyDictionary<string, CommandSequence> CharStrings { get; } public IReadOnlyDictionary<string, CommandSequence> CharStrings { get; }
/// <summary>
/// The indexed bytes for the local subroutines in this font.
/// </summary>
[NotNull]
public CompactFontFormatIndex LocalSubroutines { get; }
/// <summary> public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings)
/// The indexed bytes for the global subroutines in this font set.
/// </summary>
[NotNull]
public CompactFontFormatIndex GlobalSubroutines { get; }
public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings, CompactFontFormatIndex localSubroutines,
CompactFontFormatIndex globalSubroutines)
{ {
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings)); CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
LocalSubroutines = localSubroutines ?? throw new ArgumentNullException(nameof(localSubroutines));
GlobalSubroutines = globalSubroutines ?? throw new ArgumentNullException(nameof(globalSubroutines));
} }
/// <summary> /// <summary>

View File

@@ -66,7 +66,6 @@
{ {
public IReadOnlyList<CompactFontFormatTopLevelDictionary> FontDictionaries { get; } public IReadOnlyList<CompactFontFormatTopLevelDictionary> FontDictionaries { get; }
public IReadOnlyList<CompactFontFormatPrivateDictionary> PrivateDictionaries { get; } public IReadOnlyList<CompactFontFormatPrivateDictionary> PrivateDictionaries { get; }
public IReadOnlyList<CompactFontFormatIndex> LocalSubroutines { get; }
public ICompactFontFormatFdSelect FdSelect { get; } public ICompactFontFormatFdSelect FdSelect { get; }
public CompactFontFormatCidFont(CompactFontFormatTopLevelDictionary topDictionary, CompactFontFormatPrivateDictionary privateDictionary, public CompactFontFormatCidFont(CompactFontFormatTopLevelDictionary topDictionary, CompactFontFormatPrivateDictionary privateDictionary,
@@ -74,12 +73,10 @@
Union<Type1CharStrings, Type2CharStrings> charStrings, Union<Type1CharStrings, Type2CharStrings> charStrings,
IReadOnlyList<CompactFontFormatTopLevelDictionary> fontDictionaries, IReadOnlyList<CompactFontFormatTopLevelDictionary> fontDictionaries,
IReadOnlyList<CompactFontFormatPrivateDictionary> privateDictionaries, IReadOnlyList<CompactFontFormatPrivateDictionary> privateDictionaries,
IReadOnlyList<CompactFontFormatIndex> localSubroutines,
ICompactFontFormatFdSelect fdSelect) : base(topDictionary, privateDictionary, charset, charStrings, null) ICompactFontFormatFdSelect fdSelect) : base(topDictionary, privateDictionary, charset, charStrings, null)
{ {
FontDictionaries = fontDictionaries; FontDictionaries = fontDictionaries;
PrivateDictionaries = privateDictionaries; PrivateDictionaries = privateDictionaries;
LocalSubroutines = localSubroutines;
FdSelect = fdSelect; FdSelect = fdSelect;
} }

View File

@@ -1,12 +1,11 @@
using System; namespace UglyToad.PdfPig.Fonts.CompactFontFormat
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{ {
using System;
using Util; using Util;
internal class CompactFontFormatIndexReader internal static class CompactFontFormatIndexReader
{ {
public CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data) public static CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data)
{ {
var index = ReadIndex(data); var index = ReadIndex(data);
@@ -39,7 +38,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat
return new CompactFontFormatIndex(results); return new CompactFontFormatIndex(results);
} }
public int[] ReadIndex(CompactFontFormatData data) public static int[] ReadIndex(CompactFontFormatData data)
{ {
var count = data.ReadCard16(); var count = data.ReadCard16();

View File

@@ -13,15 +13,12 @@
internal class CompactFontFormatIndividualFontParser internal class CompactFontFormatIndividualFontParser
{ {
private readonly CompactFontFormatIndexReader indexReader;
private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader; private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader;
private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader; private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader;
public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader, public CompactFontFormatIndividualFontParser(CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
CompactFontFormatPrivateDictionaryReader privateDictionaryReader) CompactFontFormatPrivateDictionaryReader privateDictionaryReader)
{ {
this.indexReader = indexReader;
this.topLevelDictionaryReader = topLevelDictionaryReader; this.topLevelDictionaryReader = topLevelDictionaryReader;
this.privateDictionaryReader = privateDictionaryReader; this.privateDictionaryReader = privateDictionaryReader;
} }
@@ -53,12 +50,12 @@
{ {
data.Seek(privateDictionary.LocalSubroutineOffset.Value + topDictionary.PrivateDictionaryLocation.Value.Offset); data.Seek(privateDictionary.LocalSubroutineOffset.Value + topDictionary.PrivateDictionaryLocation.Value.Offset);
localSubroutines = indexReader.ReadDictionaryData(data); localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
} }
data.Seek(topDictionary.CharStringsOffset); data.Seek(topDictionary.CharStringsOffset);
var charStringIndex = indexReader.ReadDictionaryData(data); var charStringIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
ICompactFontFormatCharset charset; ICompactFontFormatCharset charset;
if (topDictionary.CharSetOffset >= 0) if (topDictionary.CharSetOffset >= 0)
@@ -94,23 +91,13 @@
} }
} }
data.Seek(topDictionary.CharStringsOffset);
Type2CharStrings charStrings;
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
case CompactFontFormatCharStringType.Type2:
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
break;
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
if (topDictionary.IsCidFont) if (topDictionary.IsCidFont)
{ {
return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings)); return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary,
charset,
globalSubroutineIndex,
localSubroutines,
charStringIndex);
} }
var encoding = topDictionary.EncodingOffset; var encoding = topDictionary.EncodingOffset;
@@ -133,6 +120,10 @@
} }
} }
var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutineIndex, localSubroutines);
var charStrings = ReadCharStrings(data, topDictionary, charStringIndex, subroutineSelector, charset);
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings), fontEncoding); return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings), fontEncoding);
} }
@@ -204,22 +195,43 @@
return "SID" + index; return "SID" + index;
} }
private static Type2CharStrings ReadCharStrings(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
CompactFontFormatIndex charStringIndex,
CompactFontFormatSubroutinesSelector subroutinesSelector,
ICompactFontFormatCharset charset)
{
data.Seek(topDictionary.CharStringsOffset);
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
case CompactFontFormatCharStringType.Type2:
return Type2CharStringParser.Parse(charStringIndex, subroutinesSelector, charset);
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
}
private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary, private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
int numberOfGlyphs, int numberOfGlyphs,
IReadOnlyList<string> stringIndex, IReadOnlyList<string> stringIndex,
CompactFontFormatPrivateDictionary privateDictionary, CompactFontFormatPrivateDictionary privateDictionary,
ICompactFontFormatCharset charset, ICompactFontFormatCharset charset,
Union<Type1CharStrings, Type2CharStrings> charstrings) CompactFontFormatIndex globalSubroutines,
CompactFontFormatIndex localSubroutinesTop,
CompactFontFormatIndex charStringIndex)
{ {
var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray; var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray;
data.Seek(offset); data.Seek(offset);
var fontDict = indexReader.ReadDictionaryData(data); var fontDict = CompactFontFormatIndexReader.ReadDictionaryData(data);
var privateDictionaries = new List<CompactFontFormatPrivateDictionary>(); var privateDictionaries = new List<CompactFontFormatPrivateDictionary>();
var fontDictionaries = new List<CompactFontFormatTopLevelDictionary>(); var fontDictionaries = new List<CompactFontFormatTopLevelDictionary>();
var fontLocalSubroutines = new List<CompactFontFormatIndex>(); var fontLocalSubroutines = new List<CompactFontFormatIndex>();
foreach (var index in fontDict) foreach (var index in fontDict)
{ {
var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex); var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex);
@@ -238,9 +250,13 @@
if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0) if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0)
{ {
data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value); data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value);
var localSubroutines = indexReader.ReadDictionaryData(data); var localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
fontLocalSubroutines.Add(localSubroutines); fontLocalSubroutines.Add(localSubroutines);
} }
else
{
fontLocalSubroutines.Add(null);
}
fontDictionaries.Add(topLevelDictionaryCid); fontDictionaries.Add(topLevelDictionaryCid);
privateDictionaries.Add(privateDictionaryCid); privateDictionaries.Add(privateDictionaryCid);
@@ -267,8 +283,15 @@
throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}."); throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}.");
} }
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, charstrings, var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutines, localSubroutinesTop,
fontDictionaries, privateDictionaries, fontLocalSubroutines, fdSelect); fdSelect, fontLocalSubroutines);
var charStrings = ReadCharStrings(data, topLevelDictionary, charStringIndex, subroutineSelector, charset);
var union = Union<Type1CharStrings, Type2CharStrings>.Two(charStrings);
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, union,
fontDictionaries, privateDictionaries, fdSelect);
} }
private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs, private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs,

View File

@@ -11,12 +11,10 @@
private const string TagTtfonly = "\u0000\u0001\u0000\u0000"; private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
private readonly CompactFontFormatIndividualFontParser individualFontParser; private readonly CompactFontFormatIndividualFontParser individualFontParser;
private readonly CompactFontFormatIndexReader indexReader;
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader) public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser)
{ {
this.individualFontParser = individualFontParser; this.individualFontParser = individualFontParser;
this.indexReader = indexReader;
} }
public CompactFontFormatFontProgram Parse(CompactFontFormatData data) public CompactFontFormatFontProgram Parse(CompactFontFormatData data)
@@ -41,11 +39,11 @@
var fontNames = ReadStringIndex(data); var fontNames = ReadStringIndex(data);
var topLevelDictionaryIndex = indexReader.ReadDictionaryData(data); var topLevelDictionaryIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
var stringIndex = ReadStringIndex(data); var stringIndex = ReadStringIndex(data);
var globalSubroutineIndex = indexReader.ReadDictionaryData(data); var globalSubroutineIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
var fonts = new Dictionary<string, CompactFontFormatFont>(); var fonts = new Dictionary<string, CompactFontFormatFont>();
@@ -79,9 +77,9 @@
/// <summary> /// <summary>
/// Reads indexed string data. /// Reads indexed string data.
/// </summary> /// </summary>
private string[] ReadStringIndex(CompactFontFormatData data) private static string[] ReadStringIndex(CompactFontFormatData data)
{ {
var index = indexReader.ReadIndex(data); var index = CompactFontFormatIndexReader.ReadIndex(data);
if (index.Length == 0) if (index.Length == 0)
{ {

View File

@@ -0,0 +1,53 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System.Collections.Generic;
internal class CompactFontFormatSubroutinesSelector
{
private readonly CompactFontFormatIndex global;
private readonly CompactFontFormatIndex local;
// CID fonts have per sub-font subroutines.
private readonly bool isCid;
private readonly ICompactFontFormatFdSelect fdSelect;
private readonly IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines;
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local)
{
this.global = global;
this.local = local;
}
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local,
ICompactFontFormatFdSelect fdSelect,
IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines)
{
this.global = global;
this.local = local;
this.fdSelect = fdSelect;
this.perFontLocalSubroutines = perFontLocalSubroutines;
isCid = true;
}
public (CompactFontFormatIndex global, CompactFontFormatIndex local) GetSubroutines(int glyphId)
{
if (!isCid)
{
return (global, local);
}
var fdIndex = fdSelect.GetFontDictionaryIndex(glyphId);
if (fdIndex < 0 || fdIndex >= perFontLocalSubroutines.Count)
{
return (global, local);
}
var localPerFont = perFontLocalSubroutines[fdIndex];
return (global, localPerFont ?? local);
}
}
}

View File

@@ -14,6 +14,10 @@
/// </summary> /// </summary>
internal class Type0Font : IFont, IVerticalWritingSupported internal class Type0Font : IFont, IVerticalWritingSupported
{ {
private readonly CMap ucs2CMap;
// ReSharper disable once NotAccessedField.Local
private readonly bool isChineseJapaneseOrKorean;
public NameToken Name => BaseFont; public NameToken Name => BaseFont;
[NotNull] [NotNull]
@@ -30,8 +34,13 @@
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical; public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap) public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap,
CMap ucs2CMap,
bool isChineseJapaneseOrKorean)
{ {
this.ucs2CMap = ucs2CMap;
this.isChineseJapaneseOrKorean = isChineseJapaneseOrKorean;
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont)); BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont)); CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap)); CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
@@ -55,6 +64,11 @@
if (!ToUnicode.CanMapToUnicode) if (!ToUnicode.CanMapToUnicode)
{ {
if (ucs2CMap != null && ucs2CMap.TryConvertToUnicode(characterCode, out value))
{
return value != null;
}
return false; return false;
} }

View File

@@ -18,6 +18,9 @@
private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist")); private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist"));
public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value; public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value;
private static readonly Lazy<GlyphList> LazyAdditionalGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("additional"));
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value;
private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats")); private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));
public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value; public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value;

View File

@@ -24,14 +24,12 @@
private readonly ILog log; private readonly ILog log;
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly CMapCache cMapCache;
private readonly FontDescriptorFactory fontDescriptorFactory; private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser; private readonly TrueTypeFontParser trueTypeFontParser;
private readonly IEncodingReader encodingReader; private readonly IEncodingReader encodingReader;
private readonly ISystemFontFinder systemFontFinder; private readonly ISystemFontFinder systemFontFinder;
public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
CMapCache cMapCache,
FontDescriptorFactory fontDescriptorFactory, FontDescriptorFactory fontDescriptorFactory,
TrueTypeFontParser trueTypeFontParser, TrueTypeFontParser trueTypeFontParser,
IEncodingReader encodingReader, IEncodingReader encodingReader,
@@ -39,7 +37,6 @@
{ {
this.log = log; this.log = log;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.cMapCache = cMapCache;
this.fontDescriptorFactory = fontDescriptorFactory; this.fontDescriptorFactory = fontDescriptorFactory;
this.trueTypeFontParser = trueTypeFontParser; this.trueTypeFontParser = trueTypeFontParser;
this.encodingReader = encodingReader; this.encodingReader = encodingReader;
@@ -113,7 +110,7 @@
if (decodedUnicodeCMap != null) if (decodedUnicodeCMap != null)
{ {
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
} }
} }

View File

@@ -16,15 +16,13 @@
internal class Type0FontHandler : IFontHandler internal class Type0FontHandler : IFontHandler
{ {
private readonly CidFontFactory cidFontFactory; private readonly CidFontFactory cidFontFactory;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly IPdfTokenScanner scanner; private readonly IPdfTokenScanner scanner;
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider, public Type0FontHandler(CidFontFactory cidFontFactory, IFilterProvider filterProvider,
IPdfTokenScanner scanner) IPdfTokenScanner scanner)
{ {
this.cidFontFactory = cidFontFactory; this.cidFontFactory = cidFontFactory;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.scanner = scanner; this.scanner = scanner;
} }
@@ -59,7 +57,7 @@
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary); throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
} }
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false); var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont);
CMap toUnicodeCMap = null; CMap toUnicodeCMap = null;
if (dictionary.ContainsKey(NameToken.ToUnicode)) if (dictionary.ContainsKey(NameToken.ToUnicode))
@@ -72,11 +70,11 @@
if (decodedUnicodeCMap != null) if (decodedUnicodeCMap != null)
{ {
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
} }
} }
var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap); var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);
return font; return font;
} }
@@ -139,7 +137,7 @@
{ {
if (value is NameToken encodingName) if (value is NameToken encodingName)
{ {
var cmap = cMapCache.Get(encodingName.Data); var cmap = CMapCache.Get(encodingName.Data);
result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Data); result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Data);
@@ -149,7 +147,7 @@
{ {
var decoded = stream.Decode(filterProvider); var decoded = stream.Decode(filterProvider);
var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false); var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded), false);
result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary); result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary);
} }
@@ -162,11 +160,11 @@
return result; return result;
} }
private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont) private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont)
{ {
if (!isCMapPredefined) if (!isCMapPredefined)
{ {
return null; return (null, false);
} }
/* /*
@@ -178,17 +176,43 @@
if (encodingName == null) if (encodingName == null)
{ {
return null; return (null, false);
} }
var isChineseJapaneseOrKorean = false;
if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
{
isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase)
|| string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase);
}
var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV); var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV);
if (isPredefinedIdentityMap && !usesDescendantAdobeFont) if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean)
{ {
return null; return (null, false);
} }
throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue."); if (!isChineseJapaneseOrKorean)
{
return (null, false);
}
var fullCmapName = cidFont.SystemInfo.ToString();
var nonUnicodeCMap = CMapCache.Get(fullCmapName);
if (nonUnicodeCMap == null)
{
return (null, true);
}
var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2";
return (CMapCache.Get(unicodeCMapName), true);
} }
} }
} }

View File

@@ -19,21 +19,19 @@
internal class Type1FontHandler : IFontHandler internal class Type1FontHandler : IFontHandler
{ {
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly FontDescriptorFactory fontDescriptorFactory; private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader; private readonly IEncodingReader encodingReader;
private readonly Type1FontParser type1FontParser; private readonly Type1FontParser type1FontParser;
private readonly CompactFontFormatParser compactFontFormatParser; private readonly CompactFontFormatParser compactFontFormatParser;
public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider, public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
FontDescriptorFactory fontDescriptorFactory, FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader, IEncodingReader encodingReader,
Type1FontParser type1FontParser, Type1FontParser type1FontParser,
CompactFontFormatParser compactFontFormatParser) CompactFontFormatParser compactFontFormatParser)
{ {
this.pdfScanner = pdfScanner; this.pdfScanner = pdfScanner;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.fontDescriptorFactory = fontDescriptorFactory; this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader; this.encodingReader = encodingReader;
@@ -95,7 +93,7 @@
if (decodedUnicodeCMap != null) if (decodedUnicodeCMap != null)
{ {
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
} }
} }

View File

@@ -15,15 +15,13 @@
internal class Type3FontHandler : IFontHandler internal class Type3FontHandler : IFontHandler
{ {
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly IEncodingReader encodingReader; private readonly IEncodingReader encodingReader;
private readonly IPdfTokenScanner scanner; private readonly IPdfTokenScanner scanner;
public Type3FontHandler(IPdfTokenScanner scanner, CMapCache cMapCache, IFilterProvider filterProvider, public Type3FontHandler(IPdfTokenScanner scanner, IFilterProvider filterProvider,
IEncodingReader encodingReader) IEncodingReader encodingReader)
{ {
this.cMapCache = cMapCache;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.encodingReader = encodingReader; this.encodingReader = encodingReader;
this.scanner = scanner; this.scanner = scanner;
@@ -50,7 +48,7 @@
if (decodedUnicodeCMap != null) if (decodedUnicodeCMap != null)
{ {
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
} }
} }

View File

@@ -71,9 +71,9 @@
// Behaviour specified by the Extraction of Text Content section of the specification. // Behaviour specified by the Extraction of Text Content section of the specification.
// If the font contains a ToUnicode CMap use that. // If the font contains a ToUnicode CMap use that.
if (ToUnicode.CanMapToUnicode) if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value))
{ {
return ToUnicode.TryGet(characterCode, out value); return true;
} }
if (encoding == null) if (encoding == null)
@@ -86,17 +86,18 @@
// Map the character code to a character name. // Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode); var encodedCharacterName = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List. // Look up the character name in the Adobe Glyph List or additional Glyph List.
try try
{ {
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName); value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName)
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
} }
catch catch
{ {
return false; return false;
} }
return true; return value != null;
} }
public CharacterBoundingBox GetBoundingBox(int characterCode) public CharacterBoundingBox GetBoundingBox(int characterCode)

View File

@@ -71,7 +71,6 @@
{ {
var log = container.Get<ILog>(); var log = container.Get<ILog>();
var filterProvider = container.Get<IFilterProvider>(); var filterProvider = container.Get<IFilterProvider>();
var cMapCache = new CMapCache(new CMapParser());
CrossReferenceTable crossReferenceTable = null; CrossReferenceTable crossReferenceTable = null;
@@ -100,9 +99,8 @@
var trueTypeFontParser = new TrueTypeFontParser(); var trueTypeFontParser = new TrueTypeFontParser();
var fontDescriptorFactory = new FontDescriptorFactory(); var fontDescriptorFactory = new FontDescriptorFactory();
var compactFontFormatIndexReader = new CompactFontFormatIndexReader(); var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(),
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()));
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing, var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
pdfScanner, pdfScanner,
@@ -117,12 +115,11 @@
var encodingReader = new EncodingReader(pdfScanner); var encodingReader = new EncodingReader(pdfScanner);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner), filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())), new TrueTypeFontHandler(log, pdfScanner, filterProvider, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontHandler(pdfScanner, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser), new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader)); new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
var resourceContainer = new ResourceStore(pdfScanner, fontFactory); var resourceContainer = new ResourceStore(pdfScanner, fontFactory);

View File

@@ -108,6 +108,7 @@ logicalanddisplay;2227
logicalandtext;2227 logicalandtext;2227
logicalordisplay;2228 logicalordisplay;2228
logicalortext;2228 logicalortext;2228
nonmarkingreturn;000D
parenleftBig;0028 parenleftBig;0028
parenleftbig;0028 parenleftbig;0028
parenleftBigg;0028 parenleftBigg;0028