mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 19:07:56 +08:00
use cid font subroutines where applicable. add ucs 2 cmap support for type 1 fonts
* cid cff fonts have multiple sub-fonts and multiple private dictionaries, in addition to a top level font and private dictionary. this fix uses the specific sub-dictionary when getting local subroutines on a per-glyph basis. * chinese, japanese or korean fonts can use a ucs-2 encoding cmap for retrieving unicode values. * add support for the additional glyph list for unicode values in true type fonts. adds nonmarkingreturn mapping to carriage return. * makes font parsing classes static where there's no reason for them to be per-instance.
This commit is contained in:
@@ -4,15 +4,13 @@
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using PdfPig.Fonts.CompactFontFormat;
|
||||
using PdfPig.Fonts.CompactFontFormat.CharStrings;
|
||||
using PdfPig.Fonts.CompactFontFormat.Dictionaries;
|
||||
using Xunit;
|
||||
|
||||
public class CompactFontFormatParserTests
|
||||
{
|
||||
private readonly CompactFontFormatParser parser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(
|
||||
new CompactFontFormatIndexReader(), new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()),
|
||||
new CompactFontFormatIndexReader());
|
||||
private readonly CompactFontFormatParser parser = new CompactFontFormatParser(
|
||||
new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(), new CompactFontFormatPrivateDictionaryReader()));
|
||||
|
||||
[Fact]
|
||||
public void CanReadMinionPro()
|
||||
|
@@ -1,45 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Fonts
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Cmap;
|
||||
using IO;
|
||||
using Parser;
|
||||
|
||||
internal class CMapCache
|
||||
{
|
||||
private readonly Dictionary<string, CMap> cache = new Dictionary<string, CMap>(StringComparer.InvariantCultureIgnoreCase);
|
||||
private readonly CMapParser cMapParser;
|
||||
|
||||
public CMapCache(CMapParser cMapParser)
|
||||
{
|
||||
this.cMapParser = cMapParser;
|
||||
}
|
||||
|
||||
public CMap Get(string name)
|
||||
{
|
||||
if (cache.TryGetValue(name, out var result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
result = cMapParser.ParseExternal(name);
|
||||
|
||||
cache[name] = result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public CMap Parse(IInputBytes bytes, bool isLenientParsing)
|
||||
{
|
||||
if (bytes == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(bytes));
|
||||
}
|
||||
|
||||
var result = cMapParser.Parse(bytes, isLenientParsing);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
@@ -31,7 +31,7 @@
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{Registry} | {Ordering} | {Supplement}";
|
||||
return $"{Registry}-{Ordering}-{Supplement}";
|
||||
}
|
||||
}
|
||||
}
|
44
src/UglyToad.PdfPig/Fonts/Cmap/CMapCache.cs
Normal file
44
src/UglyToad.PdfPig/Fonts/Cmap/CMapCache.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Cmap
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using IO;
|
||||
using Parser;
|
||||
|
||||
internal static class CMapCache
|
||||
{
|
||||
private static readonly Dictionary<string, CMap> Cache = new Dictionary<string, CMap>(StringComparer.OrdinalIgnoreCase);
|
||||
private static readonly object Lock = new object();
|
||||
|
||||
private static readonly CMapParser CMapParser = new CMapParser();
|
||||
|
||||
public static CMap Get(string name)
|
||||
{
|
||||
lock (Lock)
|
||||
{
|
||||
if (Cache.TryGetValue(name, out var result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
result = CMapParser.ParseExternal(name);
|
||||
|
||||
Cache[name] = result;
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public static CMap Parse(IInputBytes bytes, bool isLenientParsing)
|
||||
{
|
||||
if (bytes == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(bytes));
|
||||
}
|
||||
|
||||
var result = CMapParser.Parse(bytes, isLenientParsing);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
|
||||
internal static class CmapUtils
|
||||
internal static class CMapUtils
|
||||
{
|
||||
public static int ToInt(this IReadOnlyList<byte> data, int length)
|
||||
{
|
||||
|
@@ -683,22 +683,16 @@
|
||||
};
|
||||
|
||||
public static Type2CharStrings Parse([NotNull] IReadOnlyList<IReadOnlyList<byte>> charStringBytes,
|
||||
[NotNull] CompactFontFormatIndex localSubroutines,
|
||||
[NotNull] CompactFontFormatIndex globalSubroutines, ICompactFontFormatCharset charset)
|
||||
CompactFontFormatSubroutinesSelector subroutinesSelector, ICompactFontFormatCharset charset)
|
||||
{
|
||||
if (charStringBytes == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(charStringBytes));
|
||||
}
|
||||
|
||||
if (localSubroutines == null)
|
||||
if (subroutinesSelector == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(localSubroutines));
|
||||
}
|
||||
|
||||
if (globalSubroutines == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(globalSubroutines));
|
||||
throw new ArgumentNullException(nameof(subroutinesSelector));
|
||||
}
|
||||
|
||||
var charStrings = new Dictionary<string, Type2CharStrings.CommandSequence>();
|
||||
@@ -706,11 +700,12 @@
|
||||
{
|
||||
var charString = charStringBytes[i];
|
||||
var name = charset.GetNameByGlyphId(i);
|
||||
var (globalSubroutines, localSubroutines) = subroutinesSelector.GetSubroutines(i);
|
||||
var sequence = ParseSingle(charString.ToList(), localSubroutines, globalSubroutines);
|
||||
charStrings[name] = new Type2CharStrings.CommandSequence(sequence);
|
||||
}
|
||||
|
||||
return new Type2CharStrings(charStrings, localSubroutines, globalSubroutines);
|
||||
return new Type2CharStrings(charStrings);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<Union<decimal, LazyType2Command>> ParseSingle(List<byte> bytes,
|
||||
|
@@ -3,7 +3,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Dictionaries;
|
||||
using Geometry;
|
||||
using Util;
|
||||
using Util.JetBrains.Annotations;
|
||||
@@ -23,24 +22,10 @@
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, CommandSequence> CharStrings { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The indexed bytes for the local subroutines in this font.
|
||||
/// </summary>
|
||||
[NotNull]
|
||||
public CompactFontFormatIndex LocalSubroutines { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The indexed bytes for the global subroutines in this font set.
|
||||
/// </summary>
|
||||
[NotNull]
|
||||
public CompactFontFormatIndex GlobalSubroutines { get; }
|
||||
|
||||
public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings, CompactFontFormatIndex localSubroutines,
|
||||
CompactFontFormatIndex globalSubroutines)
|
||||
public Type2CharStrings(IReadOnlyDictionary<string, CommandSequence> charStrings)
|
||||
{
|
||||
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
|
||||
LocalSubroutines = localSubroutines ?? throw new ArgumentNullException(nameof(localSubroutines));
|
||||
GlobalSubroutines = globalSubroutines ?? throw new ArgumentNullException(nameof(globalSubroutines));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@@ -66,7 +66,6 @@
|
||||
{
|
||||
public IReadOnlyList<CompactFontFormatTopLevelDictionary> FontDictionaries { get; }
|
||||
public IReadOnlyList<CompactFontFormatPrivateDictionary> PrivateDictionaries { get; }
|
||||
public IReadOnlyList<CompactFontFormatIndex> LocalSubroutines { get; }
|
||||
public ICompactFontFormatFdSelect FdSelect { get; }
|
||||
|
||||
public CompactFontFormatCidFont(CompactFontFormatTopLevelDictionary topDictionary, CompactFontFormatPrivateDictionary privateDictionary,
|
||||
@@ -74,12 +73,10 @@
|
||||
Union<Type1CharStrings, Type2CharStrings> charStrings,
|
||||
IReadOnlyList<CompactFontFormatTopLevelDictionary> fontDictionaries,
|
||||
IReadOnlyList<CompactFontFormatPrivateDictionary> privateDictionaries,
|
||||
IReadOnlyList<CompactFontFormatIndex> localSubroutines,
|
||||
ICompactFontFormatFdSelect fdSelect) : base(topDictionary, privateDictionary, charset, charStrings, null)
|
||||
{
|
||||
FontDictionaries = fontDictionaries;
|
||||
PrivateDictionaries = privateDictionaries;
|
||||
LocalSubroutines = localSubroutines;
|
||||
FdSelect = fdSelect;
|
||||
}
|
||||
|
||||
|
@@ -1,12 +1,11 @@
|
||||
using System;
|
||||
|
||||
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
|
||||
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
|
||||
{
|
||||
using System;
|
||||
using Util;
|
||||
|
||||
internal class CompactFontFormatIndexReader
|
||||
internal static class CompactFontFormatIndexReader
|
||||
{
|
||||
public CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data)
|
||||
public static CompactFontFormatIndex ReadDictionaryData(CompactFontFormatData data)
|
||||
{
|
||||
var index = ReadIndex(data);
|
||||
|
||||
@@ -39,7 +38,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat
|
||||
return new CompactFontFormatIndex(results);
|
||||
}
|
||||
|
||||
public int[] ReadIndex(CompactFontFormatData data)
|
||||
public static int[] ReadIndex(CompactFontFormatData data)
|
||||
{
|
||||
var count = data.ReadCard16();
|
||||
|
||||
|
@@ -13,15 +13,12 @@
|
||||
|
||||
internal class CompactFontFormatIndividualFontParser
|
||||
{
|
||||
private readonly CompactFontFormatIndexReader indexReader;
|
||||
private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader;
|
||||
private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader;
|
||||
|
||||
public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader,
|
||||
CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
|
||||
public CompactFontFormatIndividualFontParser(CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
|
||||
CompactFontFormatPrivateDictionaryReader privateDictionaryReader)
|
||||
{
|
||||
this.indexReader = indexReader;
|
||||
this.topLevelDictionaryReader = topLevelDictionaryReader;
|
||||
this.privateDictionaryReader = privateDictionaryReader;
|
||||
}
|
||||
@@ -53,12 +50,12 @@
|
||||
{
|
||||
data.Seek(privateDictionary.LocalSubroutineOffset.Value + topDictionary.PrivateDictionaryLocation.Value.Offset);
|
||||
|
||||
localSubroutines = indexReader.ReadDictionaryData(data);
|
||||
localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
}
|
||||
|
||||
data.Seek(topDictionary.CharStringsOffset);
|
||||
|
||||
var charStringIndex = indexReader.ReadDictionaryData(data);
|
||||
var charStringIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
|
||||
ICompactFontFormatCharset charset;
|
||||
if (topDictionary.CharSetOffset >= 0)
|
||||
@@ -93,24 +90,14 @@
|
||||
charset = CompactFontFormatIsoAdobeCharset.Value;
|
||||
}
|
||||
}
|
||||
|
||||
data.Seek(topDictionary.CharStringsOffset);
|
||||
|
||||
Type2CharStrings charStrings;
|
||||
switch (topDictionary.CharStringType)
|
||||
{
|
||||
case CompactFontFormatCharStringType.Type1:
|
||||
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
|
||||
case CompactFontFormatCharStringType.Type2:
|
||||
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
|
||||
}
|
||||
|
||||
|
||||
if (topDictionary.IsCidFont)
|
||||
{
|
||||
return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
|
||||
return ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex, privateDictionary,
|
||||
charset,
|
||||
globalSubroutineIndex,
|
||||
localSubroutines,
|
||||
charStringIndex);
|
||||
}
|
||||
|
||||
var encoding = topDictionary.EncodingOffset;
|
||||
@@ -133,6 +120,10 @@
|
||||
}
|
||||
}
|
||||
|
||||
var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutineIndex, localSubroutines);
|
||||
|
||||
var charStrings = ReadCharStrings(data, topDictionary, charStringIndex, subroutineSelector, charset);
|
||||
|
||||
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings), fontEncoding);
|
||||
}
|
||||
|
||||
@@ -204,22 +195,43 @@
|
||||
return "SID" + index;
|
||||
}
|
||||
|
||||
private static Type2CharStrings ReadCharStrings(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
|
||||
CompactFontFormatIndex charStringIndex,
|
||||
CompactFontFormatSubroutinesSelector subroutinesSelector,
|
||||
ICompactFontFormatCharset charset)
|
||||
{
|
||||
data.Seek(topDictionary.CharStringsOffset);
|
||||
|
||||
switch (topDictionary.CharStringType)
|
||||
{
|
||||
case CompactFontFormatCharStringType.Type1:
|
||||
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
|
||||
case CompactFontFormatCharStringType.Type2:
|
||||
return Type2CharStringParser.Parse(charStringIndex, subroutinesSelector, charset);
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
|
||||
}
|
||||
}
|
||||
|
||||
private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
|
||||
int numberOfGlyphs,
|
||||
IReadOnlyList<string> stringIndex,
|
||||
CompactFontFormatPrivateDictionary privateDictionary,
|
||||
ICompactFontFormatCharset charset,
|
||||
Union<Type1CharStrings, Type2CharStrings> charstrings)
|
||||
CompactFontFormatIndex globalSubroutines,
|
||||
CompactFontFormatIndex localSubroutinesTop,
|
||||
CompactFontFormatIndex charStringIndex)
|
||||
{
|
||||
var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray;
|
||||
|
||||
data.Seek(offset);
|
||||
|
||||
var fontDict = indexReader.ReadDictionaryData(data);
|
||||
var fontDict = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
|
||||
var privateDictionaries = new List<CompactFontFormatPrivateDictionary>();
|
||||
var fontDictionaries = new List<CompactFontFormatTopLevelDictionary>();
|
||||
var fontLocalSubroutines = new List<CompactFontFormatIndex>();
|
||||
|
||||
foreach (var index in fontDict)
|
||||
{
|
||||
var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex);
|
||||
@@ -238,9 +250,13 @@
|
||||
if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0)
|
||||
{
|
||||
data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value);
|
||||
var localSubroutines = indexReader.ReadDictionaryData(data);
|
||||
var localSubroutines = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
fontLocalSubroutines.Add(localSubroutines);
|
||||
}
|
||||
else
|
||||
{
|
||||
fontLocalSubroutines.Add(null);
|
||||
}
|
||||
|
||||
fontDictionaries.Add(topLevelDictionaryCid);
|
||||
privateDictionaries.Add(privateDictionaryCid);
|
||||
@@ -267,8 +283,15 @@
|
||||
throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}.");
|
||||
}
|
||||
|
||||
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, charstrings,
|
||||
fontDictionaries, privateDictionaries, fontLocalSubroutines, fdSelect);
|
||||
var subroutineSelector = new CompactFontFormatSubroutinesSelector(globalSubroutines, localSubroutinesTop,
|
||||
fdSelect, fontLocalSubroutines);
|
||||
|
||||
var charStrings = ReadCharStrings(data, topLevelDictionary, charStringIndex, subroutineSelector, charset);
|
||||
|
||||
var union = Union<Type1CharStrings, Type2CharStrings>.Two(charStrings);
|
||||
|
||||
return new CompactFontFormatCidFont(topLevelDictionary, privateDictionary, charset, union,
|
||||
fontDictionaries, privateDictionaries, fdSelect);
|
||||
}
|
||||
|
||||
private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs,
|
||||
|
@@ -11,12 +11,10 @@
|
||||
private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
|
||||
|
||||
private readonly CompactFontFormatIndividualFontParser individualFontParser;
|
||||
private readonly CompactFontFormatIndexReader indexReader;
|
||||
|
||||
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader)
|
||||
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser)
|
||||
{
|
||||
this.individualFontParser = individualFontParser;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
public CompactFontFormatFontProgram Parse(CompactFontFormatData data)
|
||||
@@ -41,11 +39,11 @@
|
||||
|
||||
var fontNames = ReadStringIndex(data);
|
||||
|
||||
var topLevelDictionaryIndex = indexReader.ReadDictionaryData(data);
|
||||
var topLevelDictionaryIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
|
||||
var stringIndex = ReadStringIndex(data);
|
||||
|
||||
var globalSubroutineIndex = indexReader.ReadDictionaryData(data);
|
||||
var globalSubroutineIndex = CompactFontFormatIndexReader.ReadDictionaryData(data);
|
||||
|
||||
var fonts = new Dictionary<string, CompactFontFormatFont>();
|
||||
|
||||
@@ -79,9 +77,9 @@
|
||||
/// <summary>
|
||||
/// Reads indexed string data.
|
||||
/// </summary>
|
||||
private string[] ReadStringIndex(CompactFontFormatData data)
|
||||
private static string[] ReadStringIndex(CompactFontFormatData data)
|
||||
{
|
||||
var index = indexReader.ReadIndex(data);
|
||||
var index = CompactFontFormatIndexReader.ReadIndex(data);
|
||||
|
||||
if (index.Length == 0)
|
||||
{
|
||||
|
@@ -0,0 +1,53 @@
|
||||
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
|
||||
internal class CompactFontFormatSubroutinesSelector
|
||||
{
|
||||
private readonly CompactFontFormatIndex global;
|
||||
private readonly CompactFontFormatIndex local;
|
||||
|
||||
// CID fonts have per sub-font subroutines.
|
||||
private readonly bool isCid;
|
||||
private readonly ICompactFontFormatFdSelect fdSelect;
|
||||
private readonly IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines;
|
||||
|
||||
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local)
|
||||
{
|
||||
this.global = global;
|
||||
this.local = local;
|
||||
}
|
||||
|
||||
public CompactFontFormatSubroutinesSelector(CompactFontFormatIndex global, CompactFontFormatIndex local,
|
||||
ICompactFontFormatFdSelect fdSelect,
|
||||
IReadOnlyList<CompactFontFormatIndex> perFontLocalSubroutines)
|
||||
{
|
||||
this.global = global;
|
||||
this.local = local;
|
||||
|
||||
this.fdSelect = fdSelect;
|
||||
this.perFontLocalSubroutines = perFontLocalSubroutines;
|
||||
|
||||
isCid = true;
|
||||
}
|
||||
|
||||
public (CompactFontFormatIndex global, CompactFontFormatIndex local) GetSubroutines(int glyphId)
|
||||
{
|
||||
if (!isCid)
|
||||
{
|
||||
return (global, local);
|
||||
}
|
||||
|
||||
var fdIndex = fdSelect.GetFontDictionaryIndex(glyphId);
|
||||
|
||||
if (fdIndex < 0 || fdIndex >= perFontLocalSubroutines.Count)
|
||||
{
|
||||
return (global, local);
|
||||
}
|
||||
|
||||
var localPerFont = perFontLocalSubroutines[fdIndex];
|
||||
|
||||
return (global, localPerFont ?? local);
|
||||
}
|
||||
}
|
||||
}
|
@@ -14,6 +14,10 @@
|
||||
/// </summary>
|
||||
internal class Type0Font : IFont, IVerticalWritingSupported
|
||||
{
|
||||
private readonly CMap ucs2CMap;
|
||||
// ReSharper disable once NotAccessedField.Local
|
||||
private readonly bool isChineseJapaneseOrKorean;
|
||||
|
||||
public NameToken Name => BaseFont;
|
||||
|
||||
[NotNull]
|
||||
@@ -30,8 +34,13 @@
|
||||
|
||||
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
|
||||
|
||||
public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap)
|
||||
public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap,
|
||||
CMap ucs2CMap,
|
||||
bool isChineseJapaneseOrKorean)
|
||||
{
|
||||
this.ucs2CMap = ucs2CMap;
|
||||
this.isChineseJapaneseOrKorean = isChineseJapaneseOrKorean;
|
||||
|
||||
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
|
||||
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
|
||||
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
|
||||
@@ -55,6 +64,11 @@
|
||||
|
||||
if (!ToUnicode.CanMapToUnicode)
|
||||
{
|
||||
if (ucs2CMap != null && ucs2CMap.TryConvertToUnicode(characterCode, out value))
|
||||
{
|
||||
return value != null;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -18,6 +18,9 @@
|
||||
private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist"));
|
||||
public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value;
|
||||
|
||||
private static readonly Lazy<GlyphList> LazyAdditionalGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("additional"));
|
||||
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value;
|
||||
|
||||
private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));
|
||||
public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value;
|
||||
|
||||
|
@@ -24,14 +24,12 @@
|
||||
private readonly ILog log;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly CMapCache cMapCache;
|
||||
private readonly FontDescriptorFactory fontDescriptorFactory;
|
||||
private readonly TrueTypeFontParser trueTypeFontParser;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly ISystemFontFinder systemFontFinder;
|
||||
|
||||
public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
|
||||
CMapCache cMapCache,
|
||||
FontDescriptorFactory fontDescriptorFactory,
|
||||
TrueTypeFontParser trueTypeFontParser,
|
||||
IEncodingReader encodingReader,
|
||||
@@ -39,7 +37,6 @@
|
||||
{
|
||||
this.log = log;
|
||||
this.filterProvider = filterProvider;
|
||||
this.cMapCache = cMapCache;
|
||||
this.fontDescriptorFactory = fontDescriptorFactory;
|
||||
this.trueTypeFontParser = trueTypeFontParser;
|
||||
this.encodingReader = encodingReader;
|
||||
@@ -113,7 +110,7 @@
|
||||
|
||||
if (decodedUnicodeCMap != null)
|
||||
{
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -16,15 +16,13 @@
|
||||
internal class Type0FontHandler : IFontHandler
|
||||
{
|
||||
private readonly CidFontFactory cidFontFactory;
|
||||
private readonly CMapCache cMapCache;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IPdfTokenScanner scanner;
|
||||
|
||||
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider,
|
||||
public Type0FontHandler(CidFontFactory cidFontFactory, IFilterProvider filterProvider,
|
||||
IPdfTokenScanner scanner)
|
||||
{
|
||||
this.cidFontFactory = cidFontFactory;
|
||||
this.cMapCache = cMapCache;
|
||||
this.filterProvider = filterProvider;
|
||||
this.scanner = scanner;
|
||||
}
|
||||
@@ -59,7 +57,7 @@
|
||||
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
|
||||
}
|
||||
|
||||
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
|
||||
var (ucs2CMap, isChineseJapaneseOrKorean) = GetUcs2CMap(dictionary, isCMapPredefined, cidFont);
|
||||
|
||||
CMap toUnicodeCMap = null;
|
||||
if (dictionary.ContainsKey(NameToken.ToUnicode))
|
||||
@@ -72,11 +70,11 @@
|
||||
|
||||
if (decodedUnicodeCMap != null)
|
||||
{
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap);
|
||||
var font = new Type0Font(baseFont, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);
|
||||
|
||||
return font;
|
||||
}
|
||||
@@ -139,7 +137,7 @@
|
||||
{
|
||||
if (value is NameToken encodingName)
|
||||
{
|
||||
var cmap = cMapCache.Get(encodingName.Data);
|
||||
var cmap = CMapCache.Get(encodingName.Data);
|
||||
|
||||
result = cmap ?? throw new InvalidOperationException("Missing CMap for " + encodingName.Data);
|
||||
|
||||
@@ -149,7 +147,7 @@
|
||||
{
|
||||
var decoded = stream.Decode(filterProvider);
|
||||
|
||||
var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false);
|
||||
var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded), false);
|
||||
|
||||
result = cmap ?? throw new InvalidOperationException("Could not read CMap for " + dictionary);
|
||||
}
|
||||
@@ -162,11 +160,11 @@
|
||||
return result;
|
||||
}
|
||||
|
||||
private static CMap GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, bool usesDescendantAdobeFont)
|
||||
private static (CMap, bool isChineseJapaneseOrKorean) GetUcs2CMap(DictionaryToken dictionary, bool isCMapPredefined, ICidFont cidFont)
|
||||
{
|
||||
if (!isCMapPredefined)
|
||||
{
|
||||
return null;
|
||||
return (null, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -178,17 +176,43 @@
|
||||
|
||||
if (encodingName == null)
|
||||
{
|
||||
return null;
|
||||
return (null, false);
|
||||
}
|
||||
|
||||
var isChineseJapaneseOrKorean = false;
|
||||
|
||||
if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(cidFont.SystemInfo.Ordering, "Japan1", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(cidFont.SystemInfo.Ordering, "Korea1", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
var isPredefinedIdentityMap = encodingName.Equals(NameToken.IdentityH) || encodingName.Equals(NameToken.IdentityV);
|
||||
|
||||
if (isPredefinedIdentityMap && !usesDescendantAdobeFont)
|
||||
if (isPredefinedIdentityMap && !isChineseJapaneseOrKorean)
|
||||
{
|
||||
return null;
|
||||
return (null, false);
|
||||
}
|
||||
|
||||
throw new NotSupportedException("Support for UCS2 CMaps are not implemented yet. Please raise an issue.");
|
||||
if (!isChineseJapaneseOrKorean)
|
||||
{
|
||||
return (null, false);
|
||||
}
|
||||
|
||||
var fullCmapName = cidFont.SystemInfo.ToString();
|
||||
var nonUnicodeCMap = CMapCache.Get(fullCmapName);
|
||||
|
||||
if (nonUnicodeCMap == null)
|
||||
{
|
||||
return (null, true);
|
||||
}
|
||||
|
||||
var unicodeCMapName = $"{nonUnicodeCMap.Info.Registry}-{nonUnicodeCMap.Info.Ordering}-UCS2";
|
||||
|
||||
return (CMapCache.Get(unicodeCMapName), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -19,21 +19,19 @@
|
||||
internal class Type1FontHandler : IFontHandler
|
||||
{
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly CMapCache cMapCache;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly FontDescriptorFactory fontDescriptorFactory;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly Type1FontParser type1FontParser;
|
||||
private readonly CompactFontFormatParser compactFontFormatParser;
|
||||
|
||||
public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider,
|
||||
public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
|
||||
FontDescriptorFactory fontDescriptorFactory,
|
||||
IEncodingReader encodingReader,
|
||||
Type1FontParser type1FontParser,
|
||||
CompactFontFormatParser compactFontFormatParser)
|
||||
{
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.cMapCache = cMapCache;
|
||||
this.filterProvider = filterProvider;
|
||||
this.fontDescriptorFactory = fontDescriptorFactory;
|
||||
this.encodingReader = encodingReader;
|
||||
@@ -95,7 +93,7 @@
|
||||
|
||||
if (decodedUnicodeCMap != null)
|
||||
{
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -15,15 +15,13 @@
|
||||
|
||||
internal class Type3FontHandler : IFontHandler
|
||||
{
|
||||
private readonly CMapCache cMapCache;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly IPdfTokenScanner scanner;
|
||||
|
||||
public Type3FontHandler(IPdfTokenScanner scanner, CMapCache cMapCache, IFilterProvider filterProvider,
|
||||
public Type3FontHandler(IPdfTokenScanner scanner, IFilterProvider filterProvider,
|
||||
IEncodingReader encodingReader)
|
||||
{
|
||||
this.cMapCache = cMapCache;
|
||||
this.filterProvider = filterProvider;
|
||||
this.encodingReader = encodingReader;
|
||||
this.scanner = scanner;
|
||||
@@ -50,7 +48,7 @@
|
||||
|
||||
if (decodedUnicodeCMap != null)
|
||||
{
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
toUnicodeCMap = CMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -71,9 +71,9 @@
|
||||
// Behaviour specified by the Extraction of Text Content section of the specification.
|
||||
|
||||
// If the font contains a ToUnicode CMap use that.
|
||||
if (ToUnicode.CanMapToUnicode)
|
||||
if (ToUnicode.CanMapToUnicode && ToUnicode.TryGet(characterCode, out value))
|
||||
{
|
||||
return ToUnicode.TryGet(characterCode, out value);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (encoding == null)
|
||||
@@ -86,17 +86,18 @@
|
||||
// Map the character code to a character name.
|
||||
var encodedCharacterName = encoding.GetName(characterCode);
|
||||
|
||||
// Look up the character name in the Adobe Glyph List.
|
||||
// Look up the character name in the Adobe Glyph List or additional Glyph List.
|
||||
try
|
||||
{
|
||||
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
|
||||
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName)
|
||||
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return value != null;
|
||||
}
|
||||
|
||||
public CharacterBoundingBox GetBoundingBox(int characterCode)
|
||||
|
@@ -71,7 +71,6 @@
|
||||
{
|
||||
var log = container.Get<ILog>();
|
||||
var filterProvider = container.Get<IFilterProvider>();
|
||||
var cMapCache = new CMapCache(new CMapParser());
|
||||
|
||||
CrossReferenceTable crossReferenceTable = null;
|
||||
|
||||
@@ -100,9 +99,8 @@
|
||||
|
||||
var trueTypeFontParser = new TrueTypeFontParser();
|
||||
var fontDescriptorFactory = new FontDescriptorFactory();
|
||||
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
|
||||
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
|
||||
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
|
||||
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(new CompactFontFormatTopLevelDictionaryReader(),
|
||||
new CompactFontFormatPrivateDictionaryReader()));
|
||||
|
||||
var (rootReference, rootDictionary) = ParseTrailer(crossReferenceTable, isLenientParsing,
|
||||
pdfScanner,
|
||||
@@ -117,12 +115,11 @@
|
||||
var encodingReader = new EncodingReader(pdfScanner);
|
||||
|
||||
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
|
||||
cMapCache,
|
||||
filterProvider, pdfScanner),
|
||||
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
|
||||
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
|
||||
new TrueTypeFontHandler(log, pdfScanner, filterProvider, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
|
||||
new Type1FontHandler(pdfScanner, filterProvider, fontDescriptorFactory, encodingReader,
|
||||
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
|
||||
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
|
||||
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
|
||||
|
||||
var resourceContainer = new ResourceStore(pdfScanner, fontFactory);
|
||||
|
||||
|
@@ -108,6 +108,7 @@ logicalanddisplay;2227
|
||||
logicalandtext;2227
|
||||
logicalordisplay;2228
|
||||
logicalortext;2228
|
||||
nonmarkingreturn;000D
|
||||
parenleftBig;0028
|
||||
parenleftbig;0028
|
||||
parenleftBigg;0028
|
||||
|
Reference in New Issue
Block a user