mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-21 04:17:57 +08:00
#9 add some more code for reading glyph ids from cid fonts in compact font format
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
// using System.IO;
|
||||
using Xunit;
|
||||
|
||||
/// <summary>
|
||||
@@ -10,7 +11,7 @@
|
||||
[Fact]
|
||||
public void Tests()
|
||||
{
|
||||
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions{UseLenientParsing = false}))
|
||||
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions { UseLenientParsing = false }))
|
||||
//{
|
||||
// var page1 = document.GetPage(1);
|
||||
//}
|
||||
|
@@ -7,6 +7,8 @@
|
||||
{
|
||||
protected readonly IReadOnlyDictionary<int, (int stringId, string name)> GlyphIdToStringIdAndName;
|
||||
|
||||
public bool IsCidCharset { get; } = false;
|
||||
|
||||
protected CompactFontFormatCharset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
|
||||
{
|
||||
if (data == null)
|
||||
|
@@ -0,0 +1,34 @@
|
||||
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
|
||||
{
|
||||
using System;
|
||||
|
||||
/// <summary>
|
||||
/// An empty Charset for CID fonts which map from Character Id to Glyph Id without using strings.
|
||||
/// </summary>
|
||||
internal class CompactFontFormatEmptyCharset : ICompactFontFormatCharset
|
||||
{
|
||||
private readonly int numberOfCharstrings;
|
||||
|
||||
public bool IsCidCharset { get; } = true;
|
||||
|
||||
public CompactFontFormatEmptyCharset(int numberOfCharstrings)
|
||||
{
|
||||
this.numberOfCharstrings = numberOfCharstrings;
|
||||
}
|
||||
|
||||
public string GetNameByGlyphId(int glyphId)
|
||||
{
|
||||
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
|
||||
}
|
||||
|
||||
public string GetNameByStringId(int stringId)
|
||||
{
|
||||
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
|
||||
}
|
||||
|
||||
public string GetStringIdByGlyphId(int glyphId)
|
||||
{
|
||||
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
|
||||
}
|
||||
}
|
||||
}
|
@@ -181,6 +181,8 @@
|
||||
|
||||
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
|
||||
|
||||
public bool IsCidCharset { get; } = false;
|
||||
|
||||
private CompactFontFormatExpertCharset()
|
||||
{
|
||||
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();
|
||||
|
@@ -102,6 +102,8 @@
|
||||
|
||||
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
|
||||
|
||||
public bool IsCidCharset { get; } = false;
|
||||
|
||||
private CompactFontFormatExpertSubsetCharset()
|
||||
{
|
||||
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();
|
||||
|
@@ -244,6 +244,8 @@
|
||||
|
||||
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
|
||||
|
||||
public bool IsCidCharset { get; } = false;
|
||||
|
||||
private CompactFontFormatIsoAdobeCharset()
|
||||
{
|
||||
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();
|
||||
|
@@ -2,6 +2,8 @@
|
||||
{
|
||||
internal interface ICompactFontFormatCharset
|
||||
{
|
||||
bool IsCidCharset { get; }
|
||||
|
||||
string GetNameByGlyphId(int glyphId);
|
||||
|
||||
string GetNameByStringId(int stringId);
|
||||
|
@@ -6,6 +6,7 @@
|
||||
using Charsets;
|
||||
using CharStrings;
|
||||
using Dictionaries;
|
||||
using Exceptions;
|
||||
using Type1.CharStrings;
|
||||
using Util;
|
||||
|
||||
@@ -31,6 +32,7 @@
|
||||
|
||||
var topDictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
|
||||
|
||||
|
||||
var privateDictionary = CompactFontFormatPrivateDictionary.GetDefault();
|
||||
|
||||
if (topDictionary.PrivateDictionaryLocation.HasValue && topDictionary.PrivateDictionaryLocation.Value.Size > 0)
|
||||
@@ -58,24 +60,64 @@
|
||||
|
||||
var charStringIndex = indexReader.ReadDictionaryData(data);
|
||||
|
||||
ICompactFontFormatCharset charset = null;
|
||||
|
||||
if (topDictionary.IsCidFont && topDictionary.CharSetOffset >= 0 && topDictionary.CharSetOffset <= 2)
|
||||
ICompactFontFormatCharset charset;
|
||||
if (topDictionary.CharSetOffset >= 0)
|
||||
{
|
||||
switch (topDictionary.CharSetOffset)
|
||||
var charsetId = topDictionary.CharSetOffset;
|
||||
if (!topDictionary.IsCidFont && charsetId == 0)
|
||||
{
|
||||
case 0:
|
||||
charset = CompactFontFormatIsoAdobeCharset.Value;
|
||||
break;
|
||||
case 1:
|
||||
}
|
||||
else if (!topDictionary.IsCidFont && charsetId == 1)
|
||||
{
|
||||
charset = CompactFontFormatExpertCharset.Value;
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
else if (!topDictionary.IsCidFont && charsetId == 2)
|
||||
{
|
||||
charset = CompactFontFormatExpertSubsetCharset.Value;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
charset = ReadCharset(data, topDictionary, charStringIndex, stringIndex);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topDictionary.IsCidFont)
|
||||
{
|
||||
// a CID font with no charset does not default to any predefined charset
|
||||
charset = new CompactFontFormatEmptyCharset(charStringIndex.Count);
|
||||
}
|
||||
else
|
||||
{
|
||||
charset = CompactFontFormatIsoAdobeCharset.Value;
|
||||
}
|
||||
}
|
||||
|
||||
data.Seek(topDictionary.CharStringsOffset);
|
||||
|
||||
Type2CharStrings charStrings;
|
||||
switch (topDictionary.CharStringType)
|
||||
{
|
||||
case CompactFontFormatCharStringType.Type1:
|
||||
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
|
||||
case CompactFontFormatCharStringType.Type2:
|
||||
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
|
||||
}
|
||||
|
||||
if (topDictionary.IsCidFont)
|
||||
{
|
||||
ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex);
|
||||
}
|
||||
|
||||
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
|
||||
}
|
||||
|
||||
private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
|
||||
CompactFontFormatIndex charStringIndex, IReadOnlyList<string> stringIndex)
|
||||
{
|
||||
data.Seek(topDictionary.CharSetOffset);
|
||||
|
||||
@@ -93,9 +135,7 @@
|
||||
glyphToNamesAndStringId.Add((glyphId, stringId, ReadString(stringId, stringIndex)));
|
||||
}
|
||||
|
||||
charset = new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
|
||||
|
||||
break;
|
||||
return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
|
||||
}
|
||||
case 1:
|
||||
case 2:
|
||||
@@ -119,37 +159,16 @@
|
||||
if (format == 1)
|
||||
{
|
||||
|
||||
charset = new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
|
||||
}
|
||||
else
|
||||
{
|
||||
charset = new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
|
||||
return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
|
||||
}
|
||||
|
||||
break;
|
||||
return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
|
||||
}
|
||||
default:
|
||||
throw new InvalidOperationException($"Unrecognized format for the Charset table in a CFF font. Got: {format}.");
|
||||
}
|
||||
}
|
||||
|
||||
data.Seek(topDictionary.CharStringsOffset);
|
||||
|
||||
Type2CharStrings charStrings;
|
||||
switch (topDictionary.CharStringType)
|
||||
{
|
||||
case CompactFontFormatCharStringType.Type1:
|
||||
throw new NotImplementedException();
|
||||
case CompactFontFormatCharStringType.Type2:
|
||||
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
|
||||
}
|
||||
|
||||
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
|
||||
}
|
||||
|
||||
private static string ReadString(int index, IReadOnlyList<string> stringIndex)
|
||||
{
|
||||
if (index >= 0 && index <= 390)
|
||||
@@ -164,5 +183,155 @@
|
||||
// technically this maps to .notdef, but PDFBox uses this
|
||||
return "SID" + index;
|
||||
}
|
||||
|
||||
private void ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
|
||||
int numberOfGlyphs,
|
||||
IReadOnlyList<string> stringIndex)
|
||||
{
|
||||
var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray;
|
||||
|
||||
data.Seek(offset);
|
||||
|
||||
var fontDict = indexReader.ReadDictionaryData(data);
|
||||
|
||||
foreach (var index in fontDict)
|
||||
{
|
||||
var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex);
|
||||
|
||||
if (!topLevelDictionaryCid.PrivateDictionaryLocation.HasValue)
|
||||
{
|
||||
throw new InvalidFontFormatException("The CID keyed Compact Font Format font did not contain a private dictionary for the font dictionary.");
|
||||
}
|
||||
|
||||
var privateDictionaryBytes = data.SnapshotPortion(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset,
|
||||
topLevelDictionaryCid.PrivateDictionaryLocation.Value.Size);
|
||||
|
||||
var privateDictionaryCid = privateDictionaryReader.Read(data, stringIndex);
|
||||
|
||||
// CFFParser.java line 625 - read the local subroutines.
|
||||
if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0)
|
||||
{
|
||||
data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value);
|
||||
var localSubroutines = indexReader.ReadDictionaryData(data);
|
||||
}
|
||||
}
|
||||
|
||||
data.Seek(topLevelDictionary.CidFontOperators.FontDictionarySelect);
|
||||
|
||||
var format = data.ReadCard8();
|
||||
|
||||
ICompactFontFormatFdSelect fdSelect;
|
||||
switch (format)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
fdSelect = ReadFormat0FdSelect(data, numberOfGlyphs, topLevelDictionary.CidFontOperators.Ros);
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
fdSelect = ReadFormat3FdSelect(data, topLevelDictionary.CidFontOperators.Ros);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}.");
|
||||
}
|
||||
}
|
||||
|
||||
private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs,
|
||||
RegistryOrderingSupplement registryOrderingSupplement)
|
||||
{
|
||||
var dictionaries = new int[numberOfGlyphs];
|
||||
|
||||
for (var i = 0; i < numberOfGlyphs; i++)
|
||||
{
|
||||
dictionaries[i] = data.ReadCard8();
|
||||
}
|
||||
|
||||
return new CompactFontFormat0FdSelect(registryOrderingSupplement, dictionaries);
|
||||
}
|
||||
|
||||
private static CompactFontFormat3FdSelect ReadFormat3FdSelect(CompactFontFormatData data, RegistryOrderingSupplement registryOrderingSupplement)
|
||||
{
|
||||
var numberOfRanges = data.ReadCard16();
|
||||
var ranges = new CompactFontFormat3FdSelect.Range3[numberOfRanges];
|
||||
|
||||
for (var i = 0; i < numberOfRanges; i++)
|
||||
{
|
||||
var first = data.ReadCard16();
|
||||
var dictionary = data.ReadCard8();
|
||||
|
||||
ranges[i] = new CompactFontFormat3FdSelect.Range3(first, dictionary);
|
||||
}
|
||||
|
||||
var sentinel = data.ReadCard16();
|
||||
|
||||
return new CompactFontFormat3FdSelect(registryOrderingSupplement, ranges, sentinel);
|
||||
}
|
||||
}
|
||||
|
||||
internal interface ICompactFontFormatFdSelect
|
||||
{
|
||||
int GetFontDictionaryIndex(int glyphId);
|
||||
}
|
||||
|
||||
internal class CompactFontFormat0FdSelect : ICompactFontFormatFdSelect
|
||||
{
|
||||
public RegistryOrderingSupplement RegistryOrderingSupplement { get; }
|
||||
|
||||
public IReadOnlyList<int> FontDictionaries { get; }
|
||||
|
||||
public CompactFontFormat0FdSelect(RegistryOrderingSupplement registryOrderingSupplement, IReadOnlyList<int> fontDictionaries)
|
||||
{
|
||||
RegistryOrderingSupplement = registryOrderingSupplement ?? throw new ArgumentNullException(nameof(registryOrderingSupplement));
|
||||
FontDictionaries = fontDictionaries ?? throw new ArgumentNullException(nameof(fontDictionaries));
|
||||
}
|
||||
|
||||
public int GetFontDictionaryIndex(int glyphId)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
|
||||
internal class CompactFontFormat3FdSelect : ICompactFontFormatFdSelect
|
||||
{
|
||||
public RegistryOrderingSupplement RegistryOrderingSupplement { get; }
|
||||
|
||||
public IReadOnlyList<Range3> Ranges { get; }
|
||||
|
||||
public int Sentinel { get; }
|
||||
|
||||
public CompactFontFormat3FdSelect(RegistryOrderingSupplement registryOrderingSupplement, IReadOnlyList<Range3> ranges, int sentinel)
|
||||
{
|
||||
RegistryOrderingSupplement = registryOrderingSupplement ?? throw new ArgumentNullException(nameof(registryOrderingSupplement));
|
||||
Ranges = ranges ?? throw new ArgumentNullException(nameof(ranges));
|
||||
Sentinel = sentinel;
|
||||
}
|
||||
|
||||
public int GetFontDictionaryIndex(int glyphId)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
internal struct Range3
|
||||
{
|
||||
public int First { get; }
|
||||
|
||||
public int FontDictionary { get; }
|
||||
|
||||
public Range3(int first, int fontDictionary)
|
||||
{
|
||||
First = first;
|
||||
FontDictionary = fontDictionary;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"First {First}, Dictionary {FontDictionary}.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@@ -94,9 +94,9 @@
|
||||
|
||||
public decimal UidBase { get; set; }
|
||||
|
||||
public decimal FontDictionaryArray { get; set; }
|
||||
public int FontDictionaryArray { get; set; }
|
||||
|
||||
public decimal FontDictionarySelect { get; set; }
|
||||
public int FontDictionarySelect { get; set; }
|
||||
|
||||
public string FontName { get; set; }
|
||||
}
|
||||
|
@@ -134,7 +134,7 @@
|
||||
dictionary.IsCidFont = true;
|
||||
break;
|
||||
case 37:
|
||||
dictionary.CidFontOperators.FontDictionarySelect = operands[0].Decimal;
|
||||
dictionary.CidFontOperators.FontDictionarySelect = GetIntOrDefault(operands);
|
||||
dictionary.IsCidFont = true;
|
||||
break;
|
||||
case 38:
|
||||
|
Reference in New Issue
Block a user