#9 add some more code for reading glyph ids from cid fonts in compact font format

This commit is contained in:
Eliot Jones
2018-12-29 11:41:46 +00:00
parent 47e49c4044
commit 79c555e967
10 changed files with 284 additions and 70 deletions

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Tests.Integration
{
// using System.IO;
using Xunit;
/// <summary>
@@ -10,7 +11,7 @@
[Fact]
public void Tests()
{
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions{UseLenientParsing = false}))
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions { UseLenientParsing = false }))
//{
// var page1 = document.GetPage(1);
//}

View File

@@ -7,6 +7,8 @@
{
protected readonly IReadOnlyDictionary<int, (int stringId, string name)> GlyphIdToStringIdAndName;
public bool IsCidCharset { get; } = false;
protected CompactFontFormatCharset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
{
if (data == null)

View File

@@ -0,0 +1,34 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System;
/// <summary>
/// An empty Charset for CID fonts which map from Character Id to Glyph Id without using strings.
/// </summary>
internal class CompactFontFormatEmptyCharset : ICompactFontFormatCharset
{
private readonly int numberOfCharstrings;
public bool IsCidCharset { get; } = true;
public CompactFontFormatEmptyCharset(int numberOfCharstrings)
{
this.numberOfCharstrings = numberOfCharstrings;
}
public string GetNameByGlyphId(int glyphId)
{
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
}
public string GetNameByStringId(int stringId)
{
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
}
public string GetStringIdByGlyphId(int glyphId)
{
throw new NotSupportedException("Cid Charsets do not support named glyphs.");
}
}
}

View File

@@ -181,6 +181,8 @@
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
public bool IsCidCharset { get; } = false;
private CompactFontFormatExpertCharset()
{
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();

View File

@@ -102,6 +102,8 @@
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
public bool IsCidCharset { get; } = false;
private CompactFontFormatExpertSubsetCharset()
{
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();

View File

@@ -244,6 +244,8 @@
private readonly IReadOnlyDictionary<int, KeyValuePair<int, string>> characterIdToStringIdAndName;
public bool IsCidCharset { get; } = false;
private CompactFontFormatIsoAdobeCharset()
{
var furtherMap = new Dictionary<int, KeyValuePair<int, string>>();

View File

@@ -2,6 +2,8 @@
{
internal interface ICompactFontFormatCharset
{
bool IsCidCharset { get; }
string GetNameByGlyphId(int glyphId);
string GetNameByStringId(int stringId);

View File

@@ -6,6 +6,7 @@
using Charsets;
using CharStrings;
using Dictionaries;
using Exceptions;
using Type1.CharStrings;
using Util;
@@ -31,6 +32,7 @@
var topDictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
var privateDictionary = CompactFontFormatPrivateDictionary.GetDefault();
if (topDictionary.PrivateDictionaryLocation.HasValue && topDictionary.PrivateDictionaryLocation.Value.Size > 0)
@@ -58,24 +60,64 @@
var charStringIndex = indexReader.ReadDictionaryData(data);
ICompactFontFormatCharset charset = null;
if (topDictionary.IsCidFont && topDictionary.CharSetOffset >= 0 && topDictionary.CharSetOffset <= 2)
ICompactFontFormatCharset charset;
if (topDictionary.CharSetOffset >= 0)
{
switch (topDictionary.CharSetOffset)
var charsetId = topDictionary.CharSetOffset;
if (!topDictionary.IsCidFont && charsetId == 0)
{
case 0:
charset = CompactFontFormatIsoAdobeCharset.Value;
break;
case 1:
}
else if (!topDictionary.IsCidFont && charsetId == 1)
{
charset = CompactFontFormatExpertCharset.Value;
break;
case 2:
}
else if (!topDictionary.IsCidFont && charsetId == 2)
{
charset = CompactFontFormatExpertSubsetCharset.Value;
break;
}
else
{
charset = ReadCharset(data, topDictionary, charStringIndex, stringIndex);
}
}
else
{
if (topDictionary.IsCidFont)
{
// a CID font with no charset does not default to any predefined charset
charset = new CompactFontFormatEmptyCharset(charStringIndex.Count);
}
else
{
charset = CompactFontFormatIsoAdobeCharset.Value;
}
}
data.Seek(topDictionary.CharStringsOffset);
Type2CharStrings charStrings;
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException("Type 1 CharStrings are not currently supported in CFF font.");
case CompactFontFormatCharStringType.Type2:
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
break;
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
if (topDictionary.IsCidFont)
{
ReadCidFont(data, topDictionary, charStringIndex.Count, stringIndex);
}
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
}
private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
CompactFontFormatIndex charStringIndex, IReadOnlyList<string> stringIndex)
{
data.Seek(topDictionary.CharSetOffset);
@@ -93,9 +135,7 @@
glyphToNamesAndStringId.Add((glyphId, stringId, ReadString(stringId, stringIndex)));
}
charset = new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
break;
return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
}
case 1:
case 2:
@@ -119,37 +159,16 @@
if (format == 1)
{
charset = new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
}
else
{
charset = new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
}
break;
return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
}
default:
throw new InvalidOperationException($"Unrecognized format for the Charset table in a CFF font. Got: {format}.");
}
}
data.Seek(topDictionary.CharStringsOffset);
Type2CharStrings charStrings;
switch (topDictionary.CharStringType)
{
case CompactFontFormatCharStringType.Type1:
throw new NotImplementedException();
case CompactFontFormatCharStringType.Type2:
charStrings = Type2CharStringParser.Parse(charStringIndex, localSubroutines, globalSubroutineIndex, charset);
break;
default:
throw new ArgumentOutOfRangeException($"Unexpected CharString type in CFF font: {topDictionary.CharStringType}.");
}
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings));
}
private static string ReadString(int index, IReadOnlyList<string> stringIndex)
{
if (index >= 0 && index <= 390)
@@ -164,5 +183,155 @@
// technically this maps to .notdef, but PDFBox uses this
return "SID" + index;
}
private void ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
int numberOfGlyphs,
IReadOnlyList<string> stringIndex)
{
var offset = topLevelDictionary.CidFontOperators.FontDictionaryArray;
data.Seek(offset);
var fontDict = indexReader.ReadDictionaryData(data);
foreach (var index in fontDict)
{
var topLevelDictionaryCid = topLevelDictionaryReader.Read(new CompactFontFormatData(index), stringIndex);
if (!topLevelDictionaryCid.PrivateDictionaryLocation.HasValue)
{
throw new InvalidFontFormatException("The CID keyed Compact Font Format font did not contain a private dictionary for the font dictionary.");
}
var privateDictionaryBytes = data.SnapshotPortion(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset,
topLevelDictionaryCid.PrivateDictionaryLocation.Value.Size);
var privateDictionaryCid = privateDictionaryReader.Read(data, stringIndex);
// CFFParser.java line 625 - read the local subroutines.
if (privateDictionaryCid.LocalSubroutineOffset.HasValue && privateDictionaryCid.LocalSubroutineOffset.Value > 0)
{
data.Seek(topLevelDictionaryCid.PrivateDictionaryLocation.Value.Offset + privateDictionaryCid.LocalSubroutineOffset.Value);
var localSubroutines = indexReader.ReadDictionaryData(data);
}
}
data.Seek(topLevelDictionary.CidFontOperators.FontDictionarySelect);
var format = data.ReadCard8();
ICompactFontFormatFdSelect fdSelect;
switch (format)
{
case 0:
{
fdSelect = ReadFormat0FdSelect(data, numberOfGlyphs, topLevelDictionary.CidFontOperators.Ros);
break;
}
case 3:
{
fdSelect = ReadFormat3FdSelect(data, topLevelDictionary.CidFontOperators.Ros);
break;
}
default:
throw new InvalidFontFormatException($"Invalid Font Dictionary Select format: {format}.");
}
}
private static CompactFontFormat0FdSelect ReadFormat0FdSelect(CompactFontFormatData data, int numberOfGlyphs,
RegistryOrderingSupplement registryOrderingSupplement)
{
var dictionaries = new int[numberOfGlyphs];
for (var i = 0; i < numberOfGlyphs; i++)
{
dictionaries[i] = data.ReadCard8();
}
return new CompactFontFormat0FdSelect(registryOrderingSupplement, dictionaries);
}
private static CompactFontFormat3FdSelect ReadFormat3FdSelect(CompactFontFormatData data, RegistryOrderingSupplement registryOrderingSupplement)
{
var numberOfRanges = data.ReadCard16();
var ranges = new CompactFontFormat3FdSelect.Range3[numberOfRanges];
for (var i = 0; i < numberOfRanges; i++)
{
var first = data.ReadCard16();
var dictionary = data.ReadCard8();
ranges[i] = new CompactFontFormat3FdSelect.Range3(first, dictionary);
}
var sentinel = data.ReadCard16();
return new CompactFontFormat3FdSelect(registryOrderingSupplement, ranges, sentinel);
}
}
internal interface ICompactFontFormatFdSelect
{
int GetFontDictionaryIndex(int glyphId);
}
internal class CompactFontFormat0FdSelect : ICompactFontFormatFdSelect
{
public RegistryOrderingSupplement RegistryOrderingSupplement { get; }
public IReadOnlyList<int> FontDictionaries { get; }
public CompactFontFormat0FdSelect(RegistryOrderingSupplement registryOrderingSupplement, IReadOnlyList<int> fontDictionaries)
{
RegistryOrderingSupplement = registryOrderingSupplement ?? throw new ArgumentNullException(nameof(registryOrderingSupplement));
FontDictionaries = fontDictionaries ?? throw new ArgumentNullException(nameof(fontDictionaries));
}
public int GetFontDictionaryIndex(int glyphId)
{
throw new NotImplementedException();
}
}
internal class CompactFontFormat3FdSelect : ICompactFontFormatFdSelect
{
public RegistryOrderingSupplement RegistryOrderingSupplement { get; }
public IReadOnlyList<Range3> Ranges { get; }
public int Sentinel { get; }
public CompactFontFormat3FdSelect(RegistryOrderingSupplement registryOrderingSupplement, IReadOnlyList<Range3> ranges, int sentinel)
{
RegistryOrderingSupplement = registryOrderingSupplement ?? throw new ArgumentNullException(nameof(registryOrderingSupplement));
Ranges = ranges ?? throw new ArgumentNullException(nameof(ranges));
Sentinel = sentinel;
}
public int GetFontDictionaryIndex(int glyphId)
{
throw new NotImplementedException();
}
internal struct Range3
{
public int First { get; }
public int FontDictionary { get; }
public Range3(int first, int fontDictionary)
{
First = first;
FontDictionary = fontDictionary;
}
public override string ToString()
{
return $"First {First}, Dictionary {FontDictionary}.";
}
}
}
}

View File

@@ -94,9 +94,9 @@
public decimal UidBase { get; set; }
public decimal FontDictionaryArray { get; set; }
public int FontDictionaryArray { get; set; }
public decimal FontDictionarySelect { get; set; }
public int FontDictionarySelect { get; set; }
public string FontName { get; set; }
}

View File

@@ -134,7 +134,7 @@
dictionary.IsCidFont = true;
break;
case 37:
dictionary.CidFontOperators.FontDictionarySelect = operands[0].Decimal;
dictionary.CidFontOperators.FontDictionarySelect = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 38: