correctly determine compact font format encoding where supplements are used and pass the font encoding for type 1 fonts to the encoding used to read the pdf

This commit is contained in:
Eliot Jones
2019-05-11 11:56:01 +01:00
parent 9afceed1c5
commit 90e9c46373
7 changed files with 47 additions and 17 deletions

View File

@@ -17,18 +17,22 @@
var format = data.ReadCard8();
switch (format)
// A few fonts have multiply encoded glyphs which are indicated by setting the high order bit of the format byte.
// To get the real format out & with 0111 1111 (0x7f).
var baseFormat = format & 0x7f;
switch (baseFormat)
{
case 0:
return ReadFormat0Encoding(data, charset, stringIndex);
return ReadFormat0Encoding(data, charset, stringIndex, format);
case 1:
return ReadFormat1Encoding(data, charset, stringIndex);
return ReadFormat1Encoding(data, charset, stringIndex, format);
default:
throw new InvalidFontFormatException($"The provided format {format} for this Compact Font Format encoding was invalid.");
}
}
private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex)
private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
{
var numberOfCodes = data.ReadCard8();
@@ -41,10 +45,16 @@
values.Add((code, sid, str));
}
return new CompactFontFormatFormat0Encoding(values);
IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = new List<CompactFontFormatBuiltInEncoding.Supplement>();
if (HasSupplement(format))
{
supplements = ReadSupplement(data, stringIndex);
}
private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex)
return new CompactFontFormatFormat0Encoding(values, supplements);
}
private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
{
var numberOfRanges = data.ReadCard8();
@@ -65,7 +75,11 @@
}
}
var supplements = ReadSupplement(data, stringIndex);
IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = new List<CompactFontFormatBuiltInEncoding.Supplement>();
if (HasSupplement(format))
{
supplements = ReadSupplement(data, stringIndex);
}
return new CompactFontFormatFormat1Encoding(numberOfRanges, fromRanges, supplements);
}
@@ -102,5 +116,10 @@
return "SID" + index;
}
private static bool HasSupplement(byte format)
{
// A few fonts have multiply encoded glyphs which are indicated by setting the high order bit of the format byte.
return (format & 0x80) != 0;
}
}
}

View File

@@ -33,7 +33,6 @@
var topDictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
var privateDictionary = CompactFontFormatPrivateDictionary.GetDefault();
if (topDictionary.PrivateDictionaryLocation.HasValue && topDictionary.PrivateDictionaryLocation.Value.Size > 0)

View File

@@ -1,11 +1,11 @@
namespace UglyToad.PdfPig.Fonts.Encodings
{
using System.Collections.Generic;
using Util;
internal class CompactFontFormatFormat0Encoding : CompactFontFormatBuiltInEncoding
{
public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values) : base(EmptyArray<Supplement>.Instance)
public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values,
IReadOnlyList<Supplement> supplements) : base(supplements)
{
Add(0, 0, ".notdef");

View File

@@ -17,7 +17,8 @@
this.pdfScanner = pdfScanner;
}
public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null)
public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null,
Encoding fontEncoding = null)
{
if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject))
{
@@ -31,12 +32,12 @@
DictionaryToken encodingDictionary = DirectObjectFinder.Get<DictionaryToken>(baseEncodingObject, pdfScanner);
var encoding = ReadEncodingDictionary(encodingDictionary);
var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding);
return encoding;
}
private Encoding ReadEncodingDictionary(DictionaryToken encodingDictionary)
private Encoding ReadEncodingDictionary(DictionaryToken encodingDictionary, Encoding fontEncoding)
{
Encoding baseEncoding;
if (encodingDictionary.TryGet(NameToken.BaseEncoding, out var baseEncodingToken) && baseEncodingToken is NameToken baseEncodingName)
@@ -49,7 +50,7 @@
else
{
// TODO: This isn't true for non-symbolic fonts or latin fonts (based on OS?) see section 5.5.5
baseEncoding = StandardEncoding.Instance;
baseEncoding = fontEncoding ?? StandardEncoding.Instance;
}
if (!encodingDictionary.TryGet(NameToken.Differences, out var differencesBase))

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{
using System.Linq;
using Cmap;
using CompactFontFormat;
using Encodings;
@@ -96,7 +97,16 @@
}
}
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor);
Encoding fromFont = null;
font.Match(x => fromFont = x.Encoding != null ? new BuiltInEncoding(x.Encoding) : default(Encoding), x =>
{
if (x.Fonts != null && x.Fonts.Count > 0)
{
fromFont = x.Fonts.First().Value.Encoding;
}
});
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor, fromFont);
if (encoding == null)
{

View File

@@ -5,6 +5,7 @@
internal interface IEncodingReader
{
Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null);
Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null,
Encoding fontEncoding = null);
}
}