correctly determine compact font format encoding where supplements are used and pass the font encoding for type 1 fonts to the encoding used to read the pdf

This commit is contained in:
Eliot Jones
2019-05-11 11:56:01 +01:00
parent 9afceed1c5
commit 90e9c46373
7 changed files with 47 additions and 17 deletions

View File

@@ -17,18 +17,22 @@
var format = data.ReadCard8(); var format = data.ReadCard8();
switch (format) // A few fonts have multiply encoded glyphs which are indicated by setting the high order bit of the format byte.
// To get the real format out & with 0111 1111 (0x7f).
var baseFormat = format & 0x7f;
switch (baseFormat)
{ {
case 0: case 0:
return ReadFormat0Encoding(data, charset, stringIndex); return ReadFormat0Encoding(data, charset, stringIndex, format);
case 1: case 1:
return ReadFormat1Encoding(data, charset, stringIndex); return ReadFormat1Encoding(data, charset, stringIndex, format);
default: default:
throw new InvalidFontFormatException($"The provided format {format} for this Compact Font Format encoding was invalid."); throw new InvalidFontFormatException($"The provided format {format} for this Compact Font Format encoding was invalid.");
} }
} }
private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex) private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
{ {
var numberOfCodes = data.ReadCard8(); var numberOfCodes = data.ReadCard8();
@@ -41,10 +45,16 @@
values.Add((code, sid, str)); values.Add((code, sid, str));
} }
return new CompactFontFormatFormat0Encoding(values); IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = new List<CompactFontFormatBuiltInEncoding.Supplement>();
if (HasSupplement(format))
{
supplements = ReadSupplement(data, stringIndex);
}
return new CompactFontFormatFormat0Encoding(values, supplements);
} }
private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex) private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
{ {
var numberOfRanges = data.ReadCard8(); var numberOfRanges = data.ReadCard8();
@@ -65,7 +75,11 @@
} }
} }
var supplements = ReadSupplement(data, stringIndex); IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = new List<CompactFontFormatBuiltInEncoding.Supplement>();
if (HasSupplement(format))
{
supplements = ReadSupplement(data, stringIndex);
}
return new CompactFontFormatFormat1Encoding(numberOfRanges, fromRanges, supplements); return new CompactFontFormatFormat1Encoding(numberOfRanges, fromRanges, supplements);
} }
@@ -102,5 +116,10 @@
return "SID" + index; return "SID" + index;
} }
private static bool HasSupplement(byte format)
{
// A few fonts have multiply encoded glyphs which are indicated by setting the high order bit of the format byte.
return (format & 0x80) != 0;
}
} }
} }

View File

@@ -32,8 +32,7 @@
var individualData = new CompactFontFormatData(topDictionaryIndex.ToArray()); var individualData = new CompactFontFormatData(topDictionaryIndex.ToArray());
var topDictionary = topLevelDictionaryReader.Read(individualData, stringIndex); var topDictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
var privateDictionary = CompactFontFormatPrivateDictionary.GetDefault(); var privateDictionary = CompactFontFormatPrivateDictionary.GetDefault();
if (topDictionary.PrivateDictionaryLocation.HasValue && topDictionary.PrivateDictionaryLocation.Value.Size > 0) if (topDictionary.PrivateDictionaryLocation.HasValue && topDictionary.PrivateDictionaryLocation.Value.Size > 0)

View File

@@ -1,11 +1,11 @@
namespace UglyToad.PdfPig.Fonts.Encodings namespace UglyToad.PdfPig.Fonts.Encodings
{ {
using System.Collections.Generic; using System.Collections.Generic;
using Util;
internal class CompactFontFormatFormat0Encoding : CompactFontFormatBuiltInEncoding internal class CompactFontFormatFormat0Encoding : CompactFontFormatBuiltInEncoding
{ {
public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values) : base(EmptyArray<Supplement>.Instance) public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values,
IReadOnlyList<Supplement> supplements) : base(supplements)
{ {
Add(0, 0, ".notdef"); Add(0, 0, ".notdef");

View File

@@ -17,7 +17,8 @@
this.pdfScanner = pdfScanner; this.pdfScanner = pdfScanner;
} }
public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null) public Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null,
Encoding fontEncoding = null)
{ {
if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject)) if (!fontDictionary.TryGet(NameToken.Encoding, out var baseEncodingObject))
{ {
@@ -31,12 +32,12 @@
DictionaryToken encodingDictionary = DirectObjectFinder.Get<DictionaryToken>(baseEncodingObject, pdfScanner); DictionaryToken encodingDictionary = DirectObjectFinder.Get<DictionaryToken>(baseEncodingObject, pdfScanner);
var encoding = ReadEncodingDictionary(encodingDictionary); var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding);
return encoding; return encoding;
} }
private Encoding ReadEncodingDictionary(DictionaryToken encodingDictionary) private Encoding ReadEncodingDictionary(DictionaryToken encodingDictionary, Encoding fontEncoding)
{ {
Encoding baseEncoding; Encoding baseEncoding;
if (encodingDictionary.TryGet(NameToken.BaseEncoding, out var baseEncodingToken) && baseEncodingToken is NameToken baseEncodingName) if (encodingDictionary.TryGet(NameToken.BaseEncoding, out var baseEncodingToken) && baseEncodingToken is NameToken baseEncodingName)
@@ -49,7 +50,7 @@
else else
{ {
// TODO: This isn't true for non-symbolic fonts or latin fonts (based on OS?) see section 5.5.5 // TODO: This isn't true for non-symbolic fonts or latin fonts (based on OS?) see section 5.5.5
baseEncoding = StandardEncoding.Instance; baseEncoding = fontEncoding ?? StandardEncoding.Instance;
} }
if (!encodingDictionary.TryGet(NameToken.Differences, out var differencesBase)) if (!encodingDictionary.TryGet(NameToken.Differences, out var differencesBase))

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{ {
using System.Linq;
using Cmap; using Cmap;
using CompactFontFormat; using CompactFontFormat;
using Encodings; using Encodings;
@@ -96,7 +97,16 @@
} }
} }
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor); Encoding fromFont = null;
font.Match(x => fromFont = x.Encoding != null ? new BuiltInEncoding(x.Encoding) : default(Encoding), x =>
{
if (x.Fonts != null && x.Fonts.Count > 0)
{
fromFont = x.Fonts.First().Value.Encoding;
}
});
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor, fromFont);
if (encoding == null) if (encoding == null)
{ {

View File

@@ -5,6 +5,7 @@
internal interface IEncodingReader internal interface IEncodingReader
{ {
Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null); Encoding Read(DictionaryToken fontDictionary, bool isLenientParsing, FontDescriptor descriptor = null,
Encoding fontEncoding = null);
} }
} }