use encoding specified in base font if present

if the font uses a named encoding which is not recognised, use the corresponding encoding based on the base font name, or fall back to windows ansi encoding.
This commit is contained in:
Eliot Jones
2020-01-07 16:01:45 +00:00
parent e588b2bc50
commit d267d7501a
4 changed files with 49 additions and 10 deletions

View File

@@ -1,6 +1,9 @@
namespace UglyToad.PdfPig.Fonts.Encodings namespace UglyToad.PdfPig.Fonts.Encodings
{ {
internal class SymbolEncoding : Encoding /// <summary>
/// Symbol encoding.
/// </summary>
public class SymbolEncoding : Encoding
{ {
private static readonly (int, string)[] EncodingTable = { private static readonly (int, string)[] EncodingTable = {
(0101, "Alpha"), (0101, "Alpha"),
@@ -194,8 +197,12 @@
(0172, "zeta") (0172, "zeta")
}; };
/// <summary>
/// Single instance of this encoding.
/// </summary>
public static SymbolEncoding Instance { get; } = new SymbolEncoding(); public static SymbolEncoding Instance { get; } = new SymbolEncoding();
/// <inheritdoc />
public override string EncodingName => "SymbolEncoding"; public override string EncodingName => "SymbolEncoding";
private SymbolEncoding() private SymbolEncoding()

View File

@@ -2,7 +2,10 @@
{ {
using Core; using Core;
internal class WinAnsiEncoding : Encoding /// <summary>
/// Windows ANSI encoding.
/// </summary>
public class WinAnsiEncoding : Encoding
{ {
/// <summary> /// <summary>
/// The encoding table is taken from the Appendix of the specification. /// The encoding table is taken from the Appendix of the specification.
@@ -231,8 +234,12 @@
(0255, "hyphen") (0255, "hyphen")
}; };
/// <summary>
/// Single instance of this encoding.
/// </summary>
public static WinAnsiEncoding Instance { get; } = new WinAnsiEncoding(); public static WinAnsiEncoding Instance { get; } = new WinAnsiEncoding();
/// <inheritdoc />
public override string EncodingName => "WinAnsiEncoding"; public override string EncodingName => "WinAnsiEncoding";
private WinAnsiEncoding() private WinAnsiEncoding()

View File

@@ -1,6 +1,9 @@
namespace UglyToad.PdfPig.Fonts.Encodings namespace UglyToad.PdfPig.Fonts.Encodings
{ {
internal class ZapfDingbatsEncoding : Encoding /// <summary>
/// Zapf Dingbats encoding.
/// </summary>
public class ZapfDingbatsEncoding : Encoding
{ {
private static readonly (int, string)[] EncodingTable = { private static readonly (int, string)[] EncodingTable = {
(040, "space"), (040, "space"),
@@ -193,8 +196,12 @@
(0376, "a191") (0376, "a191")
}; };
/// <summary>
/// Single instance of this encoding.
/// </summary>
public static ZapfDingbatsEncoding Instance { get; } = new ZapfDingbatsEncoding(); public static ZapfDingbatsEncoding Instance { get; } = new ZapfDingbatsEncoding();
/// <inheritdoc />
public override string EncodingName => "ZapfDingbatsEncoding"; public override string EncodingName => "ZapfDingbatsEncoding";
private ZapfDingbatsEncoding() private ZapfDingbatsEncoding()

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Parser namespace UglyToad.PdfPig.PdfFonts.Parser
{ {
using System;
using System.Collections.Generic; using System.Collections.Generic;
using Fonts; using Fonts;
using Fonts.Encodings; using Fonts.Encodings;
@@ -27,11 +28,29 @@
if (baseEncodingObject is NameToken name) if (baseEncodingObject is NameToken name)
{ {
return GetNamedEncoding(descriptor, name); if (TryGetNamedEncoding(descriptor, name, out var namedEncoding))
{
return namedEncoding;
}
if (fontDictionary.TryGet(NameToken.BaseFont, pdfScanner, out NameToken baseFontName))
{
if (string.Equals(baseFontName.Data, "ZapfDingbats", StringComparison.OrdinalIgnoreCase))
{
return ZapfDingbatsEncoding.Instance;
}
if (string.Equals(baseFontName.Data, "Symbol", StringComparison.OrdinalIgnoreCase))
{
return SymbolEncoding.Instance;
}
return WinAnsiEncoding.Instance;
}
} }
DictionaryToken encodingDictionary = DirectObjectFinder.Get<DictionaryToken>(baseEncodingObject, pdfScanner); DictionaryToken encodingDictionary = DirectObjectFinder.Get<DictionaryToken>(baseEncodingObject, pdfScanner);
var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding); var encoding = ReadEncodingDictionary(encodingDictionary, fontEncoding);
return encoding; return encoding;
@@ -100,9 +119,9 @@
return differences; return differences;
} }
private static Encoding GetNamedEncoding(FontDescriptor descriptor, NameToken encodingName) private static bool TryGetNamedEncoding(FontDescriptor descriptor, NameToken encodingName, out Encoding encoding)
{ {
Encoding encoding; encoding = null;
// Symbolic fonts default to standard encoding. // Symbolic fonts default to standard encoding.
if (descriptor?.Flags.HasFlag(FontDescriptorFlags.Symbolic) == true) if (descriptor?.Flags.HasFlag(FontDescriptorFlags.Symbolic) == true)
{ {
@@ -111,11 +130,10 @@
if (!Encoding.TryGetNamedEncoding(encodingName, out encoding)) if (!Encoding.TryGetNamedEncoding(encodingName, out encoding))
{ {
// TODO: PDFBox would not throw here. return false;
throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}");
} }
return encoding; return true;
} }
} }
} }