mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
fixes various font handling issues for type 1 and truetype fonts
handle "unionsq" and other tricky glyph names. log missing glyphs. ignore flexpoints in type 1 subroutines. improve system font performance and substitution. handle truetype fonts using standard 14 fonts.
This commit is contained in:
@@ -87,9 +87,14 @@
|
||||
|
||||
var uniStr = new StringBuilder();
|
||||
|
||||
var foundUnicode = true;
|
||||
for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4)
|
||||
{
|
||||
int codePoint = int.Parse(name.Substring(chPos, 4), NumberStyles.HexNumber);
|
||||
if (!int.TryParse(name.Substring(chPos, 4), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var codePoint))
|
||||
{
|
||||
foundUnicode = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (codePoint > 0xD7FF && codePoint < 0xE000)
|
||||
{
|
||||
@@ -100,6 +105,11 @@
|
||||
uniStr.Append((char)codePoint);
|
||||
}
|
||||
|
||||
if (!foundUnicode)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
unicode = uniStr.ToString();
|
||||
}
|
||||
else if (name.StartsWith("u") && name.Length == 5)
|
||||
|
@@ -15,6 +15,7 @@
|
||||
using Tokens;
|
||||
using TrueType;
|
||||
using TrueType.Parser;
|
||||
using Util;
|
||||
|
||||
internal class TrueTypeFontHandler : IFontHandler
|
||||
{
|
||||
@@ -46,7 +47,29 @@
|
||||
|
||||
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
|
||||
{
|
||||
var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);
|
||||
if (!dictionary.TryGetOptionalTokenDirect(NameToken.FirstChar, pdfScanner, out NumericToken firstCharacterToken))
|
||||
{
|
||||
if (!dictionary.TryGetOptionalTokenDirect(NameToken.BaseFont, pdfScanner, out NameToken baseFont))
|
||||
{
|
||||
throw new InvalidFontFormatException($"The provided TrueType font dictionary did not contain a /FirstChar or a /BaseFont entry: {dictionary}.");
|
||||
}
|
||||
|
||||
// Can use the AFM descriptor despite not being Type 1!
|
||||
var standard14Font = Standard14.GetAdobeFontMetrics(baseFont.Data);
|
||||
|
||||
if (standard14Font == null)
|
||||
{
|
||||
throw new InvalidFontFormatException($"The provided TrueType font dictionary did not have a /FirstChar and did not match a Standard 14 font: {dictionary}.");
|
||||
}
|
||||
|
||||
var fileSystemFont = systemFontFinder.GetTrueTypeFont(baseFont.Data);
|
||||
|
||||
var thisEncoding = encodingReader.Read(dictionary, isLenientParsing);
|
||||
|
||||
return new TrueTypeStandard14FallbackSimpleFont(baseFont, standard14Font, thisEncoding, fileSystemFont);
|
||||
}
|
||||
|
||||
var firstCharacter = firstCharacterToken.Int;
|
||||
|
||||
var widths = FontDictionaryAccessHelper.GetWidths(pdfScanner, dictionary, isLenientParsing);
|
||||
|
||||
|
@@ -11,6 +11,89 @@
|
||||
using TrueType;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <summary>
|
||||
/// Some TrueType fonts use both the Standard 14 descriptor and the TrueType font from disk.
|
||||
/// </summary>
|
||||
internal class TrueTypeStandard14FallbackSimpleFont : IFont
|
||||
{
|
||||
private static readonly TransformationMatrix DefaultTransformation =
|
||||
TransformationMatrix.FromValues(1m / 1000m, 0, 0, 1m / 1000m, 0, 0);
|
||||
|
||||
private readonly FontMetrics fontMetrics;
|
||||
private readonly Encoding encoding;
|
||||
private readonly TrueTypeFontProgram font;
|
||||
|
||||
public NameToken Name { get; }
|
||||
|
||||
public bool IsVertical { get; } = false;
|
||||
|
||||
public TrueTypeStandard14FallbackSimpleFont(NameToken name, FontMetrics fontMetrics, Encoding encoding, TrueTypeFontProgram font)
|
||||
{
|
||||
this.fontMetrics = fontMetrics;
|
||||
this.encoding = encoding ?? throw new ArgumentNullException(nameof(encoding));
|
||||
this.font = font;
|
||||
Name = name;
|
||||
}
|
||||
|
||||
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
|
||||
{
|
||||
codeLength = 1;
|
||||
return bytes.CurrentByte;
|
||||
}
|
||||
|
||||
public bool TryGetUnicode(int characterCode, out string value)
|
||||
{
|
||||
value = null;
|
||||
|
||||
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
|
||||
|
||||
// Map the character code to a character name.
|
||||
var encodedCharacterName = encoding.GetName(characterCode);
|
||||
|
||||
// Look up the character name in the Adobe Glyph List.
|
||||
try
|
||||
{
|
||||
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public CharacterBoundingBox GetBoundingBox(int characterCode)
|
||||
{
|
||||
var fontMatrix = GetFontMatrix();
|
||||
if (font != null && font.TryGetBoundingBox(characterCode, out var bounds))
|
||||
{
|
||||
bounds = fontMatrix.Transform(bounds);
|
||||
return new CharacterBoundingBox(bounds, bounds.Width);
|
||||
}
|
||||
|
||||
var name = encoding.GetName(characterCode);
|
||||
var metrics = fontMetrics.CharacterMetrics[name];
|
||||
|
||||
bounds = fontMatrix.Transform(metrics.BoundingBox);
|
||||
var width = fontMatrix.TransformX(metrics.WidthX);
|
||||
|
||||
return new CharacterBoundingBox(bounds, width);
|
||||
}
|
||||
|
||||
public TransformationMatrix GetFontMatrix()
|
||||
{
|
||||
if (font?.TableRegister.HeaderTable != null)
|
||||
{
|
||||
var scale = (decimal)font.GetFontMatrixMultiplier();
|
||||
|
||||
return TransformationMatrix.FromValues(1 / scale, 0, 0, 1 / scale, 0, 0);
|
||||
}
|
||||
|
||||
return DefaultTransformation;
|
||||
}
|
||||
}
|
||||
|
||||
internal class TrueTypeSimpleFont : IFont
|
||||
{
|
||||
private static readonly TransformationMatrix DefaultTransformation =
|
||||
|
@@ -8,13 +8,59 @@
|
||||
using IO;
|
||||
using TrueType;
|
||||
using TrueType.Parser;
|
||||
using Util;
|
||||
|
||||
internal class SystemFontFinder : ISystemFontFinder
|
||||
{
|
||||
private static readonly IReadOnlyDictionary<string, string[]> NameSubstitutes;
|
||||
|
||||
static SystemFontFinder()
|
||||
{
|
||||
var dict = new Dictionary<string, string[]>
|
||||
{
|
||||
{"Courier", new[] {"CourierNew", "CourierNewPSMT", "LiberationMono", "NimbusMonL-Regu"}},
|
||||
{"Courier-Bold", new[] {"CourierNewPS-BoldMT", "CourierNew-Bold", "LiberationMono-Bold", "NimbusMonL-Bold"}},
|
||||
{"Courier-Oblique", new[] {"CourierNewPS-ItalicMT", "CourierNew-Italic", "LiberationMono-Italic", "NimbusMonL-ReguObli"}},
|
||||
{"Courier-BoldOblique", new[] {"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", "LiberationMono-BoldItalic", "NimbusMonL-BoldObli"}},
|
||||
{"Helvetica", new[] {"ArialMT", "Arial", "LiberationSans", "NimbusSanL-Regu"}},
|
||||
{"Helvetica-Bold", new[] {"Arial-BoldMT", "Arial-Bold", "LiberationSans-Bold", "NimbusSanL-Bold"}},
|
||||
{"Helvetica-BoldOblique", new[] {"Arial-BoldItalicMT", "Helvetica-BoldItalic", "LiberationSans-BoldItalic", "NimbusSanL-BoldItal"}},
|
||||
{"Helvetica-Oblique", new[] {"Arial-ItalicMT", "Arial-Italic", "Helvetica-Italic", "LiberationSans-Italic", "NimbusSanL-ReguItal"}},
|
||||
{"Times-Roman", new[] {"TimesNewRomanPSMT", "TimesNewRoman", "TimesNewRomanPS", "LiberationSerif", "NimbusRomNo9L-Regu"}},
|
||||
{"Times-Bold", new[] {"TimesNewRomanPS-BoldMT", "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", "LiberationSerif-Bold", "NimbusRomNo9L-Medi"}},
|
||||
{"Times-Italic", new[] {"TimesNewRomanPS-ItalicMT", "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", "LiberationSerif-Italic", "NimbusRomNo9L-ReguItal"}},
|
||||
{"TimesNewRomanPS-BoldItalicMT", new[] {"TimesNewRomanPS-BoldItalic", "TimesNewRoman-BoldItalic", "LiberationSerif-BoldItalic", "NimbusRomNo9L-MediItal"}},
|
||||
{"Symbol", new[] {"SymbolMT", "StandardSymL"}},
|
||||
{"ZapfDingbats", new[] {"ZapfDingbatsITC", "Dingbats", "MS-Gothic"}}
|
||||
};
|
||||
|
||||
var names = Standard14.GetNames();
|
||||
|
||||
foreach (var name in names)
|
||||
{
|
||||
if (!dict.ContainsKey(name))
|
||||
{
|
||||
var value = Standard14.GetMappedFontName(name);
|
||||
|
||||
if (dict.TryGetValue(value, out var subs))
|
||||
{
|
||||
dict[name] = subs;
|
||||
}
|
||||
else
|
||||
{
|
||||
dict[name] = new[] {value};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NameSubstitutes = dict;
|
||||
}
|
||||
|
||||
private readonly TrueTypeFontParser trueTypeFontParser;
|
||||
private readonly Lazy<IReadOnlyList<SystemFontRecord>> availableFonts;
|
||||
|
||||
private readonly Dictionary<string, TrueTypeFontProgram> cache = new Dictionary<string, TrueTypeFontProgram>(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, string> nameToFileNameMap = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly HashSet<string> readFiles = new HashSet<string>();
|
||||
|
||||
public SystemFontFinder(TrueTypeFontParser trueTypeFontParser)
|
||||
@@ -48,11 +94,76 @@
|
||||
|
||||
public TrueTypeFontProgram GetTrueTypeFont(string name)
|
||||
{
|
||||
if (cache.TryGetValue(name, out var result))
|
||||
var result = GetTrueTypeFontNamed(name);
|
||||
|
||||
if (result != null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (name.Contains("-"))
|
||||
{
|
||||
result = GetTrueTypeFontNamed(name.Replace("-", string.Empty));
|
||||
|
||||
if (result != null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (name.Contains(","))
|
||||
{
|
||||
result = GetTrueTypeFontNamed(name.Replace(",", "-"));
|
||||
|
||||
if (result != null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var substituteName in GetSubstituteNames(name))
|
||||
{
|
||||
result = GetTrueTypeFontNamed(substituteName);
|
||||
|
||||
if (result != null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result = GetTrueTypeFontNamed(name + "-Regular");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private IEnumerable<string> GetSubstituteNames(string name)
|
||||
{
|
||||
name = name.Replace(" ", string.Empty);
|
||||
if (NameSubstitutes.TryGetValue(name, out var values))
|
||||
{
|
||||
return values;
|
||||
}
|
||||
|
||||
return EmptyArray<string>.Instance;
|
||||
}
|
||||
|
||||
private TrueTypeFontProgram GetTrueTypeFontNamed(string name)
|
||||
{
|
||||
if (cache.TryGetValue(name, out var result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (nameToFileNameMap.TryGetValue(name, out var fileName))
|
||||
{
|
||||
if (TryReadFile(fileName, false, name, out result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
var nameCandidates = availableFonts.Value.Where(x => Path.GetFileName(x.Path)?.StartsWith(name[0].ToString(), StringComparison.OrdinalIgnoreCase) == true);
|
||||
|
||||
foreach (var systemFontRecord in nameCandidates)
|
||||
@@ -86,27 +197,50 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
using (var fileStream = File.OpenRead(record.Path))
|
||||
{
|
||||
readFiles.Add(record.Path);
|
||||
|
||||
var input = new StreamInputBytes(fileStream);
|
||||
var trueType = trueTypeFontParser.Parse(new TrueTypeDataBytes(input));
|
||||
var psName = trueType.TableRegister.NameTable?.GetPostscriptName() ?? trueType.Name;
|
||||
if (!cache.ContainsKey(psName))
|
||||
{
|
||||
cache[psName] = trueType;
|
||||
}
|
||||
|
||||
if (string.Equals(psName, name, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
font = trueType;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return TryReadFile(record.Path, true, name, out font);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool TryReadFile(string fileName, bool readNameFirst, string fontName, out TrueTypeFontProgram font)
|
||||
{
|
||||
font = null;
|
||||
readFiles.Add(fileName);
|
||||
|
||||
using (var fileStream = File.OpenRead(fileName))
|
||||
{
|
||||
var input = new StreamInputBytes(fileStream);
|
||||
var data = new TrueTypeDataBytes(input);
|
||||
|
||||
if (readNameFirst)
|
||||
{
|
||||
var name = trueTypeFontParser.GetNameTable(data);
|
||||
|
||||
if (name == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var fontNameFromFile = name.GetPostscriptName() ?? name.FontName;
|
||||
|
||||
nameToFileNameMap[fontNameFromFile] = fileName;
|
||||
|
||||
if (!string.Equals(fontNameFromFile, fontName, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
font = trueTypeFontParser.Parse(data);
|
||||
var psName = font.TableRegister.NameTable?.GetPostscriptName() ?? font.Name;
|
||||
if (!cache.ContainsKey(psName))
|
||||
{
|
||||
cache[psName] = font;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -125,6 +125,41 @@
|
||||
return new TrueTypeFontProgram(version, tables, builder.Build());
|
||||
}
|
||||
|
||||
public NameTable GetNameTable(TrueTypeDataBytes data)
|
||||
{
|
||||
if (data == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
// Read these data points to move to the correct data location.
|
||||
data.Read32Fixed();
|
||||
int numberOfTables = data.ReadUnsignedShort();
|
||||
data.ReadUnsignedShort();
|
||||
data.ReadUnsignedShort();
|
||||
data.ReadUnsignedShort();
|
||||
|
||||
TrueTypeHeaderTable? name = null;
|
||||
|
||||
for (var i = 0; i < numberOfTables; i++)
|
||||
{
|
||||
var tableHeader = ReadTable(data);
|
||||
|
||||
if (tableHeader.HasValue && tableHeader.Value.Tag == TrueTypeHeaderTable.Name)
|
||||
{
|
||||
name = tableHeader;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!name.HasValue)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return TableParser.Parse<NameTable>(name.Value, data, new TableRegister.Builder());
|
||||
}
|
||||
|
||||
private static void OptionallyParseTables(IReadOnlyDictionary<string, TrueTypeHeaderTable> tables, TrueTypeDataBytes data, TableRegister.Builder tableRegister)
|
||||
{
|
||||
// cmap
|
||||
|
@@ -44,10 +44,11 @@
|
||||
case FlexEnd:
|
||||
{
|
||||
context.IsFlexing = false;
|
||||
if (context.FlexPoints.Count < 7)
|
||||
{
|
||||
throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine.");
|
||||
}
|
||||
// TODO: I don't really care about flexpoints, but we should probably handle them... one day.
|
||||
//if (context.FlexPoints.Count < 7)
|
||||
//{
|
||||
// throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine.");
|
||||
//}
|
||||
|
||||
context.ClearFlexPoints();
|
||||
break;
|
||||
|
@@ -7,6 +7,7 @@
|
||||
using Fonts;
|
||||
using Geometry;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Operations;
|
||||
using PdfPig.Core;
|
||||
using Tokenization.Scanner;
|
||||
@@ -22,6 +23,7 @@
|
||||
private readonly bool isLenientParsing;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly XObjectFactory xObjectFactory;
|
||||
private readonly ILog log;
|
||||
|
||||
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
|
||||
|
||||
@@ -43,13 +45,15 @@
|
||||
|
||||
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
XObjectFactory xObjectFactory)
|
||||
XObjectFactory xObjectFactory,
|
||||
ILog log)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.userSpaceUnit = userSpaceUnit;
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.xObjectFactory = xObjectFactory;
|
||||
this.log = log;
|
||||
graphicsStack.Push(new CurrentGraphicsState());
|
||||
}
|
||||
|
||||
@@ -125,10 +129,9 @@
|
||||
|
||||
var foundUnicode = font.TryGetUnicode(code, out var unicode);
|
||||
|
||||
if (!foundUnicode && !isLenientParsing)
|
||||
if (!foundUnicode || unicode == null)
|
||||
{
|
||||
// TODO: record warning
|
||||
// throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
|
||||
log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}.");
|
||||
}
|
||||
|
||||
var wordSpacing = 0m;
|
||||
|
@@ -9,6 +9,7 @@
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
@@ -17,20 +18,23 @@
|
||||
|
||||
internal class PageFactory : IPageFactory
|
||||
{
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IPageContentParser pageContentParser;
|
||||
private readonly XObjectFactory xObjectFactory;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly ILog log;
|
||||
|
||||
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
|
||||
IPageContentParser pageContentParser,
|
||||
XObjectFactory xObjectFactory)
|
||||
XObjectFactory xObjectFactory,
|
||||
ILog log)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.filterProvider = filterProvider;
|
||||
this.pageContentParser = pageContentParser;
|
||||
this.xObjectFactory = xObjectFactory;
|
||||
this.log = log;
|
||||
this.pdfScanner = pdfScanner;
|
||||
}
|
||||
|
||||
@@ -108,7 +112,7 @@
|
||||
{
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
||||
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory, log);
|
||||
|
||||
return context.Process(operations);
|
||||
}
|
||||
|
@@ -117,7 +117,9 @@
|
||||
|
||||
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
|
||||
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
|
||||
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
|
||||
new XObjectFactory(), log);
|
||||
var informationFactory = new DocumentInformationFactory();
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user