#9 fix bug with truetype fonts and start adding support for cid fonts using compact font format

This commit is contained in:
Eliot Jones
2018-12-28 22:34:47 +00:00
parent d9052e1388
commit 47e49c4044
17 changed files with 1273 additions and 1105 deletions

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -10,7 +10,10 @@
[Fact]
public void Tests()
{
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions{UseLenientParsing = false}))
//{
// var page1 = document.GetPage(1);
//}
}
}
}

View File

@@ -33,6 +33,7 @@
<ItemGroup>
<EmbeddedResource Remove="Fonts\TrueType\Andada-Regular.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\google-simple-doc.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\PMingLiU.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\Roboto-Regular.ttf" />
<EmbeddedResource Remove="Fonts\Type1\AdobeUtopia.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX10.pfa" />
@@ -43,6 +44,7 @@
<ItemGroup>
<None Remove="Fonts\CompactFontFormat\MinionPro.bin" />
<None Remove="Fonts\TrueType\Roboto-Regular.GlyphData.txt" />
</ItemGroup>
<ItemGroup>
@@ -55,6 +57,12 @@
<Content Include="Fonts\TrueType\google-simple-doc.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\PMingLiU.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\Roboto-Regular.GlyphData.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\Roboto-Regular.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>

View File

@@ -11,6 +11,7 @@
/// </summary>
internal class Type0CidFont : ICidFont
{
private readonly ICidFontProgram fontProgram;
public NameToken Type { get; }
public NameToken SubType { get; }
public NameToken BaseFont { get; }
@@ -19,9 +20,18 @@
public CidFontType CidFontType => CidFontType.Type0;
public FontDescriptor Descriptor { get; }
public Type0CidFont()
public Type0CidFont(ICidFontProgram fontProgram, NameToken type, NameToken subType, NameToken baseFont,
CharacterIdentifierSystemInfo systemInfo,
FontDescriptor descriptor)
{
throw new System.NotImplementedException();
this.fontProgram = fontProgram;
Type = type;
SubType = subType;
BaseFont = baseFont;
SystemInfo = systemInfo;
var scale = 1 / (decimal)(fontProgram?.GetFontMatrixMultiplier() ?? 1000);
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
Descriptor = descriptor;
}
public decimal GetWidthFromFont(int characterCode)

View File

@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using CidFonts;
using Core;
using Geometry;
using Util.JetBrains.Annotations;
@@ -11,7 +12,7 @@
/// A Compact Font Format (CFF) font program as described in The Compact Font Format specification (Adobe Technical Note #5176).
/// A CFF font may contain multiple fonts and achieves compression by sharing details between fonts in the set.
/// </summary>
internal class CompactFontFormatFontProgram
internal class CompactFontFormatFontProgram : ICidFontProgram
{
/// <summary>
/// The decoded header table for this font.
@@ -58,5 +59,30 @@
#endif
return Fonts.First().Value;
}
public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox)
{
throw new NotImplementedException();
}
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
{
throw new NotImplementedException();
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
{
throw new NotImplementedException();
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width)
{
throw new NotImplementedException();
}
public int GetFontMatrixMultiplier()
{
return 1000;
}
}
}

View File

@@ -59,6 +59,8 @@
public bool IsCidFont { get; set; }
public CidFontOperators CidFontOperators { get; set; } = new CidFontOperators();
public struct SizeAndOffset
{
public int Size { get; }
@@ -78,6 +80,36 @@
}
}
internal class CidFontOperators
{
public RegistryOrderingSupplement Ros { get; set; }
public int Version { get; set; } = 0;
public int Revision { get; set; } = 0;
public int Type { get; set; } = 0;
public int Count { get; set; } = 8720;
public decimal UidBase { get; set; }
public decimal FontDictionaryArray { get; set; }
public decimal FontDictionarySelect { get; set; }
public string FontName { get; set; }
}
internal class RegistryOrderingSupplement
{
public string Registry { get; set; }
public string Ordering { get; set; }
public decimal Supplement { get; set; }
}
/// <summary>
/// Defines the format of the CharString data contained within a Compact Font Format font.
/// </summary>

View File

@@ -96,22 +96,50 @@
break;
// TODO: CID Font Stuff
case 30:
var registry = GetString(operands, stringIndex);
operands.RemoveAt(0);
var ordering = GetString(operands, stringIndex);
operands.RemoveAt(0);
var supplement = GetIntOrDefault(operands);
dictionary.CidFontOperators.Ros = new RegistryOrderingSupplement
{
Registry = registry,
Ordering = ordering,
Supplement = supplement
};
dictionary.IsCidFont = true;
break;
case 31:
dictionary.CidFontOperators.Version = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 32:
dictionary.CidFontOperators.Revision = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 33:
dictionary.CidFontOperators.Type = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 34:
dictionary.CidFontOperators.Count = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 35:
dictionary.CidFontOperators.UidBase = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 36:
dictionary.CidFontOperators.FontDictionaryArray = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 37:
dictionary.CidFontOperators.FontDictionarySelect = operands[0].Decimal;
dictionary.IsCidFont = true;
break;
case 38:
dictionary.CidFontOperators.FontName = GetString(operands, stringIndex);
dictionary.IsCidFont = true;
break;
}
}

View File

@@ -2,11 +2,16 @@
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using CidFonts;
using CompactFontFormat;
using Core;
using Exceptions;
using Filters;
using Geometry;
using IO;
using PdfPig.Exceptions;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
using Tokens;
@@ -18,16 +23,19 @@
{
private readonly FontDescriptorFactory descriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
private readonly IFilterProvider filterProvider;
private readonly IPdfTokenScanner pdfScanner;
public CidFontFactory(IPdfTokenScanner pdfScanner, FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
CompactFontFormatParser compactFontFormatParser,
IFilterProvider filterProvider)
{
this.pdfScanner = pdfScanner;
this.descriptorFactory = descriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
this.compactFontFormatParser = compactFontFormatParser;
this.filterProvider = filterProvider;
this.pdfScanner = pdfScanner;
}
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
@@ -56,7 +64,7 @@
var subType = dictionary.GetNameOrDefault(NameToken.Subtype);
if (NameToken.CidFontType0.Equals(subType))
{
//return new PDCIDFontType0(dictionary, parent);
return new Type0CidFont(fontProgram, type, subType, baseFont, systemInfo, descriptor);
}
if (NameToken.CidFontType2.Equals(subType))
@@ -106,6 +114,40 @@
case DescriptorFontFile.FontFileType.TrueType:
var input = new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile));
return trueTypeFontParser.Parse(input);
case DescriptorFontFile.FontFileType.FromSubtype:
{
if (!DirectObjectFinder.TryGet(descriptor.FontFile.ObjectKey, pdfScanner, out StreamToken str))
{
throw new NotSupportedException("Cannot read CID font from subtype.");
}
if (!str.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeName))
{
throw new PdfDocumentFormatException($"The font file stream did not contain a subtype entry: {str.StreamDictionary}.");
}
if (subtypeName == NameToken.CidFontType0C)
{
var bytes = str.Decode(filterProvider);
var font = compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
return font;
}
if (subtypeName == NameToken.Type1C)
{
}
else if (subtypeName == NameToken.OpenType)
{
}
else
{
throw new PdfDocumentFormatException($"Unexpected subtype for CID font: {subtypeName}.");
}
throw new NotSupportedException("Cannot read CID font from subtype.");
}
default:
throw new NotSupportedException("Currently only TrueType fonts are supported.");
}

View File

@@ -1,11 +1,63 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Parser
{
using System;
using System.Text;
using Names;
using Tables;
using Util;
using Util.JetBrains.Annotations;
internal class HorizontalHeaderTableParser : ITrueTypeTableParser<HorizontalHeaderTable>
{
public HorizontalHeaderTable Parse(TrueTypeHeaderTable header, TrueTypeDataBytes data, TableRegister.Builder register)
{
data.Seek(header.Offset);
var majorVersion = data.ReadUnsignedShort();
var minorVersion = data.ReadUnsignedShort();
var ascender = data.ReadSignedShort();
var descender = data.ReadSignedShort();
var lineGap = data.ReadSignedShort();
var advancedWidthMax = data.ReadUnsignedShort();
var minLeftSideBearing = data.ReadSignedShort();
var minRightSideBearing = data.ReadSignedShort();
var xMaxExtent = data.ReadSignedShort();
var caretSlopeRise = data.ReadSignedShort();
var caretSlopeRun = data.ReadSignedShort();
var caretOffset = data.ReadSignedShort();
// Reserved section
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
var metricDataFormat = data.ReadSignedShort();
if (metricDataFormat != 0)
{
throw new NotSupportedException("The metric data format for a horizontal header table should be 0.");
}
var numberOfHeaderMetrics = data.ReadUnsignedShort();
return new HorizontalHeaderTable(header, majorVersion, minorVersion, ascender,
descender, lineGap, advancedWidthMax,
minLeftSideBearing,
minRightSideBearing,
xMaxExtent,
caretSlopeRise,
caretSlopeRun,
caretOffset,
metricDataFormat,
numberOfHeaderMetrics);
}
}
internal class NameTableParser : ITrueTypeTableParser<NameTable>
{
public NameTable Parse(TrueTypeHeaderTable header, TrueTypeDataBytes data, TableRegister.Builder register)

View File

@@ -7,6 +7,7 @@
{
private static readonly CMapTableParser CMapTableParser = new CMapTableParser();
private static readonly HorizontalMetricsTableParser HorizontalMetricsTableParser = new HorizontalMetricsTableParser();
private static readonly HorizontalHeaderTableParser HorizontalHeaderTableParser = new HorizontalHeaderTableParser();
private static readonly NameTableParser NameTableParser = new NameTableParser();
private static readonly Os2TableParser Os2TableParser = new Os2TableParser();
@@ -32,6 +33,11 @@
return (T)(object)Os2TableParser.Parse(table, data, register);
}
if (typeof(T) == typeof(HorizontalHeaderTable))
{
return (T) (object) HorizontalHeaderTableParser.Parse(table, data, register);
}
throw new NotImplementedException();
}
}

View File

@@ -74,7 +74,7 @@
}
// hhea
builder.HorizontalHeaderTable = HorizontalHeaderTable.Load(data, hHead);
builder.HorizontalHeaderTable = TableParser.Parse<HorizontalHeaderTable>(hHead, data, builder);
if (!tables.TryGetValue(TrueTypeHeaderTable.Maxp, out var maxHeaderTable))
{

View File

@@ -100,52 +100,5 @@
MetricDataFormat = metricDataFormat;
NumberOfHeaderMetrics = numberOfHeaderMetrics;
}
public static HorizontalHeaderTable Load(TrueTypeDataBytes data, TrueTypeHeaderTable table)
{
data.Seek(table.Offset);
var majorVersion = data.ReadUnsignedShort();
var minorVersion = data.ReadUnsignedShort();
var ascender = data.ReadSignedShort();
var descender = data.ReadSignedShort();
var lineGap = data.ReadSignedShort();
var advancedWidthMax = data.ReadUnsignedShort();
var minLeftSideBearing = data.ReadSignedShort();
var minRightSideBearing = data.ReadSignedShort();
var xMaxExtent = data.ReadSignedShort();
var caretSlopeRise = data.ReadSignedShort();
var caretSlopeRun = data.ReadSignedShort();
var caretOffset = data.ReadSignedShort();
// Reserved section
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
var metricDataFormat = data.ReadSignedShort();
if (metricDataFormat != 0)
{
throw new NotSupportedException("The metric data format for a horizontal header table should be 0.");
}
var numberOfHeaderMetrics = data.ReadSignedShort();
return new HorizontalHeaderTable(table, majorVersion, minorVersion, ascender,
descender, lineGap, advancedWidthMax,
minLeftSideBearing,
minRightSideBearing,
xMaxExtent,
caretSlopeRise,
caretSlopeRun,
caretOffset,
metricDataFormat,
numberOfHeaderMetrics);
}
}
}

View File

@@ -99,20 +99,19 @@
var trueTypeFontParser = new TrueTypeFontParser();
var fontDescriptorFactory = new FontDescriptorFactory();
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()),
new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader)),
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);

View File

@@ -96,6 +96,7 @@
public static readonly NameToken CharSet = new NameToken("CharSet");
public static readonly NameToken CiciSignit = new NameToken("CICI.SignIt");
public static readonly NameToken CidFontType0 = new NameToken("CIDFontType0");
public static readonly NameToken CidFontType0C = new NameToken("CIDFontType0C");
public static readonly NameToken CidFontType2 = new NameToken("CIDFontType2");
public static readonly NameToken CidToGidMap = new NameToken("CIDToGIDMap");
public static readonly NameToken CidSet = new NameToken("CIDSet");