begin implementing support for compact font format fonts in type 1 font handling

This commit is contained in:
Eliot Jones
2018-04-28 13:00:43 +01:00
parent 1fe54c5f49
commit 1deefdc987
6 changed files with 279 additions and 6 deletions

View File

@@ -27,7 +27,10 @@
[Fact]
public void CanReadPage9()
{
using (var document = PdfDocument.Open(GetFilename()))
using (var document = PdfDocument.Open(GetFilename(), new ParsingOptions
{
UseLenientParsing = false
}))
{
var page = document.GetPage(9);

View File

@@ -0,0 +1,243 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Text;
using Util;
internal class CompactFontFormatParser
{
private const string TagOtto = "OTTO";
private const string TagTtcf = "ttcf";
private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
public void Parse(CompactFontFormatData data)
{
var tag = ReadTag(data);
switch (tag)
{
case TagOtto:
throw new NotImplementedException("Currently tagged CFF data is not supported.");
case TagTtcf:
throw new NotSupportedException("True Type Collection fonts are not supported.");
case TagTtfonly:
throw new NotSupportedException("OpenType fonts containing a true type font are not supported.");
default:
data.Seek(0);
break;
}
var header = ReadHeader(data);
var names = ReadStringIndex(data);
var topLevelDict = ReadDictionaryData(data);
var stringIndex = ReadStringIndex(data);
}
private static string ReadTag(CompactFontFormatData data)
{
var tag = data.ReadString(4, OtherEncodings.Iso88591);
return tag;
}
private static CompactFontFormatHeader ReadHeader(CompactFontFormatData data)
{
var major = data.ReadCard8();
var minor = data.ReadCard8();
var headerSize = data.ReadCard8();
var offsetSize = data.ReadOffsize();
return new CompactFontFormatHeader(major, minor, headerSize, offsetSize);
}
/// <summary>
/// Reads indexed string data.
/// </summary>
private static string[] ReadStringIndex(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var result = new string[count];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
result[i] = data.ReadString(length, OtherEncodings.Iso88591);
}
return result;
}
private static byte[][] ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var results = new byte[count][];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
results[i] = data.ReadBytes(length);
}
return results;
}
private static int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();
var offsetSize = data.ReadOffsize();
var offsets = new int[count + 1];
for (var i = 0; i < offsets.Length; i++)
{
offsets[i] = data.ReadOffset(offsetSize);
}
return offsets;
}
}
internal class CompactFontFormatData
{
private readonly byte[] dataBytes;
public int Position { get; private set; } = -1;
public CompactFontFormatData(byte[] dataBytes)
{
this.dataBytes = dataBytes;
}
public string ReadString(int length, Encoding encoding)
{
var bytes = new byte[length];
for (var i = 0; i < bytes.Length; i++)
{
bytes[i] = ReadByte();
}
return encoding.GetString(bytes);
}
public byte ReadCard8()
{
return ReadByte();
}
public ushort ReadCard16()
{
return (ushort)(ReadByte() << 8 | ReadByte());
}
public byte ReadOffsize()
{
return ReadByte();
}
public int ReadOffset(int offsetSize)
{
var value = 0;
for (var i = 0; i < offsetSize; i++)
{
value = value << 8 | ReadByte();
}
return value;
}
public byte ReadByte()
{
Position++;
if (Position >= dataBytes.Length)
{
throw new IndexOutOfRangeException($"Cannot read byte at position {Position} of an array which is {dataBytes.Length} bytes long.");
}
return dataBytes[Position];
}
public byte Peek()
{
return dataBytes[Position + 1];
}
public bool CanRead()
{
return Position < dataBytes.Length - 1;
}
public void Seek(int offset)
{
Position = offset - 1;
}
public long ReadLong()
{
return (ReadCard16() << 16) | ReadCard16();
}
public byte[] ReadBytes(int length)
{
var result = new byte[length];
for (int i = 0; i < length; i++)
{
result[i] = ReadByte();
}
return result;
}
}
/// <summary>
/// The header table for the binary data of a CFF file.
/// </summary>
internal struct CompactFontFormatHeader
{
public byte MajorVersion { get; }
public byte MinorVersion { get; }
public byte SizeInBytes { get; }
public byte OffsetSize { get; }
public CompactFontFormatHeader(byte majorVersion, byte minorVersion, byte sizeInBytes, byte offsetSize)
{
MajorVersion = majorVersion;
MinorVersion = minorVersion;
SizeInBytes = sizeInBytes;
OffsetSize = offsetSize;
}
public override string ToString()
{
return $"Major: {MajorVersion}, Minor: {MinorVersion}, Header Size: {SizeInBytes}, Offset: {OffsetSize}";
}
}
}

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{
using Cmap;
using CompactFontFormat;
using Encodings;
using Exceptions;
using Filters;
@@ -22,11 +23,13 @@
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader;
private readonly Type1FontParser type1FontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider,
FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader,
Type1FontParser type1FontParser)
Type1FontParser type1FontParser,
CompactFontFormatParser compactFontFormatParser)
{
this.pdfScanner = pdfScanner;
this.cMapCache = cMapCache;
@@ -34,6 +37,7 @@
this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader;
this.type1FontParser = type1FontParser;
this.compactFontFormatParser = compactFontFormatParser;
}
public IFont Generate(DictionaryToken dictionary, bool isLenientParsing)
@@ -108,11 +112,19 @@
return null;
}
var bytes = stream.Decode(filterProvider);
// We have a Compact Font Format font rather than an Adobe Type 1 Font.
if (stream.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subTypeName)
&& NameToken.Type1C.Equals(subTypeName))
{
compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
return null;
}
var length1 = stream.StreamDictionary.Get<NumericToken>(NameToken.Length1, pdfScanner);
var length2 = stream.StreamDictionary.Get<NumericToken>(NameToken.Length2, pdfScanner);
var bytes = stream.Decode(filterProvider);
var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes), length1.Int, length2.Int);
return font;

View File

@@ -8,6 +8,7 @@
using FileStructure;
using Filters;
using Fonts;
using Fonts.CompactFontFormat;
using Fonts.Parser;
using Fonts.Parser.Handlers;
using Fonts.Parser.Parts;
@@ -105,7 +106,9 @@
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontParser(new Type1EncryptedPortionParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()),
new CompactFontFormatParser()),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);

View File

@@ -67,6 +67,18 @@
return Data.TryGetValue(name.Data, out token);
}
public bool TryGet<T>(NameToken name, out T token) where T : IToken
{
token = default(T);
if (!TryGet(name, out var t) || !(t is T typedToken))
{
return false;
}
token = typedToken;
return true;
}
public bool ContainsKey(NameToken name)
{
return Data.ContainsKey(name.Data);

View File

@@ -491,6 +491,7 @@
public static readonly NameToken Type = new NameToken("Type");
public static readonly NameToken Type0 = new NameToken("Type0");
public static readonly NameToken Type1 = new NameToken("Type1");
public static readonly NameToken Type1C = new NameToken("Type1C");
public static readonly NameToken Type3 = new NameToken("Type3");
// U
public static readonly NameToken U = new NameToken("U");
@@ -529,6 +530,5 @@
// Y
public static readonly NameToken YStep = new NameToken("YStep");
public static readonly NameToken Yes = new NameToken("Yes");
}
}