fix a bug where the mediabox being defined on the pages node would throw. a lot more work on parsing compact font format

This commit is contained in:
Eliot Jones 2018-04-29 14:42:54 +01:00
parent 85d1f84965
commit b51ebfd70c
14 changed files with 969 additions and 415 deletions

View File

@ -1,21 +1,15 @@
namespace UglyToad.PdfPig.Content
{
using System;
/// <summary>
/// Contains the values inherited from the Page Tree for this page.
/// </summary>
internal class PageTreeMembers
{
public MediaBox GetMediaBox()
{
// TODO: tree inheritance
throw new NotImplementedException("Track inherited members");
}
public CropBox GetCropBox()
{
return null;
}
public MediaBox MediaBox { get; set; }
}
}

View File

@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using Geometry;
using Logging;
using Parser.Parts;
using Tokenization.Scanner;
@ -49,15 +50,17 @@
var observed = new List<int>();
var pageTreeMembers = new PageTreeMembers();
// todo: running a search for a different, unloaded, page number, results in a bug.
var isFound = FindPage(rootPageDictionary, pageNumber, observed);
var isFound = FindPage(rootPageDictionary, pageNumber, observed, pageTreeMembers);
if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary))
{
throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber);
}
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), isLenientParsing);
var page = pageFactory.Create(pageNumber, targetPageDictionary, pageTreeMembers, isLenientParsing);
locatedPages[pageNumber] = targetPageDictionary;
@ -74,7 +77,7 @@
return pages[pages.Count - 1] + 1;
}
public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved)
public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved, PageTreeMembers pageTreeMembers)
{
var type = currentPageDictionary.GetNameOrDefault(NameToken.Type);
@ -97,6 +100,16 @@
return false;
}
if (currentPageDictionary.TryGet(NameToken.MediaBox, out var token))
{
var mediaBox = DirectObjectFinder.Get<ArrayToken>(token, pdfScanner);
pageTreeMembers.MediaBox = new MediaBox(new PdfRectangle(mediaBox.GetNumeric(0).Data,
mediaBox.GetNumeric(1).Data,
mediaBox.GetNumeric(2).Data,
mediaBox.GetNumeric(3).Data));
}
if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids)
|| !(kids is ArrayToken kidsArray))
{
@ -111,7 +124,7 @@
// todo: exit early
var child = DirectObjectFinder.Get<DictionaryToken>(kid, pdfScanner);
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved, pageTreeMembers);
if (thisPageMatches)
{

View File

@ -132,6 +132,16 @@
});
}
if (values.Length == 4)
{
return new TransformationMatrix(new []
{
values[0], values[1], 0,
values[2], values[3], 0,
0, 0, 1
});
}
throw new ArgumentException("The array must either define all 9 elements of the matrix or all 6 key elements. Instead array was: " + values);
}

View File

@ -0,0 +1,214 @@
//namespace UglyToad.PdfPig.Fonts.CharStrings
//{
// using System;
// using System.Collections.Generic;
// internal class Type2CharStringParser
// {
// private int hstemCount = 0;
// private int vstemCount = 0;
// private List<Object> sequence = null;
// private List<Object> Parse(byte[] bytes, byte[][] globalSubrIndex, byte[][] localSubrIndex)
// {
// DataInput input = new DataInput(bytes);
// boolean localSubroutineIndexProvided = localSubrIndex != null && localSubrIndex.length > 0;
// boolean globalSubroutineIndexProvided = globalSubrIndex != null && globalSubrIndex.length > 0;
// while (input.hasRemaining())
// {
// int b0 = input.readUnsignedByte();
// if (b0 == 10 && localSubroutineIndexProvided)
// {
// // process subr command
// Integer operand = (Integer) sequence.remove(sequence.size() - 1);
// //get subrbias
// int bias = 0;
// int nSubrs = localSubrIndex.length;
// if (nSubrs < 1240)
// {
// bias = 107;
// }
// else if (nSubrs < 33900)
// {
// bias = 1131;
// }
// else
// {
// bias = 32768;
// }
// int subrNumber = bias + operand;
// if (subrNumber < localSubrIndex.length)
// {
// byte[] subrBytes = localSubrIndex[subrNumber];
// parse(subrBytes, globalSubrIndex, localSubrIndex, false);
// Object lastItem = sequence.get(sequence.size() - 1);
// if (lastItem is CharStringCommand && ((CharStringCommand) lastItem).getKey().getValue()[0] == 11)
// {
// sequence.remove(sequence.size() - 1); // remove "return" command
// }
// }
// }
// else if (b0 == 29 && globalSubroutineIndexProvided)
// {
// // process globalsubr command
// Integer operand = (Integer) sequence.remove(sequence.size() - 1);
////get subrbias
// int bias;
// int nSubrs = globalSubrIndex.length;
// if (nSubrs < 1240)
// {
// bias = 107;
// }
// else if (nSubrs < 33900)
// {
// bias = 1131;
// }
// else
// {
// bias = 32768;
// }
// int subrNumber = bias + operand;
// if (subrNumber < globalSubrIndex.length)
// {
// byte[] subrBytes = globalSubrIndex[subrNumber];
// parse(subrBytes, globalSubrIndex, localSubrIndex, false);
// Object lastItem = sequence.get(sequence.size() - 1);
// if (lastItem is CharStringCommand && ((CharStringCommand) lastItem).getKey().getValue()[0] == 11)
// {
// sequence.remove(sequence.size() - 1); // remove "return" command
// }
// }
// }
// else if (b0 >= 0 && b0 <= 27)
// {
// sequence.add(readCommand(b0, input));
// }
// else if (b0 == 28)
// {
// sequence.add(readNumber(b0, input));
// }
// else if (b0 >= 29 && b0 <= 31)
// {
// sequence.add(readCommand(b0, input));
// }
// else if (b0 >= 32 && b0 <= 255)
// {
// sequence.add(readNumber(b0, input));
// }
// else
// {
// throw new IllegalArgumentException();
// }
// }
// return sequence;
// }
// private CharStringCommand readCommand(int b0, DataInput input)
// {
// if (b0 == 1 || b0 == 18)
// {
// hstemCount += peekNumbers().size() / 2;
// }
// else if (b0 == 3 || b0 == 19 || b0 == 20 || b0 == 23)
// {
// vstemCount += peekNumbers().size() / 2;
// } // End if
// if (b0 == 12)
// {
// int b1 = input.readUnsignedByte();
// return new CharStringCommand(b0, b1);
// }
// else if (b0 == 19 || b0 == 20)
// {
// int[] value = new int[1 + getMaskLength()];
// value[0] = b0;
// for (int i = 1; i < value.length; i++)
// {
// value[i] = input.readUnsignedByte();
// }
// return new CharStringCommand(value);
// }
// return new CharStringCommand(b0);
// }
// private Number readNumber(int b0, DataInput input)
// {
// if (b0 == 28)
// {
// return (int) input.readShort();
// }
// else if (b0 >= 32 && b0 <= 246)
// {
// return b0 - 139;
// }
// else if (b0 >= 247 && b0 <= 250)
// {
// int b1 = input.readUnsignedByte();
// return (b0 - 247) * 256 + b1 + 108;
// }
// else if (b0 >= 251 && b0 <= 254)
// {
// int b1 = input.readUnsignedByte();
// return -(b0 - 251) * 256 - b1 - 108;
// }
// else if (b0 == 255)
// {
// short value = input.readShort();
// // The lower bytes are representing the digits after the decimal point
// double fraction = input.readUnsignedShort() / 65535d;
// return value + fraction;
// }
// else
// {
// throw new IllegalArgumentException();
// }
// }
// private int getMaskLength()
// {
// int hintCount = hstemCount + vstemCount;
// int length = hintCount / 8;
// if (hintCount % 8 > 0)
// {
// length++;
// }
// return length;
// }
// private List<Number> peekNumbers()
// {
// List<Number> numbers = new ArrayList<>();
// for (int i = sequence.size() - 1; i > -1; i--)
// {
// Object object = sequence.get(i);
// if (!(object instanceof Number))
// {
// return numbers;
// }
// numbers.add(0, (Number) object);
// }
// return numbers;
// }
// }
//}

View File

@ -0,0 +1,46 @@
using System;
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
internal class CompactFontFormatIndexReader
{
public byte[][] ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var results = new byte[count][];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
results[i] = data.ReadBytes(length);
}
return results;
}
public int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();
var offsetSize = data.ReadOffsize();
var offsets = new int[count + 1];
for (var i = 0; i < offsets.Length; i++)
{
offsets[i] = data.ReadOffset(offsetSize);
}
return offsets;
}
}
}

View File

@ -1,381 +1,48 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Collections.Generic;
using System.Text;
using Core;
using Geometry;
using Dictionaries;
internal class CompactFontFormatIndividualFontParser
{
private readonly CompactFontFormatIndexReader indexReader;
private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader;
private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader;
public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader,
CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
CompactFontFormatPrivateDictionaryReader privateDictionaryReader)
{
this.indexReader = indexReader;
this.topLevelDictionaryReader = topLevelDictionaryReader;
this.privateDictionaryReader = privateDictionaryReader;
}
public void Parse(CompactFontFormatData data, string name, byte[] topDictionaryIndex, string[] stringIndex)
{
var individualData = new CompactFontFormatData(topDictionaryIndex);
var dictionary = ReadTopLevelDictionary(individualData, stringIndex);
}
var dictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
private static CompactFontFormatFontDictionary ReadTopLevelDictionary(CompactFontFormatData data, string[] stringIndex)
{
var dictionary = new CompactFontFormatFontDictionary();
while (data.CanRead())
var privateDictionary = new CompactFontFormatPrivateDictionary();
if (dictionary.PrivateDictionarySizeAndOffset.Item2 >= 0)
{
var numbers = new List<Operand>();
data.Seek(dictionary.PrivateDictionarySizeAndOffset.Item2);
var infiniteLoopProtection = 0;
while (true)
{
infiniteLoopProtection++;
// Avoid the library getting caught in an infinite loop, probably not possible.
// "An operator may be preceded by up to a maximum of 48 operands."
if (infiniteLoopProtection > 256)
{
throw new InvalidOperationException("Got caught in an infinite loop trying to read a CFF dictionary.");
}
var byte0 = data.ReadByte();
// Operands and operators are distinguished by the first byte, 0 - 21 specify operators
if (byte0 <= 21)
{
ApplyOperator(byte0, numbers, data, stringIndex, dictionary);
break;
}
/*
* b0 value value range calculation
* 32 - 246 -107 - +107 b0 - 139
* 247 - 250 +108 - +1131 (b0 - 247)*256 + b1 + 108
* 251 - 254 -1131 - -108 -(b0 - 251)*256 - b1 - 108
* 28 -32768 - +32767 b1 << 8 | b2
* 29 -(2^31)-+(2^31-1) b1 << 24 | b2 << 16 | b3 << 8 | b4
*
* A byte value of 30 defines a real number operand
*/
if (byte0 == 28)
{
var value = data.ReadByte() << 8 | data.ReadByte();
numbers.Add(new Operand(value));
}
else if (byte0 == 29)
{
var value = data.ReadByte() << 24 | data.ReadByte() << 16 |
data.ReadByte() << 8 | data.ReadByte();
numbers.Add(new Operand(value));
}
else if (byte0 == 30)
{
var realNumber = ReadRealNumber(data);
numbers.Add(new Operand(realNumber));
}
else if (byte0 >= 32 && byte0 <= 246)
{
var value = byte0 - 139;
numbers.Add(new Operand(value));
}
else if (byte0 >= 247 && byte0 <= 250)
{
var value = (byte0 - 247) * 256 + data.ReadByte() + 108;
numbers.Add(new Operand(value));
}
else if (byte0 >= 251 && byte0 <= 254)
{
var value = -(byte0 - 251) * 256 - data.ReadByte() - 108;
numbers.Add(new Operand(value));
}
else
{
throw new InvalidOperationException($"The first dictionary byte was not in the range 29 - 254. Got {byte0}.");
}
}
privateDictionary = privateDictionaryReader.Read(data, stringIndex);
}
return dictionary;
}
private static decimal ReadRealNumber(CompactFontFormatData data)
{
var sb = new StringBuilder();
var done = false;
var exponentMissing = false;
while (!done)
if (dictionary.CharSetOffset >= 0)
{
var b = data.ReadByte();
var nibble1 = b / 16;
var nibble2 = b % 16;
for (var i = 0; i < 2; i++)
{
var nibble = i == 0 ? nibble1 : nibble2;
switch (nibble)
{
case 0x0:
case 0x1:
case 0x2:
case 0x3:
case 0x4:
case 0x5:
case 0x6:
case 0x7:
case 0x8:
case 0x9:
sb.Append(nibble);
exponentMissing = false;
break;
case 0xa:
sb.Append(".");
break;
case 0xb:
sb.Append("E");
exponentMissing = true;
break;
case 0xc:
sb.Append("E-");
exponentMissing = true;
break;
case 0xd:
break;
case 0xe:
sb.Append("-");
break;
case 0xf:
done = true;
break;
default:
throw new InvalidOperationException($"Did not expect nibble value: {nibble}.");
}
}
}
if (exponentMissing)
if (dictionary.CharStringsOffset >= 0)
{
// the exponent is missing, just append "0" to avoid an exception
// not sure if 0 is the correct value, but it seems to fit
// see PDFBOX-1522
sb.Append("0");
}
data.Seek(dictionary.CharStringsOffset);
if (sb.Length == 0)
{
return 0m;
}
return decimal.Parse(sb.ToString());
}
private static void ApplyOperator(byte byte0, List<Operand> operands, CompactFontFormatData data,
string[] stringIndex,
CompactFontFormatFontDictionary dictionary)
{
OperandKey key;
if (byte0 == 12)
{
var b1 = data.ReadByte();
key = new OperandKey(byte0, b1);
}
else
{
key = new OperandKey(byte0);
}
switch (key.Byte0)
{
case 0:
dictionary.Version = GetString(operands, stringIndex);
break;
case 1:
dictionary.Notice = GetString(operands, stringIndex);
break;
case 2:
dictionary.FullName = GetString(operands, stringIndex);
break;
case 3:
dictionary.FamilyName = GetString(operands, stringIndex);
break;
case 4:
dictionary.Weight = GetString(operands, stringIndex);
break;
case 5:
dictionary.FontBoundingBox = GetBoundingBox(operands);
break;
case 12:
{
if (!key.Byte1.HasValue)
{
throw new InvalidOperationException("A single byte sequence beginning with 12 was found.");
}
switch (key.Byte1.Value)
{
case 1:
dictionary.IsFixedPitch = operands[0].Decimal == 1;
break;
case 2:
dictionary.ItalicAngle = operands[0].Decimal;
break;
case 3:
dictionary.UnderlinePosition = operands[0].Decimal;
break;
case 4:
dictionary.UnderlineThickness = operands[0].Decimal;
break;
case 5:
dictionary.PaintType = operands[0].Decimal;
break;
case 6:
dictionary.CharstringType = operands[0].Int.Value;
break;
case 7:
break;
case 8:
break;
}
}
break;
case 13:
dictionary.UniqueId = operands.Count > 0 ? operands[0].Decimal : 0;
break;
case 14:
dictionary.Xuid = ToArray(operands);
break;
case 15:
break;
case 16:
break;
case 17:
break;
case 18:
break;
var index = indexReader.ReadDictionaryData(data);
}
}
private static string GetString(List<Operand> operands, string[] stringIndex)
{
if (operands.Count == 0)
{
throw new InvalidOperationException("Cannot read a string from an empty operands array.");
}
if (!operands[0].Int.HasValue)
{
throw new InvalidOperationException($"The first operand for reading a string was not an integer. Got: {operands[0].Decimal}");
}
var index = operands[0].Int.Value;
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
var stringIndexIndex = index - 391;
if (stringIndexIndex >= 0 && stringIndexIndex < stringIndex.Length)
{
return stringIndex[stringIndexIndex];
}
return $"SID{index}";
}
private static PdfRectangle GetBoundingBox(List<Operand> operands)
{
if (operands.Count != 4)
{
return new PdfRectangle();
}
return new PdfRectangle(operands[0].Decimal, operands[1].Decimal,
operands[2].Decimal, operands[3].Decimal);
}
private static decimal[] ToArray(List<Operand> operands)
{
var result = new decimal[operands.Count];
for (int i = 0; i < result.Length; i++)
{
result[i] = operands[i].Decimal;
}
return result;
}
private struct Operand
{
public int? Int { get; }
public decimal Decimal { get; }
public Operand(int integer)
{
Int = integer;
Decimal = integer;
}
public Operand(decimal d)
{
Int = null;
Decimal = d;
}
}
private struct OperandKey
{
public byte Byte0 { get; }
public byte? Byte1 { get; }
public OperandKey(Byte byte0)
{
Byte0 = byte0;
Byte1 = null;
}
public OperandKey(byte byte0, byte byte1)
{
Byte0 = byte0;
Byte1 = byte1;
}
}
}
internal class CompactFontFormatFontDictionary
{
public string Version { get; set; }
public string Notice { get; set; }
public string Copyright { get; set; }
public string FullName { get; set; }
public string FamilyName { get; set; }
public string Weight { get; set; }
public bool IsFixedPitch { get; set; }
public decimal ItalicAngle { get; set; }
public decimal UnderlinePosition { get; set; } = -100;
public decimal UnderlineThickness { get; set; } = 50;
public decimal PaintType { get; set; }
public int CharstringType { get; set; }
public TransformationMatrix FontMatrix { get; set; } = TransformationMatrix.FromValues(0.001m, 0m, 0.001m, 0, 0, 0);
public decimal UniqueId { get; set; }
public PdfRectangle FontBoundingBox { get; set; } = new PdfRectangle(0, 0, 0, 0);
public decimal[] Xuid { get; set; }
}
}

View File

@ -11,10 +11,12 @@
private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
private readonly CompactFontFormatIndividualFontParser individualFontParser;
private readonly CompactFontFormatIndexReader indexReader;
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser)
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader)
{
this.individualFontParser = individualFontParser;
this.indexReader = indexReader;
}
public void Parse(CompactFontFormatData data)
@ -38,11 +40,11 @@
var fontNames = ReadStringIndex(data);
var topLevelDict = ReadDictionaryData(data);
var topLevelDict = indexReader.ReadDictionaryData(data);
var stringIndex = ReadStringIndex(data);
var globalSubroutineIndex = ReadDictionaryData(data);
var globalSubroutineIndex = indexReader.ReadDictionaryData(data);
for (var i = 0; i < fontNames.Length; i++)
{
@ -72,9 +74,9 @@
/// <summary>
/// Reads indexed string data.
/// </summary>
private static string[] ReadStringIndex(CompactFontFormatData data)
private string[] ReadStringIndex(CompactFontFormatData data)
{
var index = ReadIndex(data);
var index = indexReader.ReadIndex(data);
var count = index.Length - 1;
@ -94,45 +96,6 @@
return result;
}
private static byte[][] ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var results = new byte[count][];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
results[i] = data.ReadBytes(length);
}
return results;
}
private static int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();
var offsetSize = data.ReadOffsize();
var offsets = new int[count + 1];
for (var i = 0; i < offsets.Length; i++)
{
offsets[i] = data.ReadOffset(offsetSize);
}
return offsets;
}
}
internal class CompactFontFormatData

View File

@ -0,0 +1,295 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
using System.Text;
using Geometry;
internal abstract class CompactFontFormatDictionaryReader<T>
{
private readonly List<Operand> operands = new List<Operand>();
public abstract T Read(CompactFontFormatData data, string[] stringIndex);
protected T ReadDictionary(T dictionary, CompactFontFormatData data, string[] stringIndex)
{
while (data.CanRead())
{
operands.Clear();
var infiniteLoopProtection = 0;
while (true)
{
infiniteLoopProtection++;
// Avoid the library getting caught in an infinite loop, probably not possible.
// "An operator may be preceded by up to a maximum of 48 operands."
if (infiniteLoopProtection > 256)
{
throw new InvalidOperationException("Got caught in an infinite loop trying to read a CFF dictionary.");
}
var byte0 = data.ReadByte();
// Operands and operators are distinguished by the first byte, 0 - 21 specify operators
if (byte0 <= 21)
{
var key = byte0 == 12 ? new OperandKey(byte0, data.ReadByte()) : new OperandKey(byte0);
ApplyOperation(dictionary, operands, key, stringIndex);
break;
}
/*
* b0 value value range calculation
* 32 - 246 -107 - +107 b0 - 139
* 247 - 250 +108 - +1131 (b0 - 247)*256 + b1 + 108
* 251 - 254 -1131 - -108 -(b0 - 251)*256 - b1 - 108
* 28 -32768 - +32767 b1 << 8 | b2
* 29 -(2^31)-+(2^31-1) b1 << 24 | b2 << 16 | b3 << 8 | b4
*
* A byte value of 30 defines a real number operand
*/
if (byte0 == 28)
{
var value = data.ReadByte() << 8 | data.ReadByte();
operands.Add(new Operand(value));
}
else if (byte0 == 29)
{
var value = data.ReadByte() << 24 | data.ReadByte() << 16 |
data.ReadByte() << 8 | data.ReadByte();
operands.Add(new Operand(value));
}
else if (byte0 == 30)
{
var realNumber = ReadRealNumber(data);
operands.Add(new Operand(realNumber));
}
else if (byte0 >= 32 && byte0 <= 246)
{
var value = byte0 - 139;
operands.Add(new Operand(value));
}
else if (byte0 >= 247 && byte0 <= 250)
{
var value = (byte0 - 247) * 256 + data.ReadByte() + 108;
operands.Add(new Operand(value));
}
else if (byte0 >= 251 && byte0 <= 254)
{
var value = -(byte0 - 251) * 256 - data.ReadByte() - 108;
operands.Add(new Operand(value));
}
else
{
throw new InvalidOperationException($"The first dictionary byte was not in the range 29 - 254. Got {byte0}.");
}
}
}
return dictionary;
}
private static decimal ReadRealNumber(CompactFontFormatData data)
{
var sb = new StringBuilder();
var done = false;
var exponentMissing = false;
while (!done)
{
var b = data.ReadByte();
var nibble1 = b / 16;
var nibble2 = b % 16;
for (var i = 0; i < 2; i++)
{
var nibble = i == 0 ? nibble1 : nibble2;
switch (nibble)
{
case 0x0:
case 0x1:
case 0x2:
case 0x3:
case 0x4:
case 0x5:
case 0x6:
case 0x7:
case 0x8:
case 0x9:
sb.Append(nibble);
exponentMissing = false;
break;
case 0xa:
sb.Append(".");
break;
case 0xb:
sb.Append("E");
exponentMissing = true;
break;
case 0xc:
sb.Append("E-");
exponentMissing = true;
break;
case 0xd:
break;
case 0xe:
sb.Append("-");
break;
case 0xf:
done = true;
break;
default:
throw new InvalidOperationException($"Did not expect nibble value: {nibble}.");
}
}
}
if (exponentMissing)
{
// the exponent is missing, just append "0" to avoid an exception
// not sure if 0 is the correct value, but it seems to fit
// see PDFBOX-1522
sb.Append("0");
}
if (sb.Length == 0)
{
return 0m;
}
return decimal.Parse(sb.ToString());
}
protected abstract void ApplyOperation(T dictionary, List<Operand> operands, OperandKey operandKey, string[] stringIndex);
protected static string GetString(List<Operand> operands, string[] stringIndex)
{
if (operands.Count == 0)
{
throw new InvalidOperationException("Cannot read a string from an empty operands array.");
}
if (!operands[0].Int.HasValue)
{
throw new InvalidOperationException($"The first operand for reading a string was not an integer. Got: {operands[0].Decimal}");
}
var index = operands[0].Int.Value;
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
var stringIndexIndex = index - 391;
if (stringIndexIndex >= 0 && stringIndexIndex < stringIndex.Length)
{
return stringIndex[stringIndexIndex];
}
return $"SID{index}";
}
protected static PdfRectangle GetBoundingBox(List<Operand> operands)
{
if (operands.Count != 4)
{
return new PdfRectangle();
}
return new PdfRectangle(operands[0].Decimal, operands[1].Decimal,
operands[2].Decimal, operands[3].Decimal);
}
protected static decimal[] ToArray(List<Operand> operands)
{
var result = new decimal[operands.Count];
for (int i = 0; i < result.Length; i++)
{
result[i] = operands[i].Decimal;
}
return result;
}
protected static int GetIntOrDefault(List<Operand> operands, int defaultValue = 0)
{
if (operands.Count == 0)
{
return defaultValue;
}
var first = operands[0];
if (first.Int.HasValue)
{
return first.Int.Value;
}
return defaultValue;
}
protected static decimal[] ReadDeltaToArray(List<Operand> operands)
{
var results = new decimal[operands.Count];
if (operands.Count == 0)
{
return results;
}
results[0] = operands[0].Decimal;
for (var i = 1; i < operands.Count; i++)
{
var previous = results[i - 1];
var current = operands[i].Decimal;
results[i] = previous + current;
}
return results;
}
protected struct Operand
{
public int? Int { get; }
public decimal Decimal { get; }
public Operand(int integer)
{
Int = integer;
Decimal = integer;
}
public Operand(decimal d)
{
Int = null;
Decimal = d;
}
}
protected struct OperandKey
{
public byte Byte0 { get; }
public byte? Byte1 { get; }
public OperandKey(Byte byte0)
{
Byte0 = byte0;
Byte1 = null;
}
public OperandKey(byte byte0, byte byte1)
{
Byte0 = byte0;
Byte1 = byte1;
}
}
}
}

View File

@ -0,0 +1,47 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
internal class CompactFontFormatPrivateDictionary
{
public decimal[] BlueValues { get; set; }
public decimal[] OtherBlues { get; set; }
public decimal[] FamilyBlues { get; set; }
public decimal[] FamilyOtherBlues { get; set; }
public decimal BlueScale { get; set; } = 0.039625m;
public decimal BlueShift { get; set; } = 7;
public decimal BlueFuzz { get; set; } = 1;
public decimal StandardHorizontalWidth { get; set; }
public decimal StandardVerticalWidth { get; set; }
public decimal[] StemSnapHorizontal { get; set; }
public decimal[] StemStapVertical { get; set; }
public bool ForceBold { get; set; }
public decimal LanguageGroup { get; set; }
public decimal ExpansionFactor { get; set; }
public decimal InitialRandomSeed { get; set; }
public int LocalSubroutineLocalOffset { get; set; }
/// <summary>
/// If a glyph's width equals the default width X it can be omitted from the charstring.
/// </summary>
public decimal DefaultWidthX { get; set; }
/// <summary>
/// If not equal to <see cref="DefaultWidthX"/>, Glyph width is computed by adding the charstring width to the nominal width X value.
/// </summary>
public decimal NominalWidthX { get; set; }
}
}

View File

@ -0,0 +1,90 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
internal class CompactFontFormatPrivateDictionaryReader : CompactFontFormatDictionaryReader<CompactFontFormatPrivateDictionary>
{
public override CompactFontFormatPrivateDictionary Read(CompactFontFormatData data, string[] stringIndex)
{
var dictionary = new CompactFontFormatPrivateDictionary();
ReadDictionary(dictionary, data, stringIndex);
return dictionary;
}
protected override void ApplyOperation(CompactFontFormatPrivateDictionary dictionary, List<Operand> operands, OperandKey operandKey, string[] stringIndex)
{
switch (operandKey.Byte0)
{
case 6:
dictionary.BlueValues = ReadDeltaToArray(operands);
break;
case 7:
dictionary.OtherBlues = ReadDeltaToArray(operands);
break;
case 8:
dictionary.FamilyBlues = ReadDeltaToArray(operands);
break;
case 9:
dictionary.FamilyOtherBlues = ReadDeltaToArray(operands);
break;
case 10:
dictionary.StandardHorizontalWidth = operands[0].Decimal;
break;
case 11:
dictionary.StandardVerticalWidth = operands[0].Decimal;
break;
case 12:
{
if (!operandKey.Byte1.HasValue)
{
throw new InvalidOperationException("In the CFF private dictionary, got the operation key 12 without a second byte.");
}
switch (operandKey.Byte1.Value)
{
case 9:
dictionary.BlueScale = operands[0].Decimal;
break;
case 10:
dictionary.BlueShift = operands[0].Decimal;
break;
case 11:
dictionary.BlueFuzz = operands[0].Decimal;
break;
case 12:
dictionary.StemSnapHorizontal = ReadDeltaToArray(operands);
break;
case 13:
dictionary.StemStapVertical = ReadDeltaToArray(operands);
break;
case 14:
dictionary.ForceBold = operands[0].Decimal == 1;
break;
case 17:
dictionary.LanguageGroup = operands[0].Decimal;
break;
case 18:
dictionary.ExpansionFactor = operands[0].Decimal;
break;
case 19:
dictionary.InitialRandomSeed = operands[0].Decimal;
break;
}
}
break;
case 19:
dictionary.LocalSubroutineLocalOffset = GetIntOrDefault(operands, -1);
break;
case 20:
dictionary.DefaultWidthX = operands[0].Decimal;
break;
case 21:
dictionary.NominalWidthX = operands[0].Decimal;
break;
}
}
}
}

View File

@ -0,0 +1,66 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using Core;
using Geometry;
internal class CompactFontFormatTopLevelDictionary
{
public const int UnsetOffset = -1;
public string Version { get; set; }
public string Notice { get; set; }
public string Copyright { get; set; }
public string FullName { get; set; }
public string FamilyName { get; set; }
public string Weight { get; set; }
public bool IsFixedPitch { get; set; }
public decimal ItalicAngle { get; set; }
public decimal UnderlinePosition { get; set; } = -100;
public decimal UnderlineThickness { get; set; } = 50;
public decimal PaintType { get; set; }
public int CharstringType { get; set; } = 2;
public TransformationMatrix FontMatrix { get; set; } = TransformationMatrix.FromValues(0.001m, 0m, 0.001m, 0, 0, 0);
public decimal StrokeWidth { get; set; }
public decimal UniqueId { get; set; }
public PdfRectangle FontBoundingBox { get; set; } = new PdfRectangle(0, 0, 0, 0);
public decimal[] Xuid { get; set; }
public int CharSetOffset { get; set; } = UnsetOffset;
public int EncodingOffset { get; set; } = UnsetOffset;
private Tuple<int, int> privateDictionarySizeAndOffset = Tuple.Create(0, UnsetOffset);
public Tuple<int, int> PrivateDictionarySizeAndOffset
{
get => privateDictionarySizeAndOffset ?? Tuple.Create(0, UnsetOffset);
set => privateDictionarySizeAndOffset = value;
}
public int CharStringsOffset { get; set; } = -1;
public int SyntheticBaseFontIndex { get; set; }
public string PostScript { get; set; }
public string BaseFontName { get; set; }
public decimal[] BaseFontBlend { get; set; }
}
}

View File

@ -0,0 +1,145 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
using Core;
internal class CompactFontFormatTopLevelDictionaryReader : CompactFontFormatDictionaryReader<CompactFontFormatTopLevelDictionary>
{
public override CompactFontFormatTopLevelDictionary Read(CompactFontFormatData data, string[] stringIndex)
{
var dictionary = new CompactFontFormatTopLevelDictionary();
ReadDictionary(dictionary, data, stringIndex);
return dictionary;
}
protected override void ApplyOperation(CompactFontFormatTopLevelDictionary dictionary, List<Operand> operands, OperandKey key, string[] stringIndex)
{
switch (key.Byte0)
{
case 0:
dictionary.Version = GetString(operands, stringIndex);
break;
case 1:
dictionary.Notice = GetString(operands, stringIndex);
break;
case 2:
dictionary.FullName = GetString(operands, stringIndex);
break;
case 3:
dictionary.FamilyName = GetString(operands, stringIndex);
break;
case 4:
dictionary.Weight = GetString(operands, stringIndex);
break;
case 5:
dictionary.FontBoundingBox = GetBoundingBox(operands);
break;
case 12:
{
if (!key.Byte1.HasValue)
{
throw new InvalidOperationException("A single byte sequence beginning with 12 was found.");
}
switch (key.Byte1.Value)
{
case 0:
dictionary.Copyright = GetString(operands, stringIndex);
break;
case 1:
dictionary.IsFixedPitch = operands[0].Decimal == 1;
break;
case 2:
dictionary.ItalicAngle = operands[0].Decimal;
break;
case 3:
dictionary.UnderlinePosition = operands[0].Decimal;
break;
case 4:
dictionary.UnderlineThickness = operands[0].Decimal;
break;
case 5:
dictionary.PaintType = operands[0].Decimal;
break;
case 6:
dictionary.CharstringType = GetIntOrDefault(operands);
break;
case 7:
{
var array = ToArray(operands);
if (array.Length != 4)
{
throw new InvalidOperationException($"Expected four values for the font matrix, instead got: {array}.");
}
dictionary.FontMatrix = TransformationMatrix.FromArray(array);
}
break;
case 8:
dictionary.StrokeWidth = operands[0].Decimal;
break;
case 20:
dictionary.SyntheticBaseFontIndex = GetIntOrDefault(operands);
break;
case 21:
dictionary.PostScript = GetString(operands, stringIndex);
break;
case 22:
dictionary.BaseFontName = GetString(operands, stringIndex);
break;
case 23:
dictionary.BaseFontBlend = ReadDeltaToArray(operands);
break;
// TODO: CID Font Stuff
case 30:
break;
case 31:
break;
case 32:
break;
case 33:
break;
case 34:
break;
case 35:
break;
case 36:
break;
case 37:
break;
case 38:
break;
}
}
break;
case 13:
dictionary.UniqueId = operands.Count > 0 ? operands[0].Decimal : 0;
break;
case 14:
dictionary.Xuid = ToArray(operands);
break;
case 15:
dictionary.CharSetOffset = GetIntOrDefault(operands);
break;
case 16:
dictionary.EncodingOffset = GetIntOrDefault(operands);
break;
case 17:
dictionary.CharStringsOffset = GetIntOrDefault(operands);
break;
case 18:
{
var size = GetIntOrDefault(operands);
operands.RemoveAt(0);
var offset = GetIntOrDefault(operands);
dictionary.PrivateDictionarySizeAndOffset = Tuple.Create(size, offset);
}
break;
}
}
}
}

View File

@ -165,7 +165,7 @@
}
else
{
mediaBox = pageTreeMembers.GetMediaBox();
mediaBox = pageTreeMembers.MediaBox;
if (mediaBox == null)
{

View File

@ -9,6 +9,7 @@
using Filters;
using Fonts;
using Fonts.CompactFontFormat;
using Fonts.CompactFontFormat.Dictionaries;
using Fonts.Parser;
using Fonts.Parser.Handlers;
using Fonts.Parser.Parts;
@ -102,13 +103,16 @@
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()),
new CompactFontFormatParser(new CompactFontFormatIndividualFontParser())),
new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader)),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);