fix a bug where the mediabox being defined on the pages node would throw. a lot more work on parsing compact font format

This commit is contained in:
Eliot Jones 2018-04-29 14:42:54 +01:00
parent 85d1f84965
commit b51ebfd70c
14 changed files with 969 additions and 415 deletions

View File

@ -1,21 +1,15 @@
namespace UglyToad.PdfPig.Content namespace UglyToad.PdfPig.Content
{ {
using System;
/// <summary> /// <summary>
/// Contains the values inherited from the Page Tree for this page. /// Contains the values inherited from the Page Tree for this page.
/// </summary> /// </summary>
internal class PageTreeMembers internal class PageTreeMembers
{ {
public MediaBox GetMediaBox()
{
// TODO: tree inheritance
throw new NotImplementedException("Track inherited members");
}
public CropBox GetCropBox() public CropBox GetCropBox()
{ {
return null; return null;
} }
public MediaBox MediaBox { get; set; }
} }
} }

View File

@ -2,6 +2,7 @@
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using Geometry;
using Logging; using Logging;
using Parser.Parts; using Parser.Parts;
using Tokenization.Scanner; using Tokenization.Scanner;
@ -49,15 +50,17 @@
var observed = new List<int>(); var observed = new List<int>();
var pageTreeMembers = new PageTreeMembers();
// todo: running a search for a different, unloaded, page number, results in a bug. // todo: running a search for a different, unloaded, page number, results in a bug.
var isFound = FindPage(rootPageDictionary, pageNumber, observed); var isFound = FindPage(rootPageDictionary, pageNumber, observed, pageTreeMembers);
if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary)) if (!isFound || !locatedPages.TryGetValue(pageNumber, out targetPageDictionary))
{ {
throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber); throw new ArgumentOutOfRangeException("Could not find the page with number: " + pageNumber);
} }
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), isLenientParsing); var page = pageFactory.Create(pageNumber, targetPageDictionary, pageTreeMembers, isLenientParsing);
locatedPages[pageNumber] = targetPageDictionary; locatedPages[pageNumber] = targetPageDictionary;
@ -74,7 +77,7 @@
return pages[pages.Count - 1] + 1; return pages[pages.Count - 1] + 1;
} }
public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved) public bool FindPage(DictionaryToken currentPageDictionary, int soughtPageNumber, List<int> pageNumbersObserved, PageTreeMembers pageTreeMembers)
{ {
var type = currentPageDictionary.GetNameOrDefault(NameToken.Type); var type = currentPageDictionary.GetNameOrDefault(NameToken.Type);
@ -97,6 +100,16 @@
return false; return false;
} }
if (currentPageDictionary.TryGet(NameToken.MediaBox, out var token))
{
var mediaBox = DirectObjectFinder.Get<ArrayToken>(token, pdfScanner);
pageTreeMembers.MediaBox = new MediaBox(new PdfRectangle(mediaBox.GetNumeric(0).Data,
mediaBox.GetNumeric(1).Data,
mediaBox.GetNumeric(2).Data,
mediaBox.GetNumeric(3).Data));
}
if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids) if (!currentPageDictionary.TryGet(NameToken.Kids, out var kids)
|| !(kids is ArrayToken kidsArray)) || !(kids is ArrayToken kidsArray))
{ {
@ -111,7 +124,7 @@
// todo: exit early // todo: exit early
var child = DirectObjectFinder.Get<DictionaryToken>(kid, pdfScanner); var child = DirectObjectFinder.Get<DictionaryToken>(kid, pdfScanner);
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved); var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved, pageTreeMembers);
if (thisPageMatches) if (thisPageMatches)
{ {

View File

@ -132,6 +132,16 @@
}); });
} }
if (values.Length == 4)
{
return new TransformationMatrix(new []
{
values[0], values[1], 0,
values[2], values[3], 0,
0, 0, 1
});
}
throw new ArgumentException("The array must either define all 9 elements of the matrix or all 6 key elements. Instead array was: " + values); throw new ArgumentException("The array must either define all 9 elements of the matrix or all 6 key elements. Instead array was: " + values);
} }

View File

@ -0,0 +1,214 @@
//namespace UglyToad.PdfPig.Fonts.CharStrings
//{
// using System;
// using System.Collections.Generic;
// internal class Type2CharStringParser
// {
// private int hstemCount = 0;
// private int vstemCount = 0;
// private List<Object> sequence = null;
// private List<Object> Parse(byte[] bytes, byte[][] globalSubrIndex, byte[][] localSubrIndex)
// {
// DataInput input = new DataInput(bytes);
// boolean localSubroutineIndexProvided = localSubrIndex != null && localSubrIndex.length > 0;
// boolean globalSubroutineIndexProvided = globalSubrIndex != null && globalSubrIndex.length > 0;
// while (input.hasRemaining())
// {
// int b0 = input.readUnsignedByte();
// if (b0 == 10 && localSubroutineIndexProvided)
// {
// // process subr command
// Integer operand = (Integer) sequence.remove(sequence.size() - 1);
// //get subrbias
// int bias = 0;
// int nSubrs = localSubrIndex.length;
// if (nSubrs < 1240)
// {
// bias = 107;
// }
// else if (nSubrs < 33900)
// {
// bias = 1131;
// }
// else
// {
// bias = 32768;
// }
// int subrNumber = bias + operand;
// if (subrNumber < localSubrIndex.length)
// {
// byte[] subrBytes = localSubrIndex[subrNumber];
// parse(subrBytes, globalSubrIndex, localSubrIndex, false);
// Object lastItem = sequence.get(sequence.size() - 1);
// if (lastItem is CharStringCommand && ((CharStringCommand) lastItem).getKey().getValue()[0] == 11)
// {
// sequence.remove(sequence.size() - 1); // remove "return" command
// }
// }
// }
// else if (b0 == 29 && globalSubroutineIndexProvided)
// {
// // process globalsubr command
// Integer operand = (Integer) sequence.remove(sequence.size() - 1);
////get subrbias
// int bias;
// int nSubrs = globalSubrIndex.length;
// if (nSubrs < 1240)
// {
// bias = 107;
// }
// else if (nSubrs < 33900)
// {
// bias = 1131;
// }
// else
// {
// bias = 32768;
// }
// int subrNumber = bias + operand;
// if (subrNumber < globalSubrIndex.length)
// {
// byte[] subrBytes = globalSubrIndex[subrNumber];
// parse(subrBytes, globalSubrIndex, localSubrIndex, false);
// Object lastItem = sequence.get(sequence.size() - 1);
// if (lastItem is CharStringCommand && ((CharStringCommand) lastItem).getKey().getValue()[0] == 11)
// {
// sequence.remove(sequence.size() - 1); // remove "return" command
// }
// }
// }
// else if (b0 >= 0 && b0 <= 27)
// {
// sequence.add(readCommand(b0, input));
// }
// else if (b0 == 28)
// {
// sequence.add(readNumber(b0, input));
// }
// else if (b0 >= 29 && b0 <= 31)
// {
// sequence.add(readCommand(b0, input));
// }
// else if (b0 >= 32 && b0 <= 255)
// {
// sequence.add(readNumber(b0, input));
// }
// else
// {
// throw new IllegalArgumentException();
// }
// }
// return sequence;
// }
// private CharStringCommand readCommand(int b0, DataInput input)
// {
// if (b0 == 1 || b0 == 18)
// {
// hstemCount += peekNumbers().size() / 2;
// }
// else if (b0 == 3 || b0 == 19 || b0 == 20 || b0 == 23)
// {
// vstemCount += peekNumbers().size() / 2;
// } // End if
// if (b0 == 12)
// {
// int b1 = input.readUnsignedByte();
// return new CharStringCommand(b0, b1);
// }
// else if (b0 == 19 || b0 == 20)
// {
// int[] value = new int[1 + getMaskLength()];
// value[0] = b0;
// for (int i = 1; i < value.length; i++)
// {
// value[i] = input.readUnsignedByte();
// }
// return new CharStringCommand(value);
// }
// return new CharStringCommand(b0);
// }
// private Number readNumber(int b0, DataInput input)
// {
// if (b0 == 28)
// {
// return (int) input.readShort();
// }
// else if (b0 >= 32 && b0 <= 246)
// {
// return b0 - 139;
// }
// else if (b0 >= 247 && b0 <= 250)
// {
// int b1 = input.readUnsignedByte();
// return (b0 - 247) * 256 + b1 + 108;
// }
// else if (b0 >= 251 && b0 <= 254)
// {
// int b1 = input.readUnsignedByte();
// return -(b0 - 251) * 256 - b1 - 108;
// }
// else if (b0 == 255)
// {
// short value = input.readShort();
// // The lower bytes are representing the digits after the decimal point
// double fraction = input.readUnsignedShort() / 65535d;
// return value + fraction;
// }
// else
// {
// throw new IllegalArgumentException();
// }
// }
// private int getMaskLength()
// {
// int hintCount = hstemCount + vstemCount;
// int length = hintCount / 8;
// if (hintCount % 8 > 0)
// {
// length++;
// }
// return length;
// }
// private List<Number> peekNumbers()
// {
// List<Number> numbers = new ArrayList<>();
// for (int i = sequence.size() - 1; i > -1; i--)
// {
// Object object = sequence.get(i);
// if (!(object instanceof Number))
// {
// return numbers;
// }
// numbers.add(0, (Number) object);
// }
// return numbers;
// }
// }
//}

View File

@ -0,0 +1,46 @@
using System;
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
internal class CompactFontFormatIndexReader
{
public byte[][] ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var results = new byte[count][];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
results[i] = data.ReadBytes(length);
}
return results;
}
public int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();
var offsetSize = data.ReadOffsize();
var offsets = new int[count + 1];
for (var i = 0; i < offsets.Length; i++)
{
offsets[i] = data.ReadOffset(offsetSize);
}
return offsets;
}
}
}

View File

@ -1,381 +1,48 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{ {
using System; using Dictionaries;
using System.Collections.Generic;
using System.Text;
using Core;
using Geometry;
internal class CompactFontFormatIndividualFontParser internal class CompactFontFormatIndividualFontParser
{ {
private readonly CompactFontFormatIndexReader indexReader;
private readonly CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader;
private readonly CompactFontFormatPrivateDictionaryReader privateDictionaryReader;
public CompactFontFormatIndividualFontParser(CompactFontFormatIndexReader indexReader,
CompactFontFormatTopLevelDictionaryReader topLevelDictionaryReader,
CompactFontFormatPrivateDictionaryReader privateDictionaryReader)
{
this.indexReader = indexReader;
this.topLevelDictionaryReader = topLevelDictionaryReader;
this.privateDictionaryReader = privateDictionaryReader;
}
public void Parse(CompactFontFormatData data, string name, byte[] topDictionaryIndex, string[] stringIndex) public void Parse(CompactFontFormatData data, string name, byte[] topDictionaryIndex, string[] stringIndex)
{ {
var individualData = new CompactFontFormatData(topDictionaryIndex); var individualData = new CompactFontFormatData(topDictionaryIndex);
var dictionary = ReadTopLevelDictionary(individualData, stringIndex); var dictionary = topLevelDictionaryReader.Read(individualData, stringIndex);
}
private static CompactFontFormatFontDictionary ReadTopLevelDictionary(CompactFontFormatData data, string[] stringIndex) var privateDictionary = new CompactFontFormatPrivateDictionary();
{
var dictionary = new CompactFontFormatFontDictionary(); if (dictionary.PrivateDictionarySizeAndOffset.Item2 >= 0)
while (data.CanRead())
{ {
var numbers = new List<Operand>(); data.Seek(dictionary.PrivateDictionarySizeAndOffset.Item2);
var infiniteLoopProtection = 0; privateDictionary = privateDictionaryReader.Read(data, stringIndex);
while (true)
{
infiniteLoopProtection++;
// Avoid the library getting caught in an infinite loop, probably not possible.
// "An operator may be preceded by up to a maximum of 48 operands."
if (infiniteLoopProtection > 256)
{
throw new InvalidOperationException("Got caught in an infinite loop trying to read a CFF dictionary.");
}
var byte0 = data.ReadByte();
// Operands and operators are distinguished by the first byte, 0 - 21 specify operators
if (byte0 <= 21)
{
ApplyOperator(byte0, numbers, data, stringIndex, dictionary);
break;
}
/*
* b0 value value range calculation
* 32 - 246 -107 - +107 b0 - 139
* 247 - 250 +108 - +1131 (b0 - 247)*256 + b1 + 108
* 251 - 254 -1131 - -108 -(b0 - 251)*256 - b1 - 108
* 28 -32768 - +32767 b1 << 8 | b2
* 29 -(2^31)-+(2^31-1) b1 << 24 | b2 << 16 | b3 << 8 | b4
*
* A byte value of 30 defines a real number operand
*/
if (byte0 == 28)
{
var value = data.ReadByte() << 8 | data.ReadByte();
numbers.Add(new Operand(value));
}
else if (byte0 == 29)
{
var value = data.ReadByte() << 24 | data.ReadByte() << 16 |
data.ReadByte() << 8 | data.ReadByte();
numbers.Add(new Operand(value));
}
else if (byte0 == 30)
{
var realNumber = ReadRealNumber(data);
numbers.Add(new Operand(realNumber));
}
else if (byte0 >= 32 && byte0 <= 246)
{
var value = byte0 - 139;
numbers.Add(new Operand(value));
}
else if (byte0 >= 247 && byte0 <= 250)
{
var value = (byte0 - 247) * 256 + data.ReadByte() + 108;
numbers.Add(new Operand(value));
}
else if (byte0 >= 251 && byte0 <= 254)
{
var value = -(byte0 - 251) * 256 - data.ReadByte() - 108;
numbers.Add(new Operand(value));
}
else
{
throw new InvalidOperationException($"The first dictionary byte was not in the range 29 - 254. Got {byte0}.");
}
}
} }
return dictionary; if (dictionary.CharSetOffset >= 0)
}
private static decimal ReadRealNumber(CompactFontFormatData data)
{
var sb = new StringBuilder();
var done = false;
var exponentMissing = false;
while (!done)
{ {
var b = data.ReadByte();
var nibble1 = b / 16;
var nibble2 = b % 16;
for (var i = 0; i < 2; i++)
{
var nibble = i == 0 ? nibble1 : nibble2;
switch (nibble)
{
case 0x0:
case 0x1:
case 0x2:
case 0x3:
case 0x4:
case 0x5:
case 0x6:
case 0x7:
case 0x8:
case 0x9:
sb.Append(nibble);
exponentMissing = false;
break;
case 0xa:
sb.Append(".");
break;
case 0xb:
sb.Append("E");
exponentMissing = true;
break;
case 0xc:
sb.Append("E-");
exponentMissing = true;
break;
case 0xd:
break;
case 0xe:
sb.Append("-");
break;
case 0xf:
done = true;
break;
default:
throw new InvalidOperationException($"Did not expect nibble value: {nibble}.");
}
}
} }
if (exponentMissing) if (dictionary.CharStringsOffset >= 0)
{ {
// the exponent is missing, just append "0" to avoid an exception data.Seek(dictionary.CharStringsOffset);
// not sure if 0 is the correct value, but it seems to fit
// see PDFBOX-1522
sb.Append("0");
}
if (sb.Length == 0) var index = indexReader.ReadDictionaryData(data);
{
return 0m;
}
return decimal.Parse(sb.ToString());
}
private static void ApplyOperator(byte byte0, List<Operand> operands, CompactFontFormatData data,
string[] stringIndex,
CompactFontFormatFontDictionary dictionary)
{
OperandKey key;
if (byte0 == 12)
{
var b1 = data.ReadByte();
key = new OperandKey(byte0, b1);
}
else
{
key = new OperandKey(byte0);
}
switch (key.Byte0)
{
case 0:
dictionary.Version = GetString(operands, stringIndex);
break;
case 1:
dictionary.Notice = GetString(operands, stringIndex);
break;
case 2:
dictionary.FullName = GetString(operands, stringIndex);
break;
case 3:
dictionary.FamilyName = GetString(operands, stringIndex);
break;
case 4:
dictionary.Weight = GetString(operands, stringIndex);
break;
case 5:
dictionary.FontBoundingBox = GetBoundingBox(operands);
break;
case 12:
{
if (!key.Byte1.HasValue)
{
throw new InvalidOperationException("A single byte sequence beginning with 12 was found.");
}
switch (key.Byte1.Value)
{
case 1:
dictionary.IsFixedPitch = operands[0].Decimal == 1;
break;
case 2:
dictionary.ItalicAngle = operands[0].Decimal;
break;
case 3:
dictionary.UnderlinePosition = operands[0].Decimal;
break;
case 4:
dictionary.UnderlineThickness = operands[0].Decimal;
break;
case 5:
dictionary.PaintType = operands[0].Decimal;
break;
case 6:
dictionary.CharstringType = operands[0].Int.Value;
break;
case 7:
break;
case 8:
break;
}
}
break;
case 13:
dictionary.UniqueId = operands.Count > 0 ? operands[0].Decimal : 0;
break;
case 14:
dictionary.Xuid = ToArray(operands);
break;
case 15:
break;
case 16:
break;
case 17:
break;
case 18:
break;
} }
} }
private static string GetString(List<Operand> operands, string[] stringIndex)
{
if (operands.Count == 0)
{
throw new InvalidOperationException("Cannot read a string from an empty operands array.");
}
if (!operands[0].Int.HasValue)
{
throw new InvalidOperationException($"The first operand for reading a string was not an integer. Got: {operands[0].Decimal}");
}
var index = operands[0].Int.Value;
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
var stringIndexIndex = index - 391;
if (stringIndexIndex >= 0 && stringIndexIndex < stringIndex.Length)
{
return stringIndex[stringIndexIndex];
}
return $"SID{index}";
}
private static PdfRectangle GetBoundingBox(List<Operand> operands)
{
if (operands.Count != 4)
{
return new PdfRectangle();
}
return new PdfRectangle(operands[0].Decimal, operands[1].Decimal,
operands[2].Decimal, operands[3].Decimal);
}
private static decimal[] ToArray(List<Operand> operands)
{
var result = new decimal[operands.Count];
for (int i = 0; i < result.Length; i++)
{
result[i] = operands[i].Decimal;
}
return result;
}
private struct Operand
{
public int? Int { get; }
public decimal Decimal { get; }
public Operand(int integer)
{
Int = integer;
Decimal = integer;
}
public Operand(decimal d)
{
Int = null;
Decimal = d;
}
}
private struct OperandKey
{
public byte Byte0 { get; }
public byte? Byte1 { get; }
public OperandKey(Byte byte0)
{
Byte0 = byte0;
Byte1 = null;
}
public OperandKey(byte byte0, byte byte1)
{
Byte0 = byte0;
Byte1 = byte1;
}
}
}
internal class CompactFontFormatFontDictionary
{
public string Version { get; set; }
public string Notice { get; set; }
public string Copyright { get; set; }
public string FullName { get; set; }
public string FamilyName { get; set; }
public string Weight { get; set; }
public bool IsFixedPitch { get; set; }
public decimal ItalicAngle { get; set; }
public decimal UnderlinePosition { get; set; } = -100;
public decimal UnderlineThickness { get; set; } = 50;
public decimal PaintType { get; set; }
public int CharstringType { get; set; }
public TransformationMatrix FontMatrix { get; set; } = TransformationMatrix.FromValues(0.001m, 0m, 0.001m, 0, 0, 0);
public decimal UniqueId { get; set; }
public PdfRectangle FontBoundingBox { get; set; } = new PdfRectangle(0, 0, 0, 0);
public decimal[] Xuid { get; set; }
} }
} }

View File

@ -11,10 +11,12 @@
private const string TagTtfonly = "\u0000\u0001\u0000\u0000"; private const string TagTtfonly = "\u0000\u0001\u0000\u0000";
private readonly CompactFontFormatIndividualFontParser individualFontParser; private readonly CompactFontFormatIndividualFontParser individualFontParser;
private readonly CompactFontFormatIndexReader indexReader;
public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser) public CompactFontFormatParser(CompactFontFormatIndividualFontParser individualFontParser, CompactFontFormatIndexReader indexReader)
{ {
this.individualFontParser = individualFontParser; this.individualFontParser = individualFontParser;
this.indexReader = indexReader;
} }
public void Parse(CompactFontFormatData data) public void Parse(CompactFontFormatData data)
@ -38,11 +40,11 @@
var fontNames = ReadStringIndex(data); var fontNames = ReadStringIndex(data);
var topLevelDict = ReadDictionaryData(data); var topLevelDict = indexReader.ReadDictionaryData(data);
var stringIndex = ReadStringIndex(data); var stringIndex = ReadStringIndex(data);
var globalSubroutineIndex = ReadDictionaryData(data); var globalSubroutineIndex = indexReader.ReadDictionaryData(data);
for (var i = 0; i < fontNames.Length; i++) for (var i = 0; i < fontNames.Length; i++)
{ {
@ -72,9 +74,9 @@
/// <summary> /// <summary>
/// Reads indexed string data. /// Reads indexed string data.
/// </summary> /// </summary>
private static string[] ReadStringIndex(CompactFontFormatData data) private string[] ReadStringIndex(CompactFontFormatData data)
{ {
var index = ReadIndex(data); var index = indexReader.ReadIndex(data);
var count = index.Length - 1; var count = index.Length - 1;
@ -94,45 +96,6 @@
return result; return result;
} }
private static byte[][] ReadDictionaryData(CompactFontFormatData data)
{
var index = ReadIndex(data);
var count = index.Length - 1;
var results = new byte[count][];
for (var i = 0; i < count; i++)
{
var length = index[i + 1] - index[i];
if (length < 0)
{
throw new InvalidOperationException($"Negative object length {length} at {i}. Current position: {data.Position}.");
}
results[i] = data.ReadBytes(length);
}
return results;
}
private static int[] ReadIndex(CompactFontFormatData data)
{
var count = data.ReadCard16();
var offsetSize = data.ReadOffsize();
var offsets = new int[count + 1];
for (var i = 0; i < offsets.Length; i++)
{
offsets[i] = data.ReadOffset(offsetSize);
}
return offsets;
}
} }
internal class CompactFontFormatData internal class CompactFontFormatData

View File

@ -0,0 +1,295 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
using System.Text;
using Geometry;
internal abstract class CompactFontFormatDictionaryReader<T>
{
private readonly List<Operand> operands = new List<Operand>();
public abstract T Read(CompactFontFormatData data, string[] stringIndex);
protected T ReadDictionary(T dictionary, CompactFontFormatData data, string[] stringIndex)
{
while (data.CanRead())
{
operands.Clear();
var infiniteLoopProtection = 0;
while (true)
{
infiniteLoopProtection++;
// Avoid the library getting caught in an infinite loop, probably not possible.
// "An operator may be preceded by up to a maximum of 48 operands."
if (infiniteLoopProtection > 256)
{
throw new InvalidOperationException("Got caught in an infinite loop trying to read a CFF dictionary.");
}
var byte0 = data.ReadByte();
// Operands and operators are distinguished by the first byte, 0 - 21 specify operators
if (byte0 <= 21)
{
var key = byte0 == 12 ? new OperandKey(byte0, data.ReadByte()) : new OperandKey(byte0);
ApplyOperation(dictionary, operands, key, stringIndex);
break;
}
/*
* b0 value value range calculation
* 32 - 246 -107 - +107 b0 - 139
* 247 - 250 +108 - +1131 (b0 - 247)*256 + b1 + 108
* 251 - 254 -1131 - -108 -(b0 - 251)*256 - b1 - 108
* 28 -32768 - +32767 b1 << 8 | b2
* 29 -(2^31)-+(2^31-1) b1 << 24 | b2 << 16 | b3 << 8 | b4
*
* A byte value of 30 defines a real number operand
*/
if (byte0 == 28)
{
var value = data.ReadByte() << 8 | data.ReadByte();
operands.Add(new Operand(value));
}
else if (byte0 == 29)
{
var value = data.ReadByte() << 24 | data.ReadByte() << 16 |
data.ReadByte() << 8 | data.ReadByte();
operands.Add(new Operand(value));
}
else if (byte0 == 30)
{
var realNumber = ReadRealNumber(data);
operands.Add(new Operand(realNumber));
}
else if (byte0 >= 32 && byte0 <= 246)
{
var value = byte0 - 139;
operands.Add(new Operand(value));
}
else if (byte0 >= 247 && byte0 <= 250)
{
var value = (byte0 - 247) * 256 + data.ReadByte() + 108;
operands.Add(new Operand(value));
}
else if (byte0 >= 251 && byte0 <= 254)
{
var value = -(byte0 - 251) * 256 - data.ReadByte() - 108;
operands.Add(new Operand(value));
}
else
{
throw new InvalidOperationException($"The first dictionary byte was not in the range 29 - 254. Got {byte0}.");
}
}
}
return dictionary;
}
private static decimal ReadRealNumber(CompactFontFormatData data)
{
var sb = new StringBuilder();
var done = false;
var exponentMissing = false;
while (!done)
{
var b = data.ReadByte();
var nibble1 = b / 16;
var nibble2 = b % 16;
for (var i = 0; i < 2; i++)
{
var nibble = i == 0 ? nibble1 : nibble2;
switch (nibble)
{
case 0x0:
case 0x1:
case 0x2:
case 0x3:
case 0x4:
case 0x5:
case 0x6:
case 0x7:
case 0x8:
case 0x9:
sb.Append(nibble);
exponentMissing = false;
break;
case 0xa:
sb.Append(".");
break;
case 0xb:
sb.Append("E");
exponentMissing = true;
break;
case 0xc:
sb.Append("E-");
exponentMissing = true;
break;
case 0xd:
break;
case 0xe:
sb.Append("-");
break;
case 0xf:
done = true;
break;
default:
throw new InvalidOperationException($"Did not expect nibble value: {nibble}.");
}
}
}
if (exponentMissing)
{
// the exponent is missing, just append "0" to avoid an exception
// not sure if 0 is the correct value, but it seems to fit
// see PDFBOX-1522
sb.Append("0");
}
if (sb.Length == 0)
{
return 0m;
}
return decimal.Parse(sb.ToString());
}
protected abstract void ApplyOperation(T dictionary, List<Operand> operands, OperandKey operandKey, string[] stringIndex);
protected static string GetString(List<Operand> operands, string[] stringIndex)
{
if (operands.Count == 0)
{
throw new InvalidOperationException("Cannot read a string from an empty operands array.");
}
if (!operands[0].Int.HasValue)
{
throw new InvalidOperationException($"The first operand for reading a string was not an integer. Got: {operands[0].Decimal}");
}
var index = operands[0].Int.Value;
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
var stringIndexIndex = index - 391;
if (stringIndexIndex >= 0 && stringIndexIndex < stringIndex.Length)
{
return stringIndex[stringIndexIndex];
}
return $"SID{index}";
}
protected static PdfRectangle GetBoundingBox(List<Operand> operands)
{
if (operands.Count != 4)
{
return new PdfRectangle();
}
return new PdfRectangle(operands[0].Decimal, operands[1].Decimal,
operands[2].Decimal, operands[3].Decimal);
}
protected static decimal[] ToArray(List<Operand> operands)
{
var result = new decimal[operands.Count];
for (int i = 0; i < result.Length; i++)
{
result[i] = operands[i].Decimal;
}
return result;
}
protected static int GetIntOrDefault(List<Operand> operands, int defaultValue = 0)
{
if (operands.Count == 0)
{
return defaultValue;
}
var first = operands[0];
if (first.Int.HasValue)
{
return first.Int.Value;
}
return defaultValue;
}
protected static decimal[] ReadDeltaToArray(List<Operand> operands)
{
var results = new decimal[operands.Count];
if (operands.Count == 0)
{
return results;
}
results[0] = operands[0].Decimal;
for (var i = 1; i < operands.Count; i++)
{
var previous = results[i - 1];
var current = operands[i].Decimal;
results[i] = previous + current;
}
return results;
}
protected struct Operand
{
public int? Int { get; }
public decimal Decimal { get; }
public Operand(int integer)
{
Int = integer;
Decimal = integer;
}
public Operand(decimal d)
{
Int = null;
Decimal = d;
}
}
protected struct OperandKey
{
public byte Byte0 { get; }
public byte? Byte1 { get; }
public OperandKey(Byte byte0)
{
Byte0 = byte0;
Byte1 = null;
}
public OperandKey(byte byte0, byte byte1)
{
Byte0 = byte0;
Byte1 = byte1;
}
}
}
}

View File

@ -0,0 +1,47 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
internal class CompactFontFormatPrivateDictionary
{
public decimal[] BlueValues { get; set; }
public decimal[] OtherBlues { get; set; }
public decimal[] FamilyBlues { get; set; }
public decimal[] FamilyOtherBlues { get; set; }
public decimal BlueScale { get; set; } = 0.039625m;
public decimal BlueShift { get; set; } = 7;
public decimal BlueFuzz { get; set; } = 1;
public decimal StandardHorizontalWidth { get; set; }
public decimal StandardVerticalWidth { get; set; }
public decimal[] StemSnapHorizontal { get; set; }
public decimal[] StemStapVertical { get; set; }
public bool ForceBold { get; set; }
public decimal LanguageGroup { get; set; }
public decimal ExpansionFactor { get; set; }
public decimal InitialRandomSeed { get; set; }
public int LocalSubroutineLocalOffset { get; set; }
/// <summary>
/// If a glyph's width equals the default width X it can be omitted from the charstring.
/// </summary>
public decimal DefaultWidthX { get; set; }
/// <summary>
/// If not equal to <see cref="DefaultWidthX"/>, Glyph width is computed by adding the charstring width to the nominal width X value.
/// </summary>
public decimal NominalWidthX { get; set; }
}
}

View File

@ -0,0 +1,90 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
internal class CompactFontFormatPrivateDictionaryReader : CompactFontFormatDictionaryReader<CompactFontFormatPrivateDictionary>
{
public override CompactFontFormatPrivateDictionary Read(CompactFontFormatData data, string[] stringIndex)
{
var dictionary = new CompactFontFormatPrivateDictionary();
ReadDictionary(dictionary, data, stringIndex);
return dictionary;
}
protected override void ApplyOperation(CompactFontFormatPrivateDictionary dictionary, List<Operand> operands, OperandKey operandKey, string[] stringIndex)
{
switch (operandKey.Byte0)
{
case 6:
dictionary.BlueValues = ReadDeltaToArray(operands);
break;
case 7:
dictionary.OtherBlues = ReadDeltaToArray(operands);
break;
case 8:
dictionary.FamilyBlues = ReadDeltaToArray(operands);
break;
case 9:
dictionary.FamilyOtherBlues = ReadDeltaToArray(operands);
break;
case 10:
dictionary.StandardHorizontalWidth = operands[0].Decimal;
break;
case 11:
dictionary.StandardVerticalWidth = operands[0].Decimal;
break;
case 12:
{
if (!operandKey.Byte1.HasValue)
{
throw new InvalidOperationException("In the CFF private dictionary, got the operation key 12 without a second byte.");
}
switch (operandKey.Byte1.Value)
{
case 9:
dictionary.BlueScale = operands[0].Decimal;
break;
case 10:
dictionary.BlueShift = operands[0].Decimal;
break;
case 11:
dictionary.BlueFuzz = operands[0].Decimal;
break;
case 12:
dictionary.StemSnapHorizontal = ReadDeltaToArray(operands);
break;
case 13:
dictionary.StemStapVertical = ReadDeltaToArray(operands);
break;
case 14:
dictionary.ForceBold = operands[0].Decimal == 1;
break;
case 17:
dictionary.LanguageGroup = operands[0].Decimal;
break;
case 18:
dictionary.ExpansionFactor = operands[0].Decimal;
break;
case 19:
dictionary.InitialRandomSeed = operands[0].Decimal;
break;
}
}
break;
case 19:
dictionary.LocalSubroutineLocalOffset = GetIntOrDefault(operands, -1);
break;
case 20:
dictionary.DefaultWidthX = operands[0].Decimal;
break;
case 21:
dictionary.NominalWidthX = operands[0].Decimal;
break;
}
}
}
}

View File

@ -0,0 +1,66 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using Core;
using Geometry;
internal class CompactFontFormatTopLevelDictionary
{
public const int UnsetOffset = -1;
public string Version { get; set; }
public string Notice { get; set; }
public string Copyright { get; set; }
public string FullName { get; set; }
public string FamilyName { get; set; }
public string Weight { get; set; }
public bool IsFixedPitch { get; set; }
public decimal ItalicAngle { get; set; }
public decimal UnderlinePosition { get; set; } = -100;
public decimal UnderlineThickness { get; set; } = 50;
public decimal PaintType { get; set; }
public int CharstringType { get; set; } = 2;
public TransformationMatrix FontMatrix { get; set; } = TransformationMatrix.FromValues(0.001m, 0m, 0.001m, 0, 0, 0);
public decimal StrokeWidth { get; set; }
public decimal UniqueId { get; set; }
public PdfRectangle FontBoundingBox { get; set; } = new PdfRectangle(0, 0, 0, 0);
public decimal[] Xuid { get; set; }
public int CharSetOffset { get; set; } = UnsetOffset;
public int EncodingOffset { get; set; } = UnsetOffset;
private Tuple<int, int> privateDictionarySizeAndOffset = Tuple.Create(0, UnsetOffset);
public Tuple<int, int> PrivateDictionarySizeAndOffset
{
get => privateDictionarySizeAndOffset ?? Tuple.Create(0, UnsetOffset);
set => privateDictionarySizeAndOffset = value;
}
public int CharStringsOffset { get; set; } = -1;
public int SyntheticBaseFontIndex { get; set; }
public string PostScript { get; set; }
public string BaseFontName { get; set; }
public decimal[] BaseFontBlend { get; set; }
}
}

View File

@ -0,0 +1,145 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Dictionaries
{
using System;
using System.Collections.Generic;
using Core;
internal class CompactFontFormatTopLevelDictionaryReader : CompactFontFormatDictionaryReader<CompactFontFormatTopLevelDictionary>
{
public override CompactFontFormatTopLevelDictionary Read(CompactFontFormatData data, string[] stringIndex)
{
var dictionary = new CompactFontFormatTopLevelDictionary();
ReadDictionary(dictionary, data, stringIndex);
return dictionary;
}
protected override void ApplyOperation(CompactFontFormatTopLevelDictionary dictionary, List<Operand> operands, OperandKey key, string[] stringIndex)
{
switch (key.Byte0)
{
case 0:
dictionary.Version = GetString(operands, stringIndex);
break;
case 1:
dictionary.Notice = GetString(operands, stringIndex);
break;
case 2:
dictionary.FullName = GetString(operands, stringIndex);
break;
case 3:
dictionary.FamilyName = GetString(operands, stringIndex);
break;
case 4:
dictionary.Weight = GetString(operands, stringIndex);
break;
case 5:
dictionary.FontBoundingBox = GetBoundingBox(operands);
break;
case 12:
{
if (!key.Byte1.HasValue)
{
throw new InvalidOperationException("A single byte sequence beginning with 12 was found.");
}
switch (key.Byte1.Value)
{
case 0:
dictionary.Copyright = GetString(operands, stringIndex);
break;
case 1:
dictionary.IsFixedPitch = operands[0].Decimal == 1;
break;
case 2:
dictionary.ItalicAngle = operands[0].Decimal;
break;
case 3:
dictionary.UnderlinePosition = operands[0].Decimal;
break;
case 4:
dictionary.UnderlineThickness = operands[0].Decimal;
break;
case 5:
dictionary.PaintType = operands[0].Decimal;
break;
case 6:
dictionary.CharstringType = GetIntOrDefault(operands);
break;
case 7:
{
var array = ToArray(operands);
if (array.Length != 4)
{
throw new InvalidOperationException($"Expected four values for the font matrix, instead got: {array}.");
}
dictionary.FontMatrix = TransformationMatrix.FromArray(array);
}
break;
case 8:
dictionary.StrokeWidth = operands[0].Decimal;
break;
case 20:
dictionary.SyntheticBaseFontIndex = GetIntOrDefault(operands);
break;
case 21:
dictionary.PostScript = GetString(operands, stringIndex);
break;
case 22:
dictionary.BaseFontName = GetString(operands, stringIndex);
break;
case 23:
dictionary.BaseFontBlend = ReadDeltaToArray(operands);
break;
// TODO: CID Font Stuff
case 30:
break;
case 31:
break;
case 32:
break;
case 33:
break;
case 34:
break;
case 35:
break;
case 36:
break;
case 37:
break;
case 38:
break;
}
}
break;
case 13:
dictionary.UniqueId = operands.Count > 0 ? operands[0].Decimal : 0;
break;
case 14:
dictionary.Xuid = ToArray(operands);
break;
case 15:
dictionary.CharSetOffset = GetIntOrDefault(operands);
break;
case 16:
dictionary.EncodingOffset = GetIntOrDefault(operands);
break;
case 17:
dictionary.CharStringsOffset = GetIntOrDefault(operands);
break;
case 18:
{
var size = GetIntOrDefault(operands);
operands.RemoveAt(0);
var offset = GetIntOrDefault(operands);
dictionary.PrivateDictionarySizeAndOffset = Tuple.Create(size, offset);
}
break;
}
}
}
}

View File

@ -165,7 +165,7 @@
} }
else else
{ {
mediaBox = pageTreeMembers.GetMediaBox(); mediaBox = pageTreeMembers.MediaBox;
if (mediaBox == null) if (mediaBox == null)
{ {

View File

@ -9,6 +9,7 @@
using Filters; using Filters;
using Fonts; using Fonts;
using Fonts.CompactFontFormat; using Fonts.CompactFontFormat;
using Fonts.CompactFontFormat.Dictionaries;
using Fonts.Parser; using Fonts.Parser;
using Fonts.Parser.Handlers; using Fonts.Parser.Handlers;
using Fonts.Parser.Parts; using Fonts.Parser.Parts;
@ -102,13 +103,16 @@
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, filterProvider); var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner); var encodingReader = new EncodingReader(pdfScanner);
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache, cMapCache,
filterProvider, pdfScanner), filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader), new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()), new Type1FontParser(new Type1EncryptedPortionParser()),
new CompactFontFormatParser(new CompactFontFormatIndividualFontParser())), new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader)),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader)); new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory); var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);