add initial port of base font range parsing

This commit is contained in:
Eliot Jones
2017-11-19 15:29:05 +00:00
parent 2e5aa37c85
commit 0fd433240b
6 changed files with 202 additions and 3 deletions

View File

@@ -52,6 +52,8 @@
public IReadOnlyList<CodespaceRange> CodespaceRanges { get; set; }
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; set; }
public Dictionary<int, string> BaseFontCharacterMap { get; } = new Dictionary<int, string>();
public void AddBaseFontCharacter(IReadOnlyList<byte> bytes, IReadOnlyList<byte> value)

View File

@@ -0,0 +1,14 @@
namespace UglyToad.Pdf.Fonts.Cmap
{
public class CidCharacterMapping
{
public int Source { get; }
public int Destination { get; }
public CidCharacterMapping(int source, int destination)
{
Source = source;
Destination = destination;
}
}
}

View File

@@ -0,0 +1,116 @@
namespace UglyToad.Pdf.Fonts.Parser
{
using System;
using System.Collections.Generic;
using System.Linq;
using Cmap;
using Tokenization.Scanner;
using Tokenization.Tokens;
internal class BaseFontRangeParser
{
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder)
{
for (var i = 0; i < numeric.Int; i++)
{
if (!scanner.TryReadToken(out HexToken lowSourceCode))
{
// TODO: message
throw new InvalidOperationException();
}
if (!scanner.TryReadToken(out HexToken highSourceCode))
{
// TODO: message
throw new InvalidOperationException();
}
if (!scanner.MoveNext())
{
// TODO: message
throw new InvalidOperationException();
}
List<byte> destinationBytes = null;
ArrayToken destinationArray = null;
switch (scanner.CurrentToken)
{
case ArrayToken arrayToken:
destinationArray = arrayToken;
break;
case HexToken hexToken:
destinationBytes = hexToken.Bytes.ToList();
break;
case NumericToken _:
throw new NotImplementedException("From the spec it seems this possible but the meaning is unclear...");
default:
throw new InvalidOperationException();
}
var done = false;
var startCode = new List<byte>(lowSourceCode.Bytes);
var endCode = highSourceCode.Bytes;
int arrayIndex = 0;
while (!done)
{
if (Compare(startCode, endCode) >= 0)
{
done = true;
}
builder.AddBaseFontCharacter(startCode, destinationBytes);
Increment(startCode, startCode.Count - 1);
if (destinationArray == null)
{
Increment(destinationBytes, destinationBytes.Count - 1);
}
else
{
arrayIndex++;
if (arrayIndex < destinationArray.Data.Count)
{
destinationBytes = ((HexToken)destinationArray.Data[arrayIndex]).Bytes.ToList();
}
}
}
}
}
private static void Increment(IList<byte> data, int position)
{
if (position > 0 && (data[position] & 0xFF) == 255)
{
data[position] = 0;
Increment(data, position - 1);
}
else
{
data[position] = (byte)(data[position] + 1);
}
}
private static int Compare(IReadOnlyList<byte> first, IReadOnlyList<byte> second)
{
for (var i = 0; i < first.Count; i++)
{
if (first[i] == second[i])
{
continue;
}
if ((first[i] & 0xFF) < (second[i] & 0xFF))
{
return -1;
}
return 1;
}
return 0;
}
}
}

View File

@@ -56,10 +56,33 @@
}
break;
case "beginbfrange":
{
if (previousToken is NumericToken numeric)
{
var parser = new BaseFontRangeParser();
parser.Parse(numeric, scanner, builder);
}
else
{
throw new InvalidOperationException("Unexpected token preceding start of base font character ranges: " + previousToken);
}
}
break;
case "begincidchar":
break;
case "begingcidrange":
{
if (previousToken is NumericToken numeric)
{
var characters = ParseCidCharacters(numeric, scanner);
builder.CidCharacterMappings = characters;
}
else
{
throw new InvalidOperationException("Unexpected token preceding start of Cid character mapping: " + previousToken);
}
break;
}
case "begincidrange":
break;
}
}
@@ -134,7 +157,32 @@
}
}
private static void ParseName(NameToken nameToken, CoreTokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
private static IReadOnlyList<CidCharacterMapping> ParseCidCharacters(NumericToken numeric, ITokenScanner scanner)
{
var results = new List<CidCharacterMapping>();
for (var i = 0; i < numeric.Int; i++)
{
if (!scanner.TryReadToken(out HexToken sourceCode))
{
throw new InvalidOperationException("The first token in a line for Cid Characters should be a hex, instead it was: " + scanner.CurrentToken);
}
if (!scanner.TryReadToken(out NumericToken destinationCode))
{
throw new InvalidOperationException("The destination token in a line for Cid Character should be an integer, instead it was: " + scanner.CurrentToken);
}
var sourceInteger = sourceCode.Bytes.ToInt(sourceCode.Bytes.Count);
var mapping = new CidCharacterMapping(sourceInteger, destinationCode.Int);
results.Add(mapping);
}
return results;
}
private static void ParseName(NameToken nameToken, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
{
switch (nameToken.Data.Name)
{

View File

@@ -31,6 +31,23 @@
private readonly List<byte> currentBuffer = new List<byte>();
public IToken CurrentToken { get; private set; }
public bool TryReadToken<T>(out T token) where T : class, IToken
{
token = default(T);
if (!MoveNext())
{
return false;
}
if (CurrentToken is T canCast)
{
token = canCast;
return true;
}
return false;
}
private bool hasBytePreRead;

View File

@@ -7,5 +7,7 @@
bool MoveNext();
IToken CurrentToken { get; }
bool TryReadToken<T>(out T token) where T : class, IToken;
}
}