diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CharacterMapBuilder.cs b/src/UglyToad.Pdf/Fonts/Cmap/CharacterMapBuilder.cs
index 0c4f6fec..4949cf42 100644
--- a/src/UglyToad.Pdf/Fonts/Cmap/CharacterMapBuilder.cs
+++ b/src/UglyToad.Pdf/Fonts/Cmap/CharacterMapBuilder.cs
@@ -1,5 +1,10 @@
namespace UglyToad.Pdf.Fonts.Cmap
{
+ using System.Collections.Generic;
+ using System.Linq;
+ using System.Text;
+ using Util;
+
///
/// A mutable class used when parsing and generating a .
///
@@ -44,5 +49,39 @@
/// Defined as required.
///
public int Type { get; set; } = -1;
+
+ public IReadOnlyList CodespaceRanges { get; set; }
+
+ public Dictionary BaseFontCharacterMap { get; } = new Dictionary();
+
+ public void AddBaseFontCharacter(IReadOnlyList bytes, IReadOnlyList value)
+ {
+ AddBaseFontCharacter(bytes, CreateStringFromBytes(value.ToArray()));
+ }
+
+ public void AddBaseFontCharacter(IReadOnlyList bytes, string value)
+ {
+ var code = GetCodeFromArray(bytes, bytes.Count);
+
+ BaseFontCharacterMap[code] = value;
+ }
+
+ private int GetCodeFromArray(IReadOnlyList data, int length)
+ {
+ int code = 0;
+ for (int i = 0; i < length; i++)
+ {
+ code <<= 8;
+ code |= (data[i] + 256) % 256;
+ }
+ return code;
+ }
+
+ private string CreateStringFromBytes(byte[] bytes)
+ {
+ return bytes.Length == 1
+ ? OtherEncodings.BytesAsLatin1String(bytes)
+ : Encoding.BigEndianUnicode.GetString(bytes);
+ }
}
}
diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CmapUtils.cs b/src/UglyToad.Pdf/Fonts/Cmap/CmapUtils.cs
index 0c1297d1..2eb64713 100644
--- a/src/UglyToad.Pdf/Fonts/Cmap/CmapUtils.cs
+++ b/src/UglyToad.Pdf/Fonts/Cmap/CmapUtils.cs
@@ -4,7 +4,7 @@
internal static class CmapUtils
{
- public static int ToInt(this byte[] data, int length)
+ public static int ToInt(this IReadOnlyList data, int length)
{
int code = 0;
for (int i = 0; i < length; ++i)
diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs
index e335b09a..f5aa086e 100644
--- a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs
+++ b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs
@@ -1,68 +1,34 @@
-using System;
-using System.Collections.Generic;
-using System.Text;
-
-namespace UglyToad.Pdf.Fonts.Cmap
+namespace UglyToad.Pdf.Fonts.Cmap
{
+ using System.Collections.Generic;
+
///
/// A codespace range is specified by a pair of codes of some particular length giving the lower and upper bounds of that range.
///
public class CodespaceRange
{
- private byte[] start;
- private byte[] end;
- private int startInt;
- private int endInt;
+ public IReadOnlyList Start { get; }
- public int CodeLength { get; private set; }
+ public IReadOnlyList End { get; }
- /**
- * Creates a new instance of CodespaceRange.
- */
- public CodespaceRange()
+ public int StartInt { get; }
+
+ public int EndInt { get; }
+
+ public int CodeLength { get; }
+
+ ///
+ /// Creates a new instance of .
+ ///
+ public CodespaceRange(IReadOnlyList start, IReadOnlyList end)
{
+ Start = start;
+ End = end;
+ StartInt = start.ToInt(start.Count);
+ EndInt = end.ToInt(end.Count);
+ CodeLength = start.Count;
}
-
- /** Getter for property end.
- * @return Value of property end.
- *
- */
- public byte[] getEnd()
- {
- return end;
- }
-
- /** Setter for property end.
- * @param endBytes New value of property end.
- *
- */
- void setEnd(byte[] endBytes)
- {
- end = endBytes;
- endInt = endBytes.ToInt(endBytes.Length);
- }
-
- /** Getter for property start.
- * @return Value of property start.
- *
- */
- public byte[] getStart()
- {
- return start;
- }
-
- /** Setter for property start.
- * @param startBytes New value of property start.
- *
- */
- void setStart(byte[] startBytes)
- {
- start = startBytes;
- CodeLength = start.Length;
- startInt = startBytes.ToInt(startBytes.Length);
- }
-
/**
* Returns true if the given code bytes match this codespace range.
*/
@@ -80,7 +46,7 @@ namespace UglyToad.Pdf.Fonts.Cmap
if (codeLen == CodeLength)
{
int value = code.ToInt(codeLen);
- if (value >= startInt && value <= endInt)
+ if (value >= StartInt && value <= EndInt)
{
return true;
}
diff --git a/src/UglyToad.Pdf/Fonts/Parser/CMapParser.cs b/src/UglyToad.Pdf/Fonts/Parser/CMapParser.cs
index 608733d5..323434e2 100644
--- a/src/UglyToad.Pdf/Fonts/Parser/CMapParser.cs
+++ b/src/UglyToad.Pdf/Fonts/Parser/CMapParser.cs
@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Fonts.Parser
{
using System;
+ using System.Collections.Generic;
using System.Globalization;
using Cmap;
using Cos;
@@ -27,7 +28,38 @@
{
switch (operatorToken.Data)
{
- default:
+ case "usecmap":
+ throw new NotImplementedException("External CMap files not yet supported, please submit a pull request!");
+ case "begincodespacerange":
+ {
+ if (previousToken is NumericToken numeric)
+ {
+ ParseCodespaceRange(numeric, scanner, builder);
+ }
+ else
+ {
+ throw new InvalidOperationException("Unexpected token preceding start of codespace range: " + previousToken);
+ }
+
+ }
+ break;
+ case "beginbfchar":
+ {
+ if (previousToken is NumericToken numeric)
+ {
+ ParseBaseFontCharacters(numeric, scanner, builder);
+ }
+ else
+ {
+ throw new InvalidOperationException("Unexpected token preceding start of base font characters: " + previousToken);
+ }
+ }
+ break;
+ case "beginbfrange":
+ break;
+ case "begincidchar":
+ break;
+ case "begingcidrange":
break;
}
}
@@ -42,6 +74,66 @@
return null;
}
+ private static void ParseCodespaceRange(NumericToken count, ITokenScanner tokenScanner, CharacterMapBuilder builder)
+ {
+ /*
+ * For example:
+ 3 begincodespacerange
+ <00> <80>
+ <8140> <9ffc>
+
+ endcodespacerange
+ */
+
+ var ranges = new List(count.Int);
+
+ for (var i = 0; i < count.Int; i++)
+ {
+ if (!tokenScanner.MoveNext() || !(tokenScanner.CurrentToken is HexToken start))
+ {
+ throw new InvalidOperationException("Codespace range contains an unexpected token: " + tokenScanner.CurrentToken);
+ }
+
+ if (!tokenScanner.MoveNext() || !(tokenScanner.CurrentToken is HexToken end))
+ {
+ throw new InvalidOperationException("Codespace range contains an unexpected token: " + tokenScanner.CurrentToken);
+ }
+
+ ranges.Add(new CodespaceRange(start.Bytes, end.Bytes));
+ }
+
+ builder.CodespaceRanges = ranges;
+ }
+
+ private static void ParseBaseFontCharacters(NumericToken numeric, ITokenScanner tokenScanner, CharacterMapBuilder builder)
+ {
+ for (var i = 0; i < numeric.Int; i++)
+ {
+ if (!tokenScanner.MoveNext() || !(tokenScanner.CurrentToken is HexToken inputCode))
+ {
+ throw new InvalidOperationException($"Base font characters definition contains invalid item at index {i}: {tokenScanner.CurrentToken}");
+ }
+
+ if (!tokenScanner.MoveNext())
+ {
+ throw new InvalidOperationException($"Base font characters definition contains invalid item at index {i}: {tokenScanner.CurrentToken}");
+ }
+
+ if (tokenScanner.CurrentToken is NameToken characterName)
+ {
+ builder.AddBaseFontCharacter(inputCode.Bytes, characterName.Data.Name);
+ }
+ else if (tokenScanner.CurrentToken is HexToken characterCode)
+ {
+ builder.AddBaseFontCharacter(inputCode.Bytes, characterCode.Bytes);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Base font characters definition contains invalid item at index {i}: {tokenScanner.CurrentToken}");
+ }
+ }
+ }
+
private static void ParseName(NameToken nameToken, CoreTokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
{
switch (nameToken.Data.Name)
diff --git a/src/UglyToad.Pdf/Tokenization/HexTokenizer.cs b/src/UglyToad.Pdf/Tokenization/HexTokenizer.cs
index 74e12b8a..3976cf60 100644
--- a/src/UglyToad.Pdf/Tokenization/HexTokenizer.cs
+++ b/src/UglyToad.Pdf/Tokenization/HexTokenizer.cs
@@ -1,6 +1,6 @@
namespace UglyToad.Pdf.Tokenization
{
- using System.Text;
+ using System.Collections.Generic;
using IO;
using Parser.Parts;
using Tokens;
@@ -17,8 +17,8 @@
{
return false;
}
-
- var characters = new StringBuilder();
+
+ var characters = new List();
while (inputBytes.MoveNext())
{
@@ -39,10 +39,10 @@
return false;
}
- characters.Append((char)current);
+ characters.Add((char)current);
}
- token = new HexToken(characters.ToString());
+ token = new HexToken(characters);
return true;
}
diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs
index 3d0c653f..723c056d 100644
--- a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs
+++ b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs
@@ -1,30 +1,73 @@
namespace UglyToad.Pdf.Tokenization.Tokens
{
- using System;
using System.Collections.Generic;
using System.Text;
public class HexToken : IDataToken
{
+ private static readonly Dictionary HexMap = new Dictionary
+ {
+ {'0', 0x00 },
+ {'1', 0x01 },
+ {'2', 0x02 },
+ {'3', 0x03 },
+ {'4', 0x04 },
+ {'5', 0x05 },
+ {'6', 0x06 },
+ {'7', 0x07 },
+ {'8', 0x08 },
+ {'9', 0x09 },
+
+ {'A', 0x0A },
+ {'a', 0x0A },
+ {'B', 0x0B },
+ {'b', 0x0B },
+ {'C', 0x0C },
+ {'c', 0x0C },
+ {'D', 0x0D },
+ {'d', 0x0D },
+ {'E', 0x0E },
+ {'e', 0x0E },
+ {'F', 0x0F },
+ {'f', 0x0F }
+ };
+
+ private static byte Convert(char high, char low)
+ {
+ var highByte = HexMap[high];
+ var lowByte = HexMap[low];
+
+ return (byte)(highByte << 4 | lowByte);
+ }
+
public string Data { get; }
public IReadOnlyList Bytes { get; }
- public HexToken(string characters)
+ public HexToken(IReadOnlyList characters)
{
- if (characters.Length % 2 != 0)
- {
- characters += "0";
- }
-
+ var bytes = new List();
var builder = new StringBuilder();
- byte[] raw = new byte[characters.Length / 2];
- for (int i = 0; i < raw.Length; i++)
+
+ for (int i = 0; i < characters.Count; i += 2)
{
- builder.Append((char)Convert.ToByte(characters.Substring(i * 2, 2), 16));
+ char high = characters[i];
+ char low;
+ if (i == characters.Count - 1)
+ {
+ low = '0';
+ }
+ else
+ {
+ low = characters[i + 1];
+ }
+
+ var b = Convert(high, low);
+ bytes.Add(b);
+ builder.Append((char)b);
}
- Bytes = raw;
+ Bytes = bytes;
Data = builder.ToString();
}
}