diff --git a/src/UglyToad.PdfPig.Core/OctalHelpers.cs b/src/UglyToad.PdfPig.Core/OctalHelpers.cs index 5577537e..5d4dde57 100644 --- a/src/UglyToad.PdfPig.Core/OctalHelpers.cs +++ b/src/UglyToad.PdfPig.Core/OctalHelpers.cs @@ -6,72 +6,75 @@ /// Interprets numbers in octal format. /// public static class OctalHelpers - { + { + /// /// Read a short. /// - public static short CharacterToShort(this char c) - { - switch (c) - { - case '0': - return 0; - case '1': - return 1; - case '2': - return 2; - case '3': - return 3; - case '4': - return 4; - case '5': - return 5; - case '6': - return 6; - case '7': - return 7; - case '8': - return 8; - case '9': - return 9; - default: - throw new InvalidOperationException($"Could not convert the character {c} to a short."); - } - } - + public static short CharacterToShort(this char c) + { + switch (c) + { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + default: + throw new InvalidOperationException($"Could not convert the character {c} to a short."); + } + } + /// /// Read an integer from octal digits. /// - public static int FromOctalDigits(short[] octal) - { - int sum = 0; - for (int i = octal.Length - 1; i >= 0; i--) - { - var power = i; - sum += octal[i] * QuickPower(8, power); - } - - return sum; - } - + public static int FromOctalDigits(short[] octal) + { + int sum = 0; + for (int i = octal.Length - 1; i >= 0; i--) + { + var power = i; + sum += octal[i] * QuickPower(8, power); + } + + return sum; + } + /// /// Interpret an int as octal. /// public static int FromOctalInt(int input) - { - var str = input.ToString(); + { + return System.Convert.ToInt32($"{input}", 8); + + //var str = input.ToString(); - int sum = 0; - for (var i = 0; i < str.Length; i++) - { - var part = str[str.Length - 1 - i].CharacterToShort(); + //int sum = 0; + //for (var i = 0; i < str.Length; i++) + //{ + // var part = str[str.Length - 1 - i].CharacterToShort(); - sum += part * QuickPower(8, i); - } - - return sum; - } + // sum += part * QuickPower(8, i); + //} + //return sum; + } + private static int QuickPower(int x, int pow) { int ret = 1; @@ -84,6 +87,7 @@ } return ret; - } + } + } } diff --git a/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs b/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs index 748c0122..b6f1cb10 100644 --- a/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs +++ b/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/LazyType2Command.cs @@ -8,6 +8,7 @@ /// internal class LazyType2Command { + private readonly int minimumStackParameters; private readonly Action runCommand; /// @@ -19,10 +20,12 @@ /// Create a new . /// /// The name of the command. + /// Minimum number of argument which must be on the stack or -1 if no checking /// The action to execute when evaluating the command. This modifies the . - public LazyType2Command(string name, Action runCommand) + public LazyType2Command(string name, int minimumStackParameters, Action runCommand) { Name = name ?? throw new ArgumentNullException(nameof(name)); + this.minimumStackParameters = minimumStackParameters; this.runCommand = runCommand ?? throw new ArgumentNullException(nameof(runCommand)); } @@ -38,6 +41,13 @@ throw new ArgumentNullException(nameof(context)); } + if (context.Stack.Length < minimumStackParameters) + { + Debug.WriteLine($"Warning: CFF CharString command '{Name}' expected {minimumStackParameters} arguments. Got: {context.Stack.Length}. Command ignored and stack cleared."); + context.Stack.Clear(); + return; + } + runCommand(context); } diff --git a/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs b/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs index 45f98451..b6785ce9 100644 --- a/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs +++ b/src/UglyToad.PdfPig.Fonts/CompactFontFormat/CharStrings/Type2CharStringParser.cs @@ -3,7 +3,9 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings { using System; using System.Collections.Generic; + using System.Diagnostics; using System.Linq; + using System.Runtime.CompilerServices; using Charsets; using Core; @@ -35,7 +37,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings private static readonly IReadOnlyDictionary SingleByteCommandStore = new Dictionary { - { HstemByte, new LazyType2Command("hstem", ctx => + { HstemByte, new LazyType2Command("hstem", 2, ctx => { var numberOfEdgeHints = ctx.Stack.Length / 2; var hints = new (double, double)[numberOfEdgeHints]; @@ -62,7 +64,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { - VstemByte, new LazyType2Command("vstem", ctx => + VstemByte, new LazyType2Command("vstem", 2, ctx => { var numberOfEdgeHints = ctx.Stack.Length / 2; var hints = new (double, double)[numberOfEdgeHints]; @@ -89,7 +91,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 4, - new LazyType2Command("vmoveto", ctx => + new LazyType2Command("vmoveto", 1, ctx => { var dy = ctx.Stack.PopBottom(); @@ -100,7 +102,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 5, - new LazyType2Command("rlineto", ctx => + new LazyType2Command("rlineto", 2, ctx => { var numberOfLines = ctx.Stack.Length / 2; @@ -116,7 +118,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 6, - new LazyType2Command("hlineto", ctx => + new LazyType2Command("hlineto", 1, ctx => { /* * Appends a horizontal line of length dx1 to the current point. @@ -152,7 +154,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 7, - new LazyType2Command("vlineto", ctx => + new LazyType2Command("vlineto", 1, ctx => { var isOdd = ctx.Stack.Length % 2 != 0; @@ -182,7 +184,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 8, - new LazyType2Command("rrcurveto", ctx => + new LazyType2Command("rrcurveto", 6, ctx => { var curveCount = ctx.Stack.Length / 6; for (var i = 0; i < curveCount; i++) @@ -196,14 +198,14 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { 10, new LazyType2Command("callsubr", ctx => {})}, - { 11, new LazyType2Command("return", ctx => {})}, - { 14, new LazyType2Command("endchar", ctx => + { 10, new LazyType2Command("callsubr", 1, ctx => {})}, + { 11, new LazyType2Command("return", 0, ctx => {})}, + { 14, new LazyType2Command("endchar", 0, ctx => { ctx.Stack.Clear(); }) }, - { HstemhmByte, new LazyType2Command("hstemhm", ctx => + { HstemhmByte, new LazyType2Command("hstemhm", 2, ctx => { // Same as vstem except the charstring contains hintmask var numberOfEdgeHints = ctx.Stack.Length / 2; @@ -231,22 +233,22 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { - HintmaskByte, new LazyType2Command("hintmask", ctx => + HintmaskByte, new LazyType2Command("hintmask", 0, ctx => { // TODO: record this mask somewhere ctx.Stack.Clear(); }) }, { - CntrmaskByte, new LazyType2Command("cntrmask", ctx => + CntrmaskByte, new LazyType2Command("cntrmask", 0,ctx => { // TODO: record this mask somewhere ctx.Stack.Clear(); }) }, { 21, - new LazyType2Command("rmoveto", ctx => - { + new LazyType2Command("rmoveto", 2, ctx => + { var dx = ctx.Stack.PopBottom(); var dy = ctx.Stack.PopBottom(); @@ -260,7 +262,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 22, - new LazyType2Command("hmoveto", ctx => + new LazyType2Command("hmoveto", 1, ctx => { var dx = ctx.Stack.PopBottom(); @@ -270,7 +272,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { VstemhmByte, new LazyType2Command("vstemhm", ctx => + { VstemhmByte, new LazyType2Command("vstemhm", 2, ctx => { // Same as vstem except the charstring contains hintmask var numberOfEdgeHints = ctx.Stack.Length / 2; @@ -299,7 +301,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }, { 24, - new LazyType2Command("rcurveline", ctx => + new LazyType2Command("rcurveline", 8, ctx => { var numberOfCurves = (ctx.Stack.Length - 2) / 6; for (var i = 0; i < numberOfCurves; i++) @@ -315,7 +317,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 25, - new LazyType2Command("rlinecurve", ctx => + new LazyType2Command("rlinecurve", 8, ctx => { var numberOfLines = (ctx.Stack.Length - 6) / 2; for (var i = 0; i < numberOfLines; i++) @@ -332,7 +334,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 26, - new LazyType2Command("vvcurveto", ctx => + new LazyType2Command("vvcurveto", 4, ctx => { // dx1? {dya dxb dyb dyc}+ var hasDeltaXFirstCurve = ctx.Stack.Length % 4 != 0; @@ -357,7 +359,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { 27, new LazyType2Command("hhcurveto", ctx => + { 27, new LazyType2Command("hhcurveto", 4, ctx => { // dy1? {dxa dxb dyb dxc}+ var hasDeltaYFirstCurve = ctx.Stack.Length % 4 != 0; @@ -387,10 +389,10 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { 29, new LazyType2Command("callgsubr", ctx => {}) + { 29, new LazyType2Command("callgsubr", 1, ctx => {}) }, { 30, - new LazyType2Command("vhcurveto", ctx => + new LazyType2Command("vhcurveto", 4, ctx => { var remainder = ctx.Stack.Length % 8; @@ -477,7 +479,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { 31, - new LazyType2Command("hvcurveto", ctx => + new LazyType2Command("hvcurveto", 4, ctx => { var remainder = ctx.Stack.Length % 8; @@ -563,70 +565,72 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { 255, new LazyType2Command("unknown", x => {}) } - }; - + { 255, new LazyType2Command("unknown", -1, x => {}) } + }; + + + private static readonly IReadOnlyDictionary TwoByteCommandStore = new Dictionary { - { 3, new LazyType2Command("and", ctx => ctx.Stack.Push(ctx.Stack.PopTop() != 0 && ctx.Stack.PopTop() != 0 ? 1 : 0))}, - { 4, new LazyType2Command("or", ctx => + { 3, new LazyType2Command("and", 2, ctx => ctx.Stack.Push(ctx.Stack.PopTop() != 0 && ctx.Stack.PopTop() != 0 ? 1 : 0))}, + { 4, new LazyType2Command("or", 2,ctx => { var arg1 = ctx.Stack.PopTop(); var arg2 = ctx.Stack.PopTop(); ctx.Stack.Push(arg1 != 0 || arg2 != 0 ? 1 : 0); })}, - { 5, new LazyType2Command("not", ctx => ctx.Stack.Push(ctx.Stack.PopTop() == 0 ? 1 : 0))}, - { 9, new LazyType2Command("abs", ctx => ctx.Stack.Push(Math.Abs(ctx.Stack.PopTop())))}, - { 10, new LazyType2Command("add", ctx => ctx.Stack.Push(ctx.Stack.PopTop() + ctx.Stack.PopTop()))}, + { 5, new LazyType2Command("not", 1,ctx => ctx.Stack.Push(ctx.Stack.PopTop() == 0 ? 1 : 0))}, + { 9, new LazyType2Command("abs", 1, ctx => ctx.Stack.Push(Math.Abs(ctx.Stack.PopTop())))}, + { 10, new LazyType2Command("add", 2, ctx => ctx.Stack.Push(ctx.Stack.PopTop() + ctx.Stack.PopTop()))}, { - 11, new LazyType2Command("sub", ctx => + 11, new LazyType2Command("sub", 2, ctx => { var num1 = ctx.Stack.PopTop(); var num2 = ctx.Stack.PopTop(); ctx.Stack.Push(num2 - num1); }) }, - { 12, new LazyType2Command("div", ctx => ctx.Stack.Push(ctx.Stack.PopTop()/ctx.Stack.PopTop()))}, - { 14, new LazyType2Command("neg", ctx => ctx.Stack.Push(-1 * Math.Abs(ctx.Stack.PopTop())))}, + { 12, new LazyType2Command("div", 2, ctx => ctx.Stack.Push(ctx.Stack.PopTop()/ctx.Stack.PopTop()))}, + { 14, new LazyType2Command("neg", 1, ctx => ctx.Stack.Push(-1 * Math.Abs(ctx.Stack.PopTop())))}, // ReSharper disable once EqualExpressionComparison - { 15, new LazyType2Command("eq", ctx => ctx.Stack.Push(ctx.Stack.PopTop() == ctx.Stack.PopTop() ? 1 : 0))}, - { 18, new LazyType2Command("drop", ctx => ctx.Stack.PopTop())}, - { 20, new LazyType2Command("put", ctx => ctx.AddToTransientArray(ctx.Stack.PopTop(), (int)ctx.Stack.PopTop()))}, - { 21, new LazyType2Command("get", ctx => ctx.Stack.Push(ctx.GetFromTransientArray((int)ctx.Stack.PopTop())))}, - { 22, new LazyType2Command("ifelse", x => { })}, + { 15, new LazyType2Command("eq", 2, ctx => ctx.Stack.Push(ctx.Stack.PopTop() == ctx.Stack.PopTop() ? 1 : 0))}, + { 18, new LazyType2Command("drop", 1, ctx => ctx.Stack.PopTop())}, + { 20, new LazyType2Command("put", 2, ctx => ctx.AddToTransientArray(ctx.Stack.PopTop(), (int)ctx.Stack.PopTop()))}, + { 21, new LazyType2Command("get", 1, ctx => ctx.Stack.Push(ctx.GetFromTransientArray((int)ctx.Stack.PopTop())))}, + { 22, new LazyType2Command("ifelse", 4, x => { })}, // TODO: Random, do we want to support this? - { 23, new LazyType2Command("random", ctx => ctx.Stack.Push(0.5))}, - { 24, new LazyType2Command("mul", ctx => ctx.Stack.Push(ctx.Stack.PopTop() * ctx.Stack.PopTop()))}, - { 26, new LazyType2Command("sqrt", ctx => ctx.Stack.Push(Math.Sqrt(ctx.Stack.PopTop())))}, + { 23, new LazyType2Command("random", 0, ctx => ctx.Stack.Push(0.5))}, + { 24, new LazyType2Command("mul", 2, ctx => ctx.Stack.Push(ctx.Stack.PopTop() * ctx.Stack.PopTop()))}, + { 26, new LazyType2Command("sqrt", 1, ctx => ctx.Stack.Push(Math.Sqrt(ctx.Stack.PopTop())))}, { - 27, new LazyType2Command("dup", ctx => + 27, new LazyType2Command("dup", 1, ctx => { var val = ctx.Stack.PopTop(); ctx.Stack.Push(val); ctx.Stack.Push(val); }) }, - { 28, new LazyType2Command("exch", ctx => + { 28, new LazyType2Command("exch", 2, ctx => { var num1 = ctx.Stack.PopTop(); var num2 = ctx.Stack.PopTop(); ctx.Stack.Push(num1); ctx.Stack.Push(num2); })}, - { 29, new LazyType2Command("index", ctx => + { 29, new LazyType2Command("index", 2, ctx => { var index = ctx.Stack.PopTop(); var val = ctx.Stack.CopyElementAt((int) index); ctx.Stack.Push(val); })}, { - 30, new LazyType2Command("roll", ctx => + 30, new LazyType2Command("roll", 3, ctx => { // TODO: roll }) }, { - 34, new LazyType2Command("hflex", ctx => + 34, new LazyType2Command("hflex", 7, ctx => { // dx1 dx2 dy2 dx3 dx4 dx5 dx6 // Two Bezier curves with an fd of 50 @@ -636,7 +640,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings }) }, { - 35, new LazyType2Command("flex", ctx => + 35, new LazyType2Command("flex", 13, ctx => { // dx1 dy1 dx2 dy2 dx3 dy3 dx4 dy4 dx5 dy5 dx6 dy6 fd // Two Bezier curves will be represented as a straight line when depth less than fd character space units @@ -656,12 +660,12 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings ctx.Stack.Clear(); }) }, - { 36, new LazyType2Command("hflex1", ctx => + { 36, new LazyType2Command("hflex1", 9, ctx => { // TODO: implement ctx.Stack.Clear(); })}, - { 37, new LazyType2Command("flex1", ctx => + { 37, new LazyType2Command("flex1", 11, ctx => { // dx1 dy1 dx2 dy2 dx3 dy3 dx4 dy4 dx5 dy5 d6 // d6 is either dx or dy @@ -769,7 +773,7 @@ namespace UglyToad.PdfPig.Fonts.CompactFontFormat.CharStrings { var num = bytes[++i] << 8 | bytes[++i]; // Next 2 bytes are a 16-bit two's complement number. - return (short) (num); + return (short)(num); } if (b >= 32 && b <= 246) diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs index 7f67cd99..1f2c4d37 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs @@ -60,6 +60,22 @@ } return name; + } + + + /// + /// Get the character code from name + /// + /// Character name (eg. euro, ampersand, A, space) + /// -1 if not found otherwise the character code + public virtual int GetCode(string name) + { + if (!NameToCode.TryGetValue(name, out var code)) + { + return -1; + } + + return code; } /// diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/MacExpertEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/MacExpertEncoding.cs index 22157b54..c8ea6125 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/MacExpertEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/MacExpertEncoding.cs @@ -182,9 +182,16 @@ private MacExpertEncoding() { - foreach (var valueTuple in EncodingTable) + foreach ((var codeToBeConverted, var name) in EncodingTable) { - Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2); + // In source code an int literal with a leading zero ('0') + // in other languages ('C' and 'Java') would be interpreted + // as octal (base 8) and converted but C# does not support and + // so arrives here as a different value parsed as base10. + // Convert 'codeToBeConverted' to intended value as if it was an octal literal before using. + // For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10). + var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt() + Add(code, name); } } } diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/MacOsRomanEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/MacOsRomanEncoding.cs index 732348b1..3bbd06ec 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/MacOsRomanEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/MacOsRomanEncoding.cs @@ -10,22 +10,22 @@ { private static readonly (int, string)[] EncodingTable = { - (255, "notequal"), - (260, "infinity"), - (262, "lessequal"), - (263, "greaterequal"), - (266, "partialdiff"), - (267, "summation"), - (270, "product"), - (271, "pi"), - (272, "integral"), - (275, "Omega"), - (303, "radical"), - (305, "approxequal"), - (306, "Delta"), - (327, "lozenge"), - (333, "Euro"), - (360, "apple") + (0255, "notequal"), + (0260, "infinity"), + (0262, "lessequal"), + (0263, "greaterequal"), + (0266, "partialdiff"), + (0267, "summation"), + (0270, "product"), + (0271, "pi"), + (0272, "integral"), + (0275, "Omega"), + (0303, "radical"), + (0305, "approxequal"), + (0306, "Delta"), + (0327, "lozenge"), + (0333, "Euro"), + (0360, "apple") }; /// @@ -35,9 +35,16 @@ private MacOsRomanEncoding() { - foreach (var valueTuple in EncodingTable) + foreach ((var codeToBeConverted, var name) in EncodingTable) { - Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2); + // In source code an int literal with a leading zero ('0') + // in other languages ('C' and 'Java') would be interpreted + // as octal (base 8) and converted but C# does not support and + // so arrives here as a different value parsed as base10. + // Convert 'codeToBeConverted' to intended value as if it was an octal literal before using. + // For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10). + var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt() + Add(code, name); } } } diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/MacRomanEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/MacRomanEncoding.cs index f70b52ad..68db2ef1 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/MacRomanEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/MacRomanEncoding.cs @@ -1,7 +1,8 @@ namespace UglyToad.PdfPig.Fonts.Encodings { - using Core; - + using Core; + using System.Diagnostics; + /// /// The Mac Roman encoding. /// @@ -236,9 +237,16 @@ /// protected MacRomanEncoding() { - foreach (var valueTuple in EncodingTable) + foreach ((var codeToBeConverted, var name) in EncodingTable) { - Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2); + // In source code an int literal with a leading zero ('0') + // in other languages ('C' and 'Java') would be interpreted + // as octal (base 8) and converted but C# does not support and + // so arrives here as a different value parsed as base10. + // Convert 'codeToBeConverted' to intended value as if it was an octal literal before using. + // For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10). + var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt() + Add(code, name); } } } diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/StandardEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/StandardEncoding.cs index b4c6e04e..fda4ed96 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/StandardEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/StandardEncoding.cs @@ -1,7 +1,5 @@ namespace UglyToad.PdfPig.Fonts.Encodings -{ - using Core; - +{ /// /// The standard PDF encoding. /// @@ -170,9 +168,16 @@ private StandardEncoding() { - foreach (var valueTuple in EncodingTable) + foreach ((var codeToBeConverted, var name) in EncodingTable) { - Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2); + // In source code an int literal with a leading zero ('0') + // in other languages ('C' and 'Java') would be interpreted + // as octal (base 8) and converted but C# does not support and + // so arrives here as a different value parsed as base10. + // Convert 'codeToBeConverted' to intended value as if it was an octal literal before using. + // For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10). + var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt() + Add(code, name); } } } diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs index a7391641..c2d3fe92 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs @@ -1,216 +1,228 @@ -namespace UglyToad.PdfPig.Fonts.Encodings -{ - /// - /// Symbol encoding. - /// - public class SymbolEncoding : Encoding - { - private static readonly (int, string)[] EncodingTable = { - (0101, "Alpha"), - (0102, "Beta"), - (0103, "Chi"), - (0104, "Delta"), - (0105, "Epsilon"), - (0110, "Eta"), - (0240, "Euro"), - (0107, "Gamma"), - (0301, "Ifraktur"), - (0111, "Iota"), - (0113, "Kappa"), - (0114, "Lambda"), - (0115, "Mu"), - (0116, "Nu"), - (0127, "Omega"), - (0117, "Omicron"), - (0106, "Phi"), - (0120, "Pi"), - (0131, "Psi"), - (0302, "Rfraktur"), - (0122, "Rho"), - (0123, "Sigma"), - (0124, "Tau"), - (0121, "Theta"), - (0125, "Upsilon"), - (0241, "Upsilon1"), - (0130, "Xi"), - (0132, "Zeta"), - (0300, "aleph"), - (0141, "alpha"), - (0046, "ampersand"), - (0320, "angle"), - (0341, "angleleft"), - (0361, "angleright"), - (0273, "approxequal"), - (0253, "arrowboth"), - (0333, "arrowdblboth"), - (0337, "arrowdbldown"), - (0334, "arrowdblleft"), - (0336, "arrowdblright"), - (0335, "arrowdblup"), - (0257, "arrowdown"), - (0276, "arrowhorizex"), - (0254, "arrowleft"), - (0256, "arrowright"), - (0255, "arrowup"), - (0275, "arrowvertex"), - (0052, "asteriskmath"), - (0174, "bar"), - (0142, "beta"), - (0173, "braceleft"), - (0175, "braceright"), - (0354, "bracelefttp"), - (0355, "braceleftmid"), - (0356, "braceleftbt"), - (0374, "bracerighttp"), - (0375, "bracerightmid"), - (0376, "bracerightbt"), - (0357, "braceex"), - (0133, "bracketleft"), - (0135, "bracketright"), - (0351, "bracketlefttp"), - (0352, "bracketleftex"), - (0353, "bracketleftbt"), - (0371, "bracketrighttp"), - (0372, "bracketrightex"), - (0373, "bracketrightbt"), - (0267, "bullet"), - (0277, "carriagereturn"), - (0143, "chi"), - (0304, "circlemultiply"), - (0305, "circleplus"), - (0247, "club"), - (0072, "colon"), - (0054, "comma"), - (0100, "congruent"), - (0343, "copyrightsans"), - (0323, "copyrightserif"), - (0260, "degree"), - (0144, "delta"), - (0250, "diamond"), - (0270, "divide"), - (0327, "dotmath"), - (0070, "eight"), - (0316, "element"), - (0274, "ellipsis"), - (0306, "emptyset"), - (0145, "epsilon"), - (0075, "equal"), - (0272, "equivalence"), - (0150, "eta"), - (0041, "exclam"), - (0044, "existential"), - (0065, "five"), - (0246, "florin"), - (0064, "four"), - (0244, "fraction"), - (0147, "gamma"), - (0321, "gradient"), - (0076, "greater"), - (0263, "greaterequal"), - (0251, "heart"), - (0245, "infinity"), - (0362, "integral"), - (0363, "integraltp"), - (0364, "integralex"), - (0365, "integralbt"), - (0307, "intersection"), - (0151, "iota"), - (0153, "kappa"), - (0154, "lambda"), - (0074, "less"), - (0243, "lessequal"), - (0331, "logicaland"), - (0330, "logicalnot"), - (0332, "logicalor"), - (0340, "lozenge"), - (0055, "minus"), - (0242, "minute"), - (0155, "mu"), - (0264, "multiply"), - (0071, "nine"), - (0317, "notelement"), - (0271, "notequal"), - (0313, "notsubset"), - (0156, "nu"), - (0043, "numbersign"), - (0167, "omega"), - (0166, "omega1"), - (0157, "omicron"), - (0061, "one"), - (0050, "parenleft"), - (0051, "parenright"), - (0346, "parenlefttp"), - (0347, "parenleftex"), - (0350, "parenleftbt"), - (0366, "parenrighttp"), - (0367, "parenrightex"), - (0370, "parenrightbt"), - (0266, "partialdiff"), - (0045, "percent"), - (0056, "period"), - (0136, "perpendicular"), - (0146, "phi"), - (0152, "phi1"), - (0160, "pi"), - (0053, "plus"), - (0261, "plusminus"), - (0325, "product"), - (0314, "propersubset"), - (0311, "propersuperset"), - (0265, "proportional"), - (0171, "psi"), - (0077, "question"), - (0326, "radical"), - (0140, "radicalex"), - (0315, "reflexsubset"), - (0312, "reflexsuperset"), - (0342, "registersans"), - (0322, "registerserif"), - (0162, "rho"), - (0262, "second"), - (0073, "semicolon"), - (0067, "seven"), - (0163, "sigma"), - (0126, "sigma1"), - (0176, "similar"), - (0066, "six"), - (0057, "slash"), - (0040, "space"), - (0252, "spade"), - (0047, "suchthat"), - (0345, "summation"), - (0164, "tau"), - (0134, "therefore"), - (0161, "theta"), - (0112, "theta1"), - (0063, "three"), - (0344, "trademarksans"), - (0324, "trademarkserif"), - (0062, "two"), - (0137, "underscore"), - (0310, "union"), - (0042, "universal"), - (0165, "upsilon"), - (0303, "weierstrass"), - (0170, "xi"), - (0060, "zero"), - (0172, "zeta") - }; - - /// - /// Single instance of this encoding. - /// - public static SymbolEncoding Instance { get; } = new SymbolEncoding(); - - /// - public override string EncodingName => "SymbolEncoding"; - - private SymbolEncoding() - { - foreach (var valueTuple in EncodingTable) - { - Add(valueTuple.Item1, valueTuple.Item2); - } - } - } -} \ No newline at end of file +namespace UglyToad.PdfPig.Fonts.Encodings +{ + /// + /// Symbol encoding. + /// + public class SymbolEncoding : Encoding + { + /// + /// EncodingTable for Symbol + /// PDF Spec 1.7 Page 1013 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.7old.pdf#page1013 + /// Note spec has code values as octal (base 8) with leading zero (supported in 'C' and 'Java') but not by C# + /// Code values are already converted to base 10 prior to compile. + /// + private static readonly (int, string)[] EncodingTable = { + ( 65, "Alpha"), // for char 'A' using 65 as base10 equivilant (for C# source). Spec has 0101 octal. ( 65,"Alpha") <=> (0101, "Alpha") , + ( 66, "Beta"), // for char 'B' using 66 as base10 equivilant (for C# source). Spec has 0102 octal. ( 66,"Beta") <=> (0102, "Beta") , + ( 67, "Chi"), // for char 'C' using 67 as base10 equivilant (for C# source). Spec has 0103 octal. ( 67,"Chi") <=> (0103, "Chi") , + ( 68, "Delta"), // for char 'D' using 68 as base10 equivilant (for C# source). Spec has 0104 octal. ( 68,"Delta") <=> (0104, "Delta") , + ( 69, "Epsilon"), // for char 'E' using 69 as base10 equivilant (for C# source). Spec has 0105 octal. ( 69,"Epsilon") <=> (0105, "Epsilon") , + ( 72, "Eta"), // for char 'H' using 72 as base10 equivilant (for C# source). Spec has 0110 octal. ( 72,"Eta") <=> (0110, "Eta") , + (160, "Euro"), // for char ' ' using 160 as base10 equivilant (for C# source). Spec has 0240 octal. (160,"Euro") <=> (0240, "Euro") , + ( 71, "Gamma"), // for char 'G' using 71 as base10 equivilant (for C# source). Spec has 0107 octal. ( 71,"Gamma") <=> (0107, "Gamma") , + (193, "Ifraktur"), // for char 'Á' using 193 as base10 equivilant (for C# source). Spec has 0301 octal. (193,"Ifraktur") <=> (0301, "Ifraktur") , + ( 73, "Iota"), // for char 'I' using 73 as base10 equivilant (for C# source). Spec has 0111 octal. ( 73,"Iota") <=> (0111, "Iota") , + ( 75, "Kappa"), // for char 'K' using 75 as base10 equivilant (for C# source). Spec has 0113 octal. ( 75,"Kappa") <=> (0113, "Kappa") , + ( 76, "Lambda"), // for char 'L' using 76 as base10 equivilant (for C# source). Spec has 0114 octal. ( 76,"Lambda") <=> (0114, "Lambda") , + ( 77, "Mu"), // for char 'M' using 77 as base10 equivilant (for C# source). Spec has 0115 octal. ( 77,"Mu") <=> (0115, "Mu") , + ( 78, "Nu"), // for char 'N' using 78 as base10 equivilant (for C# source). Spec has 0116 octal. ( 78,"Nu") <=> (0116, "Nu") , + ( 87, "Omega"), // for char 'W' using 87 as base10 equivilant (for C# source). Spec has 0127 octal. ( 87,"Omega") <=> (0127, "Omega") , + ( 79, "Omicron"), // for char 'O' using 79 as base10 equivilant (for C# source). Spec has 0117 octal. ( 79,"Omicron") <=> (0117, "Omicron") , + ( 70, "Phi"), // for char 'F' using 70 as base10 equivilant (for C# source). Spec has 0106 octal. ( 70,"Phi") <=> (0106, "Phi") , + ( 80, "Pi"), // for char 'P' using 80 as base10 equivilant (for C# source). Spec has 0120 octal. ( 80,"Pi") <=> (0120, "Pi") , + ( 89, "Psi"), // for char 'Y' using 89 as base10 equivilant (for C# source). Spec has 0131 octal. ( 89,"Psi") <=> (0131, "Psi") , + (194, "Rfraktur"), // for char 'Â' using 194 as base10 equivilant (for C# source). Spec has 0302 octal. (194,"Rfraktur") <=> (0302, "Rfraktur") , + ( 82, "Rho"), // for char 'R' using 82 as base10 equivilant (for C# source). Spec has 0122 octal. ( 82,"Rho") <=> (0122, "Rho") , + ( 83, "Sigma"), // for char 'S' using 83 as base10 equivilant (for C# source). Spec has 0123 octal. ( 83,"Sigma") <=> (0123, "Sigma") , + ( 84, "Tau"), // for char 'T' using 84 as base10 equivilant (for C# source). Spec has 0124 octal. ( 84,"Tau") <=> (0124, "Tau") , + ( 81, "Theta"), // for char 'Q' using 81 as base10 equivilant (for C# source). Spec has 0121 octal. ( 81,"Theta") <=> (0121, "Theta") , + ( 85, "Upsilon"), // for char 'U' using 85 as base10 equivilant (for C# source). Spec has 0125 octal. ( 85,"Upsilon") <=> (0125, "Upsilon") , + (161, "Upsilon1"), // for char '¡' using 161 as base10 equivilant (for C# source). Spec has 0241 octal. (161,"Upsilon1") <=> (0241, "Upsilon1") , + ( 88, "Xi"), // for char 'X' using 88 as base10 equivilant (for C# source). Spec has 0130 octal. ( 88,"Xi") <=> (0130, "Xi") , + ( 90, "Zeta"), // for char 'Z' using 90 as base10 equivilant (for C# source). Spec has 0132 octal. ( 90,"Zeta") <=> (0132, "Zeta") , + (192, "aleph"), // for char 'À' using 192 as base10 equivilant (for C# source). Spec has 0300 octal. (192,"aleph") <=> (0300, "aleph") , + ( 97, "alpha"), // for char 'a' using 97 as base10 equivilant (for C# source). Spec has 0141 octal. ( 97,"alpha") <=> (0141, "alpha") , + ( 38, "ampersand"), // for char '&' using 38 as base10 equivilant (for C# source). Spec has 0046 octal. ( 38,"ampersand") <=> (0046, "ampersand") , + (208, "angle"), // for char 'Ð' using 208 as base10 equivilant (for C# source). Spec has 0320 octal. (208,"angle") <=> (0320, "angle") , + (225, "angleleft"), // for char 'á' using 225 as base10 equivilant (for C# source). Spec has 0341 octal. (225,"angleleft") <=> (0341, "angleleft") , + (241, "angleright"), // for char 'ñ' using 241 as base10 equivilant (for C# source). Spec has 0361 octal. (241,"angleright") <=> (0361, "angleright") , + (187, "approxequal"), // for char '»' using 187 as base10 equivilant (for C# source). Spec has 0273 octal. (187,"approxequal") <=> (0273, "approxequal") , + (171, "arrowboth"), // for char '«' using 171 as base10 equivilant (for C# source). Spec has 0253 octal. (171,"arrowboth") <=> (0253, "arrowboth") , + (219, "arrowdblboth"), // for char 'Û' using 219 as base10 equivilant (for C# source). Spec has 0333 octal. (219,"arrowdblboth") <=> (0333, "arrowdblboth") , + (223, "arrowdbldown"), // for char 'ß' using 223 as base10 equivilant (for C# source). Spec has 0337 octal. (223,"arrowdbldown") <=> (0337, "arrowdbldown") , + (220, "arrowdblleft"), // for char 'Ü' using 220 as base10 equivilant (for C# source). Spec has 0334 octal. (220,"arrowdblleft") <=> (0334, "arrowdblleft") , + (222, "arrowdblright"), // for char 'Þ' using 222 as base10 equivilant (for C# source). Spec has 0336 octal. (222,"arrowdblright") <=> (0336, "arrowdblright") , + (221, "arrowdblup"), // for char 'Ý' using 221 as base10 equivilant (for C# source). Spec has 0335 octal. (221,"arrowdblup") <=> (0335, "arrowdblup") , + (175, "arrowdown"), // for char '¯' using 175 as base10 equivilant (for C# source). Spec has 0257 octal. (175,"arrowdown") <=> (0257, "arrowdown") , + (190, "arrowhorizex"), // for char '¾' using 190 as base10 equivilant (for C# source). Spec has 0276 octal. (190,"arrowhorizex") <=> (0276, "arrowhorizex") , + (172, "arrowleft"), // for char '¬' using 172 as base10 equivilant (for C# source). Spec has 0254 octal. (172,"arrowleft") <=> (0254, "arrowleft") , + (174, "arrowright"), // for char '®' using 174 as base10 equivilant (for C# source). Spec has 0256 octal. (174,"arrowright") <=> (0256, "arrowright") , + (173, "arrowup"), // using 173 as base10 equivilant (for C# source). Spec has 0255 octal. (173,"arrowup") <=> (0255, "arrowup") , + (189, "arrowvertex"), // for char '½' using 189 as base10 equivilant (for C# source). Spec has 0275 octal. (189,"arrowvertex") <=> (0275, "arrowvertex") , + ( 42, "asteriskmath"), // for char '*' using 42 as base10 equivilant (for C# source). Spec has 0052 octal. ( 42,"asteriskmath") <=> (0052, "asteriskmath") , + (124, "bar"), // for char '|' using 124 as base10 equivilant (for C# source). Spec has 0174 octal. (124,"bar") <=> (0174, "bar") , + ( 98, "beta"), // for char 'b' using 98 as base10 equivilant (for C# source). Spec has 0142 octal. ( 98,"beta") <=> (0142, "beta") , + (123, "braceleft"), // for char '{' using 123 as base10 equivilant (for C# source). Spec has 0173 octal. (123,"braceleft") <=> (0173, "braceleft") , + (125, "braceright"), // for char '}' using 125 as base10 equivilant (for C# source). Spec has 0175 octal. (125,"braceright") <=> (0175, "braceright") , + (236, "bracelefttp"), // for char 'ì' using 236 as base10 equivilant (for C# source). Spec has 0354 octal. (236,"bracelefttp") <=> (0354, "bracelefttp") , + (237, "braceleftmid"), // for char 'í' using 237 as base10 equivilant (for C# source). Spec has 0355 octal. (237,"braceleftmid") <=> (0355, "braceleftmid") , + (238, "braceleftbt"), // for char 'î' using 238 as base10 equivilant (for C# source). Spec has 0356 octal. (238,"braceleftbt") <=> (0356, "braceleftbt") , + (252, "bracerighttp"), // for char 'ü' using 252 as base10 equivilant (for C# source). Spec has 0374 octal. (252,"bracerighttp") <=> (0374, "bracerighttp") , + (253, "bracerightmid"), // for char 'ý' using 253 as base10 equivilant (for C# source). Spec has 0375 octal. (253,"bracerightmid") <=> (0375, "bracerightmid") , + (254, "bracerightbt"), // for char 'þ' using 254 as base10 equivilant (for C# source). Spec has 0376 octal. (254,"bracerightbt") <=> (0376, "bracerightbt") , + (239, "braceex"), // for char 'ï' using 239 as base10 equivilant (for C# source). Spec has 0357 octal. (239,"braceex") <=> (0357, "braceex") , + ( 91, "bracketleft"), // for char '[' using 91 as base10 equivilant (for C# source). Spec has 0133 octal. ( 91,"bracketleft") <=> (0133, "bracketleft") , + ( 93, "bracketright"), // for char ']' using 93 as base10 equivilant (for C# source). Spec has 0135 octal. ( 93,"bracketright") <=> (0135, "bracketright") , + (233, "bracketlefttp"), // for char 'é' using 233 as base10 equivilant (for C# source). Spec has 0351 octal. (233,"bracketlefttp") <=> (0351, "bracketlefttp") , + (234, "bracketleftex"), // for char 'ê' using 234 as base10 equivilant (for C# source). Spec has 0352 octal. (234,"bracketleftex") <=> (0352, "bracketleftex") , + (235, "bracketleftbt"), // for char 'ë' using 235 as base10 equivilant (for C# source). Spec has 0353 octal. (235,"bracketleftbt") <=> (0353, "bracketleftbt") , + (249, "bracketrighttp"), // for char 'ù' using 249 as base10 equivilant (for C# source). Spec has 0371 octal. (249,"bracketrighttp") <=> (0371, "bracketrighttp") , + (250, "bracketrightex"), // for char 'ú' using 250 as base10 equivilant (for C# source). Spec has 0372 octal. (250,"bracketrightex") <=> (0372, "bracketrightex") , + (251, "bracketrightbt"), // for char 'û' using 251 as base10 equivilant (for C# source). Spec has 0373 octal. (251,"bracketrightbt") <=> (0373, "bracketrightbt") , + (183, "bullet"), // for char '·' using 183 as base10 equivilant (for C# source). Spec has 0267 octal. (183,"bullet") <=> (0267, "bullet") , + (191, "carriagereturn"), // for char '¿' using 191 as base10 equivilant (for C# source). Spec has 0277 octal. (191,"carriagereturn") <=> (0277, "carriagereturn") , + ( 99, "chi"), // for char 'c' using 99 as base10 equivilant (for C# source). Spec has 0143 octal. ( 99,"chi") <=> (0143, "chi") , + (196, "circlemultiply"), // for char 'Ä' using 196 as base10 equivilant (for C# source). Spec has 0304 octal. (196,"circlemultiply") <=> (0304, "circlemultiply") , + (197, "circleplus"), // for char 'Å' using 197 as base10 equivilant (for C# source). Spec has 0305 octal. (197,"circleplus") <=> (0305, "circleplus") , + (167, "club"), // for char '§' using 167 as base10 equivilant (for C# source). Spec has 0247 octal. (167,"club") <=> (0247, "club") , + ( 58, "colon"), // for char ':' using 58 as base10 equivilant (for C# source). Spec has 0072 octal. ( 58,"colon") <=> (0072, "colon") , + ( 44, "comma"), // for char ',' using 44 as base10 equivilant (for C# source). Spec has 0054 octal. ( 44,"comma") <=> (0054, "comma") , + ( 64, "congruent"), // for char '@' using 64 as base10 equivilant (for C# source). Spec has 0100 octal. ( 64,"congruent") <=> (0100, "congruent") , + (227, "copyrightsans"), // for char 'ã' using 227 as base10 equivilant (for C# source). Spec has 0343 octal. (227,"copyrightsans") <=> (0343, "copyrightsans") , + (211, "copyrightserif"), // for char 'Ó' using 211 as base10 equivilant (for C# source). Spec has 0323 octal. (211,"copyrightserif") <=> (0323, "copyrightserif") , + (176, "degree"), // for char '°' using 176 as base10 equivilant (for C# source). Spec has 0260 octal. (176,"degree") <=> (0260, "degree") , + (100, "delta"), // for char 'd' using 100 as base10 equivilant (for C# source). Spec has 0144 octal. (100,"delta") <=> (0144, "delta") , + (168, "diamond"), // for char '¨' using 168 as base10 equivilant (for C# source). Spec has 0250 octal. (168,"diamond") <=> (0250, "diamond") , + (184, "divide"), // for char '¸' using 184 as base10 equivilant (for C# source). Spec has 0270 octal. (184,"divide") <=> (0270, "divide") , + (215, "dotmath"), // for char '×' using 215 as base10 equivilant (for C# source). Spec has 0327 octal. (215,"dotmath") <=> (0327, "dotmath") , + ( 56, "eight"), // for char '8' using 56 as base10 equivilant (for C# source). Spec has 0070 octal. ( 56,"eight") <=> (0070, "eight") , + (206, "element"), // for char 'Î' using 206 as base10 equivilant (for C# source). Spec has 0316 octal. (206,"element") <=> (0316, "element") , + (188, "ellipsis"), // for char '¼' using 188 as base10 equivilant (for C# source). Spec has 0274 octal. (188,"ellipsis") <=> (0274, "ellipsis") , + (198, "emptyset"), // for char 'Æ' using 198 as base10 equivilant (for C# source). Spec has 0306 octal. (198,"emptyset") <=> (0306, "emptyset") , + (101, "epsilon"), // for char 'e' using 101 as base10 equivilant (for C# source). Spec has 0145 octal. (101,"epsilon") <=> (0145, "epsilon") , + ( 61, "equal"), // for char '=' using 61 as base10 equivilant (for C# source). Spec has 0075 octal. ( 61,"equal") <=> (0075, "equal") , + (186, "equivalence"), // for char 'º' using 186 as base10 equivilant (for C# source). Spec has 0272 octal. (186,"equivalence") <=> (0272, "equivalence") , + (104, "eta"), // for char 'h' using 104 as base10 equivilant (for C# source). Spec has 0150 octal. (104,"eta") <=> (0150, "eta") , + ( 33, "exclam"), // for char '!' using 33 as base10 equivilant (for C# source). Spec has 0041 octal. ( 33,"exclam") <=> (0041, "exclam") , + ( 36, "existential"), // for char '$' using 36 as base10 equivilant (for C# source). Spec has 0044 octal. ( 36,"existential") <=> (0044, "existential") , + ( 53, "five"), // for char '5' using 53 as base10 equivilant (for C# source). Spec has 0065 octal. ( 53,"five") <=> (0065, "five") , + (166, "florin"), // for char '¦' using 166 as base10 equivilant (for C# source). Spec has 0246 octal. (166,"florin") <=> (0246, "florin") , + ( 52, "four"), // for char '4' using 52 as base10 equivilant (for C# source). Spec has 0064 octal. ( 52,"four") <=> (0064, "four") , + (164, "fraction"), // for char '¤' using 164 as base10 equivilant (for C# source). Spec has 0244 octal. (164,"fraction") <=> (0244, "fraction") , + (103, "gamma"), // for char 'g' using 103 as base10 equivilant (for C# source). Spec has 0147 octal. (103,"gamma") <=> (0147, "gamma") , + (209, "gradient"), // for char 'Ñ' using 209 as base10 equivilant (for C# source). Spec has 0321 octal. (209,"gradient") <=> (0321, "gradient") , + ( 62, "greater"), // for char '>' using 62 as base10 equivilant (for C# source). Spec has 0076 octal. ( 62,"greater") <=> (0076, "greater") , + (179, "greaterequal"), // for char '³' using 179 as base10 equivilant (for C# source). Spec has 0263 octal. (179,"greaterequal") <=> (0263, "greaterequal") , + (169, "heart"), // for char '©' using 169 as base10 equivilant (for C# source). Spec has 0251 octal. (169,"heart") <=> (0251, "heart") , + (165, "infinity"), // for char '¥' using 165 as base10 equivilant (for C# source). Spec has 0245 octal. (165,"infinity") <=> (0245, "infinity") , + (242, "integral"), // for char 'ò' using 242 as base10 equivilant (for C# source). Spec has 0362 octal. (242,"integral") <=> (0362, "integral") , + (243, "integraltp"), // for char 'ó' using 243 as base10 equivilant (for C# source). Spec has 0363 octal. (243,"integraltp") <=> (0363, "integraltp") , + (244, "integralex"), // for char 'ô' using 244 as base10 equivilant (for C# source). Spec has 0364 octal. (244,"integralex") <=> (0364, "integralex") , + (245, "integralbt"), // for char 'õ' using 245 as base10 equivilant (for C# source). Spec has 0365 octal. (245,"integralbt") <=> (0365, "integralbt") , + (199, "intersection"), // for char 'Ç' using 199 as base10 equivilant (for C# source). Spec has 0307 octal. (199,"intersection") <=> (0307, "intersection") , + (105, "iota"), // for char 'i' using 105 as base10 equivilant (for C# source). Spec has 0151 octal. (105,"iota") <=> (0151, "iota") , + (107, "kappa"), // for char 'k' using 107 as base10 equivilant (for C# source). Spec has 0153 octal. (107,"kappa") <=> (0153, "kappa") , + (108, "lambda"), // for char 'l' using 108 as base10 equivilant (for C# source). Spec has 0154 octal. (108,"lambda") <=> (0154, "lambda") , + ( 60, "less"), // for char '<' using 60 as base10 equivilant (for C# source). Spec has 0074 octal. ( 60,"less") <=> (0074, "less") , + (163, "lessequal"), // for char '£' using 163 as base10 equivilant (for C# source). Spec has 0243 octal. (163,"lessequal") <=> (0243, "lessequal") , + (217, "logicaland"), // for char 'Ù' using 217 as base10 equivilant (for C# source). Spec has 0331 octal. (217,"logicaland") <=> (0331, "logicaland") , + (216, "logicalnot"), // for char 'Ø' using 216 as base10 equivilant (for C# source). Spec has 0330 octal. (216,"logicalnot") <=> (0330, "logicalnot") , + (218, "logicalor"), // for char 'Ú' using 218 as base10 equivilant (for C# source). Spec has 0332 octal. (218,"logicalor") <=> (0332, "logicalor") , + (224, "lozenge"), // for char 'à' using 224 as base10 equivilant (for C# source). Spec has 0340 octal. (224,"lozenge") <=> (0340, "lozenge") , + ( 45, "minus"), // for char '-' using 45 as base10 equivilant (for C# source). Spec has 0055 octal. ( 45,"minus") <=> (0055, "minus") , + (162, "minute"), // for char '¢' using 162 as base10 equivilant (for C# source). Spec has 0242 octal. (162,"minute") <=> (0242, "minute") , + (109, "mu"), // for char 'm' using 109 as base10 equivilant (for C# source). Spec has 0155 octal. (109,"mu") <=> (0155, "mu") , + (180, "multiply"), // for char '´' using 180 as base10 equivilant (for C# source). Spec has 0264 octal. (180,"multiply") <=> (0264, "multiply") , + ( 57, "nine"), // for char '9' using 57 as base10 equivilant (for C# source). Spec has 0071 octal. ( 57,"nine") <=> (0071, "nine") , + (207, "notelement"), // for char 'Ï' using 207 as base10 equivilant (for C# source). Spec has 0317 octal. (207,"notelement") <=> (0317, "notelement") , + (185, "notequal"), // for char '¹' using 185 as base10 equivilant (for C# source). Spec has 0271 octal. (185,"notequal") <=> (0271, "notequal") , + (203, "notsubset"), // for char 'Ë' using 203 as base10 equivilant (for C# source). Spec has 0313 octal. (203,"notsubset") <=> (0313, "notsubset") , + (110, "nu"), // for char 'n' using 110 as base10 equivilant (for C# source). Spec has 0156 octal. (110,"nu") <=> (0156, "nu") , + ( 35, "numbersign"), // for char '#' using 35 as base10 equivilant (for C# source). Spec has 0043 octal. ( 35,"numbersign") <=> (0043, "numbersign") , + (119, "omega"), // for char 'w' using 119 as base10 equivilant (for C# source). Spec has 0167 octal. (119,"omega") <=> (0167, "omega") , + (118, "omega1"), // for char 'v' using 118 as base10 equivilant (for C# source). Spec has 0166 octal. (118,"omega1") <=> (0166, "omega1") , + (111, "omicron"), // for char 'o' using 111 as base10 equivilant (for C# source). Spec has 0157 octal. (111,"omicron") <=> (0157, "omicron") , + ( 49, "one"), // for char '1' using 49 as base10 equivilant (for C# source). Spec has 0061 octal. ( 49,"one") <=> (0061, "one") , + ( 40, "parenleft"), // for char '(' using 40 as base10 equivilant (for C# source). Spec has 0050 octal. ( 40,"parenleft") <=> (0050, "parenleft") , + ( 41, "parenright"), // for char ')' using 41 as base10 equivilant (for C# source). Spec has 0051 octal. ( 41,"parenright") <=> (0051, "parenright") , + (230, "parenlefttp"), // for char 'æ' using 230 as base10 equivilant (for C# source). Spec has 0346 octal. (230,"parenlefttp") <=> (0346, "parenlefttp") , + (231, "parenleftex"), // for char 'ç' using 231 as base10 equivilant (for C# source). Spec has 0347 octal. (231,"parenleftex") <=> (0347, "parenleftex") , + (232, "parenleftbt"), // for char 'è' using 232 as base10 equivilant (for C# source). Spec has 0350 octal. (232,"parenleftbt") <=> (0350, "parenleftbt") , + (246, "parenrighttp"), // for char 'ö' using 246 as base10 equivilant (for C# source). Spec has 0366 octal. (246,"parenrighttp") <=> (0366, "parenrighttp") , + (247, "parenrightex"), // for char '÷' using 247 as base10 equivilant (for C# source). Spec has 0367 octal. (247,"parenrightex") <=> (0367, "parenrightex") , + (248, "parenrightbt"), // for char 'ø' using 248 as base10 equivilant (for C# source). Spec has 0370 octal. (248,"parenrightbt") <=> (0370, "parenrightbt") , + (182, "partialdiff"), // for char '¶' using 182 as base10 equivilant (for C# source). Spec has 0266 octal. (182,"partialdiff") <=> (0266, "partialdiff") , + ( 37, "percent"), // for char '%' using 37 as base10 equivilant (for C# source). Spec has 0045 octal. ( 37,"percent") <=> (0045, "percent") , + ( 46, "period"), // for char '.' using 46 as base10 equivilant (for C# source). Spec has 0056 octal. ( 46,"period") <=> (0056, "period") , + ( 94, "perpendicular"), // for char '^' using 94 as base10 equivilant (for C# source). Spec has 0136 octal. ( 94,"perpendicular") <=> (0136, "perpendicular") , + (102, "phi"), // for char 'f' using 102 as base10 equivilant (for C# source). Spec has 0146 octal. (102,"phi") <=> (0146, "phi") , + (106, "phi1"), // for char 'j' using 106 as base10 equivilant (for C# source). Spec has 0152 octal. (106,"phi1") <=> (0152, "phi1") , + (112, "pi"), // for char 'p' using 112 as base10 equivilant (for C# source). Spec has 0160 octal. (112,"pi") <=> (0160, "pi") , + ( 43, "plus"), // for char '+' using 43 as base10 equivilant (for C# source). Spec has 0053 octal. ( 43,"plus") <=> (0053, "plus") , + (177, "plusminus"), // for char '±' using 177 as base10 equivilant (for C# source). Spec has 0261 octal. (177,"plusminus") <=> (0261, "plusminus") , + (213, "product"), // for char 'Õ' using 213 as base10 equivilant (for C# source). Spec has 0325 octal. (213,"product") <=> (0325, "product") , + (204, "propersubset"), // for char 'Ì' using 204 as base10 equivilant (for C# source). Spec has 0314 octal. (204,"propersubset") <=> (0314, "propersubset") , + (201, "propersuperset"), // for char 'É' using 201 as base10 equivilant (for C# source). Spec has 0311 octal. (201,"propersuperset") <=> (0311, "propersuperset") , + (181, "proportional"), // for char 'µ' using 181 as base10 equivilant (for C# source). Spec has 0265 octal. (181,"proportional") <=> (0265, "proportional") , + (121, "psi"), // for char 'y' using 121 as base10 equivilant (for C# source). Spec has 0171 octal. (121,"psi") <=> (0171, "psi") , + ( 63, "question"), // for char '?' using 63 as base10 equivilant (for C# source). Spec has 0077 octal. ( 63,"question") <=> (0077, "question") , + (214, "radical"), // for char 'Ö' using 214 as base10 equivilant (for C# source). Spec has 0326 octal. (214,"radical") <=> (0326, "radical") , + ( 96, "radicalex"), // for char '`' using 96 as base10 equivilant (for C# source). Spec has 0140 octal. ( 96,"radicalex") <=> (0140, "radicalex") , + (205, "reflexsubset"), // for char 'Í' using 205 as base10 equivilant (for C# source). Spec has 0315 octal. (205,"reflexsubset") <=> (0315, "reflexsubset") , + (202, "reflexsuperset"), // for char 'Ê' using 202 as base10 equivilant (for C# source). Spec has 0312 octal. (202,"reflexsuperset") <=> (0312, "reflexsuperset") , + (226, "registersans"), // for char 'â' using 226 as base10 equivilant (for C# source). Spec has 0342 octal. (226,"registersans") <=> (0342, "registersans") , + (210, "registerserif"), // for char 'Ò' using 210 as base10 equivilant (for C# source). Spec has 0322 octal. (210,"registerserif") <=> (0322, "registerserif") , + (114, "rho"), // for char 'r' using 114 as base10 equivilant (for C# source). Spec has 0162 octal. (114,"rho") <=> (0162, "rho") , + (178, "second"), // for char '²' using 178 as base10 equivilant (for C# source). Spec has 0262 octal. (178,"second") <=> (0262, "second") , + ( 59, "semicolon"), // for char ';' using 59 as base10 equivilant (for C# source). Spec has 0073 octal. ( 59,"semicolon") <=> (0073, "semicolon") , + ( 55, "seven"), // for char '7' using 55 as base10 equivilant (for C# source). Spec has 0067 octal. ( 55,"seven") <=> (0067, "seven") , + (115, "sigma"), // for char 's' using 115 as base10 equivilant (for C# source). Spec has 0163 octal. (115,"sigma") <=> (0163, "sigma") , + ( 86, "sigma1"), // for char 'V' using 86 as base10 equivilant (for C# source). Spec has 0126 octal. ( 86,"sigma1") <=> (0126, "sigma1") , + (126, "similar"), // for char '~' using 126 as base10 equivilant (for C# source). Spec has 0176 octal. (126,"similar") <=> (0176, "similar") , + ( 54, "six"), // for char '6' using 54 as base10 equivilant (for C# source). Spec has 0066 octal. ( 54,"six") <=> (0066, "six") , + ( 47, "slash"), // for char '/' using 47 as base10 equivilant (for C# source). Spec has 0057 octal. ( 47,"slash") <=> (0057, "slash") , + ( 32, "space"), // for char ' ' using 32 as base10 equivilant (for C# source). Spec has 0040 octal. ( 32,"space") <=> (0040, "space") , + (170, "spade"), // for char 'ª' using 170 as base10 equivilant (for C# source). Spec has 0252 octal. (170,"spade") <=> (0252, "spade") , + ( 39, "suchthat"), // for char ''' using 39 as base10 equivilant (for C# source). Spec has 0047 octal. ( 39,"suchthat") <=> (0047, "suchthat") , + (229, "summation"), // for char 'å' using 229 as base10 equivilant (for C# source). Spec has 0345 octal. (229,"summation") <=> (0345, "summation") , + (116, "tau"), // for char 't' using 116 as base10 equivilant (for C# source). Spec has 0164 octal. (116,"tau") <=> (0164, "tau") , + ( 92, "therefore"), // for char '\' using 92 as base10 equivilant (for C# source). Spec has 0134 octal. ( 92,"therefore") <=> (0134, "therefore") , + (113, "theta"), // for char 'q' using 113 as base10 equivilant (for C# source). Spec has 0161 octal. (113,"theta") <=> (0161, "theta") , + ( 74, "theta1"), // for char 'J' using 74 as base10 equivilant (for C# source). Spec has 0112 octal. ( 74,"theta1") <=> (0112, "theta1") , + ( 51, "three"), // for char '3' using 51 as base10 equivilant (for C# source). Spec has 0063 octal. ( 51,"three") <=> (0063, "three") , + (228, "trademarksans"), // for char 'ä' using 228 as base10 equivilant (for C# source). Spec has 0344 octal. (228,"trademarksans") <=> (0344, "trademarksans") , + (212, "trademarkserif"), // for char 'Ô' using 212 as base10 equivilant (for C# source). Spec has 0324 octal. (212,"trademarkserif") <=> (0324, "trademarkserif") , + ( 50, "two"), // for char '2' using 50 as base10 equivilant (for C# source). Spec has 0062 octal. ( 50,"two") <=> (0062, "two") , + ( 95, "underscore"), // for char '_' using 95 as base10 equivilant (for C# source). Spec has 0137 octal. ( 95,"underscore") <=> (0137, "underscore") , + (200, "union"), // for char 'È' using 200 as base10 equivilant (for C# source). Spec has 0310 octal. (200,"union") <=> (0310, "union") , + ( 34, "universal"), // for char '"' using 34 as base10 equivilant (for C# source). Spec has 0042 octal. ( 34,"universal") <=> (0042, "universal") , + (117, "upsilon"), // for char 'u' using 117 as base10 equivilant (for C# source). Spec has 0165 octal. (117,"upsilon") <=> (0165, "upsilon") , + (195, "weierstrass"), // for char 'Ã' using 195 as base10 equivilant (for C# source). Spec has 0303 octal. (195,"weierstrass") <=> (0303, "weierstrass") , + (120, "xi"), // for char 'x' using 120 as base10 equivilant (for C# source). Spec has 0170 octal. (120,"xi") <=> (0170, "xi") , + ( 48, "zero"), // for char '0' using 48 as base10 equivilant (for C# source). Spec has 0060 octal. ( 48,"zero") <=> (0060, "zero") , + (122, "zeta") // for char 'z' using 122 as base10 equivilant (for C# source). Spec has 0172 octal. (122,"zeta") <=> (0172, "zeta") + }; + + + private static readonly (int, int)[] UnicodeEquivilants = { + (0x391, 65), // Greek Capital Letter Alpha + }; + + /// + /// Single instance of this encoding. + /// + public static SymbolEncoding Instance { get; } = new SymbolEncoding(); + + /// + public override string EncodingName => "SymbolEncoding"; + + private SymbolEncoding() + { + foreach ((var code, var name) in EncodingTable) + { + // Note: code from source is already base 10 no need to use OctalHelpers.FromOctalInt + Add(code, name); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs index c1a35509..bff63d70 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs @@ -1,10 +1,12 @@ namespace UglyToad.PdfPig.Fonts.Encodings { - using Core; + + + /// /// Windows ANSI encoding. - /// + /// public class WinAnsiEncoding : Encoding { /// @@ -244,12 +246,16 @@ private WinAnsiEncoding() { - foreach (var valueTuple in EncodingTable) + foreach ((var codeToBeConverted, var name) in EncodingTable) { - // Convert out of octal before creating - var code = OctalHelpers.FromOctalInt(valueTuple.Item1); - - Add(code, valueTuple.Item2); + // In source code an int literal with a leading zero ('0') + // in other languages ('C' and 'Java') would be interpreted + // as octal (base 8) and converted but C# does not support and + // so arrives here as a different value parsed as base10. + // Convert 'codeToBeConverted' to intended value as if it was an octal literal before using. + // For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10). + var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt() + Add(code, name); } // In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character. diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs index 45cfd9b4..cfe8897a 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs @@ -1,215 +1,222 @@ -namespace UglyToad.PdfPig.Fonts.Encodings -{ - /// - /// Zapf Dingbats encoding. - /// - public class ZapfDingbatsEncoding : Encoding - { - private static readonly (int, string)[] EncodingTable = { - (040, "space"), - (041, "a1"), - (042, "a2"), - (043, "a202"), - (044, "a3"), - (045, "a4"), - (046, "a5"), - (047, "a119"), - (050, "a118"), - (051, "a117"), - (052, "a11"), - (053, "a12"), - (054, "a13"), - (055, "a14"), - (056, "a15"), - (057, "a16"), - (060, "a105"), - (061, "a17"), - (062, "a18"), - (063, "a19"), - (064, "a20"), - (065, "a21"), - (066, "a22"), - (067, "a23"), - (070, "a24"), - (071, "a25"), - (072, "a26"), - (073, "a27"), - (074, "a28"), - (075, "a6"), - (076, "a7"), - (077, "a8"), - (0100, "a9"), - (0101, "a10"), - (0102, "a29"), - (0103, "a30"), - (0104, "a31"), - (0105, "a32"), - (0106, "a33"), - (0107, "a34"), - (0110, "a35"), - (0111, "a36"), - (0112, "a37"), - (0113, "a38"), - (0114, "a39"), - (0115, "a40"), - (0116, "a41"), - (0117, "a42"), - (0120, "a43"), - (0121, "a44"), - (0122, "a45"), - (0123, "a46"), - (0124, "a47"), - (0125, "a48"), - (0126, "a49"), - (0127, "a50"), - (0130, "a51"), - (0131, "a52"), - (0132, "a53"), - (0133, "a54"), - (0134, "a55"), - (0135, "a56"), - (0136, "a57"), - (0137, "a58"), - (0140, "a59"), - (0141, "a60"), - (0142, "a61"), - (0143, "a62"), - (0144, "a63"), - (0145, "a64"), - (0146, "a65"), - (0147, "a66"), - (0150, "a67"), - (0151, "a68"), - (0152, "a69"), - (0153, "a70"), - (0154, "a71"), - (0155, "a72"), - (0156, "a73"), - (0157, "a74"), - (0160, "a203"), - (0161, "a75"), - (0162, "a204"), - (0163, "a76"), - (0164, "a77"), - (0165, "a78"), - (0166, "a79"), - (0167, "a81"), - (0170, "a82"), - (0171, "a83"), - (0172, "a84"), - (0173, "a97"), - (0174, "a98"), - (0175, "a99"), - (0176, "a100"), - (0241, "a101"), - (0242, "a102"), - (0243, "a103"), - (0244, "a104"), - (0245, "a106"), - (0246, "a107"), - (0247, "a108"), - (0250, "a112"), - (0251, "a111"), - (0252, "a110"), - (0253, "a109"), - (0254, "a120"), - (0255, "a121"), - (0256, "a122"), - (0257, "a123"), - (0260, "a124"), - (0261, "a125"), - (0262, "a126"), - (0263, "a127"), - (0264, "a128"), - (0265, "a129"), - (0266, "a130"), - (0267, "a131"), - (0270, "a132"), - (0271, "a133"), - (0272, "a134"), - (0273, "a135"), - (0274, "a136"), - (0275, "a137"), - (0276, "a138"), - (0277, "a139"), - (0300, "a140"), - (0301, "a141"), - (0302, "a142"), - (0303, "a143"), - (0304, "a144"), - (0305, "a145"), - (0306, "a146"), - (0307, "a147"), - (0310, "a148"), - (0311, "a149"), - (0312, "a150"), - (0313, "a151"), - (0314, "a152"), - (0315, "a153"), - (0316, "a154"), - (0317, "a155"), - (0320, "a156"), - (0321, "a157"), - (0322, "a158"), - (0323, "a159"), - (0324, "a160"), - (0325, "a161"), - (0326, "a163"), - (0327, "a164"), - (0330, "a196"), - (0331, "a165"), - (0332, "a192"), - (0333, "a166"), - (0334, "a167"), - (0335, "a168"), - (0336, "a169"), - (0337, "a170"), - (0340, "a171"), - (0341, "a172"), - (0342, "a173"), - (0343, "a162"), - (0344, "a174"), - (0345, "a175"), - (0346, "a176"), - (0347, "a177"), - (0350, "a178"), - (0351, "a179"), - (0352, "a193"), - (0353, "a180"), - (0354, "a199"), - (0355, "a181"), - (0356, "a200"), - (0357, "a182"), - (0361, "a201"), - (0362, "a183"), - (0363, "a184"), - (0364, "a197"), - (0365, "a185"), - (0366, "a194"), - (0367, "a198"), - (0370, "a186"), - (0371, "a195"), - (0372, "a187"), - (0373, "a188"), - (0374, "a189"), - (0375, "a190"), - (0376, "a191") - }; - - /// - /// Single instance of this encoding. - /// - public static ZapfDingbatsEncoding Instance { get; } = new ZapfDingbatsEncoding(); - - /// - public override string EncodingName => "ZapfDingbatsEncoding"; - - private ZapfDingbatsEncoding() - { - foreach (var valueTuple in EncodingTable) - { - Add(valueTuple.Item1, valueTuple.Item2); - } - } - } -} \ No newline at end of file +namespace UglyToad.PdfPig.Fonts.Encodings +{ + /// + /// Zapf Dingbats encoding. + /// + public class ZapfDingbatsEncoding : Encoding + { + /// + /// EncodingTable for ZapfDingbats + /// PDF Spec 1.7 Page 1016 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.7old.pdf#page1016 + /// Note spec has code values are octal (base 8) with leading zero (supported in 'C' and 'Java') but not by C# + /// Code values are already converted to base 10 prior to compile. Original octal values in comments on each line. + /// + private static readonly (int, string)[] EncodingTable = { + ( 32, "space"), // for char ' ' using 32 as base10 equivilant (for C# source). Spec has 040 octal. ( 32,"space") <=> (040, "space"), + ( 33, "a1"), // for char '!' using 33 as base10 equivilant (for C# source). Spec has 041 octal. ( 33,"a1") <=> (041, "a1") , + ( 34, "a2"), // for char '"' using 34 as base10 equivilant (for C# source). Spec has 042 octal. ( 34,"a2") <=> (042, "a2") , + ( 35, "a202"), // for char '#' using 35 as base10 equivilant (for C# source). Spec has 043 octal. ( 35,"a202") <=> (043, "a202") , + ( 36, "a3"), // for char '$' using 36 as base10 equivilant (for C# source). Spec has 044 octal. ( 36,"a3") <=> (044, "a3") , + ( 37, "a4"), // for char '%' using 37 as base10 equivilant (for C# source). Spec has 045 octal. ( 37,"a4") <=> (045, "a4") , + ( 38, "a5"), // for char '&' using 38 as base10 equivilant (for C# source). Spec has 046 octal. ( 38,"a5") <=> (046, "a5") , + ( 39, "a119"), // for char ''' using 39 as base10 equivilant (for C# source). Spec has 047 octal. ( 39,"a119") <=> (047, "a119") , + ( 40, "a118"), // for char '(' using 40 as base10 equivilant (for C# source). Spec has 050 octal. ( 40,"a118") <=> (050, "a118") , + ( 41, "a117"), // for char ')' using 41 as base10 equivilant (for C# source). Spec has 051 octal. ( 41,"a117") <=> (051, "a117") , + ( 42, "a11"), // for char '*' using 42 as base10 equivilant (for C# source). Spec has 052 octal. ( 42,"a11") <=> (052, "a11") , + ( 43, "a12"), // for char '+' using 43 as base10 equivilant (for C# source). Spec has 053 octal. ( 43,"a12") <=> (053, "a12") , + ( 44, "a13"), // for char ',' using 44 as base10 equivilant (for C# source). Spec has 054 octal. ( 44,"a13") <=> (054, "a13") , + ( 45, "a14"), // for char '-' using 45 as base10 equivilant (for C# source). Spec has 055 octal. ( 45,"a14") <=> (055, "a14") , + ( 46, "a15"), // for char '.' using 46 as base10 equivilant (for C# source). Spec has 056 octal. ( 46,"a15") <=> (056, "a15") , + ( 47, "a16"), // for char '/' using 47 as base10 equivilant (for C# source). Spec has 057 octal. ( 47,"a16") <=> (057, "a16") , + ( 48, "a105"), // for char '0' using 48 as base10 equivilant (for C# source). Spec has 060 octal. ( 48,"a105") <=> (060, "a105") , + ( 49, "a17"), // for char '1' using 49 as base10 equivilant (for C# source). Spec has 061 octal. ( 49,"a17") <=> (061, "a17") , + ( 50, "a18"), // for char '2' using 50 as base10 equivilant (for C# source). Spec has 062 octal. ( 50,"a18") <=> (062, "a18") , + ( 51, "a19"), // for char '3' using 51 as base10 equivilant (for C# source). Spec has 063 octal. ( 51,"a19") <=> (063, "a19") , + ( 52, "a20"), // for char '4' using 52 as base10 equivilant (for C# source). Spec has 064 octal. ( 52,"a20") <=> (064, "a20") , + ( 53, "a21"), // for char '5' using 53 as base10 equivilant (for C# source). Spec has 065 octal. ( 53,"a21") <=> (065, "a21") , + ( 54, "a22"), // for char '6' using 54 as base10 equivilant (for C# source). Spec has 066 octal. ( 54,"a22") <=> (066, "a22") , + ( 55, "a23"), // for char '7' using 55 as base10 equivilant (for C# source). Spec has 067 octal. ( 55,"a23") <=> (067, "a23") , + ( 56, "a24"), // for char '8' using 56 as base10 equivilant (for C# source). Spec has 070 octal. ( 56,"a24") <=> (070, "a24") , + ( 57, "a25"), // for char '9' using 57 as base10 equivilant (for C# source). Spec has 071 octal. ( 57,"a25") <=> (071, "a25") , + ( 58, "a26"), // for char ':' using 58 as base10 equivilant (for C# source). Spec has 072 octal. ( 58,"a26") <=> (072, "a26") , + ( 59, "a27"), // for char ';' using 59 as base10 equivilant (for C# source). Spec has 073 octal. ( 59,"a27") <=> (073, "a27") , + ( 60, "a28"), // for char '<' using 60 as base10 equivilant (for C# source). Spec has 074 octal. ( 60,"a28") <=> (074, "a28") , + ( 61, "a6"), // for char '=' using 61 as base10 equivilant (for C# source). Spec has 075 octal. ( 61,"a6") <=> (075, "a6") , + ( 62, "a7"), // for char '>' using 62 as base10 equivilant (for C# source). Spec has 076 octal. ( 62,"a7") <=> (076, "a7") , + ( 63, "a8"), // for char '?' using 63 as base10 equivilant (for C# source). Spec has 077 octal. ( 63,"a8") <=> (077, "a8") , + ( 64, "a9"), // for char '@' using 64 as base10 equivilant (for C# source). Spec has 0100 octal. ( 64,"a9") <=> (0100, "a9") , + ( 65, "a10"), // for char 'A' using 65 as base10 equivilant (for C# source). Spec has 0101 octal. ( 65,"a10") <=> (0101, "a10") , + ( 66, "a29"), // for char 'B' using 66 as base10 equivilant (for C# source). Spec has 0102 octal. ( 66,"a29") <=> (0102, "a29") , + ( 67, "a30"), // for char 'C' using 67 as base10 equivilant (for C# source). Spec has 0103 octal. ( 67,"a30") <=> (0103, "a30") , + ( 68, "a31"), // for char 'D' using 68 as base10 equivilant (for C# source). Spec has 0104 octal. ( 68,"a31") <=> (0104, "a31") , + ( 69, "a32"), // for char 'E' using 69 as base10 equivilant (for C# source). Spec has 0105 octal. ( 69,"a32") <=> (0105, "a32") , + ( 70, "a33"), // for char 'F' using 70 as base10 equivilant (for C# source). Spec has 0106 octal. ( 70,"a33") <=> (0106, "a33") , + ( 71, "a34"), // for char 'G' using 71 as base10 equivilant (for C# source). Spec has 0107 octal. ( 71,"a34") <=> (0107, "a34") , + ( 72, "a35"), // for char 'H' using 72 as base10 equivilant (for C# source). Spec has 0110 octal. ( 72,"a35") <=> (0110, "a35") , + ( 73, "a36"), // for char 'I' using 73 as base10 equivilant (for C# source). Spec has 0111 octal. ( 73,"a36") <=> (0111, "a36") , + ( 74, "a37"), // for char 'J' using 74 as base10 equivilant (for C# source). Spec has 0112 octal. ( 74,"a37") <=> (0112, "a37") , + ( 75, "a38"), // for char 'K' using 75 as base10 equivilant (for C# source). Spec has 0113 octal. ( 75,"a38") <=> (0113, "a38") , + ( 76, "a39"), // for char 'L' using 76 as base10 equivilant (for C# source). Spec has 0114 octal. ( 76,"a39") <=> (0114, "a39") , + ( 77, "a40"), // for char 'M' using 77 as base10 equivilant (for C# source). Spec has 0115 octal. ( 77,"a40") <=> (0115, "a40") , + ( 78, "a41"), // for char 'N' using 78 as base10 equivilant (for C# source). Spec has 0116 octal. ( 78,"a41") <=> (0116, "a41") , + ( 79, "a42"), // for char 'O' using 79 as base10 equivilant (for C# source). Spec has 0117 octal. ( 79,"a42") <=> (0117, "a42") , + ( 80, "a43"), // for char 'P' using 80 as base10 equivilant (for C# source). Spec has 0120 octal. ( 80,"a43") <=> (0120, "a43") , + ( 81, "a44"), // for char 'Q' using 81 as base10 equivilant (for C# source). Spec has 0121 octal. ( 81,"a44") <=> (0121, "a44") , + ( 82, "a45"), // for char 'R' using 82 as base10 equivilant (for C# source). Spec has 0122 octal. ( 82,"a45") <=> (0122, "a45") , + ( 83, "a46"), // for char 'S' using 83 as base10 equivilant (for C# source). Spec has 0123 octal. ( 83,"a46") <=> (0123, "a46") , + ( 84, "a47"), // for char 'T' using 84 as base10 equivilant (for C# source). Spec has 0124 octal. ( 84,"a47") <=> (0124, "a47") , + ( 85, "a48"), // for char 'U' using 85 as base10 equivilant (for C# source). Spec has 0125 octal. ( 85,"a48") <=> (0125, "a48") , + ( 86, "a49"), // for char 'V' using 86 as base10 equivilant (for C# source). Spec has 0126 octal. ( 86,"a49") <=> (0126, "a49") , + ( 87, "a50"), // for char 'W' using 87 as base10 equivilant (for C# source). Spec has 0127 octal. ( 87,"a50") <=> (0127, "a50") , + ( 88, "a51"), // for char 'X' using 88 as base10 equivilant (for C# source). Spec has 0130 octal. ( 88,"a51") <=> (0130, "a51") , + ( 89, "a52"), // for char 'Y' using 89 as base10 equivilant (for C# source). Spec has 0131 octal. ( 89,"a52") <=> (0131, "a52") , + ( 90, "a53"), // for char 'Z' using 90 as base10 equivilant (for C# source). Spec has 0132 octal. ( 90,"a53") <=> (0132, "a53") , + ( 91, "a54"), // for char '[' using 91 as base10 equivilant (for C# source). Spec has 0133 octal. ( 91,"a54") <=> (0133, "a54") , + ( 92, "a55"), // for char '\' using 92 as base10 equivilant (for C# source). Spec has 0134 octal. ( 92,"a55") <=> (0134, "a55") , + ( 93, "a56"), // for char ']' using 93 as base10 equivilant (for C# source). Spec has 0135 octal. ( 93,"a56") <=> (0135, "a56") , + ( 94, "a57"), // for char '^' using 94 as base10 equivilant (for C# source). Spec has 0136 octal. ( 94,"a57") <=> (0136, "a57") , + ( 95, "a58"), // for char '_' using 95 as base10 equivilant (for C# source). Spec has 0137 octal. ( 95,"a58") <=> (0137, "a58") , + ( 96, "a59"), // for char '`' using 96 as base10 equivilant (for C# source). Spec has 0140 octal. ( 96,"a59") <=> (0140, "a59") , + ( 97, "a60"), // for char 'a' using 97 as base10 equivilant (for C# source). Spec has 0141 octal. ( 97,"a60") <=> (0141, "a60") , + ( 98, "a61"), // for char 'b' using 98 as base10 equivilant (for C# source). Spec has 0142 octal. ( 98,"a61") <=> (0142, "a61") , + ( 99, "a62"), // for char 'c' using 99 as base10 equivilant (for C# source). Spec has 0143 octal. ( 99,"a62") <=> (0143, "a62") , + (100, "a63"), // for char 'd' using 100 as base10 equivilant (for C# source). Spec has 0144 octal. (100,"a63") <=> (0144, "a63") , + (101, "a64"), // for char 'e' using 101 as base10 equivilant (for C# source). Spec has 0145 octal. (101,"a64") <=> (0145, "a64") , + (102, "a65"), // for char 'f' using 102 as base10 equivilant (for C# source). Spec has 0146 octal. (102,"a65") <=> (0146, "a65") , + (103, "a66"), // for char 'g' using 103 as base10 equivilant (for C# source). Spec has 0147 octal. (103,"a66") <=> (0147, "a66") , + (104, "a67"), // for char 'h' using 104 as base10 equivilant (for C# source). Spec has 0150 octal. (104,"a67") <=> (0150, "a67") , + (105, "a68"), // for char 'i' using 105 as base10 equivilant (for C# source). Spec has 0151 octal. (105,"a68") <=> (0151, "a68") , + (106, "a69"), // for char 'j' using 106 as base10 equivilant (for C# source). Spec has 0152 octal. (106,"a69") <=> (0152, "a69") , + (107, "a70"), // for char 'k' using 107 as base10 equivilant (for C# source). Spec has 0153 octal. (107,"a70") <=> (0153, "a70") , + (108, "a71"), // for char 'l' using 108 as base10 equivilant (for C# source). Spec has 0154 octal. (108,"a71") <=> (0154, "a71") , + (109, "a72"), // for char 'm' using 109 as base10 equivilant (for C# source). Spec has 0155 octal. (109,"a72") <=> (0155, "a72") , + (110, "a73"), // for char 'n' using 110 as base10 equivilant (for C# source). Spec has 0156 octal. (110,"a73") <=> (0156, "a73") , + (111, "a74"), // for char 'o' using 111 as base10 equivilant (for C# source). Spec has 0157 octal. (111,"a74") <=> (0157, "a74") , + (112, "a203"), // for char 'p' using 112 as base10 equivilant (for C# source). Spec has 0160 octal. (112,"a203") <=> (0160, "a203"), + (113, "a75"), // for char 'q' using 113 as base10 equivilant (for C# source). Spec has 0161 octal. (113,"a75") <=> (0161, "a75") , + (114, "a204"), // for char 'r' using 114 as base10 equivilant (for C# source). Spec has 0162 octal. (114,"a204") <=> (0162, "a204"), + (115, "a76"), // for char 's' using 115 as base10 equivilant (for C# source). Spec has 0163 octal. (115,"a76") <=> (0163, "a76") , + (116, "a77"), // for char 't' using 116 as base10 equivilant (for C# source). Spec has 0164 octal. (116,"a77") <=> (0164, "a77") , + (117, "a78"), // for char 'u' using 117 as base10 equivilant (for C# source). Spec has 0165 octal. (117,"a78") <=> (0165, "a78") , + (118, "a79"), // for char 'v' using 118 as base10 equivilant (for C# source). Spec has 0166 octal. (118,"a79") <=> (0166, "a79") , + (119, "a81"), // for char 'w' using 119 as base10 equivilant (for C# source). Spec has 0167 octal. (119,"a81") <=> (0167, "a81") , + (120, "a82"), // for char 'x' using 120 as base10 equivilant (for C# source). Spec has 0170 octal. (120,"a82") <=> (0170, "a82") , + (121, "a83"), // for char 'y' using 121 as base10 equivilant (for C# source). Spec has 0171 octal. (121,"a83") <=> (0171, "a83") , + (122, "a84"), // for char 'z' using 122 as base10 equivilant (for C# source). Spec has 0172 octal. (122,"a84") <=> (0172, "a84") , + (123, "a97"), // for char '{' using 123 as base10 equivilant (for C# source). Spec has 0173 octal. (123,"a97") <=> (0173, "a97") , + (124, "a98"), // for char '|' using 124 as base10 equivilant (for C# source). Spec has 0174 octal. (124,"a98") <=> (0174, "a98") , + (125, "a99"), // for char '}' using 125 as base10 equivilant (for C# source). Spec has 0175 octal. (125,"a99") <=> (0175, "a99") , + (126, "a100"), // for char '~' using 126 as base10 equivilant (for C# source). Spec has 0176 octal. (126,"a100") <=> (0176, "a100"), + (161, "a101"), // for char '¡' using 161 as base10 equivilant (for C# source). Spec has 0241 octal. (161,"a101") <=> (0241, "a101"), + (162, "a102"), // for char '¢' using 162 as base10 equivilant (for C# source). Spec has 0242 octal. (162,"a102") <=> (0242, "a102"), + (163, "a103"), // for char '£' using 163 as base10 equivilant (for C# source). Spec has 0243 octal. (163,"a103") <=> (0243, "a103"), + (164, "a104"), // for char '¤' using 164 as base10 equivilant (for C# source). Spec has 0244 octal. (164,"a104") <=> (0244, "a104"), + (165, "a106"), // for char '¥' using 165 as base10 equivilant (for C# source). Spec has 0245 octal. (165,"a106") <=> (0245, "a106"), + (166, "a107"), // for char '¦' using 166 as base10 equivilant (for C# source). Spec has 0246 octal. (166,"a107") <=> (0246, "a107"), + (167, "a108"), // for char '§' using 167 as base10 equivilant (for C# source). Spec has 0247 octal. (167,"a108") <=> (0247, "a108"), + (168, "a112"), // for char '¨' using 168 as base10 equivilant (for C# source). Spec has 0250 octal. (168,"a112") <=> (0250, "a112"), + (169, "a111"), // for char '©' using 169 as base10 equivilant (for C# source). Spec has 0251 octal. (169,"a111") <=> (0251, "a111"), + (170, "a110"), // for char 'ª' using 170 as base10 equivilant (for C# source). Spec has 0252 octal. (170,"a110") <=> (0252, "a110"), + (171, "a109"), // for char '«' using 171 as base10 equivilant (for C# source). Spec has 0253 octal. (171,"a109") <=> (0253, "a109"), + (172, "a120"), // for char '¬' using 172 as base10 equivilant (for C# source). Spec has 0254 octal. (172,"a120") <=> (0254, "a120"), + (173, "a121"), // using 173 as base10 equivilant (for C# source). Spec has 0255 octal. (173,"a121") <=> (0255, "a121"), + (174, "a122"), // for char '®' using 174 as base10 equivilant (for C# source). Spec has 0256 octal. (174,"a122") <=> (0256, "a122"), + (175, "a123"), // for char '¯' using 175 as base10 equivilant (for C# source). Spec has 0257 octal. (175,"a123") <=> (0257, "a123"), + (176, "a124"), // for char '°' using 176 as base10 equivilant (for C# source). Spec has 0260 octal. (176,"a124") <=> (0260, "a124"), + (177, "a125"), // for char '±' using 177 as base10 equivilant (for C# source). Spec has 0261 octal. (177,"a125") <=> (0261, "a125"), + (178, "a126"), // for char '²' using 178 as base10 equivilant (for C# source). Spec has 0262 octal. (178,"a126") <=> (0262, "a126"), + (179, "a127"), // for char '³' using 179 as base10 equivilant (for C# source). Spec has 0263 octal. (179,"a127") <=> (0263, "a127"), + (180, "a128"), // for char '´' using 180 as base10 equivilant (for C# source). Spec has 0264 octal. (180,"a128") <=> (0264, "a128"), + (181, "a129"), // for char 'µ' using 181 as base10 equivilant (for C# source). Spec has 0265 octal. (181,"a129") <=> (0265, "a129"), + (182, "a130"), // for char '¶' using 182 as base10 equivilant (for C# source). Spec has 0266 octal. (182,"a130") <=> (0266, "a130"), + (183, "a131"), // for char '·' using 183 as base10 equivilant (for C# source). Spec has 0267 octal. (183,"a131") <=> (0267, "a131"), + (184, "a132"), // for char '¸' using 184 as base10 equivilant (for C# source). Spec has 0270 octal. (184,"a132") <=> (0270, "a132"), + (185, "a133"), // for char '¹' using 185 as base10 equivilant (for C# source). Spec has 0271 octal. (185,"a133") <=> (0271, "a133"), + (186, "a134"), // for char 'º' using 186 as base10 equivilant (for C# source). Spec has 0272 octal. (186,"a134") <=> (0272, "a134"), + (187, "a135"), // for char '»' using 187 as base10 equivilant (for C# source). Spec has 0273 octal. (187,"a135") <=> (0273, "a135"), + (188, "a136"), // for char '¼' using 188 as base10 equivilant (for C# source). Spec has 0274 octal. (188,"a136") <=> (0274, "a136"), + (189, "a137"), // for char '½' using 189 as base10 equivilant (for C# source). Spec has 0275 octal. (189,"a137") <=> (0275, "a137"), + (190, "a138"), // for char '¾' using 190 as base10 equivilant (for C# source). Spec has 0276 octal. (190,"a138") <=> (0276, "a138"), + (191, "a139"), // for char '¿' using 191 as base10 equivilant (for C# source). Spec has 0277 octal. (191,"a139") <=> (0277, "a139"), + (192, "a140"), // for char 'À' using 192 as base10 equivilant (for C# source). Spec has 0300 octal. (192,"a140") <=> (0300, "a140"), + (193, "a141"), // for char 'Á' using 193 as base10 equivilant (for C# source). Spec has 0301 octal. (193,"a141") <=> (0301, "a141"), + (194, "a142"), // for char 'Â' using 194 as base10 equivilant (for C# source). Spec has 0302 octal. (194,"a142") <=> (0302, "a142"), + (195, "a143"), // for char 'Ã' using 195 as base10 equivilant (for C# source). Spec has 0303 octal. (195,"a143") <=> (0303, "a143"), + (196, "a144"), // for char 'Ä' using 196 as base10 equivilant (for C# source). Spec has 0304 octal. (196,"a144") <=> (0304, "a144"), + (197, "a145"), // for char 'Å' using 197 as base10 equivilant (for C# source). Spec has 0305 octal. (197,"a145") <=> (0305, "a145"), + (198, "a146"), // for char 'Æ' using 198 as base10 equivilant (for C# source). Spec has 0306 octal. (198,"a146") <=> (0306, "a146"), + (199, "a147"), // for char 'Ç' using 199 as base10 equivilant (for C# source). Spec has 0307 octal. (199,"a147") <=> (0307, "a147"), + (200, "a148"), // for char 'È' using 200 as base10 equivilant (for C# source). Spec has 0310 octal. (200,"a148") <=> (0310, "a148"), + (201, "a149"), // for char 'É' using 201 as base10 equivilant (for C# source). Spec has 0311 octal. (201,"a149") <=> (0311, "a149"), + (202, "a150"), // for char 'Ê' using 202 as base10 equivilant (for C# source). Spec has 0312 octal. (202,"a150") <=> (0312, "a150"), + (203, "a151"), // for char 'Ë' using 203 as base10 equivilant (for C# source). Spec has 0313 octal. (203,"a151") <=> (0313, "a151"), + (204, "a152"), // for char 'Ì' using 204 as base10 equivilant (for C# source). Spec has 0314 octal. (204,"a152") <=> (0314, "a152"), + (205, "a153"), // for char 'Í' using 205 as base10 equivilant (for C# source). Spec has 0315 octal. (205,"a153") <=> (0315, "a153"), + (206, "a154"), // for char 'Î' using 206 as base10 equivilant (for C# source). Spec has 0316 octal. (206,"a154") <=> (0316, "a154"), + (207, "a155"), // for char 'Ï' using 207 as base10 equivilant (for C# source). Spec has 0317 octal. (207,"a155") <=> (0317, "a155"), + (208, "a156"), // for char 'Ð' using 208 as base10 equivilant (for C# source). Spec has 0320 octal. (208,"a156") <=> (0320, "a156"), + (209, "a157"), // for char 'Ñ' using 209 as base10 equivilant (for C# source). Spec has 0321 octal. (209,"a157") <=> (0321, "a157"), + (210, "a158"), // for char 'Ò' using 210 as base10 equivilant (for C# source). Spec has 0322 octal. (210,"a158") <=> (0322, "a158"), + (211, "a159"), // for char 'Ó' using 211 as base10 equivilant (for C# source). Spec has 0323 octal. (211,"a159") <=> (0323, "a159"), + (212, "a160"), // for char 'Ô' using 212 as base10 equivilant (for C# source). Spec has 0324 octal. (212,"a160") <=> (0324, "a160"), + (213, "a161"), // for char 'Õ' using 213 as base10 equivilant (for C# source). Spec has 0325 octal. (213,"a161") <=> (0325, "a161"), + (214, "a163"), // for char 'Ö' using 214 as base10 equivilant (for C# source). Spec has 0326 octal. (214,"a163") <=> (0326, "a163"), + (215, "a164"), // for char '×' using 215 as base10 equivilant (for C# source). Spec has 0327 octal. (215,"a164") <=> (0327, "a164"), + (216, "a196"), // for char 'Ø' using 216 as base10 equivilant (for C# source). Spec has 0330 octal. (216,"a196") <=> (0330, "a196"), + (217, "a165"), // for char 'Ù' using 217 as base10 equivilant (for C# source). Spec has 0331 octal. (217,"a165") <=> (0331, "a165"), + (218, "a192"), // for char 'Ú' using 218 as base10 equivilant (for C# source). Spec has 0332 octal. (218,"a192") <=> (0332, "a192"), + (219, "a166"), // for char 'Û' using 219 as base10 equivilant (for C# source). Spec has 0333 octal. (219,"a166") <=> (0333, "a166"), + (220, "a167"), // for char 'Ü' using 220 as base10 equivilant (for C# source). Spec has 0334 octal. (220,"a167") <=> (0334, "a167"), + (221, "a168"), // for char 'Ý' using 221 as base10 equivilant (for C# source). Spec has 0335 octal. (221,"a168") <=> (0335, "a168"), + (222, "a169"), // for char 'Þ' using 222 as base10 equivilant (for C# source). Spec has 0336 octal. (222,"a169") <=> (0336, "a169"), + (223, "a170"), // for char 'ß' using 223 as base10 equivilant (for C# source). Spec has 0337 octal. (223,"a170") <=> (0337, "a170"), + (224, "a171"), // for char 'à' using 224 as base10 equivilant (for C# source). Spec has 0340 octal. (224,"a171") <=> (0340, "a171"), + (225, "a172"), // for char 'á' using 225 as base10 equivilant (for C# source). Spec has 0341 octal. (225,"a172") <=> (0341, "a172"), + (226, "a173"), // for char 'â' using 226 as base10 equivilant (for C# source). Spec has 0342 octal. (226,"a173") <=> (0342, "a173"), + (227, "a162"), // for char 'ã' using 227 as base10 equivilant (for C# source). Spec has 0343 octal. (227,"a162") <=> (0343, "a162"), + (228, "a174"), // for char 'ä' using 228 as base10 equivilant (for C# source). Spec has 0344 octal. (228,"a174") <=> (0344, "a174"), + (229, "a175"), // for char 'å' using 229 as base10 equivilant (for C# source). Spec has 0345 octal. (229,"a175") <=> (0345, "a175"), + (230, "a176"), // for char 'æ' using 230 as base10 equivilant (for C# source). Spec has 0346 octal. (230,"a176") <=> (0346, "a176"), + (231, "a177"), // for char 'ç' using 231 as base10 equivilant (for C# source). Spec has 0347 octal. (231,"a177") <=> (0347, "a177"), + (232, "a178"), // for char 'è' using 232 as base10 equivilant (for C# source). Spec has 0350 octal. (232,"a178") <=> (0350, "a178"), + (233, "a179"), // for char 'é' using 233 as base10 equivilant (for C# source). Spec has 0351 octal. (233,"a179") <=> (0351, "a179"), + (234, "a193"), // for char 'ê' using 234 as base10 equivilant (for C# source). Spec has 0352 octal. (234,"a193") <=> (0352, "a193"), + (235, "a180"), // for char 'ë' using 235 as base10 equivilant (for C# source). Spec has 0353 octal. (235,"a180") <=> (0353, "a180"), + (236, "a199"), // for char 'ì' using 236 as base10 equivilant (for C# source). Spec has 0354 octal. (236,"a199") <=> (0354, "a199"), + (237, "a181"), // for char 'í' using 237 as base10 equivilant (for C# source). Spec has 0355 octal. (237,"a181") <=> (0355, "a181"), + (238, "a200"), // for char 'î' using 238 as base10 equivilant (for C# source). Spec has 0356 octal. (238,"a200") <=> (0356, "a200"), + (239, "a182"), // for char 'ï' using 239 as base10 equivilant (for C# source). Spec has 0357 octal. (239,"a182") <=> (0357, "a182"), + (241, "a201"), // for char 'ñ' using 241 as base10 equivilant (for C# source). Spec has 0361 octal. (241,"a201") <=> (0361, "a201"), + (242, "a183"), // for char 'ò' using 242 as base10 equivilant (for C# source). Spec has 0362 octal. (242,"a183") <=> (0362, "a183"), + (243, "a184"), // for char 'ó' using 243 as base10 equivilant (for C# source). Spec has 0363 octal. (243,"a184") <=> (0363, "a184"), + (244, "a197"), // for char 'ô' using 244 as base10 equivilant (for C# source). Spec has 0364 octal. (244,"a197") <=> (0364, "a197"), + (245, "a185"), // for char 'õ' using 245 as base10 equivilant (for C# source). Spec has 0365 octal. (245,"a185") <=> (0365, "a185"), + (246, "a194"), // for char 'ö' using 246 as base10 equivilant (for C# source). Spec has 0366 octal. (246,"a194") <=> (0366, "a194"), + (247, "a198"), // for char '÷' using 247 as base10 equivilant (for C# source). Spec has 0367 octal. (247,"a198") <=> (0367, "a198"), + (248, "a186"), // for char 'ø' using 248 as base10 equivilant (for C# source). Spec has 0370 octal. (248,"a186") <=> (0370, "a186"), + (249, "a195"), // for char 'ù' using 249 as base10 equivilant (for C# source). Spec has 0371 octal. (249,"a195") <=> (0371, "a195"), + (250, "a187"), // for char 'ú' using 250 as base10 equivilant (for C# source). Spec has 0372 octal. (250,"a187") <=> (0372, "a187"), + (251, "a188"), // for char 'û' using 251 as base10 equivilant (for C# source). Spec has 0373 octal. (251,"a188") <=> (0373, "a188"), + (252, "a189"), // for char 'ü' using 252 as base10 equivilant (for C# source). Spec has 0374 octal. (252,"a189") <=> (0374, "a189"), + (253, "a190"), // for char 'ý' using 253 as base10 equivilant (for C# source). Spec has 0375 octal. (253,"a190") <=> (0375, "a190"), + (254, "a191") // for char 'þ' using 254 as base10 equivilant (for C# source). Spec has 0376 octal. (254,"a191") <=> (0376, "a191") + }; + + /// + /// Single instance of this encoding. + /// + public static ZapfDingbatsEncoding Instance { get; } = new ZapfDingbatsEncoding(); + + /// + public override string EncodingName => "ZapfDingbatsEncoding"; + + private ZapfDingbatsEncoding() + { + foreach ((var code, var name) in EncodingTable) + { + // Note: code from source is already base 10 no need to use OctalHelpers.FromOctalInt + Add(code, name); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs b/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs index 17fd56eb..cd5cf667 100644 --- a/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs +++ b/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs @@ -6,7 +6,7 @@ namespace UglyToad.PdfPig.Tests.Geometry public class ClippingTests { [Fact] - public void ContainsRectangleEvenOdd() + public void ContainsRectangleEvenOdd() { using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("SPARC - v9 Architecture Manual"), new ParsingOptions() { ClipPaths = true })) diff --git a/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs b/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs new file mode 100644 index 00000000..18f9401b --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs @@ -0,0 +1,949 @@ +namespace UglyToad.PdfPig.Tests.Writer.Fonts +{ + using System; + + using System.Linq; + using PdfPig.Fonts; + using PdfPig.Content; + using UglyToad.PdfPig.Core; + using UglyToad.PdfPig.Fonts.Standard14Fonts; + using UglyToad.PdfPig.Writer; + + using Xunit; + using System.Reflection; + using System.Collections.Generic; + using UglyToad.PdfPig.Fonts.AdobeFontMetrics; + using System.IO; + using System.Diagnostics; + + + public class Standard14WritingFontTests + { + [Fact] + public void ZapfDingbatsFontAddText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.ZapfDingbats); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.ZapfDingbatsEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.ZapfDingbats); + { + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; + + var point = new PdfPoint(leftX, topPageY); + DateTimeStampPage(pdfBuilder, page, point, cm); + var letters = page.AddText("Adobe Standard Font ZapfDingbats", 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black (octal) and Unicode in Blue (hex)", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + point = new PdfPoint(leftX, newY); + var eachRowY = new List(); + eachRowY.Add(newY); // First row + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page,F1, 12d,unicodesCharacters); + var context = GetContext(F1,page, nameof(F1), F2,maxCharacterHeight,maxCharacterWidth); + + // Font specific character codes (in black) + page.SetTextAndFillColor(0,0,0); //Black + foreach ((var code, var name) in EncodingTable) + { + var ch = (char)code; // Note code is already base 10 no need to use OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); + point = AddLetterWithContext(point, $"{ch}", context, true); + + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to PDF encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + + // Unicode character codes (in blue) + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel: true); + } + } + + // Save one page PDF to file system for manual review. + var pdfBytes = pdfBuilder.Build(); + WritePdfFile(nameof(ZapfDingbatsFontAddText), pdfBytes); + + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + var page1 = document.GetPage(1); + var letters = page1.Letters; + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == "ZapfDingbats" + && l.Color.ToRGBValues().b == 0) + .ToList(); + + + Assert.Equal(188,lettersFontSpecificCodes.Count); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.ZapfDingbats.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + Assert.Equal(letterCharacter, unicodeCharacter); + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == "ZapfDingbats" + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(188,lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + Assert.Equal(letterCharacter, unicodeCharacter); + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + } + } + } + } + + [Fact] + public void ZapfDingbatsFontErrorResponseAddingInvalidText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.ZapfDingbats); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.ZapfDingbatsEncoding)); + + { + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + var cm = (page.PageSize.Width / 8.5 / 2.54); + var point = new PdfPoint(cm, page.PageSize.Top - cm); + + { + // Get the codes that have no character associated in the font specific coding. + var codesUnder255 = Enumerable.Range(0, 255).Select(v => (char)v).ToArray(); + var codesFromEncodingTable = EncodingTable.Select(v => (char)v.code).ToArray(); + var invalidCharactersUnder255 = codesUnder255.Except(codesFromEncodingTable); + //Debug.WriteLine($"Number of invalid under 255 characters: {invalidCharactersUnder255.Count()}"); + Assert.Equal(67, invalidCharactersUnder255.Count()); + foreach (var ch in invalidCharactersUnder255) + { + try + { + var letter = page.AddText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + } + + { + // UnicodeRanges.Dingbats - 0x2700 - 0x27BF + var codesFromUnicodeDingbatBlock = Enumerable.Range(0x2700, 0xBF).Select(v => (char)v).ToArray(); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.ZapfDingbats); + var invalidCharactersInUnicodeDingbaBlock = codesFromUnicodeDingbatBlock.Except(unicodesCharacters); + //Debug.WriteLine($"Number of invalid unicode characters: {invalidCharactersInUnicodeDingbaBlock.Count()}"); + Assert.Equal(31, invalidCharactersInUnicodeDingbaBlock.Count()); + foreach (var ch in invalidCharactersInUnicodeDingbaBlock) + { + try + { + var letter = page.AddText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + } + } + } + + [Fact] + public void SymbolFontAddText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.Symbol); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.SymbolEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); + { + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; + + var point = new PdfPoint(leftX, topPageY); + DateTimeStampPage(pdfBuilder, page, point, cm); + var letters = page.AddText("Adobe Standard Font Symbol ", 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black (octal), Unicode in Blue (hex), Red only available using Unicode", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + + + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page, F1, 12d, unicodesCharacters); + var context = GetContext(F1, page, nameof(F1), F2, maxCharacterHeight, maxCharacterWidth); + + // First set of rows for direct PDF font specific character codes + newY = newY - maxCharacterHeight; + point = new PdfPoint(leftX, newY); + var eachRowY = new List(new[] { newY }); + page.SetTextAndFillColor(0, 0, 0); //Black + bool isTextColorBlack = true; + foreach ((var codeFontSpecific, var name) in EncodingTable) + { + var code = codeFontSpecific; // Code is already converted [neither OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); is required] + // For a clash library uses unicode interpretation. + // Substitue if code is any of the 4 codes that clash (in Unicode and font specific encodes for Symbol) + if (code == 0xac) code = '\u2190'; // 0xac in unicode is logicalnot ('¬') use Unicode alternative for arrowleft ('←') 0x2190 + if (code == 0xf7) code = '\uf8f7'; // 0xf7 in unicode is divide ('÷') (different form '/') use Unicode alternative for parenrightex Unicode 0xF8F7 + if (code == 0xb5) code = '\u221D'; // 0xb5 in unicode is lowercase mu ('µ') use Unicode alternative for proportiona('∝') 0x221D + if (code == 0xd7) code = '\u22c5'; // 0xd7 in unicode is muliply ('×') (different from '*') use Unicode alternative for dotmath ('⋅') 0x22C5 + if (code != codeFontSpecific && isTextColorBlack) { page.SetTextAndFillColor(200, 0, 0); isTextColorBlack = false; } + if (code == codeFontSpecific && isTextColorBlack == false) { page.SetTextAndFillColor(0, 0, 0); isTextColorBlack = true; } + + char ch = (char)code; + point = AddLetterWithContext(point, $"{ch}", context, isTextColorBlack); + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to font specific encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel: true); + } + } + + // Save two page PDF to file system for manual review. + var pdfBytes = pdfBuilder.Build(); + WritePdfFile(nameof(SymbolFontAddText), pdfBytes); + + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + var page1 = document.GetPage(1); + var letters = page1.Letters; + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == "Symbol" + && l.Color.ToRGBValues().b == 0 + && (l.Color.ToRGBValues().b == 0 + || l.Color.ToRGBValues().r == 200) + ) + .ToList(); + + + Assert.Equal(189, lettersFontSpecificCodes.Count); + Assert.Equal(EncodingTable.Length, lettersFontSpecificCodes.Count); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.AdobeGlyphList.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(letterCharacter, unicodeCharacter); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == "Symbol" + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(189, lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(letterCharacter, unicodeCharacter); + } + } + } + } + + [Fact] + public void SymbolFontErrorResponseAddingInvalidText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.Symbol); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.SymbolEncoding)); + + { + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + var cm = (page.PageSize.Width / 8.5 / 2.54); + var point = new PdfPoint(cm, page.PageSize.Top - cm); + + { + // Get the codes that have no character associated in the font specific coding. + var codesUnder255 = Enumerable.Range(0, 255).Select(v => (char)v).ToArray(); + var codesFromEncodingTable = EncodingTable.Select(v => (char)v.code).ToArray(); + var invalidCharactersUnder255 = codesUnder255.Except(codesFromEncodingTable); + Debug.WriteLine($"Number of invalid under 255 characters: {invalidCharactersUnder255.Count()}"); + foreach (var ch in invalidCharactersUnder255) + { + try + { + var letter = page.AddText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + } + + { + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); + + var randomCharacters = new char[10]; + { + var listUnicodeCharacters = unicodesCharacters.Select(v => (int)v).ToList(); + var rnd = new Random(); + int nextIndex = 0; + while (nextIndex < randomCharacters.Length) + { + var value = rnd.Next(0x10ffff); + + if (listUnicodeCharacters.Contains(value)) { continue; } + char ch = (char)value; + int i = (int)ch; + if (i >= 0xd800 && i <= 0xdfff) { continue; } + randomCharacters[nextIndex++] = ch; + Debug.WriteLine($"{value:X}"); + } + } + foreach (var ch in randomCharacters) + { + int i = (int)ch; + if (i > 0x10ffff) { + Debug.WriteLine("Unexpected unicode point. Too large to be unicode. Expected: <0x10ffff. Got: 0x{i:X}"); + continue; + } + if (i >= 0xd800 && i<=0xdfff) + { + Debug.WriteLine("Unexpected unicode point that is not a surrogate Expected: <0xd800 && >0xdfff. Got: 0x{i:X}"); + continue; + } + try + { + var letter = page.AddText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, F1); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + } + } + } + [Fact] + public void StandardFontsAddText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesBold); + PdfDocumentBuilder.AddedFont F3 = pdfBuilder.AddStandard14Font(Standard14Font.TimesItalic); + PdfDocumentBuilder.AddedFont F4 = pdfBuilder.AddStandard14Font(Standard14Font.TimesBoldItalic); + PdfDocumentBuilder.AddedFont F5 = pdfBuilder.AddStandard14Font(Standard14Font.Helvetica); + PdfDocumentBuilder.AddedFont F6 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaBold); + PdfDocumentBuilder.AddedFont F7 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaOblique); + PdfDocumentBuilder.AddedFont F8 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaBoldOblique); + PdfDocumentBuilder.AddedFont F9 = pdfBuilder.AddStandard14Font(Standard14Font.Courier); + PdfDocumentBuilder.AddedFont F10 = pdfBuilder.AddStandard14Font(Standard14Font.CourierBold); + PdfDocumentBuilder.AddedFont F11 = pdfBuilder.AddStandard14Font(Standard14Font.CourierOblique); + PdfDocumentBuilder.AddedFont F12 = pdfBuilder.AddStandard14Font(Standard14Font.CourierBoldOblique); + + var standardFontsWithStandardEncoding = new PdfDocumentBuilder.AddedFont[] + { + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12 + }; + + //AddLetterWithFont(page, point, "v", F1, nameof(F1)); + //AddLetterWithFont(page, point, "v", F2, nameof(F2)); + //AddLetterWithFont(page, point, "v", F3, nameof(F3)); + //AddLetterWithFont(page, point, "v", F4, nameof(F4)); + //AddLetterWithFont(page, point, "v", F5, nameof(F5)); + //AddLetterWithFont(page, point, "v", F6, nameof(F6)); + //AddLetterWithFont(page, point, "v", F7, nameof(F7)); + //AddLetterWithFont(page, point, "v", F8, nameof(F8)); + //AddLetterWithFont(page, point, "v", F9, nameof(F9)); + //AddLetterWithFont(page, point, "v", F10, nameof(F10)); + //AddLetterWithFont(page, point, "v", F11, nameof(F11)); + //AddLetterWithFont(page, point, "v", F12, nameof(F12)); + + + // Get all characters codes in font using existing metrics in (private) Standard14Cache class (using reflection). + var Standard14Cache = GetStandard14Cache(); + + // All 12 fonts should conform to 'StanardEncoding' + var EncodingTable = ((int code, string name)[])GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.StandardEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); + + int fontNumber = 0; + foreach (var font in standardFontsWithStandardEncoding) + { + fontNumber++; + var storedFont = pdfBuilder.Fonts[font.Id]; + var fontProgram = storedFont.FontProgram; + var fontName = fontProgram.Name; + + { + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; + + var point = new PdfPoint(leftX, topPageY); + DateTimeStampPage(pdfBuilder, page, point, cm); + var letters = page.AddText("Adobe Standard Font "+ fontName, 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black, Unicode in Blue, Red only available using Unicode", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + point = new PdfPoint(leftX, newY); + + + var eachRowY = new List(new[] { newY }); + + var metrics = Standard14Cache[fontName]; + + var codesFromMetrics = new HashSet(); + page.SetTextAndFillColor(0, 0, 0); //Black + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page, F1, 12d, unicodesCharacters); + var context = GetContext(font, page, $"F{fontNumber}", F2, maxCharacterHeight, maxCharacterWidth); + + // Detect if all codes in Standard encoding table are in metrics for font. + bool isMissing = false; + bool isTextColorBlack = true; + foreach ((var codeNotBase8Converted, var name) in EncodingTable) + { + var codeFontSpecific = System.Convert.ToInt32($"{codeNotBase8Converted}", 8); + var isToggleColor = false; + var code = codeFontSpecific; + if (codeFontSpecific == 0xc6) { code = 0x02D8; } + else if (codeFontSpecific == 0xb4) { code = 0x00b7; } + else if (codeFontSpecific == 0xb7) { code = 0x2022; } + else if (codeFontSpecific == 0xb8) { code = 0x201A; } + else if (codeFontSpecific == 0xa4) { code = 0x2044; } + else if (codeFontSpecific == 0xa8) { code = 0x00a4; } + else if (codeFontSpecific == 0x60) { code = 0x2018; } + else if (codeFontSpecific == 0xaf) { code = 0xFB02; } + else if (codeFontSpecific == 0xaa) { code = 0x201C; } + else if (codeFontSpecific == 0xba) { code = 0x201D; } + else if (codeFontSpecific == 0xf8) { code = 0x0142; } + else if (codeFontSpecific == 0x27) { code = 0x2019; } + if (code != codeFontSpecific && isTextColorBlack) { page.SetTextAndFillColor(200, 0, 0); isTextColorBlack = false; } + if (code == codeFontSpecific && isTextColorBlack == false) { page.SetTextAndFillColor(0, 0, 0); isTextColorBlack = true; } + + char ch = (char)code; + point = AddLetterWithContext(point, $"{ch}", context, isTextColorBlack); + + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + foreach (var metric in metrics.CharacterMetrics) + { + var code = metric.Value.CharacterCode; + if (code == -1) continue; + codesFromMetrics.Add(code); + } + + foreach ((var codeNotBase8Converted, var name) in EncodingTable) + { + var codeBase10 = System.Convert.ToInt32($"{codeNotBase8Converted}", 8); + if (codesFromMetrics.Contains(codeBase10) == false) + { + var ch = (char)codeBase10; + isMissing = true; + Debug.WriteLine($"In Adobe Standard Font '{fontName}' code {codeBase10} is in Standard encoding table but not in font metrics."); + } + } + + Assert.False(isMissing, $"Adobe Standard Font '{fontName}' contains code(s) in Standard encoding table but not in font metrics. See Debug output for details."); + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to PDF encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel:true); + } + } + } + + // Save one page per standard font to file system for manual review. + var pdfBytes = pdfBuilder.Build(); + WritePdfFile($"{nameof(StandardFontsAddText)}", pdfBytes); + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + foreach (var page in document.GetPages()) + { + var letters = page.Letters; + var expectedFontName = letters.FirstOrDefault(l=>l.FontSize == 12d).FontName; + + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == expectedFontName + && l.FontSize == 12d + && ( l.Color.ToRGBValues().b == 0 + || l.Color.ToRGBValues().r == 200) + ) + .ToList(); + + + Assert.Equal(149, lettersFontSpecificCodes.Count); + Assert.Equal(lettersFontSpecificCodes.Count, EncodingTable.Length); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.AdobeGlyphList.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + if (letterCharacter != unicodeCharacter) Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(unicodeCharacter, letterCharacter); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == expectedFontName + && l.FontSize == 12d + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(149,lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(unicodeCharacter, letterCharacter); + } + } + + } + } + } + + + [Fact] + public void StandardFontErrorResponseAddingInvalidText() + { + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + var cm = (page.PageSize.Width / 8.5 / 2.54); + var point = new PdfPoint(cm, page.PageSize.Top - cm); + + PdfDocumentBuilder.AddedFont[] standardFontsWithStandardEncoding; + { + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesBold); + PdfDocumentBuilder.AddedFont F3 = pdfBuilder.AddStandard14Font(Standard14Font.TimesItalic); + PdfDocumentBuilder.AddedFont F4 = pdfBuilder.AddStandard14Font(Standard14Font.TimesBoldItalic); + PdfDocumentBuilder.AddedFont F5 = pdfBuilder.AddStandard14Font(Standard14Font.Helvetica); + PdfDocumentBuilder.AddedFont F6 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaBold); + PdfDocumentBuilder.AddedFont F7 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaOblique); + PdfDocumentBuilder.AddedFont F8 = pdfBuilder.AddStandard14Font(Standard14Font.HelveticaBoldOblique); + PdfDocumentBuilder.AddedFont F9 = pdfBuilder.AddStandard14Font(Standard14Font.Courier); + PdfDocumentBuilder.AddedFont F10 = pdfBuilder.AddStandard14Font(Standard14Font.CourierBold); + PdfDocumentBuilder.AddedFont F11 = pdfBuilder.AddStandard14Font(Standard14Font.CourierOblique); + PdfDocumentBuilder.AddedFont F12 = pdfBuilder.AddStandard14Font(Standard14Font.CourierBoldOblique); + + standardFontsWithStandardEncoding = new PdfDocumentBuilder.AddedFont[] + { + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12 + }; + } + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.StandardEncoding)); + + // Get the codes that have no character associated in the font specific coding. + char[] invalidCharactersUnder255; + { + var codesUnder255 = Enumerable.Range(0, 255).Select(v => (char)v).ToArray(); + var codesFromEncodingTable = EncodingTable.Select(v => (char)v.code).ToArray(); + invalidCharactersUnder255 = codesUnder255.Except(codesFromEncodingTable).ToArray(); + Debug.WriteLine($"Number of invalid under 255 characters: {invalidCharactersUnder255.Count()}"); + } + + // Get random unicodes not valid for any font with Standard encoding. + var randomUnicodeCharacters = new char[10]; + { + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); + { + var listUnicodeCharacters = unicodesCharacters.Select(v => (int)v).ToList(); + var rnd = new Random(); + int nextIndex = 0; + while (nextIndex < randomUnicodeCharacters.Length) + { + var value = rnd.Next(0x10ffff); + + if (listUnicodeCharacters.Contains(value)) { continue; } + char ch = (char)value; + int i = (int)ch; + if (i >= 0xd800 && i <= 0xdfff) { continue; } + randomUnicodeCharacters[nextIndex++] = ch; + Debug.WriteLine($"{value:X}"); + } + } + } + + int fontNumber = 0; + foreach (var font in standardFontsWithStandardEncoding) + { + fontNumber++; + var storedFont = pdfBuilder.Fonts[font.Id]; + var fontProgram = storedFont.FontProgram; + var fontName = fontProgram.Name; + + foreach (var ch in invalidCharactersUnder255) + { + try + { + var letter = page.AddText($"{ch}", 12, point, font); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported. Font: '{fontName}'"); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, font); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported. Font: '{fontName}'"); + + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + + + foreach (var ch in randomUnicodeCharacters) + { + int i = (int)ch; + if (i > 0x10ffff) + { + Debug.WriteLine("Unexpected unicode point. Too large to be unicode. Expected: <0x10ffff. Got: 0x{i:X}"); + continue; + } + if (i >= 0xd800 && i <= 0xdfff) + { + Debug.WriteLine("Unexpected unicode point that is not a surrogate Expected: <0xd800 && >0xdfff. Got: 0x{i:X}"); + continue; + } + try + { + var letter = page.AddText($"{ch}", 12, point, font); + Assert.True(true, $"Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + try + { + var letter = page.MeasureText($"{ch}", 12, point, font); + Assert.True(true, "Unexpected. Character: '{ch}' (0x{(int)ch:X}) should throw. Not supported."); + } + catch (InvalidOperationException ex) + { + // Expected + // "The font does not contain a character: '?' (0xnn)." where ? is a character and nn is hex number. + Assert.Contains("The font does not contain a character", ex.Message); + } + } + + } + } + + internal PdfPoint AddLetterWithContext( PdfPoint point, string stringToAdd, ( PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth)context, bool isOctalLabel = false, bool isHexLabel = false) + { + var font = context.font; + var page = context.page; + var fontName = context.fontName; + var fontLabel = context.fontLabel; + var maxCharacterHeight = context.maxCharacterHeight; + var maxCharacterWidth = context.maxCharacterWidth; + + return AddLetter(page, point, stringToAdd, font, fontName, fontLabel, maxCharacterHeight, maxCharacterWidth,isOctalLabel, isHexLabel); + } + internal PdfPoint AddLetter(PdfPageBuilder page, PdfPoint point, string stringToAdd, PdfDocumentBuilder.AddedFont font, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth, bool isOctalLabel = false, bool isHexLabel = false) + { + if (stringToAdd is null) { throw new ArgumentException("Text to add must be a single letter.", nameof(stringToAdd)); } + if (stringToAdd.Length > 1) { throw new ArgumentException("Text to add must be a single letter.", nameof(stringToAdd)); } + if (fontName.ToUpper() != fontName) { throw new ArgumentException(@"FontName must be in uppercase eg. ""F1"".", nameof(fontName)); } + + var letter = page.AddText(stringToAdd, 12, point, font); + if (isOctalLabel) + { + var labelPointSize = 5; + var octalString = System.Convert.ToString((int)stringToAdd[0],8).PadLeft(3, '0'); + var label = octalString; + var codeMidPoint = point.X + letter[0].GlyphRectangle.Width / 2; + var ml = page.MeasureText(label, labelPointSize, point, fontLabel); + var labelY = point.Y + ml.Max(v => v.GlyphRectangle.Height) * 0.1 + maxCharacterHeight; + var xLabel =codeMidPoint - (ml.Sum(v => v.GlyphRectangle.Width) /2); + var labelPoint = new PdfPoint(xLabel, labelY); + page.AddText(label, labelPointSize, labelPoint, fontLabel); + } + + if (isHexLabel) + { + var labelPointSize = 3; + var hexString = $"{(int)stringToAdd[0]:X}".PadLeft(4, '0'); + var label = "0x" + hexString; + var codeMidPoint = point.X + letter[0].GlyphRectangle.Width / 2; + var ml = page.MeasureText(label, labelPointSize, point, fontLabel); + var labelY = point.Y - ml.Max(v => v.GlyphRectangle.Height) * 2.5; + var xLabel = codeMidPoint - (ml.Sum(v => v.GlyphRectangle.Width) / 2); + var labelPoint = new PdfPoint(xLabel, labelY); + page.AddText(label, labelPointSize, labelPoint, fontLabel); + } + + + Assert.NotNull(letter); // We should get back something. + Assert.Equal(1, letter.Count); // There should be only one letter returned after the add operation. + Assert.Equal(stringToAdd, letter[0].Value); // Check we got back the name letter (eg. "v") + //Debug.WriteLine($"{letter[0]}"); + + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + + var letterWidth = letter[0].GlyphRectangle.Width * 2; + var letterHeight = letter[0].GlyphRectangle.Height * 2; + + var newX = point.X + maxCharacterWidth * 1.1; + var newY = point.Y; + + if (newX > page.PageSize.Width - cm) + { + return newLine(cm, point.Y, maxCharacterHeight); + } + return new PdfPoint(newX, newY); + } + + PdfPoint newLine(double cm, double y, double maxCharacterHeight) + { + var newX = 1 * cm; + var newY = y - maxCharacterHeight * 5; + return new PdfPoint(newX, newY); + } + + private static void WritePdfFile(string name, byte[] bytes, string extension = "pdf") + { + const string subFolder = nameof(Standard14WritingFontTests); + var folderPath = subFolder; + + if (!Directory.Exists(folderPath)) + { + Directory.CreateDirectory(folderPath); + } + + var filePath = Path.Combine(folderPath, $"{name}.{extension}"); + File.WriteAllBytes(filePath, bytes); + Debug.WriteLine($@"{Path.Combine(Directory.GetCurrentDirectory(), filePath)}"); + } + + private static (int code, string name)[] GetEncodingTable(Type t) + { + // Get existing (but private) EncodingTable from encoding class using reflection so we can obtain all codes + var EncodingTableFieldInfo = t.GetFields(BindingFlags.NonPublic | BindingFlags.Static) + .FirstOrDefault(v => v.Name == "EncodingTable"); + (int, string)[] EncodingTable = ((int, string)[])EncodingTableFieldInfo.GetValue(Activator.CreateInstance(t, true)); + return EncodingTable; + } + + + private (PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth) GetContext(PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth) + { + return (font, page, fontName, fontLabel, maxCharacterHeight, maxCharacterWidth); + + } + + private static char[]GetUnicodeCharacters((int code, string name)[] EncodingTable, GlyphList glyphList) + { + var gylphNamesFromEncodingTable = EncodingTable.Select(v => v.name).ToArray(); + char[] unicodesCharacters = gylphNamesFromEncodingTable.Select(v => (char)glyphList.NameToUnicode(v)[0]).ToArray(); + return unicodesCharacters; + } + ( double maxCharacterHeight,double maxCharacterWidth)GetCharacterDetails(PdfPageBuilder page,PdfDocumentBuilder.AddedFont font, double fontSize, char[] unicodesCharacters) + { + double maxCharacterHeight; + double maxCharacterWidth; + { + var point = new PdfPoint(10, 10); + var characterRectangles = unicodesCharacters.Select(v => page.MeasureText($"{v}", 12m,point, font)[0].GlyphRectangle); + maxCharacterHeight = characterRectangles.Max(v => v.Height); + maxCharacterWidth = characterRectangles.Max(v => v.Height); + } + return (maxCharacterHeight, maxCharacterWidth); + } + + + private static Dictionary GetStandard14Cache() + { + var Standard14Type = typeof(UglyToad.PdfPig.Fonts.Standard14Fonts.Standard14); + var Standard14CacheFieldInfos = Standard14Type.GetFields(BindingFlags.NonPublic | BindingFlags.Static); + var Standard14Cache = (Dictionary)Standard14CacheFieldInfos.FirstOrDefault(v => v.Name == "Standard14Cache").GetValue(null); + return Standard14Cache; + } + + private static void DateTimeStampPage(PdfDocumentBuilder pdfBuilder, PdfPageBuilder page, PdfPoint point, double cm) + { + var courierFont = pdfBuilder.AddStandard14Font(Standard14Font.Courier); + + var stampTextUTC = " UTC: " + DateTime.UtcNow.ToString("yyyy-MMM-dd HH:mm"); + var stampTextLocal = "Local: " + DateTimeOffset.Now.ToString("yyyy-MMM-dd HH:mm zzz"); + + const decimal fontSize = 7m; + + var indentFromLeft = page.PageSize.Width - cm; + { + var mtUTC = page.MeasureText(stampTextUTC, fontSize, point, courierFont); + var mtlocal = page.MeasureText(stampTextLocal, fontSize, point, courierFont); + var widthUTC = mtUTC.Sum(v => v.GlyphRectangle.Width); + var widthLocal = mtlocal.Sum(v => v.GlyphRectangle.Width); + + indentFromLeft -= Math.Max(widthUTC, widthLocal); + } + + { + point = new PdfPoint(indentFromLeft, point.Y); + var letters = page.AddText(stampTextUTC, 7m, point, courierFont); + var maxHeight = letters.Max(v=>v.GlyphRectangle.Height); + point = new PdfPoint(indentFromLeft, point.Y - maxHeight * 1.2); + } + + { + var letters = page.AddText(stampTextLocal, 7m, point, courierFont); + } + } + } +} diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index 5c3fca4b..b15ea4f3 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -2,8 +2,9 @@ { using Core; using Graphics.Colors; - using PdfFonts; - + using PdfFonts; + using System.Diagnostics; + /// /// A glyph or combination of glyphs (characters) drawn by a PDF content stream. /// diff --git a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs index 26095d8a..88e69586 100644 --- a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs +++ b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs @@ -87,7 +87,10 @@ //If we got here, we have to iterate till we manage to exit - HashSet visitedTokens = new HashSet(); // As we visit each token add to this list (the hashcode of the indirect reference) + // Attempt to detect (and break) any infitine loop (IL) by recording the ids of the last 1000 (by default) tokens processed. + const int InfiniteLoopWorkingWindow = 1000; + var visitedTokens = new Dictionary>(); // Quick lookup containing ids (object number, generation) of tokens already processed (trimmed as we go to last 1000 (by default)) + var visitedTokensWorkingWindow = new Queue<(long ObjectNumber, int Generation)>(InfiniteLoopWorkingWindow); var toProcess = new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, @@ -105,15 +108,52 @@ do { var current = toProcess.Dequeue(); - var currentReferenceHash = current.reference.GetHashCode(); - if (visitedTokens.Contains(currentReferenceHash)) + + #region Break any potential infinite loop + // Remember the last 1000 (by default) tokens and if we attempt to process again break out of loop + var currentReferenceObjectNumber = current.reference.ObjectNumber; + var currentReferenceGeneration = current.reference.Generation; + if (visitedTokens.ContainsKey(currentReferenceObjectNumber)) { - continue; // don't revisit token already processed. break infinite loop. Issue #512 + var generations = visitedTokens[currentReferenceObjectNumber]; + + if (generations.Contains(currentReferenceGeneration)) + { + var listOfLastVisitedToken = visitedTokensWorkingWindow.ToList(); + var indexOfCurrentTokenInListOfLastVisitedToken = listOfLastVisitedToken.IndexOf((currentReferenceObjectNumber, currentReferenceGeneration)); + var howManyTokensBack = Math.Abs(indexOfCurrentTokenInListOfLastVisitedToken - listOfLastVisitedToken.Count); //eg initate loop is taking us back to last token or five token back + System.Diagnostics.Debug.WriteLine($"Break infinite loop while processing page {pageNumber.PageCount+1} tokens. Token with object number {currentReferenceObjectNumber} and generation {currentReferenceGeneration} processed {howManyTokensBack} token(s) back. "); + continue; // don't reprocess token already processed. break infinite loop. Issue #519 + } + else + { + generations.Add(currentReferenceGeneration); + visitedTokens[currentReferenceObjectNumber] = generations; + } } else { - visitedTokens.Add(currentReferenceHash); - } + visitedTokens.Add(currentReferenceObjectNumber, new HashSet() { currentReferenceGeneration }); + + visitedTokensWorkingWindow.Enqueue((currentReferenceObjectNumber, currentReferenceGeneration)); + if (visitedTokensWorkingWindow.Count >= InfiniteLoopWorkingWindow) + { + var toBeRemovedFromWorkingHashset = visitedTokensWorkingWindow.Dequeue(); + var toBeRemovedObjectNumber = toBeRemovedFromWorkingHashset.ObjectNumber; + var toBeRemovedGeneration = toBeRemovedFromWorkingHashset.Generation; + var generations = visitedTokens[toBeRemovedObjectNumber]; + generations.Remove(toBeRemovedGeneration); + if (generations.Count == 0) + { + visitedTokens.Remove(toBeRemovedObjectNumber); + } + else + { + visitedTokens[toBeRemovedObjectNumber] = generations; + } + } + } + #endregion if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids)) { if (!isLenientParsing) diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs index a67686b3..34359905 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs @@ -1,7 +1,8 @@ // ReSharper disable CompareOfFloatsByEqualityOperator namespace UglyToad.PdfPig.PdfFonts.Simple { - using System; + using System; + using System.Diagnostics; using Core; using Fonts; using Fonts.AdobeFontMetrics; @@ -46,13 +47,36 @@ namespace UglyToad.PdfPig.PdfFonts.Simple public bool TryGetUnicode(int characterCode, out string value) { - var name = encoding.GetName(characterCode); - - var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); - - value = listed; - - return true; + var name = encoding.GetName(characterCode); + if (name is ".notdef") + { + value = null; + return false; + } + if (encoding is ZapfDingbatsEncoding) + { + var listed = GlyphList.ZapfDingbats.NameToUnicode(name); + + value = listed; + + return true; + } + else if (encoding is StandardEncoding or SymbolEncoding) + { + var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); + + value = listed; + + return true; + } else + { + Debug.WriteLine($"Warning: Type1Standard14Font with unexpected encoding: '{encoding.EncodingName}' Expected: 'ZapfDingbatsEncoding','SymbolEncoding' or 'StandardEncoding' . Font: '{standardFontMetrics.FontName}'"); + var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); + + value = listed; + + return true; + } } public CharacterBoundingBox GetBoundingBox(int characterCode) diff --git a/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs b/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs index a55ee5a8..8953d5c6 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs @@ -1,14 +1,14 @@ namespace UglyToad.PdfPig.Writer.Fonts { using System; - using System.Collections.Generic; - using System.IO; + using System.Collections.Generic; + using System.Diagnostics; + using System.Linq; using Core; using PdfPig.Fonts; using PdfPig.Fonts.AdobeFontMetrics; using PdfPig.Fonts.Encodings; - using Tokens; - using Util.JetBrains.Annotations; + using Tokens; internal class Standard14WritingFont : IWritingFont { @@ -24,12 +24,25 @@ } public bool TryGetBoundingBox(char character, out PdfRectangle boundingBox) - { - var encoding = StandardEncoding.Instance; - boundingBox = default(PdfRectangle); - if (!metrics.CharacterMetrics.TryGetValue(encoding.GetName(character), out var characterMetric)) - { - return false; + { + + boundingBox = default(PdfRectangle); + + int code = CodeMapIfUnicode(character); + if (code == -1) + { + Debug.WriteLine($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X})."); + return false; + } + + var characterMetric = metrics.CharacterMetrics + .Where(v => v.Value.CharacterCode == code) + .Select(v => v.Value) + .FirstOrDefault(); + if (characterMetric is null) + { + Debug.WriteLine($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X})."); + return false; } boundingBox = new PdfRectangle(characterMetric.BoundingBox.Left, characterMetric.BoundingBox.Bottom, @@ -40,7 +53,8 @@ public bool TryGetAdvanceWidth(char character, out double width) { - width = 0; + width = 0; + if (!TryGetBoundingBox(character, out var bbox)) { return false; @@ -63,9 +77,9 @@ { NameToken.Type, NameToken.Font }, { NameToken.Subtype, NameToken.Type1 }, { NameToken.BaseFont, NameToken.Create(metrics.FontName) }, - { NameToken.Encoding, NameToken.MacRomanEncoding } + { NameToken.Encoding, (metrics.FontName is "Symbol" or "ZapfDingbats") ? NameToken.Create("FontSpecific") : NameToken.StandardEncoding } // 2022-12-12 @fnatzke was NameToken.MacRomanEncoding; not sure based on spec why MacRomanEncoding encoding? }; - + var token = new DictionaryToken(dictionary); if (reservedIndirect != null) @@ -79,23 +93,194 @@ } public byte GetValueForCharacter(char character) - { - var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character); + { + var characterCode = CodeMapIfUnicode(character); + var characterMetric = metrics.CharacterMetrics + .Where(v => v.Value.CharacterCode == characterCode) + .Select(v => v.Value) + .FirstOrDefault(); + if (characterMetric is null) + { + throw new NotSupportedException($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X})."); + } + var code = characterMetric.CharacterCode; + var result = (byte)code; + return result; + } + private int UnicodeToSymbolCode(char character) + { + var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character); + if (name is ".notdef") + { + return -1; + } + var symbolEncoding = SymbolEncoding.Instance; + var code = symbolEncoding.GetCode(name); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' [Symbol] (StandardEncoding)."); + } + return code; + } + + private int UnicodeToZapfDingbats(char character) + { + + int code; + var name = GlyphList.ZapfDingbats.UnicodeCodePointToName(character); + if (name is ".notdef") + { + Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X})."); + return -1; + } + var encoding = ZapfDingbatsEncoding.Instance; + code = encoding.GetCode(name); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (font specific encoding: ZapfDingbats)."); + } + return code; + + } + + private int UnicodeToStandardEncoding(char character) + { + int code; + var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character); + if (name is ".notdef") + { + Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X})."); + return -1; + } + var standardEncoding = StandardEncoding.Instance; + code = standardEncoding.GetCode(name); + if (code == -1) + { + // Check if name from glyph list is the same except first letter's case; capital letter (or if capital a lowercase) + var nameCapitalisedChange = Char.IsUpper(name[0]) ? Char.ToLower(name[0]) + name.Substring(1) :Char.ToUpper(name[0]) + name.Substring(1); + code = standardEncoding.GetCode(nameCapitalisedChange); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (StandardEncoding)."); + } + } + return code; + } + + private int CodeMapIfUnicode(char character) + { + int code; // encoding code either from StanardEncoding, ZapfDingbatsEncoding or SymbolEncoding depending on font + int i = (int)character; + if (metrics.FontName is "ZapfDingbats") + { + // Either use character code as is if font specific code or map from Unicode Dingbats range. 0x2700- 0x27bf + code = i < 255 ? i : UnicodeToZapfDingbats(character); + } + else if (metrics.FontName is "Symbol") + { + if (i == 0x00AC) { + Debug.WriteLine("Warning: 0x00AC used as Unicode ('¬') (logicalnot). For (arrowleft)('←') from Adobe Symbol Font Specific (0330) use Unicode 0x2190 ('←')."); + return 0x00d8; + } + if (i == 0x00F7) { + Debug.WriteLine("Warning: 0x00F7 used as Unicode ('÷')(divide). For (parenrightex) from Adobe Symbol Font Specific (0367) use Unicode 0xF8F7."); + return 0x00B8; + } + if (i == 0x00B5) { + Debug.WriteLine("Warning: 0x00B5 used as Unicode divide ('µ')(mu). For (proportional)('∝') from Adobe Symbol Font Specific (0265) use Unicode 0x221D('∝')."); + return 0x006d; + } + if (i == 0x00D7) { + Debug.WriteLine("Warning: 0x00D7 used as Unicode multiply ('×')(multiply). For (dotmath)('⋅') from Adobe Symbol Font Specific (0327) use Unicode 0x22C5('⋅')."); + return 0x00B4; + } + + // Either use character code as is if font specific code or map from Unicode + code = i < 255 ? i : UnicodeToSymbolCode(character); + } + else + { + // Resolve clashes as unicode. Use any standard code (with warnings). Map remainder as unicode to stanard character code. + + if (i == 0x00c6) + { + Debug.WriteLine("Warning: 0x00C6 used as Unicode ('Æ') (AE). For (breve)('˘') from Adobe Standard Font Specific (0306) use Unicode 0x02D8 ('˘')."); + return 0x00e1; // (341) + } + + if (i == 0x00b4) + { + Debug.WriteLine("Warning: 0x00B4 used as Unicode ('´') (acute). For (periodcentered)('·') from Adobe Standard Font Specific (0264) use Unicode 0x00B7 ('·')."); + return 0x00c2; // (0302) + } + + + + if (i == 0x00b7) + { + Debug.WriteLine("Warning: 0x00B7 used as Unicode ('·') (periodcentered). For (bullet)('•') from Adobe Standard Font Specific (0267) use Unicode 0x2022 ('•')."); + return 0x00b4; // (0264) + } + + if (i == 0x00b8) + { + Debug.WriteLine("Warning: 0x00B8 used as Unicode ('¸') (cedilla). For (quotesinglbase)('‚') from Adobe Standard Font Specific (0267) use Unicode 0x201A ('‚')."); + return 0x00cb; // (0313) + } + + if (i == 0x00a4) + { + Debug.WriteLine("Warning: 0x00A4 used as Unicode (currency). For (fraction) ('⁄') from Adobe Standard Font Specific (0244) use Unicode 0x2044 ('⁄')."); + return 0x00a8; // (0250) + } + + if (i == 0x00a8) + { + Debug.WriteLine("Warning: 0x00A8 used as Unicode (dieresis)('¨'). For (currency) from Adobe Standard Font Specific (0250) use Unicode 0x00A4."); + return 0x00c8; // (0310) + } + + if (i == 0x0060) + { + Debug.WriteLine("Warning: 0x0060 used as Unicode (grave)('`'). For (quoteleft)('‘') from Adobe Standard Font Specific (0140) use Unicode 0x2018."); + return 0x00c1; // (0301) + } + + if (i == 0x00af) + { + Debug.WriteLine("Warning: 0x00AF used as Unicode (macron)('¯'). For (fl)('fl') from Adobe Standard Font Specific (0257) use Unicode 0xFB02."); + return 0x00c5; // (0305) + } + + if (i == 0x00aa) + { + Debug.WriteLine("Warning: 0x00AA used as Unicode (ordfeminine)('ª'). For (quotedblleft) ('“') from Adobe Standard Font Specific (0252) use Unicode 0x201C."); + return 0x00e3; // (0343) + } + + if (i == 0x00ba) + { + Debug.WriteLine("Warning: 0x00BA used as Unicode (ordmasculine)('º'). For (quotedblright) ('”') from Adobe Standard Font Specific (0272) use Unicode 0x201D."); + return 0x00eb; // (0353) + } + + if (i == 0x00f8) + { + Debug.WriteLine("Warning: 0x00F8 used as Unicode (oslash)('ø'). For (lslash) ('ł') from Adobe Standard Font Specific (0370) use Unicode 0x0142."); + return 0x00f9; // (0371) + } + + if (i == 0x0027) + { + Debug.WriteLine("Warning: 0x0027 used as Unicode (quotesingle)('\''). For (quoteright) ('’') from Adobe Standard Font Specific (0047) use Unicode 0x2019."); + return 0x00a9; // (0251) + } + + var isCharacterCodeInStandardEncoding = metrics.CharacterMetrics.Any(v => v.Value.CharacterCode == i); + code = isCharacterCodeInStandardEncoding ? i : UnicodeToStandardEncoding(character); + } + return code; + } - if (name == null || !MacRomanEncoding.Instance.NameToCodeMap.TryGetValue(name, out var code)) - { - var nameError = name ?? "NULL"; - throw new NotSupportedException($"No mapping for character '{character}' exists in the Standard14 font. Glyph name: '{nameError}'."); - } - - if (code > byte.MaxValue) - { - throw new NotSupportedException($"Value of code for character '{character}' exceeded the range of a byte. Glyph name: '{name}'."); - } - - var result = (byte) code; - return result; - } - } - + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index 7c6f4cd3..c814f75b 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -882,7 +882,7 @@ if (!font.TryGetBoundingBox(c, out var rect)) { - throw new InvalidOperationException($"The font does not contain a character: {c}."); + throw new InvalidOperationException($"The font does not contain a character: '{c}' (0x{(int)c:X})."); } if (!font.TryGetAdvanceWidth(c, out var charWidth))