remove unsupported syntax

This commit is contained in:
Eliot Jones
2023-01-08 14:18:09 -05:00
parent d2944e14e5
commit d5b196ff44
3 changed files with 297 additions and 287 deletions

View File

@@ -1,184 +1,184 @@
namespace UglyToad.PdfPig.Fonts.Encodings
namespace UglyToad.PdfPig.Fonts.Encodings
{
/// <summary>
/// The standard PDF encoding.
/// </summary>
public class StandardEncoding : Encoding
{
private static readonly (int, string)[] EncodingTable =
{
(0101, "A"),
(0341, "AE"),
(0102, "B"),
(0103, "C"),
(0104, "D"),
(0105, "E"),
(0106, "F"),
(0107, "G"),
(0110, "H"),
(0111, "I"),
(0112, "J"),
(0113, "K"),
(0114, "L"),
(0350, "Lslash"),
(0115, "M"),
(0116, "N"),
(0117, "O"),
(0352, "OE"),
(0351, "Oslash"),
(0120, "P"),
(0121, "Q"),
(0122, "R"),
(0123, "S"),
(0124, "T"),
(0125, "U"),
(0126, "V"),
(0127, "W"),
(0130, "X"),
(0131, "Y"),
(0132, "Z"),
(0141, "a"),
(0302, "acute"),
(0361, "ae"),
(0046, "ampersand"),
(0136, "asciicircum"),
(0176, "asciitilde"),
(0052, "asterisk"),
(0100, "at"),
(0142, "b"),
(0134, "backslash"),
(0174, "bar"),
(0173, "braceleft"),
(0175, "braceright"),
(0133, "bracketleft"),
(0135, "bracketright"),
(0306, "breve"),
(0267, "bullet"),
(0143, "c"),
(0317, "caron"),
(0313, "cedilla"),
(0242, "cent"),
(0303, "circumflex"),
(0072, "colon"),
(0054, "comma"),
(0250, "currency"),
(0144, "d"),
(0262, "dagger"),
(0263, "daggerdbl"),
(0310, "dieresis"),
(0044, "dollar"),
(0307, "dotaccent"),
(0365, "dotlessi"),
(0145, "e"),
(0070, "eight"),
(0274, "ellipsis"),
(0320, "emdash"),
(0261, "endash"),
(0075, "equal"),
(0041, "exclam"),
(0241, "exclamdown"),
(0146, "f"),
(0256, "fi"),
(0065, "five"),
(0257, "fl"),
(0246, "florin"),
(0064, "four"),
(0244, "fraction"),
(0147, "g"),
(0373, "germandbls"),
(0301, "grave"),
(0076, "greater"),
(0253, "guillemotleft"),
(0273, "guillemotright"),
(0254, "guilsinglleft"),
(0255, "guilsinglright"),
(0150, "h"),
(0315, "hungarumlaut"),
(0055, "hyphen"),
(0151, "i"),
(0152, "j"),
(0153, "k"),
(0154, "l"),
(0074, "less"),
(0370, "lslash"),
(0155, "m"),
(0305, "macron"),
(0156, "n"),
(0071, "nine"),
(0043, "numbersign"),
(0157, "o"),
(0372, "oe"),
(0316, "ogonek"),
(0061, "one"),
(0343, "ordfeminine"),
(0353, "ordmasculine"),
(0371, "oslash"),
(0160, "p"),
(0266, "paragraph"),
(0050, "parenleft"),
(0051, "parenright"),
(0045, "percent"),
(0056, "period"),
(0264, "periodcentered"),
(0275, "perthousand"),
(0053, "plus"),
(0161, "q"),
(0077, "question"),
(0277, "questiondown"),
(0042, "quotedbl"),
(0271, "quotedblbase"),
(0252, "quotedblleft"),
(0272, "quotedblright"),
(0140, "quoteleft"),
(0047, "quoteright"),
(0270, "quotesinglbase"),
(0251, "quotesingle"),
(0162, "r"),
(0312, "ring"),
(0163, "s"),
(0247, "section"),
(0073, "semicolon"),
(0067, "seven"),
(0066, "six"),
(0057, "slash"),
(0040, "space"),
(0243, "sterling"),
(0164, "t"),
(0063, "three"),
(0304, "tilde"),
(0062, "two"),
(0165, "u"),
(0137, "underscore"),
(0166, "v"),
(0167, "w"),
(0170, "x"),
(0171, "y"),
(0245, "yen"),
(0172, "z"),
(0060, "zero")
};
/// <summary>
/// The single instance of the standard encoding.
/// </summary>
public static StandardEncoding Instance { get; } = new StandardEncoding();
/// <inheritdoc />
public override string EncodingName => "StandardEncoding";
private StandardEncoding()
{
foreach ((var codeToBeConverted, var name) in EncodingTable)
{
// In source code an int literal with a leading zero ('0')
// in other languages ('C' and 'Java') would be interpreted
/// <summary>
/// The standard PDF encoding.
/// </summary>
public class StandardEncoding : Encoding
{
private static readonly (int, string)[] EncodingTable =
{
(0101, "A"),
(0341, "AE"),
(0102, "B"),
(0103, "C"),
(0104, "D"),
(0105, "E"),
(0106, "F"),
(0107, "G"),
(0110, "H"),
(0111, "I"),
(0112, "J"),
(0113, "K"),
(0114, "L"),
(0350, "Lslash"),
(0115, "M"),
(0116, "N"),
(0117, "O"),
(0352, "OE"),
(0351, "Oslash"),
(0120, "P"),
(0121, "Q"),
(0122, "R"),
(0123, "S"),
(0124, "T"),
(0125, "U"),
(0126, "V"),
(0127, "W"),
(0130, "X"),
(0131, "Y"),
(0132, "Z"),
(0141, "a"),
(0302, "acute"),
(0361, "ae"),
(0046, "ampersand"),
(0136, "asciicircum"),
(0176, "asciitilde"),
(0052, "asterisk"),
(0100, "at"),
(0142, "b"),
(0134, "backslash"),
(0174, "bar"),
(0173, "braceleft"),
(0175, "braceright"),
(0133, "bracketleft"),
(0135, "bracketright"),
(0306, "breve"),
(0267, "bullet"),
(0143, "c"),
(0317, "caron"),
(0313, "cedilla"),
(0242, "cent"),
(0303, "circumflex"),
(0072, "colon"),
(0054, "comma"),
(0250, "currency"),
(0144, "d"),
(0262, "dagger"),
(0263, "daggerdbl"),
(0310, "dieresis"),
(0044, "dollar"),
(0307, "dotaccent"),
(0365, "dotlessi"),
(0145, "e"),
(0070, "eight"),
(0274, "ellipsis"),
(0320, "emdash"),
(0261, "endash"),
(0075, "equal"),
(0041, "exclam"),
(0241, "exclamdown"),
(0146, "f"),
(0256, "fi"),
(0065, "five"),
(0257, "fl"),
(0246, "florin"),
(0064, "four"),
(0244, "fraction"),
(0147, "g"),
(0373, "germandbls"),
(0301, "grave"),
(0076, "greater"),
(0253, "guillemotleft"),
(0273, "guillemotright"),
(0254, "guilsinglleft"),
(0255, "guilsinglright"),
(0150, "h"),
(0315, "hungarumlaut"),
(0055, "hyphen"),
(0151, "i"),
(0152, "j"),
(0153, "k"),
(0154, "l"),
(0074, "less"),
(0370, "lslash"),
(0155, "m"),
(0305, "macron"),
(0156, "n"),
(0071, "nine"),
(0043, "numbersign"),
(0157, "o"),
(0372, "oe"),
(0316, "ogonek"),
(0061, "one"),
(0343, "ordfeminine"),
(0353, "ordmasculine"),
(0371, "oslash"),
(0160, "p"),
(0266, "paragraph"),
(0050, "parenleft"),
(0051, "parenright"),
(0045, "percent"),
(0056, "period"),
(0264, "periodcentered"),
(0275, "perthousand"),
(0053, "plus"),
(0161, "q"),
(0077, "question"),
(0277, "questiondown"),
(0042, "quotedbl"),
(0271, "quotedblbase"),
(0252, "quotedblleft"),
(0272, "quotedblright"),
(0140, "quoteleft"),
(0047, "quoteright"),
(0270, "quotesinglbase"),
(0251, "quotesingle"),
(0162, "r"),
(0312, "ring"),
(0163, "s"),
(0247, "section"),
(0073, "semicolon"),
(0067, "seven"),
(0066, "six"),
(0057, "slash"),
(0040, "space"),
(0243, "sterling"),
(0164, "t"),
(0063, "three"),
(0304, "tilde"),
(0062, "two"),
(0165, "u"),
(0137, "underscore"),
(0166, "v"),
(0167, "w"),
(0170, "x"),
(0171, "y"),
(0245, "yen"),
(0172, "z"),
(0060, "zero")
};
/// <summary>
/// The single instance of the standard encoding.
/// </summary>
public static StandardEncoding Instance { get; } = new StandardEncoding();
/// <inheritdoc />
public override string EncodingName => "StandardEncoding";
private StandardEncoding()
{
foreach ((var codeToBeConverted, var name) in EncodingTable)
{
// In source code an int literal with a leading zero ('0')
// in other languages ('C' and 'Java') would be interpreted
// as octal (base 8) and converted but C# does not support and
// so arrives here as a different value parsed as base10.
// Convert 'codeToBeConverted' to intended value as if it was an octal literal before using.
// For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10).
var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt()
Add(code, name);
}
}
}
// so arrives here as a different value parsed as base10.
// Convert 'codeToBeConverted' to intended value as if it was an octal literal before using.
// For example 040 converts to string "40" then convert string to int again but using base 8 (octal) so result is 32 (base 10).
var code = System.Convert.ToInt32($"{codeToBeConverted}", 8); // alternative is OctalHelpers.FromOctalInt()
Add(code, name);
}
}
}
}

View File

@@ -48,7 +48,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple
public bool TryGetUnicode(int characterCode, out string value)
{
var name = encoding.GetName(characterCode);
if (name is ".notdef")
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
{
value = null;
return false;
@@ -63,7 +63,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple
return true;
}
if (encoding is StandardEncoding or SymbolEncoding)
if (encoding is StandardEncoding || encoding is SymbolEncoding)
{
var listed = GlyphList.AdobeGlyphList.NameToUnicode(name);

View File

@@ -1,105 +1,113 @@
namespace UglyToad.PdfPig.Writer.Fonts
{
using System;
namespace UglyToad.PdfPig.Writer.Fonts
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Core;
using PdfPig.Fonts;
using PdfPig.Fonts.AdobeFontMetrics;
using PdfPig.Fonts.Encodings;
using Core;
using PdfPig.Fonts;
using PdfPig.Fonts.AdobeFontMetrics;
using PdfPig.Fonts.Encodings;
using Tokens;
internal class Standard14WritingFont : IWritingFont
{
private readonly AdobeFontMetrics metrics;
public bool HasWidths { get; } = false;
public string Name => metrics.FontName;
public Standard14WritingFont(AdobeFontMetrics metrics)
{
this.metrics = metrics;
}
public bool TryGetBoundingBox(char character, out PdfRectangle boundingBox)
{
internal class Standard14WritingFont : IWritingFont
{
private readonly AdobeFontMetrics metrics;
public bool HasWidths { get; } = false;
public string Name => metrics.FontName;
public Standard14WritingFont(AdobeFontMetrics metrics)
{
this.metrics = metrics;
}
public bool TryGetBoundingBox(char character, out PdfRectangle boundingBox)
{
boundingBox = default(PdfRectangle);
int code = CodeMapIfUnicode(character);
if (code == -1)
{
Debug.WriteLine($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X}).");
return false;
}
var characterMetric = metrics.CharacterMetrics
.Where(v => v.Value.CharacterCode == code)
.Select(v => v.Value)
.FirstOrDefault();
if (characterMetric is null)
if (characterMetric == null)
{
Debug.WriteLine($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X}).");
return false;
}
boundingBox = new PdfRectangle(characterMetric.BoundingBox.Left, characterMetric.BoundingBox.Bottom,
characterMetric.BoundingBox.Left + characterMetric.Width.X, characterMetric.BoundingBox.Top);
return true;
}
public bool TryGetAdvanceWidth(char character, out double width)
{
}
boundingBox = new PdfRectangle(characterMetric.BoundingBox.Left, characterMetric.BoundingBox.Bottom,
characterMetric.BoundingBox.Left + characterMetric.Width.X, characterMetric.BoundingBox.Top);
return true;
}
public bool TryGetAdvanceWidth(char character, out double width)
{
width = 0;
if (!TryGetBoundingBox(character, out var bbox))
{
return false;
}
width = bbox.Width;
return true;
}
public TransformationMatrix GetFontMatrix()
{
return TransformationMatrix.FromValues(1/1000.0, 0, 0, 1/1000.0, 0, 0);
}
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, IndirectReferenceToken reservedIndirect=null)
{
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Font },
{ NameToken.Subtype, NameToken.Type1 },
{ NameToken.BaseFont, NameToken.Create(metrics.FontName) },
{ NameToken.Encoding, (metrics.FontName is "Symbol" or "ZapfDingbats") ? NameToken.Create("FontSpecific") : NameToken.StandardEncoding } // 2022-12-12 @fnatzke was NameToken.MacRomanEncoding; not sure based on spec why MacRomanEncoding encoding?
};
var token = new DictionaryToken(dictionary);
if (reservedIndirect != null)
{
return writer.WriteToken(token, reservedIndirect);
}
var result = writer.WriteToken(token);
return result;
}
public byte GetValueForCharacter(char character)
{
if (!TryGetBoundingBox(character, out var bbox))
{
return false;
}
width = bbox.Width;
return true;
}
public TransformationMatrix GetFontMatrix()
{
return TransformationMatrix.FromValues(1 / 1000.0, 0, 0, 1 / 1000.0, 0, 0);
}
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, IndirectReferenceToken reservedIndirect = null)
{
var encoding = NameToken.StandardEncoding;
if (string.Equals(metrics.FontName, "Symbol", StringComparison.OrdinalIgnoreCase)
|| string.Equals(metrics.FontName, "ZapfDingbats", StringComparison.OrdinalIgnoreCase))
{
// 2022-12-12 @fnatzke was NameToken.MacRomanEncoding; not sure based on spec why MacRomanEncoding encoding?
encoding = NameToken.Create("FontSpecific");
}
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Font },
{ NameToken.Subtype, NameToken.Type1 },
{ NameToken.BaseFont, NameToken.Create(metrics.FontName) },
{ NameToken.Encoding, encoding }
};
var token = new DictionaryToken(dictionary);
if (reservedIndirect != null)
{
return writer.WriteToken(token, reservedIndirect);
}
var result = writer.WriteToken(token);
return result;
}
public byte GetValueForCharacter(char character)
{
var characterCode = CodeMapIfUnicode(character);
var characterMetric = metrics.CharacterMetrics
.Where(v => v.Value.CharacterCode == characterCode)
.Select(v => v.Value)
.FirstOrDefault();
if (characterMetric is null)
if (characterMetric == null)
{
throw new NotSupportedException($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X}).");
}
@@ -108,9 +116,9 @@
return result;
}
private int UnicodeToSymbolCode(char character)
{
{
var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character);
if (name is ".notdef")
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
{
return -1;
}
@@ -125,16 +133,15 @@
private int UnicodeToZapfDingbats(char character)
{
int code;
var name = GlyphList.ZapfDingbats.UnicodeCodePointToName(character);
if (name is ".notdef")
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
{
Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X}).");
return -1;
}
var encoding = ZapfDingbatsEncoding.Instance;
code = encoding.GetCode(name);
var code = encoding.GetCode(name);
if (code == -1)
{
Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (font specific encoding: ZapfDingbats).");
@@ -142,57 +149,60 @@
return code;
}
private int UnicodeToStandardEncoding(char character)
{
int code;
var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character);
if (name is ".notdef")
if (string.Equals(name, ".notdef", StringComparison.OrdinalIgnoreCase))
{
Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X}).");
return -1;
}
var standardEncoding = StandardEncoding.Instance;
code = standardEncoding.GetCode(name);
var code = standardEncoding.GetCode(name);
if (code == -1)
{
// Check if name from glyph list is the same except first letter's case; capital letter (or if capital a lowercase)
var nameCapitalisedChange = Char.IsUpper(name[0]) ? Char.ToLower(name[0]) + name.Substring(1) :Char.ToUpper(name[0]) + name.Substring(1);
var nameCapitalisedChange = Char.IsUpper(name[0]) ? Char.ToLower(name[0]) + name.Substring(1) : Char.ToUpper(name[0]) + name.Substring(1);
code = standardEncoding.GetCode(nameCapitalisedChange);
if (code == -1)
{
Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (StandardEncoding).");
}
}
}
return code;
}
private int CodeMapIfUnicode(char character)
{
int code; // encoding code either from StanardEncoding, ZapfDingbatsEncoding or SymbolEncoding depending on font
int i = (int)character;
if (metrics.FontName is "ZapfDingbats")
int code; // encoding code either from StandardEncoding, ZapfDingbatsEncoding or SymbolEncoding depending on font.
int i = character;
if (string.Equals(metrics.FontName, "ZapfDingbats", StringComparison.OrdinalIgnoreCase))
{
// Either use character code as is if font specific code or map from Unicode Dingbats range. 0x2700- 0x27bf
// Either use character code as is if font specific code or map from Unicode Dingbats range. 0x2700 - 0x27bf.
code = i < 255 ? i : UnicodeToZapfDingbats(character);
}
else if (metrics.FontName is "Symbol")
else if (string.Equals(metrics.FontName, "Symbol", StringComparison.OrdinalIgnoreCase))
{
if (i == 0x00AC) {
if (i == 0x00AC)
{
Debug.WriteLine("Warning: 0x00AC used as Unicode ('¬') (logicalnot). For (arrowleft)('←') from Adobe Symbol Font Specific (0330) use Unicode 0x2190 ('←').");
return 0x00d8;
}
if (i == 0x00F7) {
Debug.WriteLine("Warning: 0x00F7 used as Unicode ('÷')(divide). For (parenrightex) from Adobe Symbol Font Specific (0367) use Unicode 0xF8F7.");
if (i == 0x00F7)
{
Debug.WriteLine("Warning: 0x00F7 used as Unicode ('÷')(divide). For (parenrightex) from Adobe Symbol Font Specific (0367) use Unicode 0xF8F7.");
return 0x00B8;
}
if (i == 0x00B5) {
if (i == 0x00B5)
{
Debug.WriteLine("Warning: 0x00B5 used as Unicode divide ('µ')(mu). For (proportional)('∝') from Adobe Symbol Font Specific (0265) use Unicode 0x221D('∝').");
return 0x006d;
}
if (i == 0x00D7) {
if (i == 0x00D7)
{
Debug.WriteLine("Warning: 0x00D7 used as Unicode multiply ('×')(multiply). For (dotmath)('⋅') from Adobe Symbol Font Specific (0327) use Unicode 0x22C5('⋅').");
return 0x00B4;
return 0x00B4;
}
// Either use character code as is if font specific code or map from Unicode
@@ -214,7 +224,7 @@
return 0x00c2; // (0302)
}
if (i == 0x00b7)
{
@@ -281,6 +291,6 @@
}
return code;
}
}
}
}