diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs index 7f67cd99..1f2c4d37 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/Encoding.cs @@ -60,6 +60,22 @@ } return name; + } + + + /// + /// Get the character code from name + /// + /// Character name (eg. euro, ampersand, A, space) + /// -1 if not found otherwise the character code + public virtual int GetCode(string name) + { + if (!NameToCode.TryGetValue(name, out var code)) + { + return -1; + } + + return code; } /// diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs index d37d178e..c2d3fe92 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/SymbolEncoding.cs @@ -38,7 +38,7 @@ ( 81, "Theta"), // for char 'Q' using 81 as base10 equivilant (for C# source). Spec has 0121 octal. ( 81,"Theta") <=> (0121, "Theta") , ( 85, "Upsilon"), // for char 'U' using 85 as base10 equivilant (for C# source). Spec has 0125 octal. ( 85,"Upsilon") <=> (0125, "Upsilon") , (161, "Upsilon1"), // for char '¡' using 161 as base10 equivilant (for C# source). Spec has 0241 octal. (161,"Upsilon1") <=> (0241, "Upsilon1") , - ( 88, "Xi"), // for char 'X' using 88 as base10 equivilant (for C# source). Spec has 0130 octal. ( 88,"Xi") <=> (0130, "Xi") , + ( 88, "Xi"), // for char 'X' using 88 as base10 equivilant (for C# source). Spec has 0130 octal. ( 88,"Xi") <=> (0130, "Xi") , ( 90, "Zeta"), // for char 'Z' using 90 as base10 equivilant (for C# source). Spec has 0132 octal. ( 90,"Zeta") <=> (0132, "Zeta") , (192, "aleph"), // for char 'À' using 192 as base10 equivilant (for C# source). Spec has 0300 octal. (192,"aleph") <=> (0300, "aleph") , ( 97, "alpha"), // for char 'a' using 97 as base10 equivilant (for C# source). Spec has 0141 octal. ( 97,"alpha") <=> (0141, "alpha") , @@ -203,6 +203,11 @@ (122, "zeta") // for char 'z' using 122 as base10 equivilant (for C# source). Spec has 0172 octal. (122,"zeta") <=> (0172, "zeta") }; + + private static readonly (int, int)[] UnicodeEquivilants = { + (0x391, 65), // Greek Capital Letter Alpha + }; + /// /// Single instance of this encoding. /// diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs index 32453bc7..bff63d70 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/WinAnsiEncoding.cs @@ -1,10 +1,12 @@ namespace UglyToad.PdfPig.Fonts.Encodings { - using Core; + + + /// /// Windows ANSI encoding. - /// + /// public class WinAnsiEncoding : Encoding { /// diff --git a/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs b/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs index 46b3c378..cfe8897a 100644 --- a/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs +++ b/src/UglyToad.PdfPig.Fonts/Encodings/ZapfDingbatsEncoding.cs @@ -8,8 +8,8 @@ /// /// EncodingTable for ZapfDingbats /// PDF Spec 1.7 Page 1016 https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.7old.pdf#page1016 - /// Note spec has code values as octal (base 8) with leading zero (supported in 'C' and 'Java') but not by C# - /// Code values are already converted to base 10 prior to compile. + /// Note spec has code values are octal (base 8) with leading zero (supported in 'C' and 'Java') but not by C# + /// Code values are already converted to base 10 prior to compile. Original octal values in comments on each line. /// private static readonly (int, string)[] EncodingTable = { ( 32, "space"), // for char ' ' using 32 as base10 equivilant (for C# source). Spec has 040 octal. ( 32,"space") <=> (040, "space"), diff --git a/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs b/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs index 17fd56eb..cd5cf667 100644 --- a/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs +++ b/src/UglyToad.PdfPig.Tests/Geometry/ClippingTests.cs @@ -6,7 +6,7 @@ namespace UglyToad.PdfPig.Tests.Geometry public class ClippingTests { [Fact] - public void ContainsRectangleEvenOdd() + public void ContainsRectangleEvenOdd() { using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("SPARC - v9 Architecture Manual"), new ParsingOptions() { ClipPaths = true })) diff --git a/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs b/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs index f3ecf4c7..940e1704 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/Fonts/Standard14WritingFontTests.cs @@ -1,14 +1,14 @@ namespace UglyToad.PdfPig.Tests.Writer.Fonts { using System; - + using System.Linq; using PdfPig.Fonts; using PdfPig.Content; - using UglyToad.PdfPig.Core; + using UglyToad.PdfPig.Core; using UglyToad.PdfPig.Fonts.Standard14Fonts; using UglyToad.PdfPig.Writer; - + using Xunit; using System.Reflection; using System.Collections.Generic; @@ -16,71 +16,227 @@ using System.IO; using System.Drawing; using System.Diagnostics; + using System.Reflection.Emit; + using System.Text; public class Standard14WritingFontTests { [Fact] public void ZapfDingbatsFontAddText() { - PdfDocumentBuilder builder = new PdfDocumentBuilder(); - PdfDocumentBuilder.AddedFont F1 = builder.AddStandard14Font(Standard14Font.ZapfDingbats); - PdfPageBuilder page = builder.AddPage(PageSize.A4); + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.ZapfDingbats); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.ZapfDingbatsEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.ZapfDingbats); + { + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); - double topPageY = page.PageSize.Top - 50; - double inch = (page.PageSize.Width / 8.5); - double cm = inch / 2.54; - double leftX = 1 * cm; - var point = new PdfPoint(leftX, topPageY); + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; - // Get existing (but private) EncodingTable from encoding class using reflection so we can obtain all codes - var ZapfDingbatsEncodingType = typeof(UglyToad.PdfPig.Fonts.Encodings.ZapfDingbatsEncoding); - var EncodingTableFieldInfo = ZapfDingbatsEncodingType.GetFields(BindingFlags.NonPublic | BindingFlags.Static) - .FirstOrDefault(v=>v.Name=="EncodingTable"); - (int, string)[] EncodingTable = ((int, string)[])EncodingTableFieldInfo.GetValue(Activator.CreateInstance(ZapfDingbatsEncodingType, true)); + var point = new PdfPoint(leftX, topPageY); + var letters = page.AddText("Adobe Standard Font ZapfDingbats", 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black (octal) and Unicode in Blue (hex)", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + point = new PdfPoint(leftX, newY); + var eachRowY = new List(); + eachRowY.Add(newY); // First row + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page,F1, 12d,unicodesCharacters); + var context = GetContext(F1,page, nameof(F1), F2,maxCharacterHeight,maxCharacterWidth); + // Font specific character codes (in black) + page.SetTextAndFillColor(0,0,0); //Black + foreach ((var code, var name) in EncodingTable) + { + var ch = (char)code; // Note code is already base 10 no need to use OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); + point = AddLetterWithContext(point, $"{ch}", context, true); - foreach ((var code, var name) in EncodingTable) - { - var ch = (char)code; // Note code is already base 10 no need to use OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); - point = AddLetterWithFont(page, point, $"{ch}", F1, nameof(F1)); + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to PDF encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + + // Unicode character codes (in blue) + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel: true); + } } - + // Save one page PDF to file system for manual review. - var pdfBytes = builder.Build(); + var pdfBytes = pdfBuilder.Build(); WritePdfFile(nameof(ZapfDingbatsFontAddText), pdfBytes); + + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + var page1 = document.GetPage(1); + var letters = page1.Letters; + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == "ZapfDingbats" + && l.Color.ToRGBValues().b == 0) + .ToList(); + + + Assert.Equal(188,lettersFontSpecificCodes.Count); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.ZapfDingbats.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + Assert.Equal(letterCharacter, unicodeCharacter); + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == "ZapfDingbats" + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(188,lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + Assert.Equal(letterCharacter, unicodeCharacter); + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + } + } + } } [Fact] public void SymbolFontAddText() { - PdfDocumentBuilder builder = new PdfDocumentBuilder(); - PdfDocumentBuilder.AddedFont F1 = builder.AddStandard14Font(Standard14Font.Symbol); - PdfPageBuilder page = builder.AddPage(PageSize.A4); - - double topPageY = page.PageSize.Top - 50; - double inch = (page.PageSize.Width / 8.5); - double cm = inch / 2.54; - double leftX = 1 * cm; - var point = new PdfPoint(leftX, topPageY); - - // Get existing (but private) EncodingTable from encoding class using reflection so we can obtain all codes - var SymbolEncodingType = typeof(UglyToad.PdfPig.Fonts.Encodings.SymbolEncoding); - var EncodingTableFieldInfo = SymbolEncodingType.GetFields(BindingFlags.NonPublic | BindingFlags.Static) - .FirstOrDefault(v => v.Name == "EncodingTable"); - (int, string)[] EncodingTable = ((int, string)[])EncodingTableFieldInfo.GetValue(Activator.CreateInstance(SymbolEncodingType, true)); - - - foreach ((var code, var name) in EncodingTable) + PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder(); + PdfDocumentBuilder.AddedFont F1 = pdfBuilder.AddStandard14Font(Standard14Font.Symbol); + var EncodingTable = GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.SymbolEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); { - var ch = (char)code; // Note code is already base 10 no need to use OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); - point = AddLetterWithFont(page, point, $"{ch}", F1, nameof(F1)); + PdfDocumentBuilder.AddedFont F2 = pdfBuilder.AddStandard14Font(Standard14Font.TimesRoman); + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; + + var point = new PdfPoint(leftX, topPageY); + var letters = page.AddText("Adobe Standard Font Symbol ", 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black (octal), Unicode in Blue (hex), Red only available using Unicode", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + + + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page, F1, 12d, unicodesCharacters); + var context = GetContext(F1, page, nameof(F1), F2, maxCharacterHeight, maxCharacterWidth); + + // First set of rows for direct PDF font specific character codes + newY = newY - maxCharacterHeight; + point = new PdfPoint(leftX, newY); + var eachRowY = new List(new[] { newY }); + page.SetTextAndFillColor(0, 0, 0); //Black + bool isTextColorBlack = true; + foreach ((var codeFontSpecific, var name) in EncodingTable) + { + var code = codeFontSpecific; // Code is already converted [neither OctalHelpers.FromOctalInt or System.Convert.ToInt32($"{code}", 8); is required] + // For a clash library uses unicode interpretation. + // Substitue if code is any of the 4 codes that clash (in Unicode and font specific encodes for Symbol) + if (code == 0xac) code = '\u2190'; // 0xac in unicode is logicalnot ('¬') use Unicode alternative for arrowleft ('←') 0x2190 + if (code == 0xf7) code = '\uf8f7'; // 0xf7 in unicode is divide ('÷') (different form '/') use Unicode alternative for parenrightex Unicode 0xF8F7 + if (code == 0xb5) code = '\u221D'; // 0xb5 in unicode is lowercase mu ('µ') use Unicode alternative for proportiona('∝') 0x221D + if (code == 0xd7) code = '\u22c5'; // 0xd7 in unicode is muliply ('×') (different from '*') use Unicode alternative for dotmath ('⋅') 0x22C5 + if (code != codeFontSpecific && isTextColorBlack) { page.SetTextAndFillColor(200, 0, 0); isTextColorBlack = false; } + if (code == codeFontSpecific && isTextColorBlack == false) { page.SetTextAndFillColor(0, 0, 0); isTextColorBlack = true; } + + char ch = (char)code; + point = AddLetterWithContext(point, $"{ch}", context, isTextColorBlack); + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to font specific encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel: true); + } } - // Save one page PDF to file system for manual review. - var pdfBytes = builder.Build(); + // Save two page PDF to file system for manual review. + var pdfBytes = pdfBuilder.Build(); WritePdfFile(nameof(SymbolFontAddText), pdfBytes); + + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + var page1 = document.GetPage(1); + var letters = page1.Letters; + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == "Symbol" + && l.Color.ToRGBValues().b == 0 + && (l.Color.ToRGBValues().b == 0 + || l.Color.ToRGBValues().r == 200) + ) + .ToList(); + + + Assert.Equal(189, lettersFontSpecificCodes.Count); + Assert.Equal(EncodingTable.Length, lettersFontSpecificCodes.Count); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.AdobeGlyphList.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(letterCharacter, unicodeCharacter); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == "Symbol" + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(189, lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(letterCharacter, unicodeCharacter); + } + } + } } - [Fact] public void StandardFontsAddText() @@ -99,8 +255,7 @@ PdfDocumentBuilder.AddedFont F11 = pdfBuilder.AddStandard14Font(Standard14Font.CourierOblique); PdfDocumentBuilder.AddedFont F12 = pdfBuilder.AddStandard14Font(Standard14Font.CourierBoldOblique); - - var standardFontWithStandardEncoding = new PdfDocumentBuilder.AddedFont[] + var standardFontsWithStandardEncoding = new PdfDocumentBuilder.AddedFont[] { F1, F2, @@ -115,7 +270,7 @@ F11, F12 }; - + //AddLetterWithFont(page, point, "v", F1, nameof(F1)); //AddLetterWithFont(page, point, "v", F2, nameof(F2)); //AddLetterWithFont(page, point, "v", F3, nameof(F3)); @@ -130,106 +285,243 @@ //AddLetterWithFont(page, point, "v", F12, nameof(F12)); - // Get all characters/codes in font using existing (but private) class using reflection - var Standard14Type = typeof(UglyToad.PdfPig.Fonts.Standard14Fonts.Standard14); - var Standard14CacheFieldInfos = Standard14Type.GetFields(BindingFlags.NonPublic | BindingFlags.Static); - var Standard14Cache = (Dictionary)Standard14CacheFieldInfos.FirstOrDefault(v => v.Name == "Standard14Cache").GetValue(null); - - - // Alternatively all 12 fonts should conform to 'StanardEncoding' - var SymbolEncodingType = typeof(UglyToad.PdfPig.Fonts.Encodings.StandardEncoding); - var EncodingTableFieldInfo = SymbolEncodingType.GetFields(BindingFlags.NonPublic | BindingFlags.Static) - .FirstOrDefault(v => v.Name == "EncodingTable"); - (int, string)[] EncodingTable = ((int, string)[])EncodingTableFieldInfo.GetValue(Activator.CreateInstance(SymbolEncodingType, true)); - + // Get all characters codes in font using existing metrics in (private) Standard14Cache class (using reflection). + var Standard14Cache = GetStandard14Cache(); + // All 12 fonts should conform to 'StanardEncoding' + var EncodingTable = ((int code, string name)[])GetEncodingTable(typeof(UglyToad.PdfPig.Fonts.Encodings.StandardEncoding)); + var unicodesCharacters = GetUnicodeCharacters(EncodingTable, GlyphList.AdobeGlyphList); + int fontNumber = 0; - foreach (var font in standardFontWithStandardEncoding) + foreach (var font in standardFontsWithStandardEncoding) { - PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); - - double topPageY = page.PageSize.Top - 50; - double inch = (page.PageSize.Width / 8.5); - double cm = inch / 2.54; - double leftX = 1 * cm; - fontNumber++; var storedFont = pdfBuilder.Fonts[font.Id]; var fontProgram = storedFont.FontProgram; var fontName = fontProgram.Name; - - var pointHeading = new PdfPoint(leftX, topPageY); - var letters = page.AddText("Font: " + fontName, 21, pointHeading, font); - var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 2; - var point = new PdfPoint(leftX, newY); - - var metrics = Standard14Cache[fontName]; - - var codesFromMetrics = new HashSet(); - foreach (var metric in metrics.CharacterMetrics) + { - var code = metric.Value.CharacterCode; - if (code == -1) continue; - codesFromMetrics.Add(code); - char ch = (char)code; + PdfPageBuilder page = pdfBuilder.AddPage(PageSize.A4); + + double topPageY = page.PageSize.Top - 50; + double inch = (page.PageSize.Width / 8.5); + double cm = inch / 2.54; + double leftX = 1 * cm; - point = AddLetterWithFont(page, point, $"{ch}", font, $"F{fontNumber}"); - } + var point = new PdfPoint(leftX, topPageY); + var letters = page.AddText("Adobe Standard Font "+ fontName, 21, point, F2); + var newY = topPageY - letters.Select(v => v.GlyphRectangle.Height).Max() * 1.2; + point = new PdfPoint(leftX, newY); + letters = page.AddText("Font Specific encoding in Black, Unicode in Blue, Red only available using Unicode", 10, point, F2); + newY = newY - letters.Select(v => v.GlyphRectangle.Height).Max() * 3; + point = new PdfPoint(leftX, newY); + - // Detect if all codes in Standard encoding table are in metrics for font. - bool isMissing = false; - foreach ((var codeNotBase8Converted, var name) in EncodingTable) - { - var codeBase10 = System.Convert.ToInt32($"{codeNotBase8Converted}", 8); - if (codesFromMetrics.Contains(codeBase10) == false) - { - var ch = (char)codeBase10; - isMissing = true; - Debug.WriteLine($"In Adobe Standard Font '{fontName}' code {codeBase10} is in Standard encoding table but not in font metrics."); + var eachRowY = new List(new[] { newY }); + + var metrics = Standard14Cache[fontName]; + + var codesFromMetrics = new HashSet(); + page.SetTextAndFillColor(0, 0, 0); //Black + + (var maxCharacterHeight, var maxCharacterWidth) = GetCharacterDetails(page, F1, 12d, unicodesCharacters); + var context = GetContext(font, page, $"F{fontNumber}", F2, maxCharacterHeight, maxCharacterWidth); + + // Detect if all codes in Standard encoding table are in metrics for font. + bool isMissing = false; + bool isTextColorBlack = true; + foreach ((var codeNotBase8Converted, var name) in EncodingTable) + { + var codeFontSpecific = System.Convert.ToInt32($"{codeNotBase8Converted}", 8); + var isToggleColor = false; + var code = codeFontSpecific; + if (codeFontSpecific == 0xc6) { code = 0x02D8; } + else if (codeFontSpecific == 0xb4) { code = 0x00b7; } + else if (codeFontSpecific == 0xb7) { code = 0x2022; } + else if (codeFontSpecific == 0xb8) { code = 0x201A; } + else if (codeFontSpecific == 0xa4) { code = 0x2044; } + else if (codeFontSpecific == 0xa8) { code = 0x00a4; } + else if (codeFontSpecific == 0x60) { code = 0x2018; } + else if (codeFontSpecific == 0xaf) { code = 0xFB02; } + else if (codeFontSpecific == 0xaa) { code = 0x201C; } + else if (codeFontSpecific == 0xba) { code = 0x201D; } + else if (codeFontSpecific == 0xf8) { code = 0x0142; } + else if (codeFontSpecific == 0x27) { code = 0x2019; } + if (code != codeFontSpecific && isTextColorBlack) { page.SetTextAndFillColor(200, 0, 0); isTextColorBlack = false; } + if (code == codeFontSpecific && isTextColorBlack == false) { page.SetTextAndFillColor(0, 0, 0); isTextColorBlack = true; } + + char ch = (char)code; + point = AddLetterWithContext(point, $"{ch}", context, isTextColorBlack); + + if (eachRowY.Last() != point.Y) { eachRowY.Add(point.Y); } + } + + foreach (var metric in metrics.CharacterMetrics) + { + var code = metric.Value.CharacterCode; + if (code == -1) continue; + codesFromMetrics.Add(code); + } + + foreach ((var codeNotBase8Converted, var name) in EncodingTable) + { + var codeBase10 = System.Convert.ToInt32($"{codeNotBase8Converted}", 8); + if (codesFromMetrics.Contains(codeBase10) == false) + { + var ch = (char)codeBase10; + isMissing = true; + Debug.WriteLine($"In Adobe Standard Font '{fontName}' code {codeBase10} is in Standard encoding table but not in font metrics."); + } + } + + Assert.False(isMissing, $"Adobe Standard Font '{fontName}' contains code(s) in Standard encoding table but not in font metrics. See Debug output for details."); + + // Second set of rows for (unicode) characters : Test mapping from (C#) unicode chars to PDF encoding + newY = newY - maxCharacterHeight * 1.2; + point = new PdfPoint(leftX, newY); + page.SetTextAndFillColor(0, 0, 200); //Blue + foreach (var unicodeCh in unicodesCharacters) + { + point = AddLetterWithContext(point, $"{unicodeCh}", context, isHexLabel:true); } } - - Assert.False(isMissing, $"Adobe Standard Font '{fontName}' contains code(s) in Standard encoding table but not in font metrics. See Debug output for details."); } // Save one page per standard font to file system for manual review. var pdfBytes = pdfBuilder.Build(); WritePdfFile($"{nameof(StandardFontsAddText)}", pdfBytes); + + // Check extracted letters + using (var document = PdfDocument.Open(pdfBytes)) + { + foreach (var page in document.GetPages()) + { + var letters = page.Letters; + var expectedFontName = letters.FirstOrDefault(l=>l.FontSize == 12d).FontName; + + + { + var lettersFontSpecificCodes = letters.Where(l => l.FontName == expectedFontName + && l.FontSize == 12d + && ( l.Color.ToRGBValues().b == 0 + || l.Color.ToRGBValues().r == 200) + ) + .ToList(); + + + Assert.Equal(149, lettersFontSpecificCodes.Count); + Assert.Equal(lettersFontSpecificCodes.Count, EncodingTable.Length); + for (int i = 0; i < lettersFontSpecificCodes.Count; i++) + { + var letter = lettersFontSpecificCodes[i]; + + (var code, var name) = EncodingTable[i]; + var unicodeString = GlyphList.AdobeGlyphList.NameToUnicode(name); + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodeString[0]; + if (letterCharacter != unicodeCharacter) Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(unicodeCharacter, letterCharacter); + } + } + + { + var lettersUnicode = letters.Where(l => l.FontName == expectedFontName + && l.FontSize == 12d + && l.Color.ToRGBValues().b > 0.78m) + .ToList(); + Assert.Equal(149,lettersUnicode.Count); + for (int i = 0; i < lettersUnicode.Count; i++) + { + var letter = lettersUnicode[i]; + + var letterCharacter = letter.Value[0]; + var unicodeCharacter = unicodesCharacters[i]; + //Debug.WriteLine($"{letterCharacter} , {unicodeCharacter}"); + Assert.Equal(unicodeCharacter, letterCharacter); + } + } + + } + } } - static double maxY = 0; - internal PdfPoint AddLetterWithFont(PdfPageBuilder page, PdfPoint point, string stringToAdd, PdfDocumentBuilder.AddedFont font, string fontName) + + internal PdfPoint AddLetterWithContext( PdfPoint point, string stringToAdd, ( PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth)context, bool isOctalLabel = false, bool isHexLabel = false) + { + var font = context.font; + var page = context.page; + var fontName = context.fontName; + var fontLabel = context.fontLabel; + var maxCharacterHeight = context.maxCharacterHeight; + var maxCharacterWidth = context.maxCharacterWidth; + + return AddLetter(page, point, stringToAdd, font, fontName, fontLabel, maxCharacterHeight, maxCharacterWidth,isOctalLabel, isHexLabel); + } + internal PdfPoint AddLetter(PdfPageBuilder page, PdfPoint point, string stringToAdd, PdfDocumentBuilder.AddedFont font, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth, bool isOctalLabel = false, bool isHexLabel = false) { if (stringToAdd is null) { throw new ArgumentException("Text to add must be a single letter.", nameof(stringToAdd)); } - if (stringToAdd.Length>1) { throw new ArgumentException("Text to add must be a single letter.", nameof(stringToAdd)); } + if (stringToAdd.Length > 1) { throw new ArgumentException("Text to add must be a single letter.", nameof(stringToAdd)); } if (fontName.ToUpper() != fontName) { throw new ArgumentException(@"FontName must be in uppercase eg. ""F1"".", nameof(fontName)); } + var letter = page.AddText(stringToAdd, 12, point, font); - Assert.NotNull(letter); // We should get back something. - Assert.Equal(1, letter.Count); // There should be only one letter returned after the add operation. - Assert.Equal(stringToAdd, letter[0].Value); // Check we got back the name letter (eg. "v") - //Assert.Equal(fontName, letter[0].FontName); // eg. "F1" for first font added, "F2" for second etc. + if (isOctalLabel) + { + var labelPointSize = 5; + var octalString = System.Convert.ToString((int)stringToAdd[0],8).PadLeft(3, '0'); + var label = octalString; + var codeMidPoint = point.X + letter[0].GlyphRectangle.Width / 2; + var ml = page.MeasureText(label, labelPointSize, point, fontLabel); + var labelY = point.Y + ml.Max(v => v.GlyphRectangle.Height) * 0.1 + maxCharacterHeight; + var xLabel =codeMidPoint - (ml.Sum(v => v.GlyphRectangle.Width) /2); + var labelPoint = new PdfPoint(xLabel, labelY); + page.AddText(label, labelPointSize, labelPoint, fontLabel); + } + if (isHexLabel) + { + var labelPointSize = 3; + var hexString = $"{(int)stringToAdd[0]:X}".PadLeft(4, '0'); + var label = "0x" + hexString; + var codeMidPoint = point.X + letter[0].GlyphRectangle.Width / 2; + var ml = page.MeasureText(label, labelPointSize, point, fontLabel); + var labelY = point.Y - ml.Max(v => v.GlyphRectangle.Height) * 2.5; + var xLabel = codeMidPoint - (ml.Sum(v => v.GlyphRectangle.Width) / 2); + var labelPoint = new PdfPoint(xLabel, labelY); + page.AddText(label, labelPointSize, labelPoint, fontLabel); + } + + + Assert.NotNull(letter); // We should get back something. + Assert.Equal(1, letter.Count); // There should be only one letter returned after the add operation. + Assert.Equal(stringToAdd, letter[0].Value); // Check we got back the name letter (eg. "v") + //Debug.WriteLine($"{letter[0]}"); + double inch = (page.PageSize.Width / 8.5); double cm = inch / 2.54; - - - var letterWidth = letter[0].GlyphRectangle.Width*2; - var letterHeight = letter[0].GlyphRectangle.Height * 2; - var newX = point.X + letterWidth; + var letterWidth = letter[0].GlyphRectangle.Width * 2; + var letterHeight = letter[0].GlyphRectangle.Height * 2; + + var newX = point.X + maxCharacterWidth * 1.1; var newY = point.Y; - if (letterHeight > maxY) maxY = letterHeight; - if (newX > page.PageSize.Width - 2 * letterWidth) + + if (newX > page.PageSize.Width - cm) { - newX = 1 * cm; - newY -= maxY *2; - maxY=0; + return newLine(cm, point.Y, maxCharacterHeight); } return new PdfPoint(newX, newY); } + PdfPoint newLine(double cm, double y, double maxCharacterHeight) + { + var newX = 1 * cm; + var newY = y - maxCharacterHeight * 5; + return new PdfPoint(newX, newY); + } + private static void WritePdfFile(string name, byte[] bytes, string extension = "pdf") - { + { const string subFolder = nameof(Standard14WritingFontTests); var folderPath = subFolder; @@ -243,5 +535,48 @@ Debug.WriteLine($@"{Path.Combine(Directory.GetCurrentDirectory(), filePath)}"); } + private static (int code, string name)[] GetEncodingTable(Type t) + { + // Get existing (but private) EncodingTable from encoding class using reflection so we can obtain all codes + var EncodingTableFieldInfo = t.GetFields(BindingFlags.NonPublic | BindingFlags.Static) + .FirstOrDefault(v => v.Name == "EncodingTable"); + (int, string)[] EncodingTable = ((int, string)[])EncodingTableFieldInfo.GetValue(Activator.CreateInstance(t, true)); + return EncodingTable; + } + + + private (PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth) GetContext(PdfDocumentBuilder.AddedFont font, PdfPageBuilder page, string fontName, PdfDocumentBuilder.AddedFont fontLabel, double maxCharacterHeight, double maxCharacterWidth) + { + return (font, page, fontName, fontLabel, maxCharacterHeight, maxCharacterWidth); + + } + + private static char[]GetUnicodeCharacters((int code, string name)[] EncodingTable, GlyphList glyphList) + { + var gylphNamesFromEncodingTable = EncodingTable.Select(v => v.name).ToArray(); + char[] unicodesCharacters = gylphNamesFromEncodingTable.Select(v => (char)glyphList.NameToUnicode(v)[0]).ToArray(); + return unicodesCharacters; + } + ( double maxCharacterHeight,double maxCharacterWidth)GetCharacterDetails(PdfPageBuilder page,PdfDocumentBuilder.AddedFont font, double fontSize, char[] unicodesCharacters) + { + double maxCharacterHeight; + double maxCharacterWidth; + { + var point = new PdfPoint(10, 10); + var characterRectangles = unicodesCharacters.Select(v => page.MeasureText($"{v}", 12m,point, font)[0].GlyphRectangle); + maxCharacterHeight = characterRectangles.Max(v => v.Height); + maxCharacterWidth = characterRectangles.Max(v => v.Height); + } + return (maxCharacterHeight, maxCharacterWidth); + } + + + private static Dictionary GetStandard14Cache() + { + var Standard14Type = typeof(UglyToad.PdfPig.Fonts.Standard14Fonts.Standard14); + var Standard14CacheFieldInfos = Standard14Type.GetFields(BindingFlags.NonPublic | BindingFlags.Static); + var Standard14Cache = (Dictionary)Standard14CacheFieldInfos.FirstOrDefault(v => v.Name == "Standard14Cache").GetValue(null); + return Standard14Cache; + } } } diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index 5c3fca4b..b15ea4f3 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -2,8 +2,9 @@ { using Core; using Graphics.Colors; - using PdfFonts; - + using PdfFonts; + using System.Diagnostics; + /// /// A glyph or combination of glyphs (characters) drawn by a PDF content stream. /// diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs index a67686b3..34359905 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs @@ -1,7 +1,8 @@ // ReSharper disable CompareOfFloatsByEqualityOperator namespace UglyToad.PdfPig.PdfFonts.Simple { - using System; + using System; + using System.Diagnostics; using Core; using Fonts; using Fonts.AdobeFontMetrics; @@ -46,13 +47,36 @@ namespace UglyToad.PdfPig.PdfFonts.Simple public bool TryGetUnicode(int characterCode, out string value) { - var name = encoding.GetName(characterCode); - - var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); - - value = listed; - - return true; + var name = encoding.GetName(characterCode); + if (name is ".notdef") + { + value = null; + return false; + } + if (encoding is ZapfDingbatsEncoding) + { + var listed = GlyphList.ZapfDingbats.NameToUnicode(name); + + value = listed; + + return true; + } + else if (encoding is StandardEncoding or SymbolEncoding) + { + var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); + + value = listed; + + return true; + } else + { + Debug.WriteLine($"Warning: Type1Standard14Font with unexpected encoding: '{encoding.EncodingName}' Expected: 'ZapfDingbatsEncoding','SymbolEncoding' or 'StandardEncoding' . Font: '{standardFontMetrics.FontName}'"); + var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); + + value = listed; + + return true; + } } public CharacterBoundingBox GetBoundingBox(int characterCode) diff --git a/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs b/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs index c5987b5d..b450b5b8 100644 --- a/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs +++ b/src/UglyToad.PdfPig/Writer/Fonts/Standard14WritingFont.cs @@ -28,10 +28,19 @@ } public bool TryGetBoundingBox(char character, out PdfRectangle boundingBox) - { - boundingBox = default(PdfRectangle); + { + + boundingBox = default(PdfRectangle); + + int code = CodeMapIfUnicode(character); + if (code == -1) + { + Debug.WriteLine($"Font '{metrics.FontName}' does NOT have character '{character}' (0x{(int)character:X})."); + return false; + } + var characterMetric = metrics.CharacterMetrics - .Where(v => v.Value.CharacterCode == character) + .Where(v => v.Value.CharacterCode == code) .Select(v => v.Value) .FirstOrDefault(); if (characterMetric is null) @@ -48,7 +57,8 @@ public bool TryGetAdvanceWidth(char character, out double width) { - width = 0; + width = 0; + if (!TryGetBoundingBox(character, out var bbox)) { return false; @@ -87,10 +97,10 @@ } public byte GetValueForCharacter(char character) - { - + { + var characterCode = CodeMapIfUnicode(character); var characterMetric = metrics.CharacterMetrics - .Where(v => v.Value.CharacterCode == character) + .Where(v => v.Value.CharacterCode == characterCode) .Select(v => v.Value) .FirstOrDefault(); if (characterMetric is null) @@ -100,6 +110,181 @@ var code = characterMetric.CharacterCode; var result = (byte)code; return result; - } + } + private int UnicodeToSymbolCode(char character) + { + var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character); + if (name is ".notdef") + { + return -1; + } + var symbolEncoding = SymbolEncoding.Instance; + var code = symbolEncoding.GetCode(name); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' [Symbol] (StandardEncoding)."); + } + return code; + } + + private int UnicodeToZapfDingbats(char character) + { + + int code; + var name = GlyphList.ZapfDingbats.UnicodeCodePointToName(character); + if (name is ".notdef") + { + Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X})."); + return -1; + } + var encoding = ZapfDingbatsEncoding.Instance; + code = encoding.GetCode(name); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (font specific encoding : ZapfDingbats)."); + } + return code; + + } + + private int UnicodeToStandardEncoding(char character) + { + int code; + var name = GlyphList.AdobeGlyphList.UnicodeCodePointToName(character); + if (name is ".notdef") + { + Debug.WriteLine($"Failed to find Unicode character '{character}' (0x{(int)character:X})."); + return -1; + } + var standardEncoding = StandardEncoding.Instance; + code = standardEncoding.GetCode(name); + if (code == -1) + { + // Check if name from glyph list is the same except first letter's case; capital letter (or if capital a lowercase) + var nameCapitalisedChange = Char.IsUpper(name[0]) ? Char.ToLower(name[0]) + name.Substring(1) :Char.ToUpper(name[0]) + name.Substring(1); + code = standardEncoding.GetCode(nameCapitalisedChange); + if (code == -1) + { + Debug.WriteLine($"Found Unicode point '{character}' (0x{(int)character:X}) but glphy name '{name}' not found in font '{metrics.FontName}' (StandardEncoding)."); + } + } + return code; + } + + private int CodeMapIfUnicode(char character) + { + int code; // encoding code either from StanardEncoding, ZapfDingbatsEncoding or SymbolEncoding depending on font + int i = (int)character; + if (metrics.FontName is "ZapfDingbats") + { + // Either use character code as is if font specific code or map from Unicode Dingbats range. 0x2700- 0x27bf + code = i < 255 ? i : UnicodeToZapfDingbats(character); + } + else if (metrics.FontName is "Symbol") + { + if (i == 0x00AC) { + Debug.WriteLine("Warning: 0x00AC used as Unicode ('¬') (logicalnot). For (arrowleft)('←') from Adobe Symbol Font Specific (0330) use Unicode 0x2190 ('←')."); + return 0x00d8; + } + if (i == 0x00F7) { + Debug.WriteLine("Warning: 0x00F7 used as Unicode ('÷')(divide). For (parenrightex) from Adobe Symbol Font Specific (0367) use Unicode 0xF8F7."); + return 0x00B8; + } + if (i == 0x00B5) { + Debug.WriteLine("Warning: 0x00B5 used as Unicode divide ('µ')(mu). For (proportional)('∝') from Adobe Symbol Font Specific (0265) use Unicode 0x221D('∝')."); + return 0x006d; + } + if (i == 0x00D7) { + Debug.WriteLine("Warning: 0x00D7 used as Unicode multiply ('×')(multiply). For (dotmath)('⋅') from Adobe Symbol Font Specific (0327) use Unicode 0x22C5('⋅')."); + return 0x00B4; + } + + // Either use character code as is if font specific code or map from Unicode + code = i < 255 ? i : UnicodeToSymbolCode(character); + } + else + { + // Resolve clashes as unicode. Use any standard code (with warnings). Map remainder as unicode to stanard character code. + + if (i == 0x00c6) + { + Debug.WriteLine("Warning: 0x00C6 used as Unicode ('Æ') (AE). For (breve)('˘') from Adobe Standard Font Specific (0306) use Unicode 0x02D8 ('˘')."); + return 0x00e1; // (341) + } + + if (i == 0x00b4) + { + Debug.WriteLine("Warning: 0x00B4 used as Unicode ('´') (acute). For (periodcentered)('·') from Adobe Standard Font Specific (0264) use Unicode 0x00B7 ('·')."); + return 0x00c2; // (0302) + } + + + + if (i == 0x00b7) + { + Debug.WriteLine("Warning: 0x00B7 used as Unicode ('·') (periodcentered). For (bullet)('•') from Adobe Standard Font Specific (0267) use Unicode 0x2022 ('•')."); + return 0x00b4; // (0264) + } + + if (i == 0x00b8) + { + Debug.WriteLine("Warning: 0x00B8 used as Unicode ('¸') (cedilla). For (quotesinglbase)('‚') from Adobe Standard Font Specific (0267) use Unicode 0x201A ('‚')."); + return 0x00cb; // (0313) + } + + if (i == 0x00a4) + { + Debug.WriteLine("Warning: 0x00A4 used as Unicode (currency). For (fraction) ('⁄') from Adobe Standard Font Specific (0244) use Unicode 0x2044 ('⁄')."); + return 0x00a8; // (0250) + } + + if (i == 0x00a8) + { + Debug.WriteLine("Warning: 0x00A8 used as Unicode (dieresis)('¨'). For (currency) from Adobe Standard Font Specific (0250) use Unicode 0x00A4."); + return 0x00c8; // (0310) + } + + if (i == 0x0060) + { + Debug.WriteLine("Warning: 0x0060 used as Unicode (grave)('`'). For (quoteleft)('‘') from Adobe Standard Font Specific (0140) use Unicode 0x2018."); + return 0x00c1; // (0301) + } + + if (i == 0x00af) + { + Debug.WriteLine("Warning: 0x00AF used as Unicode (macron)('¯'). For (fl)('fl') from Adobe Standard Font Specific (0257) use Unicode 0xFB02."); + return 0x00c5; // (0305) + } + + if (i == 0x00aa) + { + Debug.WriteLine("Warning: 0x00AA used as Unicode (ordfeminine)('ª'). For (quotedblleft) ('“') from Adobe Standard Font Specific (0252) use Unicode 0x201C."); + return 0x00e3; // (0343) + } + + if (i == 0x00ba) + { + Debug.WriteLine("Warning: 0x00BA used as Unicode (ordmasculine)('º'). For (quotedblright) ('”') from Adobe Standard Font Specific (0272) use Unicode 0x201D."); + return 0x00eb; // (0353) + } + + if (i == 0x00f8) + { + Debug.WriteLine("Warning: 0x00F8 used as Unicode (oslash)('ø'). For (lslash) ('ł') from Adobe Standard Font Specific (0370) use Unicode 0x0142."); + return 0x00f9; // (0371) + } + + if (i == 0x0027) + { + Debug.WriteLine("Warning: 0x0027 used as Unicode (quotesingle)('\''). For (quoteright) ('’') from Adobe Standard Font Specific (0047) use Unicode 0x2019."); + return 0x00a9; // (0251) + } + + var isCharacterCodeInStandardEncoding = metrics.CharacterMetrics.Any(v => v.Value.CharacterCode == i); + code = isCharacterCodeInStandardEncoding ? i : UnicodeToStandardEncoding(character); + } + return code; + } + } } \ No newline at end of file