From eb66611e5523bc7a762d7fea79535d85cb407fc6 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 6 Jan 2018 14:11:14 +0000 Subject: [PATCH] checkpoint check in for adobe font metrics parsing --- .../Parser/AdobeFontMetricsParserTests.cs | 31 ++++ .../Documents/Single Page Type 1 Font.pdf | Bin 0 -> 4631 bytes .../SinglePageSimpleOpenOfficeTests - Copy.cs | 40 +++++ .../UglyToad.Pdf.Tests.csproj | 4 + src/UglyToad.Pdf/Fonts/FontFactory.cs | 5 +- src/UglyToad.Pdf/Fonts/FontMetrics.cs | 166 ++++++++++++++++++ .../Fonts/Parser/AdobeFontMetricsParser.cs | 102 +++++++++-- .../Parser/FontDictionaryAccessHelper.cs | 80 +++++++++ .../Parser/Handlers/TrueTypeFontHandler.cs | 76 +------- .../Fonts/Parser/Handlers/Type1FontHandler.cs | 85 +++++++++ src/UglyToad.Pdf/Parser/Parts/ReadHelper.cs | 5 + src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs | 3 +- 12 files changed, 512 insertions(+), 85 deletions(-) create mode 100644 src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Type 1 Font.pdf create mode 100644 src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests - Copy.cs create mode 100644 src/UglyToad.Pdf/Fonts/Parser/FontDictionaryAccessHelper.cs create mode 100644 src/UglyToad.Pdf/Fonts/Parser/Handlers/Type1FontHandler.cs diff --git a/src/UglyToad.Pdf.Tests/Fonts/Parser/AdobeFontMetricsParserTests.cs b/src/UglyToad.Pdf.Tests/Fonts/Parser/AdobeFontMetricsParserTests.cs index edaeb7ae..52b59147 100644 --- a/src/UglyToad.Pdf.Tests/Fonts/Parser/AdobeFontMetricsParserTests.cs +++ b/src/UglyToad.Pdf.Tests/Fonts/Parser/AdobeFontMetricsParserTests.cs @@ -1,5 +1,8 @@ namespace UglyToad.Pdf.Tests.Fonts.Parser { + using System; + using System.IO; + using IO; using Pdf.Fonts.Parser; using Xunit; @@ -79,5 +82,33 @@ C 37 ; WX 600 ; N percent ; B 81 -15 518 622 ;"; Assert.NotNull(metrics); } + + [Fact] + public void CanParseHelveticaAfmFile() + { + var helvetica = GetResourceBytes("UglyToad.Pdf.Resources.AdobeFontMetrics.Helvetica.afm"); + + var input = new ByteArrayInputBytes(helvetica); + + var metrics = parser.Parse(input, false); + + Assert.NotNull(metrics); + } + + private static byte[] GetResourceBytes(string name) + { + using (var memoryStream = new MemoryStream()) + using (var resource = typeof(AdobeFontMetricsParser).Assembly.GetManifestResourceStream(name)) + { + if (resource == null) + { + throw new InvalidOperationException($"No assembly resource with name: {name}."); + } + + resource.CopyTo(memoryStream); + + return memoryStream.ToArray(); + } + } } } diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Type 1 Font.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Type 1 Font.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8f9bc24278696dbe3372bde9d0a0b69fde92483f GIT binary patch literal 4631 zcmeHLU1%It6mDoMWpOP^#ny*%`x92d*?aH&?ar{f!R}^BF`I-uNDRI*;4u-4MGu-S}F>@NbG}%4@GEw@CQ71_SbG`4HiMjykz#w zx%Zs&ec!#g_l(XIiULh4@zEb{UB4NRDTpAidJ<(ac+zp1;XA9$LKIITS(PZBMrk~b zC>0ev1=cwv%V`BqpampLDx#vcn9IePYrzz8wDA*`uA2));j5-r`gg}&j`m>-Wp?Xyrel!Iq{b_!}tDY)!;A%a;^ z86S0R56SJ8h9Q9&V~GVwjIfw)hBc5(fBxcQ^O zrpAdV%J@t%zf?r{N^j)o*f%`WZU5Lrt zf%qz0a@=g<_LWZ)$g#4Cmy{AIHS+9)GqLKk+105zb9LF&ti)I@mKif(Z(~BsXb5(+ z8*OB4thtedd&^=RyGlh(czt&aRNmy;c_-HUz$Oo=hc=S z5O`8cQi00PA=?L9Ry=o9pjaimE?+)^!z1EHBG}r z#G)vGhY*~0L!%|Q!6673W#Y{DhPgp9N-Al3b?nSznk2XvsQ?xEP9tBwI!TL)q*rU|Wf<245I{98cy01+jKn87^2K)`Ob z1EznK0}!=w(eR;NHG((21oF2}7hrG6CPYF~0<1(T&55EergcgLqUeM`UErFBtrYgy zf8yFz;Den16BtUR`vXz0b7dNhQb{j(X0r~P?$JVrb;(M}YPBi}N=jmaoH8XrlWbcs ziL9z5O)RmRO5lghon`hiZZ!Q`By!8dtj2h90nkSwWy{oU&#xPxP@~bP!4AcRi$P=U zj3R}>;^Eoh-r|H`!~a?zzGW-OrZC-tYYVPTA+RauEp=_dwJ8KP<-Db?{})_vmhavG z;98qatibtxgL@Od=eI8ya7N?@xn~MGKOqv4N}?#Hqr+hW4u{Xe?GsLhaQ3*bSO4nL zx#}aO*Ye=a4pmVk1wnr#>T?j@YU#Hqx;AyVy&(>_P-H1RFm|ZDp+G8wO$SQVUfba> D!(aV1 literal 0 HcmV?d00001 diff --git a/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests - Copy.cs b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests - Copy.cs new file mode 100644 index 00000000..f4cd7be7 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests - Copy.cs @@ -0,0 +1,40 @@ +namespace UglyToad.Pdf.Tests.Integration +{ + using System; + using System.IO; + using System.Linq; + using Content; + using Xunit; + + public class SinglePageType1FontTests + { + private static string GetFilename() + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + return Path.Combine(documentFolder, "Single Page Type 1 Font.pdf"); + } + + [Fact] + public void HasCorrectNumberOfPages() + { + var file = GetFilename(); + + using (var document = PdfDocument.Open(File.ReadAllBytes(file))) + { + Assert.Equal(1, document.NumberOfPages); + } + } + + [Fact] + public void HasCorrectPageSize() + { + using (var document = PdfDocument.Open(GetFilename())) + { + //var page = document.GetPage(1); + + //Assert.Equal(PageSize.Letter, page.Size); + } + } + } +} diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj index 47c2014f..9020c00c 100644 --- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj +++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj @@ -17,6 +17,7 @@ + @@ -48,6 +49,9 @@ PreserveNewest + + Always + PreserveNewest diff --git a/src/UglyToad.Pdf/Fonts/FontFactory.cs b/src/UglyToad.Pdf/Fonts/FontFactory.cs index bc28f31d..266cbb7d 100644 --- a/src/UglyToad.Pdf/Fonts/FontFactory.cs +++ b/src/UglyToad.Pdf/Fonts/FontFactory.cs @@ -14,13 +14,14 @@ private readonly ILog log; private readonly IReadOnlyDictionary handlers; - public FontFactory(ILog log, Type0FontHandler type0FontHandler, TrueTypeFontHandler trueTypeFontHandler) + public FontFactory(ILog log, Type0FontHandler type0FontHandler, TrueTypeFontHandler trueTypeFontHandler, Type1FontHandler type1FontHandler) { this.log = log; handlers = new Dictionary { {CosName.TYPE0, type0FontHandler}, - {CosName.TRUE_TYPE, trueTypeFontHandler} + {CosName.TRUE_TYPE, trueTypeFontHandler}, + {CosName.TYPE1, type1FontHandler} }; } diff --git a/src/UglyToad.Pdf/Fonts/FontMetrics.cs b/src/UglyToad.Pdf/Fonts/FontMetrics.cs index 7aaf7344..510b10cd 100644 --- a/src/UglyToad.Pdf/Fonts/FontMetrics.cs +++ b/src/UglyToad.Pdf/Fonts/FontMetrics.cs @@ -1,6 +1,172 @@ namespace UglyToad.Pdf.Fonts { + using System.Collections.Generic; + using Geometry; + class FontMetrics { } + + internal class FontMetricsBuilder + { + public decimal AfmVersion { get; } + + public List Comments { get; } + + public List CharacterMetrics { get; } = new List(); + + /// + /// Name of the font as seen by PostScript. + /// + public string FontName { get; set; } + + /// + /// The full text name of the font. + /// + public string FullName { get; set; } + + /// + /// The name of the typeface family for the font. + /// + public string FamilyName { get; set; } + + /// + /// The weight of the font. + /// + public string Weight { get; set; } + + /// + /// Angle in degrees counter-clockwise from vertical of vertical strokes of the font. + /// + public decimal ItalicAngle { get; set; } + + /// + /// Whether the font is monospaced or not. + /// + public bool IsFixedPitch { get; set; } + + /// + /// The dimensions of the font bounding box. + /// + public PdfRectangle PdfBoundingBox { get; private set; } + + /// + /// Distance from the baseline for underlining. + /// + public decimal UnderlinePosition { get; set; } + + /// + /// The stroke width for underlining. + /// + public decimal UnderlineThickness { get; set; } + + /// + /// Version identifier for the font program. + /// + public string Version { get; set; } + + /// + /// Font name trademark or copyright notice. + /// + public string Notice { get; set; } + + public string EncodingScheme { get; set; } + + /// + /// Code describing mapping scheme for a non base font. + /// + public int MappingScheme { get; set; } + + /// + /// The character set of this font. + /// + public string CharacterSet { get; set; } + + public bool IsBaseFont { get; set; } = true; + + /// + /// The y-value of the top of a capital H. + /// + public decimal CapHeight { get; set; } + + /// + /// The y-value of the top of lowercase x. + /// + public decimal XHeight { get; set; } + + /// + /// Generally the y-value of the top of lowercase d. + /// + public decimal Ascender { get; set; } + + /// + /// The y-value of the bottom of lowercase p. + /// + public decimal Descender { get; set; } + + /// + /// Width of horizontal stems. + /// + public decimal StdHw { get; set; } + + /// + /// Width of vertical stems. + /// + public decimal StdVw { get; set; } + + public CharacterWidth CharacterWidth { get; private set; } + + public FontMetricsBuilder(decimal afmVersion) + { + AfmVersion = afmVersion; + Comments = new List(); + } + + public void SetBoundingBox(decimal x1, decimal y1, decimal x2, decimal y2) + { + PdfBoundingBox = new PdfRectangle(x1, y1, x2, y2); + } + + public void SetCharacterWidth(decimal x, decimal y) + { + CharacterWidth = new CharacterWidth(x, y); + } + } + + /// + /// The x and y components of the width vector of the font's characters. + /// Presence implies that IsFixedPitch is true. + /// + internal class CharacterWidth + { + public decimal X { get; } + + public decimal Y { get; } + + public CharacterWidth(decimal x, decimal y) + { + X = x; + Y = y; + } + } + + internal class IndividualCharacterMetric + { + public int CharacterCode { get; set; } + + public decimal WidthX { get; set; } + public decimal WidthY { get; set; } + + public decimal WidthXDirection0 { get; set; } + public decimal WidthYDirection0 { get; set; } + + public decimal WidthXDirection1 { get; set; } + public decimal WidthYDirection1 { get; set; } + + public string Name { get; set; } + + public PdfVector VVector { get; set; } + + public PdfRectangle BoundingBox { get; set; } + } } diff --git a/src/UglyToad.Pdf/Fonts/Parser/AdobeFontMetricsParser.cs b/src/UglyToad.Pdf/Fonts/Parser/AdobeFontMetricsParser.cs index c38a4563..3bd0de0c 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/AdobeFontMetricsParser.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/AdobeFontMetricsParser.cs @@ -1,9 +1,10 @@ namespace UglyToad.Pdf.Fonts.Parser { using System; + using System.Text; + using Exceptions; using IO; - using Tokenization.Scanner; - using Tokenization.Tokens; + using Pdf.Parser.Parts; internal class AdobeFontMetricsParser : IAdobeFontMetricsParser { @@ -316,24 +317,103 @@ public FontMetrics Parse(IInputBytes bytes, bool useReducedDataSet) { - var tokenizer = new CoreTokenScanner(bytes); + var token = ReadString(bytes); - tokenizer.MoveNext(); - - var current = tokenizer.CurrentToken; - - if (!(current is OperatorToken operatorToken) || operatorToken.Data != StartFontMetrics) + if (!string.Equals(StartFontMetrics, token, StringComparison.OrdinalIgnoreCase)) { - throw new InvalidOperationException($"The font metrics file started with {current} rather than {StartFontMetrics}."); + throw new InvalidFontFormatException($"The AFM file was not valid, it did not start with {StartFontMetrics}."); } - while (tokenizer.MoveNext()) - { + var version = ReadDecimal(bytes); + var builder = new FontMetricsBuilder(version); + + while ((token = ReadString(bytes)) != EndFontMetrics) + { + switch (token) + { + case Comment: + builder.Comments.Add(ReadLine(bytes)); + break; + case FontName: + builder.FontName = ReadLine(bytes); + break; + case FullName: + builder.FullName = ReadLine(bytes); + break; + case FamilyName: + builder.FamilyName = ReadLine(bytes); + break; + } } return new FontMetrics(); } + + private static decimal ReadDecimal(IInputBytes input) + { + var str = ReadString(input); + + return decimal.Parse(str); + } + + private static bool ReadBool(IInputBytes input) + { + var boolean = ReadString(input); + + switch (boolean) + { + case "true": + return true; + case "false": + return false; + default: + throw new InvalidFontFormatException($"The AFM should have contained a boolean but instead contained: {boolean}."); + } + } + + private static readonly StringBuilder Builder = new StringBuilder(); + + private static string ReadString(IInputBytes input) + { + Builder.Clear(); + + if (input.IsAtEnd()) + { + return EndFontMetrics; + } + + while (ReadHelper.IsWhitespace(input.CurrentByte) && input.MoveNext()) + { + } + + Builder.Append((char)input.CurrentByte); + + while (input.MoveNext() && !ReadHelper.IsWhitespace(input.CurrentByte)) + { + Builder.Append((char)input.CurrentByte); + } + + return Builder.ToString(); + } + + private static string ReadLine(IInputBytes input) + { + Builder.Clear(); + + while (ReadHelper.IsWhitespace(input.CurrentByte) && input.MoveNext()) + { + } + + Builder.Append((char)input.CurrentByte); + + while (input.MoveNext() && !ReadHelper.IsEndOfLine(input.CurrentByte)) + { + Builder.Append((char)input.CurrentByte); + } + + return Builder.ToString(); + } } internal interface IAdobeFontMetricsParser diff --git a/src/UglyToad.Pdf/Fonts/Parser/FontDictionaryAccessHelper.cs b/src/UglyToad.Pdf/Fonts/Parser/FontDictionaryAccessHelper.cs new file mode 100644 index 00000000..afe8cac3 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/Parser/FontDictionaryAccessHelper.cs @@ -0,0 +1,80 @@ +namespace UglyToad.Pdf.Fonts.Parser +{ + using System.Linq; + using ContentStream; + using Cos; + using Exceptions; + using IO; + using Parts; + using Pdf.Parser; + + internal static class FontDictionaryAccessHelper + { + public static int GetFirstCharacter(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.FIRST_CHAR, out CosInt firstChar)) + { + throw new InvalidFontFormatException( + $"No first character entry was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return firstChar.AsInt(); + } + + public static int GetLastCharacter(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.LAST_CHAR, out CosInt lastChar)) + { + throw new InvalidFontFormatException( + $"No last character entry was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return lastChar.AsInt(); + } + + public static decimal[] GetWidths(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray)) + { + throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray(); + } + + public static FontDescriptor GetFontDescriptor(IPdfObjectParser pdfObjectParser, FontDescriptorFactory fontDescriptorFactory, PdfDictionary dictionary, + IRandomAccessRead reader, bool isLenientParsing) + { + if (!dictionary.TryGetItemOfType(CosName.FONT_DESC, out CosObject obj)) + { + throw new InvalidFontFormatException($"No font descriptor indirect reference found in the TrueType font: {dictionary}."); + } + + var parsed = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing); + + if (!(parsed is PdfDictionary descriptorDictionary)) + { + throw new InvalidFontFormatException($"Expected a font descriptor dictionary but instead found {parsed}."); + } + + var descriptor = fontDescriptorFactory.Generate(descriptorDictionary, isLenientParsing); + + return descriptor; + } + + public static CosName GetName(PdfDictionary dictionary, FontDescriptor descriptor) + { + if (dictionary.TryGetName(CosName.BASE_FONT, out CosName name)) + { + return name; + } + + if (descriptor.FontName != null) + { + return descriptor.FontName; + } + + throw new InvalidFontFormatException($"Could not find a name for this TrueType font {dictionary}."); + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs b/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs index 3f4e12b6..2a45d19e 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs @@ -37,17 +37,17 @@ public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { - var firstCharacter = GetFirstCharacter(dictionary); + var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); - var lastCharacter = GetLastCharacter(dictionary); + var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); - var widths = GetWidths(dictionary); + var widths = FontDictionaryAccessHelper.GetWidths(dictionary); - var descriptor = GetFontDescriptor(dictionary, reader, isLenientParsing); + var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing); var font = ParseTrueTypeFont(descriptor, reader, isLenientParsing); - var name = GetName(dictionary, descriptor); + var name = FontDictionaryAccessHelper.GetName(dictionary, descriptor); CMap toUnicodeCMap = null; if (dictionary.TryGetItemOfType(CosName.TO_UNICODE, out CosObject toUnicodeObj)) @@ -92,57 +92,6 @@ return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap, encoding); } - private static int GetFirstCharacter(PdfDictionary dictionary) - { - if (!dictionary.TryGetItemOfType(CosName.FIRST_CHAR, out CosInt firstChar)) - { - throw new InvalidFontFormatException( - $"No first character entry was found in the font dictionary for this TrueType font: {dictionary}."); - } - - return firstChar.AsInt(); - } - - private static int GetLastCharacter(PdfDictionary dictionary) - { - if (!dictionary.TryGetItemOfType(CosName.LAST_CHAR, out CosInt lastChar)) - { - throw new InvalidFontFormatException( - $"No last character entry was found in the font dictionary for this TrueType font: {dictionary}."); - } - - return lastChar.AsInt(); - } - - private static decimal[] GetWidths(PdfDictionary dictionary) - { - if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray)) - { - throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}."); - } - - return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray(); - } - - private FontDescriptor GetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) - { - if (!dictionary.TryGetItemOfType(CosName.FONT_DESC, out CosObject obj)) - { - throw new InvalidFontFormatException($"No font descriptor indirect reference found in the TrueType font: {dictionary}."); - } - - var parsed = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing); - - if (!(parsed is PdfDictionary descriptorDictionary)) - { - throw new InvalidFontFormatException($"Expected a font descriptor dictionary but instead found {parsed}."); - } - - var descriptor = fontDescriptorFactory.Generate(descriptorDictionary, isLenientParsing); - - return descriptor; - } - private TrueTypeFont ParseTrueTypeFont(FontDescriptor descriptor, IRandomAccessRead reader, bool isLenientParsing) { @@ -170,20 +119,5 @@ return font; } - - private static CosName GetName(PdfDictionary dictionary, FontDescriptor descriptor) - { - if (dictionary.TryGetName(CosName.BASE_FONT, out CosName name)) - { - return name; - } - - if (descriptor.FontName != null) - { - return descriptor.FontName; - } - - throw new InvalidFontFormatException($"Could not find a name for this TrueType font {dictionary}."); - } } } diff --git a/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type1FontHandler.cs new file mode 100644 index 00000000..61ed9537 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -0,0 +1,85 @@ +namespace UglyToad.Pdf.Fonts.Parser.Handlers +{ + using System; + using Cmap; + using ContentStream; + using Cos; + using Encodings; + using Exceptions; + using Filters; + using IO; + using Parts; + using Pdf.Parser; + using Simple; + + internal class Type1FontHandler : IFontHandler + { + private readonly IPdfObjectParser pdfObjectParser; + private readonly CMapCache cMapCache; + private readonly IFilterProvider filterProvider; + private readonly FontDescriptorFactory fontDescriptorFactory; + + public Type1FontHandler(IPdfObjectParser pdfObjectParser, CMapCache cMapCache, IFilterProvider filterProvider, FontDescriptorFactory fontDescriptorFactory) + { + this.pdfObjectParser = pdfObjectParser; + this.cMapCache = cMapCache; + this.filterProvider = filterProvider; + this.fontDescriptorFactory = fontDescriptorFactory; + } + + public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) + { + var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary); + + var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary); + + var widths = FontDictionaryAccessHelper.GetWidths(dictionary); + + var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing); + + var name = FontDictionaryAccessHelper.GetName(dictionary, descriptor); + + CMap toUnicodeCMap = null; + if (dictionary.TryGetItemOfType(CosName.TO_UNICODE, out CosObject toUnicodeObj)) + { + var toUnicode = pdfObjectParser.Parse(toUnicodeObj.ToIndirectReference(), reader, isLenientParsing) as PdfRawStream; + + var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); + + if (decodedUnicodeCMap != null) + { + toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + } + } + + Encoding encoding = null; + if (dictionary.TryGetValue(CosName.ENCODING, out var encodingBase)) + { + // Symbolic fonts default to standard encoding. + if (descriptor.Flags.HasFlag(FontFlags.Symbolic)) + { + encoding = StandardEncoding.Instance; + } + + if (encodingBase is CosName encodingName) + { + if (!Encoding.TryGetNamedEncoding(encodingName, out encoding)) + { + // TODO: PDFBox would not throw here. + throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}"); + } + } + else if (encodingBase is CosDictionary encodingDictionary) + { + throw new NotImplementedException("No support for reading encoding from dictionary yet."); + } + else + { + throw new NotImplementedException("No support for reading encoding from font yet."); + } + } + + return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap, encoding); + } + } +} diff --git a/src/UglyToad.Pdf/Parser/Parts/ReadHelper.cs b/src/UglyToad.Pdf/Parser/Parts/ReadHelper.cs index db709429..7b7acdec 100644 --- a/src/UglyToad.Pdf/Parser/Parts/ReadHelper.cs +++ b/src/UglyToad.Pdf/Parser/Parts/ReadHelper.cs @@ -141,6 +141,11 @@ namespace UglyToad.Pdf.Parser.Parts return IsLineFeed(c) || IsCarriageReturn(c); } + public static bool IsEndOfLine(byte b) + { + return IsLineFeed(b) || IsCarriageReturn(b); + } + public static bool IsLineFeed(int c) { return AsciiLineFeed == c; diff --git a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs index 37178f18..f1c070f0 100644 --- a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs @@ -86,7 +86,8 @@ cMapCache, filterProvider, pdfObjectParser), - new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser)); + new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser), + new Type1FontHandler(pdfObjectParser, cMapCache, filterProvider, fontDescriptorFactory)); var dynamicParser = container.Get(); var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);