diff --git a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs index 2e67b32c..9882c047 100644 --- a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs +++ b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs @@ -30,9 +30,9 @@ { using (var document = PdfDocument.Open(GetFilename())) { - //var page = document.GetPage(1); + var page = document.GetPage(1); - //Assert.Equal(PageSize.A4, page.Size); + Assert.Equal(PageSize.A4, page.Size); } } } diff --git a/src/UglyToad.Pdf/Content/PageSize.cs b/src/UglyToad.Pdf/Content/PageSize.cs index b91eff73..61eb02a2 100644 --- a/src/UglyToad.Pdf/Content/PageSize.cs +++ b/src/UglyToad.Pdf/Content/PageSize.cs @@ -36,7 +36,9 @@ // Seems there is some disagreement 1190/1191 {new WidthHeight(842, 1190), PageSize.A3}, {new WidthHeight(842, 1191), PageSize.A3}, + // Possibly some kind of rounding mix-up here {new WidthHeight(595, 842), PageSize.A4}, + {new WidthHeight(595, 841), PageSize.A4}, {new WidthHeight(420, 595), PageSize.A5}, {new WidthHeight(298, 420), PageSize.A6}, {new WidthHeight(210, 298), PageSize.A7}, diff --git a/src/UglyToad.Pdf/Fonts/FontFactory.cs b/src/UglyToad.Pdf/Fonts/FontFactory.cs index 80489d54..bc28f31d 100644 --- a/src/UglyToad.Pdf/Fonts/FontFactory.cs +++ b/src/UglyToad.Pdf/Fonts/FontFactory.cs @@ -14,12 +14,13 @@ private readonly ILog log; private readonly IReadOnlyDictionary handlers; - public FontFactory(ILog log, Type0FontHandler type0FontHandler) + public FontFactory(ILog log, Type0FontHandler type0FontHandler, TrueTypeFontHandler trueTypeFontHandler) { this.log = log; handlers = new Dictionary { - {CosName.TYPE0, type0FontHandler} + {CosName.TYPE0, type0FontHandler}, + {CosName.TRUE_TYPE, trueTypeFontHandler} }; } diff --git a/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs b/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs index ed7d2101..36e8928d 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Handlers/TrueTypeFontHandler.cs @@ -1,6 +1,160 @@ namespace UglyToad.Pdf.Fonts.Parser.Handlers { - internal class TrueTypeFontHandler + using System.Linq; + using Cmap; + using ContentStream; + using Cos; + using Exceptions; + using Filters; + using IO; + using Parts; + using Pdf.Parser; + using Simple; + using TrueType; + using TrueType.Parser; + + internal class TrueTypeFontHandler : IFontHandler { + private readonly IPdfObjectParser pdfObjectParser; + private readonly IFilterProvider filterProvider; + private readonly CMapCache cMapCache; + private readonly FontDescriptorFactory fontDescriptorFactory; + private readonly TrueTypeFontParser trueTypeFontParser; + + public TrueTypeFontHandler(IPdfObjectParser pdfObjectParser, IFilterProvider filterProvider, + CMapCache cMapCache, + FontDescriptorFactory fontDescriptorFactory, + TrueTypeFontParser trueTypeFontParser) + { + this.pdfObjectParser = pdfObjectParser; + this.filterProvider = filterProvider; + this.cMapCache = cMapCache; + this.fontDescriptorFactory = fontDescriptorFactory; + this.trueTypeFontParser = trueTypeFontParser; + } + + public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) + { + var firstCharacter = GetFirstCharacter(dictionary); + + var lastCharacter = GetLastCharacter(dictionary); + + var widths = GetWidths(dictionary); + + var descriptor = GetFontDescriptor(dictionary, reader, isLenientParsing); + + var font = ParseTrueTypeFont(descriptor, reader, isLenientParsing); + + var name = GetName(dictionary, descriptor); + + CMap toUnicodeCMap = null; + if (dictionary.TryGetItemOfType(CosName.TO_UNICODE, out CosObject toUnicodeObj)) + { + var toUnicode = pdfObjectParser.Parse(toUnicodeObj.ToIndirectReference(), reader, isLenientParsing) as PdfRawStream; + + var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); + + if (decodedUnicodeCMap != null) + { + toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing); + } + } + + return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap); + } + + private static int GetFirstCharacter(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.FIRST_CHAR, out CosInt firstChar)) + { + throw new InvalidFontFormatException( + $"No first character entry was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return firstChar.AsInt(); + } + + private static int GetLastCharacter(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.LAST_CHAR, out CosInt lastChar)) + { + throw new InvalidFontFormatException( + $"No last character entry was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return lastChar.AsInt(); + } + + private static decimal[] GetWidths(PdfDictionary dictionary) + { + if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray)) + { + throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}."); + } + + return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray(); + } + + private FontDescriptor GetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) + { + if (!dictionary.TryGetItemOfType(CosName.FONT_DESC, out CosObject obj)) + { + throw new InvalidFontFormatException($"No font descriptor indirect reference found in the TrueType font: {dictionary}."); + } + + var parsed = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing); + + if (!(parsed is PdfDictionary descriptorDictionary)) + { + throw new InvalidFontFormatException($"Expected a font descriptor dictionary but instead found {parsed}."); + } + + var descriptor = fontDescriptorFactory.Generate(descriptorDictionary, isLenientParsing); + + return descriptor; + } + + private TrueTypeFont ParseTrueTypeFont(FontDescriptor descriptor, IRandomAccessRead reader, + bool isLenientParsing) + { + if (descriptor?.FontFile == null) + { + return null; + } + + if (descriptor.FontFile.FileType != DescriptorFontFile.FontFileType.TrueType) + { + throw new InvalidFontFormatException( + $"Expected a TrueType font in the TrueType font descriptor, instead it was {descriptor.FontFile.FileType}."); + } + + var fontFileStream = pdfObjectParser.Parse(descriptor.FontFile.ObjectKey, reader, isLenientParsing) as PdfRawStream; + + if (fontFileStream == null) + { + return null; + } + + var fontFile = fontFileStream.Decode(filterProvider); + + var font = trueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile))); + + return font; + } + + private static CosName GetName(PdfDictionary dictionary, FontDescriptor descriptor) + { + if (dictionary.TryGetName(CosName.BASE_FONT, out CosName name)) + { + return name; + } + + if (descriptor.FontName != null) + { + return descriptor.FontName; + } + + throw new InvalidFontFormatException($"Could not find a name for this TrueType font {dictionary}."); + } } } diff --git a/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs new file mode 100644 index 00000000..37b07a7a --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs @@ -0,0 +1,75 @@ +namespace UglyToad.Pdf.Fonts.Simple +{ + using Cmap; + using Composite; + using Cos; + using Geometry; + using IO; + using Util.JetBrains.Annotations; + + internal class TrueTypeSimpleFont : IFont + { + private readonly int firstCharacterCode; + private readonly int lastCharacterCode; + private readonly decimal[] widths; + private readonly FontDescriptor descriptor; + + public CosName Name { get; } + + public bool IsVertical { get; } + + [NotNull] + public ToUnicodeCMap ToUnicode { get; set; } + + public TrueTypeSimpleFont(CosName name, int firstCharacterCode, int lastCharacterCode, decimal[] widths, + FontDescriptor descriptor, + [CanBeNull]CMap toUnicodeCMap) + { + this.firstCharacterCode = firstCharacterCode; + this.lastCharacterCode = lastCharacterCode; + this.widths = widths; + this.descriptor = descriptor; + + Name = name; + IsVertical = false; + ToUnicode = new ToUnicodeCMap(toUnicodeCMap); + } + + public int ReadCharacterCode(IInputBytes bytes, out int codeLength) + { + codeLength = 1; + return bytes.CurrentByte; + } + + public bool TryGetUnicode(int characterCode, out string value) + { + value = null; + + if (!ToUnicode.CanMapToUnicode) + { + return false; + } + + return ToUnicode.TryGet(characterCode, out value); + } + + public PdfVector GetDisplacement(int characterCode) + { + var tx = GetWidth(characterCode); + + return new PdfVector(tx / 1000m, 0); + } + + public decimal GetWidth(int characterCode) + { + var index = characterCode - firstCharacterCode; + + if (index < 0 || index >= widths.Length) + { + return descriptor.MissingWidth; + } + + return widths[index]; + } + } +} diff --git a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs index 2c5dff1c..6268fd25 100644 --- a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs @@ -68,10 +68,11 @@ var cMapCache = new CMapCache(new CMapParser()); - var fontFactory = new FontFactory(container.Get(), new Type0FontHandler(cidFontFactory, + var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory, cMapCache, filterProvider, - pdfObjectParser)); + pdfObjectParser), + new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser)); var dynamicParser = container.Get(); var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory); diff --git a/src/UglyToad.Pdf/UglyToad.Pdf.csproj b/src/UglyToad.Pdf/UglyToad.Pdf.csproj index 3b03c0f7..0cbc5832 100644 --- a/src/UglyToad.Pdf/UglyToad.Pdf.csproj +++ b/src/UglyToad.Pdf/UglyToad.Pdf.csproj @@ -227,9 +227,5 @@ - - - -