diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Old Gutnish Internet Explorer.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Old Gutnish Internet Explorer.pdf new file mode 100644 index 00000000..10350655 Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Integration/Documents/Old Gutnish Internet Explorer.pdf differ diff --git a/src/UglyToad.Pdf.Tests/Integration/LocalTests.cs b/src/UglyToad.Pdf.Tests/Integration/LocalTests.cs index 561da8b1..8a414cf0 100644 --- a/src/UglyToad.Pdf.Tests/Integration/LocalTests.cs +++ b/src/UglyToad.Pdf.Tests/Integration/LocalTests.cs @@ -10,6 +10,7 @@ [Fact] public void Tests() { + } } } diff --git a/src/UglyToad.Pdf.Tests/Integration/OldGutnishTests.cs b/src/UglyToad.Pdf.Tests/Integration/OldGutnishTests.cs new file mode 100644 index 00000000..e853e200 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Integration/OldGutnishTests.cs @@ -0,0 +1,41 @@ +namespace UglyToad.Pdf.Tests.Integration +{ + using System; + using System.IO; + using Xunit; + + public class OldGutnishTests + { + private static string GetFilename() + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + return Path.Combine(documentFolder, "Old Gutnish Internet Explorer.pdf"); + } + + [Fact] + public void HasCorrectNumberOfPages() + { + using (var document = PdfDocument.Open(GetFilename())) + { + Assert.Equal(3, document.NumberOfPages); + } + } + + [Fact] + public void HasCorrectContentAfterReadingPreviousPage() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.Contains("Þissi þieluar hafþi ann sun sum hit hafþi. En hafþa cuna hit huita stierna", page.Text); + + page = document.GetPage(2); + + Assert.Contains("Greipur sem annar hét; og Gunnfjón sá þriðji", page.Text); + } + } + + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs index 6503f7c5..1bcc22bf 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Handlers/Type0FontHandler.cs @@ -38,16 +38,20 @@ if (TryGetFirstDescendant(dictionary, out var descendantObject)) { - var parsed = DirectObjectFinder.Find(descendantObject, pdfObjectParser, reader, isLenientParsing); + PdfDictionary descendantFontDictionary; - if (parsed is PdfDictionary descendantFontDictionary) + if (descendantObject is CosObject obj) { - cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing); + var parsed = DirectObjectFinder.Find(obj, pdfObjectParser, reader, isLenientParsing); + + descendantFontDictionary = parsed; } else { - throw new InvalidFontFormatException("Expected to find a Descendant Font dictionary, instead it was: " + parsed); + descendantFontDictionary = (PdfDictionary) descendantObject; } + + cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing); } else { @@ -76,7 +80,7 @@ return font; } - private static bool TryGetFirstDescendant(PdfDictionary dictionary, out CosObject descendant) + private static bool TryGetFirstDescendant(PdfDictionary dictionary, out CosBase descendant) { descendant = null; @@ -91,9 +95,21 @@ return true; } - if (value is COSArray array && array.Count > 0 && array.get(0) is CosObject objArr) + if (value is COSArray array && array.Count > 0) { + if (array.get(0) is CosObject objArr) + { descendant = objArr; + } + else if (array.get(0) is PdfDictionary dict) + { + descendant = dict; + } + else + { + return false; + } + return true; } diff --git a/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs b/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs index e3bee9c8..2d6b8c9b 100644 --- a/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs +++ b/src/UglyToad.Pdf/Fonts/Parser/Parts/CidFontFactory.cs @@ -75,6 +75,12 @@ if (!dictionary.TryGetValue(CosName.FONT_DESC, out var baseValue) || !(baseValue is CosObject obj)) { + if (baseValue is PdfDictionary baseDictionary) + { + descriptorDictionary = baseDictionary; + return true; + } + return false; } @@ -239,11 +245,33 @@ throw new InvalidFontFormatException($"No CID System Info was found in the CID Font dictionary: {dictionary}"); } - var registry = (CosString) cidDictionary.GetItemOrDefault(CosName.REGISTRY); - var ordering = (CosString)cidDictionary.GetItemOrDefault(CosName.ORDERING); + var registry = SafeKeyAccess(cidDictionary, CosName.REGISTRY, reader, isLenientParsing); + var ordering = SafeKeyAccess(cidDictionary, CosName.ORDERING, reader, isLenientParsing); var supplement = cidDictionary.GetIntOrDefault(CosName.SUPPLEMENT, 0); return new CharacterIdentifierSystemInfo(registry.GetAscii(), ordering.GetAscii(), supplement); } + + private CosString SafeKeyAccess(PdfDictionary dictionary, CosName keyName, IRandomAccessRead reader, bool isLenientParsing) + { + var item = dictionary.GetItemOrDefault(keyName); + + if (item == null) + { + return new CosString(string.Empty); + } + + if (item is CosString str) + { + return str; + } + + if (item is CosObject obj) + { + return DirectObjectFinder.Find(obj, pdfObjectParser, reader, isLenientParsing); + } + + return new CosString(string.Empty); + } } }