fix bugs with reading documents from microsoft print to pdf

This commit is contained in:
Eliot Jones
2018-01-10 19:23:10 +00:00
parent 674945206c
commit ab5a357665
5 changed files with 94 additions and 8 deletions

View File

@@ -10,6 +10,7 @@
[Fact]
public void Tests()
{
}
}
}

View File

@@ -0,0 +1,41 @@
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Xunit;
public class OldGutnishTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "Old Gutnish Internet Explorer.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
Assert.Equal(3, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectContentAfterReadingPreviousPage()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.Contains("Þissi þieluar hafþi ann sun sum hit hafþi. En hafþa cuna hit huita stierna", page.Text);
page = document.GetPage(2);
Assert.Contains("Greipur sem annar hét; og Gunnfjón sá þriðji", page.Text);
}
}
}
}

View File

@@ -38,16 +38,20 @@
if (TryGetFirstDescendant(dictionary, out var descendantObject))
{
var parsed = DirectObjectFinder.Find<PdfDictionary>(descendantObject, pdfObjectParser, reader, isLenientParsing);
PdfDictionary descendantFontDictionary;
if (parsed is PdfDictionary descendantFontDictionary)
if (descendantObject is CosObject obj)
{
cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
var parsed = DirectObjectFinder.Find<PdfDictionary>(obj, pdfObjectParser, reader, isLenientParsing);
descendantFontDictionary = parsed;
}
else
{
throw new InvalidFontFormatException("Expected to find a Descendant Font dictionary, instead it was: " + parsed);
descendantFontDictionary = (PdfDictionary) descendantObject;
}
cidFont = ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
}
else
{
@@ -76,7 +80,7 @@
return font;
}
private static bool TryGetFirstDescendant(PdfDictionary dictionary, out CosObject descendant)
private static bool TryGetFirstDescendant(PdfDictionary dictionary, out CosBase descendant)
{
descendant = null;
@@ -91,9 +95,21 @@
return true;
}
if (value is COSArray array && array.Count > 0 && array.get(0) is CosObject objArr)
if (value is COSArray array && array.Count > 0)
{
if (array.get(0) is CosObject objArr)
{
descendant = objArr;
}
else if (array.get(0) is PdfDictionary dict)
{
descendant = dict;
}
else
{
return false;
}
return true;
}

View File

@@ -75,6 +75,12 @@
if (!dictionary.TryGetValue(CosName.FONT_DESC, out var baseValue) || !(baseValue is CosObject obj))
{
if (baseValue is PdfDictionary baseDictionary)
{
descriptorDictionary = baseDictionary;
return true;
}
return false;
}
@@ -239,11 +245,33 @@
throw new InvalidFontFormatException($"No CID System Info was found in the CID Font dictionary: {dictionary}");
}
var registry = (CosString) cidDictionary.GetItemOrDefault(CosName.REGISTRY);
var ordering = (CosString)cidDictionary.GetItemOrDefault(CosName.ORDERING);
var registry = SafeKeyAccess(cidDictionary, CosName.REGISTRY, reader, isLenientParsing);
var ordering = SafeKeyAccess(cidDictionary, CosName.ORDERING, reader, isLenientParsing);
var supplement = cidDictionary.GetIntOrDefault(CosName.SUPPLEMENT, 0);
return new CharacterIdentifierSystemInfo(registry.GetAscii(), ordering.GetAscii(), supplement);
}
private CosString SafeKeyAccess(PdfDictionary dictionary, CosName keyName, IRandomAccessRead reader, bool isLenientParsing)
{
var item = dictionary.GetItemOrDefault(keyName);
if (item == null)
{
return new CosString(string.Empty);
}
if (item is CosString str)
{
return str;
}
if (item is CosObject obj)
{
return DirectObjectFinder.Find<CosString>(obj, pdfObjectParser, reader, isLenientParsing);
}
return new CosString(string.Empty);
}
}
}