fix a bug with font reading order in multi-page documents

This commit is contained in:
Eliot Jones
2018-01-07 20:19:17 +00:00
parent 59c36a7ddd
commit 133ab43d45
5 changed files with 59 additions and 20 deletions

View File

@@ -0,0 +1,39 @@
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Xunit;
public class FarmerMacTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "FarmerMac.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
Assert.Equal(5, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectContentAfterReadingPreviousPage()
{
using (var document = PdfDocument.Open(GetFilename()))
{
// ReSharper disable once RedundantAssignment
var page = document.GetPage(1);
page = document.GetPage(2);
Assert.Contains("financial results for the fiscal quarter ended June 30, 2017 and (2) a conference call to discuss those results and Farmer Mac", page.Text);
}
}
}
}

View File

@@ -17,9 +17,7 @@
[Fact]
public void HasCorrectNumberOfPages()
{
var file = GetFilename();
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
using (var document = PdfDocument.Open(GetFilename()))
{
Assert.Equal(4, document.NumberOfPages);
}
@@ -61,14 +59,5 @@
Assert.Contains("Söderberg", page.Text);
}
}
//[Fact]
//public void localFileTest()
//{
// using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\Document (1).pdf"))
// {
// var page = document.GetPage(1);
// }
//}
}
}

View File

@@ -10,6 +10,7 @@
<ItemGroup>
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
<None Remove="Integration\Documents\FarmerMac.pdf" />
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
<None Remove="Integration\Documents\Judgement Document.pdf" />
@@ -29,6 +30,9 @@
<EmbeddedResource Include="Fonts\TrueType\Roboto-Regular.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</EmbeddedResource>
<Content Include="Integration\Documents\FarmerMac.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Integration\Documents\Font Size Test - from libre office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>

View File

@@ -14,7 +14,8 @@
private readonly IPdfObjectParser pdfObjectParser;
private readonly IFontFactory fontFactory;
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
private readonly Dictionary<IndirectReference, IFont> loadedFonts = new Dictionary<IndirectReference, IFont>();
private readonly Dictionary<CosName, IndirectReference> currentResourceState = new Dictionary<CosName, IndirectReference>();
public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory)
{
@@ -57,11 +58,6 @@
{
foreach (var pair in fontDictionary)
{
if (loadedFonts.ContainsKey(pair.Key))
{
continue;
}
if (!(pair.Value is CosObject objectKey))
{
if (isLenientParsing)
@@ -71,6 +67,15 @@
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
}
var reference = objectKey.ToIndirectReference();
currentResourceState[pair.Key] = reference;
if (loadedFonts.ContainsKey(reference))
{
continue;
}
var fontObject = DirectObjectFinder.Find<PdfDictionary>(objectKey, pdfObjectParser, reader, false);
@@ -79,13 +84,15 @@
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
}
loadedFonts[pair.Key] = fontFactory.Get(fontObject, reader, isLenientParsing);
loadedFonts[reference] = fontFactory.Get(fontObject, reader, isLenientParsing);
}
}
public IFont GetFont(CosName name)
{
loadedFonts.TryGetValue(name, out var font);
var reference = currentResourceState[name];
loadedFonts.TryGetValue(reference, out var font);
return font;
}