fix a bug with font reading order in multi-page documents

This commit is contained in:
Eliot Jones
2018-01-07 20:19:17 +00:00
parent 59c36a7ddd
commit 133ab43d45
5 changed files with 59 additions and 20 deletions

View File

@@ -0,0 +1,39 @@
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Xunit;
public class FarmerMacTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "FarmerMac.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
Assert.Equal(5, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectContentAfterReadingPreviousPage()
{
using (var document = PdfDocument.Open(GetFilename()))
{
// ReSharper disable once RedundantAssignment
var page = document.GetPage(1);
page = document.GetPage(2);
Assert.Contains("financial results for the fiscal quarter ended June 30, 2017 and (2) a conference call to discuss those results and Farmer Mac", page.Text);
}
}
}
}

View File

@@ -17,9 +17,7 @@
[Fact] [Fact]
public void HasCorrectNumberOfPages() public void HasCorrectNumberOfPages()
{ {
var file = GetFilename(); using (var document = PdfDocument.Open(GetFilename()))
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
{ {
Assert.Equal(4, document.NumberOfPages); Assert.Equal(4, document.NumberOfPages);
} }
@@ -61,14 +59,5 @@
Assert.Contains("Söderberg", page.Text); Assert.Contains("Söderberg", page.Text);
} }
} }
//[Fact]
//public void localFileTest()
//{
// using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\Document (1).pdf"))
// {
// var page = document.GetPage(1);
// }
//}
} }
} }

View File

@@ -10,6 +10,7 @@
<ItemGroup> <ItemGroup>
<None Remove="Fonts\TrueType\google-simple-doc.ttf" /> <None Remove="Fonts\TrueType\google-simple-doc.ttf" />
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" /> <None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
<None Remove="Integration\Documents\FarmerMac.pdf" />
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" /> <None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" /> <None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
<None Remove="Integration\Documents\Judgement Document.pdf" /> <None Remove="Integration\Documents\Judgement Document.pdf" />
@@ -29,6 +30,9 @@
<EmbeddedResource Include="Fonts\TrueType\Roboto-Regular.ttf"> <EmbeddedResource Include="Fonts\TrueType\Roboto-Regular.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</EmbeddedResource> </EmbeddedResource>
<Content Include="Integration\Documents\FarmerMac.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Integration\Documents\Font Size Test - from libre office.pdf"> <Content Include="Integration\Documents\Font Size Test - from libre office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content> </Content>

View File

@@ -14,7 +14,8 @@
private readonly IPdfObjectParser pdfObjectParser; private readonly IPdfObjectParser pdfObjectParser;
private readonly IFontFactory fontFactory; private readonly IFontFactory fontFactory;
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>(); private readonly Dictionary<IndirectReference, IFont> loadedFonts = new Dictionary<IndirectReference, IFont>();
private readonly Dictionary<CosName, IndirectReference> currentResourceState = new Dictionary<CosName, IndirectReference>();
public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory) public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory)
{ {
@@ -57,11 +58,6 @@
{ {
foreach (var pair in fontDictionary) foreach (var pair in fontDictionary)
{ {
if (loadedFonts.ContainsKey(pair.Key))
{
continue;
}
if (!(pair.Value is CosObject objectKey)) if (!(pair.Value is CosObject objectKey))
{ {
if (isLenientParsing) if (isLenientParsing)
@@ -72,6 +68,15 @@
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}."); throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
} }
var reference = objectKey.ToIndirectReference();
currentResourceState[pair.Key] = reference;
if (loadedFonts.ContainsKey(reference))
{
continue;
}
var fontObject = DirectObjectFinder.Find<PdfDictionary>(objectKey, pdfObjectParser, reader, false); var fontObject = DirectObjectFinder.Find<PdfDictionary>(objectKey, pdfObjectParser, reader, false);
if (fontObject == null) if (fontObject == null)
@@ -79,13 +84,15 @@
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}"); throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
} }
loadedFonts[pair.Key] = fontFactory.Get(fontObject, reader, isLenientParsing); loadedFonts[reference] = fontFactory.Get(fontObject, reader, isLenientParsing);
} }
} }
public IFont GetFont(CosName name) public IFont GetFont(CosName name)
{ {
loadedFonts.TryGetValue(name, out var font); var reference = currentResourceState[name];
loadedFonts.TryGetValue(reference, out var font);
return font; return font;
} }