diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf
new file mode 100644
index 00000000..bb12f5f1
Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf differ
diff --git a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs
index 9882c047..8334c1f3 100644
--- a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs
+++ b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs
@@ -2,6 +2,7 @@
{
using System;
using System.IO;
+ using System.Linq;
using Content;
using Xunit;
@@ -35,5 +36,18 @@
Assert.Equal(PageSize.A4, page.Size);
}
}
+
+ [Fact]
+ public void GetsCorrectPageTextIgnoringHiddenCharacters()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ var text = string.Join(string.Empty, page.Letters.Select(x => x.Value));
+
+ Assert.Equal("36pt font14 pt font6pt font", text);
+ }
+ }
}
}
diff --git a/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs
new file mode 100644
index 00000000..f5aa0827
--- /dev/null
+++ b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs
@@ -0,0 +1,39 @@
+namespace UglyToad.Pdf.Tests.Integration
+{
+ using System;
+ using System.IO;
+ using Content;
+ using Xunit;
+
+ public class SinglePageSimpleOpenOfficeTests
+ {
+ private static string GetFilename()
+ {
+ var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
+
+ return Path.Combine(documentFolder, "Single Page Simple - from open office.pdf");
+ }
+
+ [Fact]
+ public void HasCorrectNumberOfPages()
+ {
+ var file = GetFilename();
+
+ using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
+ {
+ Assert.Equal(1, document.NumberOfPages);
+ }
+ }
+
+ [Fact]
+ public void HasCorrectPageSize()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ Assert.Equal(PageSize.Letter, page.Size);
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
index 49106bee..3f547ef0 100644
--- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
+++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
@@ -13,6 +13,7 @@
+
@@ -31,6 +32,9 @@
PreserveNewest
+
+ PreserveNewest
+
diff --git a/src/UglyToad.Pdf/Content/IPageFactory.cs b/src/UglyToad.Pdf/Content/IPageFactory.cs
index 5663d145..f07fdb5a 100644
--- a/src/UglyToad.Pdf/Content/IPageFactory.cs
+++ b/src/UglyToad.Pdf/Content/IPageFactory.cs
@@ -7,5 +7,7 @@
{
Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
bool isLenientParsing);
+
+ void LoadResources(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
}
}
\ No newline at end of file
diff --git a/src/UglyToad.Pdf/Content/PageFactory.cs b/src/UglyToad.Pdf/Content/PageFactory.cs
index f9d0dc0c..6873b196 100644
--- a/src/UglyToad.Pdf/Content/PageFactory.cs
+++ b/src/UglyToad.Pdf/Content/PageFactory.cs
@@ -45,7 +45,7 @@
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
- LoadResources(number, dictionary, reader, isLenientParsing);
+ LoadResources(dictionary, reader, isLenientParsing);
PageContent content = default(PageContent);
@@ -136,7 +136,7 @@
return mediaBox;
}
- private void LoadResources(int pageNumber, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
+ public void LoadResources(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var resources = dictionary.GetItemOrDefault(CosName.RESOURCES);
@@ -155,13 +155,8 @@
if (resourceDictionary is PdfDictionary resolvedDictionary)
{
resourceStore.LoadResourceDictionary(resolvedDictionary, reader, isLenientParsing);
-
- return;
}
}
-
- throw new InvalidOperationException(
- $"No resource dictionary was found for this page ({pageNumber}), the page dictionary was {dictionary}.");
}
}
}
diff --git a/src/UglyToad.Pdf/Content/Pages.cs b/src/UglyToad.Pdf/Content/Pages.cs
index aa24a9ad..95d2131d 100644
--- a/src/UglyToad.Pdf/Content/Pages.cs
+++ b/src/UglyToad.Pdf/Content/Pages.cs
@@ -64,6 +64,7 @@
{
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
{
+ // TODO: cache the page
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader,
isLenientParsing);
}
@@ -119,6 +120,8 @@
var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray;
+ pageFactory.LoadResources(currentPageDictionary, reader, isLenientParsing);
+
bool childFound = false;
foreach (var kid in kids.OfType())
{
diff --git a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs
index b5cb0950..7f480319 100644
--- a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs
+++ b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs
@@ -79,10 +79,15 @@
public void ShowText(IInputBytes bytes)
{
- var font = resourceStore.GetFont(GetCurrentState().FontState.FontName);
-
var currentState = GetCurrentState();
+ var font = resourceStore.GetFont(currentState.FontState.FontName);
+
+ if (font == null)
+ {
+ throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet.");
+ }
+
var fontSize = currentState.FontState.FontSize;
var horizontalScaling = currentState.FontState.HorizontalScaling;
var characterSpacing = currentState.FontState.CharacterSpacing;