diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Two Page Text Only - from libre office.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Two Page Text Only - from libre office.pdf
new file mode 100644
index 00000000..526105bf
Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Integration/Documents/Two Page Text Only - from libre office.pdf differ
diff --git a/src/UglyToad.Pdf.Tests/Integration/TwoPageTextOnlyLibreOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/TwoPageTextOnlyLibreOfficeTests.cs
new file mode 100644
index 00000000..3ba15361
--- /dev/null
+++ b/src/UglyToad.Pdf.Tests/Integration/TwoPageTextOnlyLibreOfficeTests.cs
@@ -0,0 +1,58 @@
+namespace UglyToad.Pdf.Tests.Integration
+{
+ using System;
+ using System.IO;
+ using Content;
+ using Xunit;
+
+ public class TwoPageTextOnlyLibreOfficeTests
+ {
+ private static string GetFilename()
+ {
+ var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
+
+ return Path.Combine(documentFolder, "Two Page Text Only - from libre office.pdf");
+ }
+
+ [Fact]
+ public void HasCorrectNumberOfPages()
+ {
+ var file = GetFilename();
+
+ using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
+ {
+ Assert.Equal(2, document.NumberOfPages);
+ }
+ }
+
+ [Fact]
+ public void HasCorrectPageSize()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ Assert.Equal(PageSize.A4, page.Size);
+
+ page = document.GetPage(2);
+
+ Assert.Equal(PageSize.A4, page.Size);
+ }
+ }
+
+ [Fact]
+ public void PagesStartWithCorrectText()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ Assert.StartsWith("Apache License", page.Text);
+
+ page = document.GetPage(2);
+
+ Assert.StartsWith("2. Grant of Copyright", page.Text);
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
index fad17196..0a38ce05 100644
--- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
+++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
@@ -16,6 +16,7 @@
+
@@ -43,6 +44,9 @@
PreserveNewest
+
+ PreserveNewest
+
diff --git a/src/UglyToad.Pdf/Content/Page.cs b/src/UglyToad.Pdf/Content/Page.cs
index b2621ac0..e694e54d 100644
--- a/src/UglyToad.Pdf/Content/Page.cs
+++ b/src/UglyToad.Pdf/Content/Page.cs
@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
+ using System.Linq;
public class Page
{
@@ -18,6 +19,8 @@
public IReadOnlyList Letters => Content?.Letters ?? new Letter[0];
+ public string Text { get; }
+
///
/// Gets the width of the page in points.
///
@@ -44,11 +47,22 @@
MediaBox = mediaBox;
CropBox = cropBox;
Content = content;
+ Text = GetText(content);
Width = mediaBox.Bounds.Width;
Height = mediaBox.Bounds.Height;
Size = mediaBox.Bounds.GetPageSize();
}
+
+ private static string GetText(PageContent content)
+ {
+ if (content?.Letters == null)
+ {
+ return string.Empty;
+ }
+
+ return string.Join(string.Empty, content.Letters.Select(x => x.Value));
+ }
}
}
\ No newline at end of file
diff --git a/src/UglyToad.Pdf/Content/Pages.cs b/src/UglyToad.Pdf/Content/Pages.cs
index fbd9c7aa..33c3abe3 100644
--- a/src/UglyToad.Pdf/Content/Pages.cs
+++ b/src/UglyToad.Pdf/Content/Pages.cs
@@ -91,6 +91,7 @@
bool found = pageNumber == soughtPageNumber;
locatedPages[pageNumber] = currentPageDictionary;
+ pageNumbersObserved.Add(pageNumber);
return found;
}
@@ -117,6 +118,7 @@
if (thisPageMatches)
{
childFound = true;
+ break;
}
}