add test for multiple page pdf from libre office

This commit is contained in:
Eliot Jones
2018-01-03 22:46:26 +00:00
parent 21be34a938
commit 1aacb14285
5 changed files with 78 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Content;
using Xunit;
public class TwoPageTextOnlyLibreOfficeTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "Two Page Text Only - from libre office.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
var file = GetFilename();
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
{
Assert.Equal(2, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectPageSize()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.Equal(PageSize.A4, page.Size);
page = document.GetPage(2);
Assert.Equal(PageSize.A4, page.Size);
}
}
[Fact]
public void PagesStartWithCorrectText()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.StartsWith("Apache License", page.Text);
page = document.GetPage(2);
Assert.StartsWith("2. Grant of Copyright", page.Text);
}
}
}
}

View File

@@ -16,6 +16,7 @@
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" /> <None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" /> <None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" /> <None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
<None Remove="Integration\Documents\Two Page Text Only - from libre office.pdf" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
@@ -43,6 +44,9 @@
<Content Include="Integration\Documents\Single Page Simple - from open office.pdf"> <Content Include="Integration\Documents\Single Page Simple - from open office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content> </Content>
<Content Include="Integration\Documents\Two Page Text Only - from libre office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@@ -2,6 +2,7 @@
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
public class Page public class Page
{ {
@@ -18,6 +19,8 @@
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0]; public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
public string Text { get; }
/// <summary> /// <summary>
/// Gets the width of the page in points. /// Gets the width of the page in points.
/// </summary> /// </summary>
@@ -44,11 +47,22 @@
MediaBox = mediaBox; MediaBox = mediaBox;
CropBox = cropBox; CropBox = cropBox;
Content = content; Content = content;
Text = GetText(content);
Width = mediaBox.Bounds.Width; Width = mediaBox.Bounds.Width;
Height = mediaBox.Bounds.Height; Height = mediaBox.Bounds.Height;
Size = mediaBox.Bounds.GetPageSize(); Size = mediaBox.Bounds.GetPageSize();
} }
private static string GetText(PageContent content)
{
if (content?.Letters == null)
{
return string.Empty;
}
return string.Join(string.Empty, content.Letters.Select(x => x.Value));
}
} }
} }

View File

@@ -91,6 +91,7 @@
bool found = pageNumber == soughtPageNumber; bool found = pageNumber == soughtPageNumber;
locatedPages[pageNumber] = currentPageDictionary; locatedPages[pageNumber] = currentPageDictionary;
pageNumbersObserved.Add(pageNumber);
return found; return found;
} }
@@ -117,6 +118,7 @@
if (thisPageMatches) if (thisPageMatches)
{ {
childFound = true; childFound = true;
break;
} }
} }