mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 19:07:56 +08:00
add test for multiple page pdf from libre office
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,58 @@
|
||||
namespace UglyToad.Pdf.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class TwoPageTextOnlyLibreOfficeTests
|
||||
{
|
||||
private static string GetFilename()
|
||||
{
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||
|
||||
return Path.Combine(documentFolder, "Two Page Text Only - from libre office.pdf");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectNumberOfPages()
|
||||
{
|
||||
var file = GetFilename();
|
||||
|
||||
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||
{
|
||||
Assert.Equal(2, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectPageSize()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
Assert.Equal(PageSize.A4, page.Size);
|
||||
|
||||
page = document.GetPage(2);
|
||||
|
||||
Assert.Equal(PageSize.A4, page.Size);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PagesStartWithCorrectText()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
Assert.StartsWith("Apache License", page.Text);
|
||||
|
||||
page = document.GetPage(2);
|
||||
|
||||
Assert.StartsWith("2. Grant of Copyright", page.Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -16,6 +16,7 @@
|
||||
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
||||
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
|
||||
<None Remove="Integration\Documents\Two Page Text Only - from libre office.pdf" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
@@ -43,6 +44,9 @@
|
||||
<Content Include="Integration\Documents\Single Page Simple - from open office.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
<Content Include="Integration\Documents\Two Page Text Only - from libre office.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
@@ -2,6 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
public class Page
|
||||
{
|
||||
@@ -18,6 +19,8 @@
|
||||
|
||||
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
|
||||
|
||||
public string Text { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the width of the page in points.
|
||||
/// </summary>
|
||||
@@ -44,11 +47,22 @@
|
||||
MediaBox = mediaBox;
|
||||
CropBox = cropBox;
|
||||
Content = content;
|
||||
Text = GetText(content);
|
||||
|
||||
Width = mediaBox.Bounds.Width;
|
||||
Height = mediaBox.Bounds.Height;
|
||||
|
||||
Size = mediaBox.Bounds.GetPageSize();
|
||||
}
|
||||
|
||||
private static string GetText(PageContent content)
|
||||
{
|
||||
if (content?.Letters == null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return string.Join(string.Empty, content.Letters.Select(x => x.Value));
|
||||
}
|
||||
}
|
||||
}
|
@@ -91,6 +91,7 @@
|
||||
bool found = pageNumber == soughtPageNumber;
|
||||
|
||||
locatedPages[pageNumber] = currentPageDictionary;
|
||||
pageNumbersObserved.Add(pageNumber);
|
||||
|
||||
return found;
|
||||
}
|
||||
@@ -117,6 +118,7 @@
|
||||
if (thisPageMatches)
|
||||
{
|
||||
childFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user