mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-20 03:17:57 +08:00
add test for multiple page pdf from libre office
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,58 @@
|
|||||||
|
namespace UglyToad.Pdf.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using Content;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class TwoPageTextOnlyLibreOfficeTests
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||||
|
|
||||||
|
return Path.Combine(documentFolder, "Two Page Text Only - from libre office.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectNumberOfPages()
|
||||||
|
{
|
||||||
|
var file = GetFilename();
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||||
|
{
|
||||||
|
Assert.Equal(2, document.NumberOfPages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectPageSize()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.Equal(PageSize.A4, page.Size);
|
||||||
|
|
||||||
|
page = document.GetPage(2);
|
||||||
|
|
||||||
|
Assert.Equal(PageSize.A4, page.Size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void PagesStartWithCorrectText()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.StartsWith("Apache License", page.Text);
|
||||||
|
|
||||||
|
page = document.GetPage(2);
|
||||||
|
|
||||||
|
Assert.StartsWith("2. Grant of Copyright", page.Text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -16,6 +16,7 @@
|
|||||||
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
|
||||||
|
<None Remove="Integration\Documents\Two Page Text Only - from libre office.pdf" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
@@ -43,6 +44,9 @@
|
|||||||
<Content Include="Integration\Documents\Single Page Simple - from open office.pdf">
|
<Content Include="Integration\Documents\Single Page Simple - from open office.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
<Content Include="Integration\Documents\Two Page Text Only - from libre office.pdf">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</Content>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
public class Page
|
public class Page
|
||||||
{
|
{
|
||||||
@@ -18,6 +19,8 @@
|
|||||||
|
|
||||||
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
|
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
|
||||||
|
|
||||||
|
public string Text { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the width of the page in points.
|
/// Gets the width of the page in points.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -44,11 +47,22 @@
|
|||||||
MediaBox = mediaBox;
|
MediaBox = mediaBox;
|
||||||
CropBox = cropBox;
|
CropBox = cropBox;
|
||||||
Content = content;
|
Content = content;
|
||||||
|
Text = GetText(content);
|
||||||
|
|
||||||
Width = mediaBox.Bounds.Width;
|
Width = mediaBox.Bounds.Width;
|
||||||
Height = mediaBox.Bounds.Height;
|
Height = mediaBox.Bounds.Height;
|
||||||
|
|
||||||
Size = mediaBox.Bounds.GetPageSize();
|
Size = mediaBox.Bounds.GetPageSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string GetText(PageContent content)
|
||||||
|
{
|
||||||
|
if (content?.Letters == null)
|
||||||
|
{
|
||||||
|
return string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
return string.Join(string.Empty, content.Letters.Select(x => x.Value));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -91,6 +91,7 @@
|
|||||||
bool found = pageNumber == soughtPageNumber;
|
bool found = pageNumber == soughtPageNumber;
|
||||||
|
|
||||||
locatedPages[pageNumber] = currentPageDictionary;
|
locatedPages[pageNumber] = currentPageDictionary;
|
||||||
|
pageNumbersObserved.Add(pageNumber);
|
||||||
|
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
@@ -117,6 +118,7 @@
|
|||||||
if (thisPageMatches)
|
if (thisPageMatches)
|
||||||
{
|
{
|
||||||
childFound = true;
|
childFound = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user