Merge pull request #3 from GowenGit/master

letter boundaries - todo: review changes for non latin characters, it seems like we need both the bounding box and the origin to be stored for each letter since the origin is on the baseline while the bounding box can extend below.
This commit is contained in:
Eliot Jones
2018-04-15 11:41:30 +01:00
committed by GitHub
27 changed files with 438 additions and 462 deletions

View File

@@ -70,7 +70,7 @@
break;
}
var myX = pageLetter.Location.X;
var myX = pageLetter.Rectangle.BottomLeft.X;
var theirX = pdfBoxData[index].X;
var myLetter = pageLetter.Value;
@@ -81,10 +81,13 @@
continue;
}
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, 2);
var comparer = new DecimalComparer(3m);
Assert.Equal(pdfBoxData[index].Width, pageLetter.Width, 1);
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, comparer);
Assert.Equal(pdfBoxData[index].Width, pageLetter.Rectangle.Width, comparer);
index++;
}
@@ -108,7 +111,7 @@
break;
}
var myX = pageLetter.Location.X;
var myX = pageLetter.Rectangle.Left;
var theirX = positions[index].X;
var myLetter = pageLetter.Value;
@@ -122,7 +125,7 @@
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, 2);
Assert.Equal(positions[index].Width, pageLetter.Width, 1);
Assert.Equal(positions[index].Width, pageLetter.Rectangle.Width, 1);
index++;
}

View File

@@ -134,12 +134,12 @@ namespace UglyToad.PdfPig.Tests.Integration
}
Assert.Equal(datum.Text, letter.Value);
Assert.Equal(datum.X, letter.Location.X, 2);
Assert.Equal(datum.X, letter.Rectangle.BottomLeft.X, 2);
var transformed = page.Height - letter.Location.Y;
var transformed = page.Height - letter.Rectangle.BottomLeft.Y;
Assert.Equal(datum.Y, transformed, 2);
Assert.Equal(datum.Width, letter.Width, 2);
Assert.Equal(datum.Width, letter.Rectangle.Width, 2);
Assert.Equal(datum.FontName, letter.FontName);
@@ -179,13 +179,13 @@ namespace UglyToad.PdfPig.Tests.Integration
}
Assert.Equal(datum.Text, letter.Value);
Assert.Equal(datum.X, letter.Location.X, 2);
Assert.Equal(datum.X, letter.Rectangle.BottomLeft.X, 2);
var transformed = page.Height - letter.Location.Y;
var transformed = page.Height - letter.Rectangle.BottomLeft.Y;
Assert.Equal(datum.Y, transformed, 2);
// Until we get width from glyphs we're a bit out.
Assert.True(Math.Abs(datum.Width - letter.Width) < 0.03m);
Assert.True(Math.Abs(datum.Width - letter.Rectangle.Width) < 0.03m);
index++;
}

View File

@@ -37,6 +37,41 @@
}
}
[Fact]
public void HasCorrectLetterBoundingBoxes()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
var comparer = new DecimalComparer(3m);
Assert.Equal("I", page.Letters[0].Value);
Assert.Equal(90.1m, page.Letters[0].Rectangle.BottomLeft.X, comparer);
Assert.Equal(709.2m, page.Letters[0].Rectangle.BottomLeft.Y, comparer);
Assert.Equal(94.0m, page.Letters[0].Rectangle.TopRight.X, comparer);
Assert.Equal(719.89m, page.Letters[0].Rectangle.TopRight.Y, comparer);
Assert.Equal("a", page.Letters[5].Value);
Assert.Equal(114.5m, page.Letters[5].Rectangle.BottomLeft.X, comparer);
Assert.Equal(709.2m, page.Letters[5].Rectangle.BottomLeft.Y, comparer);
Assert.Equal(119.82m, page.Letters[5].Rectangle.TopRight.X, comparer);
Assert.Equal(714.89m, page.Letters[5].Rectangle.TopRight.Y, comparer);
Assert.Equal("f", page.Letters[16].Value);
Assert.Equal(169.9m, page.Letters[16].Rectangle.BottomLeft.X, comparer);
Assert.Equal(709.2m, page.Letters[16].Rectangle.BottomLeft.Y, comparer);
Assert.Equal(176.89m, page.Letters[16].Rectangle.TopRight.X, comparer);
Assert.Equal(719.89m, page.Letters[16].Rectangle.TopRight.Y, comparer);
}
}
[Fact]
public void GetsCorrectPageTextIgnoringHiddenCharacters()
{

View File

@@ -0,0 +1,65 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.IO;
using System.Linq;
using Content;
using Xunit;
public class Type0FontTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "Type0 Font.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
var file = GetFilename();
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
{
Assert.Equal(1, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectPageSize()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.Equal(PageSize.Letter, page.Size);
}
}
[Fact]
public void GetsCorrectPageTextIgnoringHiddenCharacters()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
var text = string.Join(string.Empty, page.Letters.Select(x => x.Value));
Assert.True(text?.Contains("Powder River Examiner"));
}
}
[Fact]
public void HasLetterWidthsAndHeights()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.True((bool) page.Letters.Any(x => x.Rectangle.Width != 0));
Assert.True((bool) page.Letters.Any(x => x.Rectangle.Height != 0));
}
}
}
}