mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-21 04:17:57 +08:00
181 lines
6.0 KiB
C#
181 lines
6.0 KiB
C#
namespace UglyToad.PdfPig.Tests.Integration
|
|
{
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using Content;
|
|
using Xunit;
|
|
|
|
public class SinglePageNonLatinAcrobatDistillerTests
|
|
{
|
|
private static string GetFilename()
|
|
{
|
|
return IntegrationHelpers.GetDocumentPath("Single Page Non Latin - from acrobat distiller.pdf");
|
|
}
|
|
|
|
[Fact]
|
|
public void HasCorrectNumberOfPages()
|
|
{
|
|
var file = GetFilename();
|
|
|
|
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
|
{
|
|
Assert.Equal(1, document.NumberOfPages);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void HasCorrectPageSize()
|
|
{
|
|
using (var document = PdfDocument.Open(GetFilename()))
|
|
{
|
|
var page = document.GetPage(1);
|
|
|
|
Assert.Equal(PageSize.Letter, page.Size);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void GetsCorrectPageTextIgnoringHiddenCharacters()
|
|
{
|
|
using (var document = PdfDocument.Open(GetFilename()))
|
|
{
|
|
var page = document.GetPage(1);
|
|
|
|
var text = string.Join(string.Empty, page.Letters.Select(x => x.Value));
|
|
|
|
// For some reason the C# string reverses these characters but they are extracted correctly.
|
|
// TODO: Need someone who can read these to check them
|
|
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void LetterPositionsAreCorrectPdfBox()
|
|
{
|
|
using (var document = PdfDocument.Open(GetFilename()))
|
|
{
|
|
var page = document.GetPage(1);
|
|
|
|
var pdfBoxData = GetPdfBoxPositionData();
|
|
|
|
var index = 0;
|
|
foreach (var pageLetter in page.Letters)
|
|
{
|
|
if (index >= pdfBoxData.Count)
|
|
{
|
|
break;
|
|
}
|
|
|
|
var myX = pageLetter.Location.X;
|
|
var theirX = pdfBoxData[index].X;
|
|
|
|
var myLetter = pageLetter.Value;
|
|
var theirLetter = pdfBoxData[index].Text;
|
|
|
|
if (myLetter == " " && theirLetter != " ")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var comparer = new DecimalComparer(3m);
|
|
|
|
Assert.Equal(theirLetter, myLetter);
|
|
|
|
Assert.Equal(theirX, myX, comparer);
|
|
|
|
Assert.Equal(pdfBoxData[index].Width, pageLetter.Width, comparer);
|
|
|
|
index++;
|
|
}
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void LetterPositionsAreCorrectXfinium()
|
|
{
|
|
var comparer = new DecimalComparer(1);
|
|
|
|
using (var document = PdfDocument.Open(GetFilename()))
|
|
{
|
|
var page = document.GetPage(1);
|
|
|
|
var positions = GetXfiniumPositionData();
|
|
|
|
var index = 0;
|
|
foreach (var pageLetter in page.Letters)
|
|
{
|
|
if (index >= positions.Count)
|
|
{
|
|
break;
|
|
}
|
|
|
|
var myX = pageLetter.Location.X;
|
|
var theirX = positions[index].X;
|
|
|
|
var myLetter = pageLetter.Value;
|
|
var theirLetter = positions[index].Text;
|
|
|
|
if (myLetter == " " && theirLetter != " ")
|
|
{
|
|
continue;
|
|
}
|
|
|
|
Assert.Equal(theirLetter, myLetter);
|
|
Assert.Equal(theirX, myX, comparer);
|
|
|
|
Assert.Equal(positions[index].Width, pageLetter.Width, 1);
|
|
|
|
index++;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static IReadOnlyList<AssertablePositionData> GetPdfBoxPositionData()
|
|
{
|
|
const string data = @"90 90.65997 14.42556 H 19 FFJICI+TimesNewRomanPSMT
|
|
104.4395 90.65997 8.871117 e 19 FFJICI+TimesNewRomanPSMT
|
|
113.3247 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
|
118.8931 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
|
124.4615 90.65997 9.989998 o 19 FFJICI+TimesNewRomanPSMT
|
|
139.4505 90.65997 6.733261 ﺪ 19 FFJIAH+TimesNewRomanPSMT
|
|
146.1778 90.65997 7.872116 ﻤ 19 FFJIAH+TimesNewRomanPSMT
|
|
154.0439 90.65997 10.5894 ﺤ 19 FFJIAH+TimesNewRomanPSMT
|
|
164.6273 90.65997 7.872116 ﻣ 19 FFJIAH+TimesNewRomanPSMT
|
|
177.4964 90.65997 18.86111 W 19 FFJICI+TimesNewRomanPSMT
|
|
196.3575 90.65997 9.990005 o 19 FFJICI+TimesNewRomanPSMT
|
|
206.4275 90.65997 6.653336 r 19 FFJICI+TimesNewRomanPSMT
|
|
213.0808 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
|
218.6352 90.65997 9.990005 d 19 FFJICI+TimesNewRomanPSMT
|
|
228.6252 90.65997 4.994995 . 19 FFJICI+TimesNewRomanPSMT";
|
|
|
|
var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(AssertablePositionData.Parse)
|
|
.ToList();
|
|
|
|
return result;
|
|
}
|
|
|
|
private static IReadOnlyList<AssertablePositionData> GetXfiniumPositionData()
|
|
{
|
|
const string data = @"90 90.66 14.439546 H 19 FFJICI+TimesNewRomanPSMT 17.802
|
|
104.4395 90.66 8.885106 e 19 FFJICI+TimesNewRomanPSMT 17.80218
|
|
113.3247 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218
|
|
118.8931 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218
|
|
124.4615 90.66 10.003986 o 19 FFJICI+TimesNewRomanPSMT 17.80218
|
|
139.4505 90.66 6.727266 ﺪ 19 FFJIAH+TimesNewRomanPSMT 17.80218
|
|
146.1778 90.66 7.866126 ﻤ 19 FFJIAH+TimesNewRomanPSMT 17.80218
|
|
154.0439 90.66 10.583406 ﺤ 19 FFJIAH+TimesNewRomanPSMT 17.80218
|
|
164.6273 90.66 7.866126 ﻣ 19 FFJIAH+TimesNewRomanPSMT 17.80218
|
|
177.4964 90.66 18.86112 W 19 FFJICI+TimesNewRomanPSMT 17.80218";
|
|
|
|
var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(AssertablePositionData.Parse)
|
|
.ToList();
|
|
|
|
return result;
|
|
}
|
|
}
|
|
}
|