add xfinium data validations for non latin characters

This commit is contained in:
Eliot Jones
2018-04-14 19:18:19 +01:00
parent f1c01b5576
commit 731a1a3956
2 changed files with 65 additions and 1 deletions

View File

@@ -16,15 +16,19 @@
public string FontName { get; set; }
public decimal Height { get; set; }
public static AssertablePositionData Parse(string line)
{
var parts = line.Split('\t', StringSplitOptions.None);
if (parts.Length != 6)
if (parts.Length < 6)
{
throw new ArgumentException($"Expected 6 parts to the line, instead got {parts.Length}");
}
var height = parts.Length < 7 ? 0 : decimal.Parse(parts[6]);
return new AssertablePositionData
{
X = decimal.Parse(parts[0]),

View File

@@ -84,6 +84,46 @@
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, 2);
Assert.Equal(pdfBoxData[index].Width, pageLetter.Width, 1);
index++;
}
}
}
[Fact]
public void LetterPositionsAreCorrectXfinium()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
var positions = GetXfiniumPositionData();
var index = 0;
foreach (var pageLetter in page.Letters)
{
if (index >= positions.Count)
{
break;
}
var myX = pageLetter.Location.X;
var theirX = positions[index].X;
var myLetter = pageLetter.Value;
var theirLetter = positions[index].Text;
if (myLetter == " " && theirLetter != " ")
{
continue;
}
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, 2);
Assert.Equal(positions[index].Width, pageLetter.Width, 1);
index++;
}
}
@@ -113,5 +153,25 @@
return result;
}
private static IReadOnlyList<AssertablePositionData> GetXfiniumPositionData()
{
const string data = @"90 90.66 14.439546 H 19 FFJICI+TimesNewRomanPSMT 17.802
104.4395 90.66 8.885106 e 19 FFJICI+TimesNewRomanPSMT 17.80218
113.3247 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218
118.8931 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218
124.4615 90.66 10.003986 o 19 FFJICI+TimesNewRomanPSMT 17.80218
139.4505 90.66 6.727266 ﺪ 19 FFJIAH+TimesNewRomanPSMT 17.80218
146.1778 90.66 7.866126 ﻤ 19 FFJIAH+TimesNewRomanPSMT 17.80218
154.0439 90.66 10.583406 ﺤ 19 FFJIAH+TimesNewRomanPSMT 17.80218
164.6273 90.66 7.866126 ﻣ 19 FFJIAH+TimesNewRomanPSMT 17.80218
177.4964 90.66 18.86112 W 19 FFJICI+TimesNewRomanPSMT 17.80218";
var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
.Select(AssertablePositionData.Parse)
.ToList();
return result;
}
}
}