From 731a1a3956d7db6226c97df829f2ad71116bf961 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 14 Apr 2018 19:18:19 +0100 Subject: [PATCH] add xfinium data validations for non latin characters --- .../Integration/AssertablePositionData.cs | 6 +- ...SinglePageNonLatinAcrobatDistillerTests.cs | 60 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs b/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs index 0a520e79..b38a3170 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs @@ -16,15 +16,19 @@ public string FontName { get; set; } + public decimal Height { get; set; } + public static AssertablePositionData Parse(string line) { var parts = line.Split('\t', StringSplitOptions.None); - if (parts.Length != 6) + if (parts.Length < 6) { throw new ArgumentException($"Expected 6 parts to the line, instead got {parts.Length}"); } + var height = parts.Length < 7 ? 0 : decimal.Parse(parts[6]); + return new AssertablePositionData { X = decimal.Parse(parts[0]), diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs index 84aea6d3..81e8c3a2 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs @@ -84,6 +84,46 @@ Assert.Equal(theirLetter, myLetter); Assert.Equal(theirX, myX, 2); + Assert.Equal(pdfBoxData[index].Width, pageLetter.Width, 1); + + index++; + } + } + } + + [Fact] + public void LetterPositionsAreCorrectXfinium() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + var positions = GetXfiniumPositionData(); + + var index = 0; + foreach (var pageLetter in page.Letters) + { + if (index >= positions.Count) + { + break; + } + + var myX = pageLetter.Location.X; + var theirX = positions[index].X; + + var myLetter = pageLetter.Value; + var theirLetter = positions[index].Text; + + if (myLetter == " " && theirLetter != " ") + { + continue; + } + + Assert.Equal(theirLetter, myLetter); + Assert.Equal(theirX, myX, 2); + + Assert.Equal(positions[index].Width, pageLetter.Width, 1); + index++; } } @@ -113,5 +153,25 @@ return result; } + + private static IReadOnlyList GetXfiniumPositionData() + { + const string data = @"90 90.66 14.439546 H 19 FFJICI+TimesNewRomanPSMT 17.802 +104.4395 90.66 8.885106 e 19 FFJICI+TimesNewRomanPSMT 17.80218 +113.3247 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218 +118.8931 90.66 5.568426 l 19 FFJICI+TimesNewRomanPSMT 17.80218 +124.4615 90.66 10.003986 o 19 FFJICI+TimesNewRomanPSMT 17.80218 +139.4505 90.66 6.727266 ﺪ 19 FFJIAH+TimesNewRomanPSMT 17.80218 +146.1778 90.66 7.866126 ﻤ 19 FFJIAH+TimesNewRomanPSMT 17.80218 +154.0439 90.66 10.583406 ﺤ 19 FFJIAH+TimesNewRomanPSMT 17.80218 +164.6273 90.66 7.866126 ﻣ 19 FFJIAH+TimesNewRomanPSMT 17.80218 +177.4964 90.66 18.86112 W 19 FFJICI+TimesNewRomanPSMT 17.80218"; + + var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries) + .Select(AssertablePositionData.Parse) + .ToList(); + + return result; + } } }