mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-07 07:34:36 +08:00
Fix #1148
This commit is contained in:
@@ -7,6 +7,28 @@
|
||||
|
||||
public class GithubIssuesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Issue1148()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("P2P-33713919.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }))
|
||||
{
|
||||
var page = document.GetPage(2);
|
||||
|
||||
var letters = page.Letters;
|
||||
|
||||
var words = NearestNeighbourWordExtractor.Instance.GetWords(letters).ToArray();
|
||||
|
||||
var firstTableLine = words[42];
|
||||
|
||||
Assert.EndsWith("C<--,:", firstTableLine.Text); // Just to make sure we are looking at the correct line. Text might change as this is not actually correct
|
||||
|
||||
Assert.Equal(firstTableLine.BoundingBox.BottomLeft, new PdfPoint(x: 31.890118, y: 693.035685));
|
||||
Assert.Equal(firstTableLine.BoundingBox.BottomRight, new PdfPoint(x: 563.3851179999991, y: 693.035685));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue1122()
|
||||
{
|
||||
|
Binary file not shown.
@@ -61,9 +61,12 @@
|
||||
this.defaultWidth = defaultWidth;
|
||||
this.cidToGid = cidToGid;
|
||||
|
||||
// TODO: This should maybe take units per em into account?
|
||||
var scale = 1 / (double)(fontProgram?.GetFontMatrixMultiplier() ?? 1000);
|
||||
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
|
||||
|
||||
// NB: For the font matrixPdfBox always return 1/1000 with the comment '1000 upem, this is not strictly true'
|
||||
// see https://github.com/apache/pdfbox/blob/a5379f5588ee4c98222ee61366ad3d82e0f2264e/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java#L191
|
||||
// Always using 1/1000 breaks the 'ReadWordsFromOldGutnishPage1' test
|
||||
}
|
||||
|
||||
public double GetWidthFromFont(int characterIdentifier)
|
||||
|
@@ -126,13 +126,23 @@
|
||||
// Get the bounding box in glyph space
|
||||
var boundingBox = CidFont.GetBoundingBox(characterIdentifier);
|
||||
|
||||
boundingBox = CidFont.GetFontMatrix(characterIdentifier).Transform(boundingBox);
|
||||
var fontMatrix = CidFont.GetFontMatrix(characterIdentifier);
|
||||
boundingBox = fontMatrix.Transform(boundingBox);
|
||||
|
||||
var width = CidFont.GetWidthFromFont(characterIdentifier);
|
||||
|
||||
var advanceWidth = GetFontMatrix().TransformX(width);
|
||||
// BobLD: Not sure why we don't need CidFont.GetFontMatrix(characterCode)
|
||||
// Might be related to https://github.com/veraPDF/veraPDF-library/issues/1010
|
||||
double scale = fontMatrix.A;
|
||||
if (Math.Abs(scale - 0.001) < 0.0001)
|
||||
{
|
||||
// BobLD: The value of scale is close enough to 0.001 to be able to use 0.001.
|
||||
// Still not sure what is the correct logic, but this hack fixes issue #1148 (while not breaking "Old Gutnish Internet Explorer.pdf")
|
||||
//
|
||||
// Based on https://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/
|
||||
// which quotes: section 6.2.11.5 of ISO 19005-2:2011 (PDF/A-2) clarifies this issue: “For ISO 19005, consistent is defined to be a difference of no more than 1/1000 unit.”
|
||||
scale = 0.001;
|
||||
}
|
||||
|
||||
var advanceWidth = scale * width;
|
||||
|
||||
var result = new CharacterBoundingBox(boundingBox, advanceWidth);
|
||||
|
||||
|
Reference in New Issue
Block a user