mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 03:34:52 +08:00
fix bug with computing text positions
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.Pdf.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Content;
|
||||
@@ -51,5 +52,66 @@
|
||||
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LetterPositionsAreCorrectPdfBox()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
var pdfBoxData = GetPdfBoxPositionData();
|
||||
|
||||
var index = 0;
|
||||
foreach (var pageLetter in page.Letters)
|
||||
{
|
||||
if (index >= pdfBoxData.Count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var myX = pageLetter.Location.X;
|
||||
var theirX = pdfBoxData[index].X;
|
||||
|
||||
var myLetter = pageLetter.Value;
|
||||
var theirLetter = pdfBoxData[index].Text;
|
||||
|
||||
if (myLetter == " " && theirLetter != " ")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Assert.Equal(theirLetter, myLetter);
|
||||
Assert.Equal(theirX, myX, 2);
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<AssertablePositionData> GetPdfBoxPositionData()
|
||||
{
|
||||
const string data = @"90 90.65997 14.42556 H 19 FFJICI+TimesNewRomanPSMT
|
||||
104.4395 90.65997 8.871117 e 19 FFJICI+TimesNewRomanPSMT
|
||||
113.3247 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||
118.8931 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||
124.4615 90.65997 9.989998 o 19 FFJICI+TimesNewRomanPSMT
|
||||
139.4505 90.65997 6.733261 ﺪ 19 FFJIAH+TimesNewRomanPSMT
|
||||
146.1778 90.65997 7.872116 ﻤ 19 FFJIAH+TimesNewRomanPSMT
|
||||
154.0439 90.65997 10.5894 ﺤ 19 FFJIAH+TimesNewRomanPSMT
|
||||
164.6273 90.65997 7.872116 ﻣ 19 FFJIAH+TimesNewRomanPSMT
|
||||
177.4964 90.65997 18.86111 W 19 FFJICI+TimesNewRomanPSMT
|
||||
196.3575 90.65997 9.990005 o 19 FFJICI+TimesNewRomanPSMT
|
||||
206.4275 90.65997 6.653336 r 19 FFJICI+TimesNewRomanPSMT
|
||||
213.0808 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||
218.6352 90.65997 9.990005 d 19 FFJICI+TimesNewRomanPSMT
|
||||
228.6252 90.65997 4.994995 . 19 FFJICI+TimesNewRomanPSMT";
|
||||
|
||||
var result = data.Split(new[] {"\r", "\n", "\r\n"}, StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(AssertablePositionData.Parse)
|
||||
.ToList();
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,15 +0,0 @@
|
||||
|
||||
128 64 32 16 8 4 2 1
|
||||
0 0 0 0 | 0 0 0 0
|
||||
03
|
||||
0 0 0 0 | 0 0 1 1
|
||||
20
|
||||
0 0 1 0 | 0 0 0 0
|
||||
|
||||
37
|
||||
0 0 1 1 | 0 1 1 1
|
||||
54
|
||||
0 1 0 1 | 0 1 0 0
|
||||
|
||||
41
|
||||
0 1 0 0 | 0 0 0 1
|
@@ -8,6 +8,7 @@
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Parser;
|
||||
using Util;
|
||||
|
||||
internal class PageFactory : IPageFactory
|
||||
{
|
||||
@@ -61,6 +62,8 @@
|
||||
|
||||
var contents = contentStream.Decode(filterProvider);
|
||||
|
||||
var texty = OtherEncodings.BytesAsLatin1String(contents);
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||
|
@@ -134,6 +134,35 @@
|
||||
return new TransformationMatrix(result);
|
||||
}
|
||||
|
||||
public decimal GetScalingFactorX()
|
||||
{
|
||||
var xScale = A;
|
||||
|
||||
/**
|
||||
* BM: if the trm is rotated, the calculation is a little more complicated
|
||||
*
|
||||
* The rotation matrix multiplied with the scaling matrix is:
|
||||
* ( x 0 0) ( cos sin 0) ( x*cos x*sin 0)
|
||||
* ( 0 y 0) * (-sin cos 0) = (-y*sin y*cos 0)
|
||||
* ( 0 0 1) ( 0 0 1) ( 0 0 1)
|
||||
*
|
||||
* So, if you want to deduce x from the matrix you take
|
||||
* M(0,0) = x*cos and M(0,1) = x*sin and use the theorem of Pythagoras
|
||||
*
|
||||
* sqrt(M(0,0)^2+M(0,1)^2) =
|
||||
* sqrt(x2*cos2+x2*sin2) =
|
||||
* sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one
|
||||
* sqrt(x2) =
|
||||
* abs(x)
|
||||
*/
|
||||
if (!(B == 0m && C == 0m))
|
||||
{
|
||||
xScale = (decimal)Math.Sqrt((double)(A*A + B*B));
|
||||
}
|
||||
|
||||
return xScale;
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
if (!(obj is TransformationMatrix m))
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using Core;
|
||||
using Cos;
|
||||
|
||||
/// <summary>
|
||||
@@ -32,6 +33,8 @@
|
||||
/// </summary>
|
||||
CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
|
||||
TransformationMatrix FontMatrix { get; }
|
||||
|
||||
CidFontType CidFontType { get; }
|
||||
|
||||
FontDescriptor Descriptor { get; }
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using Core;
|
||||
using Cos;
|
||||
|
||||
/// <inheritdoc/>
|
||||
@@ -13,9 +14,15 @@
|
||||
public CosName SubType { get; }
|
||||
public CosName BaseFont { get; }
|
||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
public TransformationMatrix FontMatrix { get; }
|
||||
public CidFontType CidFontType => CidFontType.Type0;
|
||||
public FontDescriptor Descriptor { get; }
|
||||
|
||||
public Type0CidFont()
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
public decimal GetWidthFromFont(int characterCode)
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
|
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Core;
|
||||
using Cos;
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -18,6 +19,7 @@
|
||||
public CosName SubType { get; }
|
||||
public CosName BaseFont { get; }
|
||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
public TransformationMatrix FontMatrix { get; }
|
||||
public CidFontType CidFontType => CidFontType.Type2;
|
||||
public FontDescriptor Descriptor { get; }
|
||||
|
||||
@@ -34,10 +36,15 @@
|
||||
this.fontProgram = fontProgram;
|
||||
this.verticalWritingMetrics = verticalWritingMetrics;
|
||||
this.widths = widths;
|
||||
|
||||
// TODO: This should maybe take units per em into account?
|
||||
var scale = 1 / 1000m;
|
||||
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
|
||||
}
|
||||
|
||||
public decimal GetWidthFromFont(int characterCode)
|
||||
{
|
||||
// TODO: Read the font width from the font program.
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
|
@@ -3,6 +3,7 @@
|
||||
using System;
|
||||
using CidFonts;
|
||||
using Cmap;
|
||||
using Core;
|
||||
using Cos;
|
||||
using Geometry;
|
||||
using IO;
|
||||
@@ -84,5 +85,10 @@
|
||||
|
||||
return fromFont;
|
||||
}
|
||||
|
||||
public TransformationMatrix GetFontMatrix()
|
||||
{
|
||||
return CidFont.FontMatrix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts
|
||||
{
|
||||
using Core;
|
||||
using Cos;
|
||||
using Geometry;
|
||||
using IO;
|
||||
@@ -17,5 +18,7 @@
|
||||
PdfVector GetDisplacement(int characterCode);
|
||||
|
||||
decimal GetWidth(int characterCode);
|
||||
|
||||
TransformationMatrix GetFontMatrix();
|
||||
}
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@
|
||||
using System;
|
||||
using Cmap;
|
||||
using Composite;
|
||||
using Core;
|
||||
using Cos;
|
||||
using Encodings;
|
||||
using Geometry;
|
||||
@@ -11,6 +12,8 @@
|
||||
|
||||
internal class TrueTypeSimpleFont : IFont
|
||||
{
|
||||
private static readonly TransformationMatrix FontMatrix =
|
||||
TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0);
|
||||
private readonly int firstCharacterCode;
|
||||
private readonly int lastCharacterCode;
|
||||
private readonly decimal[] widths;
|
||||
@@ -108,5 +111,11 @@
|
||||
|
||||
return widths[index];
|
||||
}
|
||||
|
||||
public TransformationMatrix GetFontMatrix()
|
||||
{
|
||||
// TODO: should this also use units per em?
|
||||
return FontMatrix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -89,11 +89,13 @@
|
||||
}
|
||||
|
||||
var fontSize = currentState.FontState.FontSize;
|
||||
var horizontalScaling = currentState.FontState.HorizontalScaling;
|
||||
var horizontalScaling = currentState.FontState.HorizontalScaling / 100m;
|
||||
var characterSpacing = currentState.FontState.CharacterSpacing;
|
||||
|
||||
var transformationMatrix = currentState.CurrentTransformationMatrix;
|
||||
|
||||
var fontMatrix = font.GetFontMatrix();
|
||||
|
||||
// TODO: this does not seem correct, produces the correct result for now but we need to revisit.
|
||||
// see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
|
||||
var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2);
|
||||
@@ -114,12 +116,12 @@
|
||||
|
||||
if (font.IsVertical)
|
||||
{
|
||||
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
|
||||
throw new NotImplementedException("Vertical fonts are# currently unsupported, please submit a pull request or issue with an example file.");
|
||||
}
|
||||
|
||||
var displacement = font.GetDisplacement(code);
|
||||
|
||||
var width = (displacement.X * fontSize) * transformationMatrix.A;
|
||||
|
||||
var width = displacement.X * fontSize * TextMatrices.TextMatrix.GetScalingFactorX() * transformationMatrix.A;
|
||||
|
||||
ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize);
|
||||
|
||||
@@ -148,7 +150,7 @@
|
||||
var textState = currentState.FontState;
|
||||
|
||||
var fontSize = textState.FontSize;
|
||||
var horizontalScaling = textState.HorizontalScaling;
|
||||
var horizontalScaling = textState.HorizontalScaling/100m;
|
||||
var font = resourceStore.GetFont(textState.FontName);
|
||||
|
||||
var isVertical = font.IsVertical;
|
||||
|
Reference in New Issue
Block a user