fix bug with computing text positions

This commit is contained in:
Eliot Jones
2018-01-02 22:23:08 +00:00
parent d03c04cca1
commit 5ab8d69ea5
11 changed files with 136 additions and 20 deletions

View File

@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Tests.Integration namespace UglyToad.Pdf.Tests.Integration
{ {
using System; using System;
using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using Content; using Content;
@@ -51,5 +52,66 @@
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text); Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
} }
} }
[Fact]
public void LetterPositionsAreCorrectPdfBox()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
var pdfBoxData = GetPdfBoxPositionData();
var index = 0;
foreach (var pageLetter in page.Letters)
{
if (index >= pdfBoxData.Count)
{
break;
}
var myX = pageLetter.Location.X;
var theirX = pdfBoxData[index].X;
var myLetter = pageLetter.Value;
var theirLetter = pdfBoxData[index].Text;
if (myLetter == " " && theirLetter != " ")
{
continue;
}
Assert.Equal(theirLetter, myLetter);
Assert.Equal(theirX, myX, 2);
index++;
}
}
}
private static IReadOnlyList<AssertablePositionData> GetPdfBoxPositionData()
{
const string data = @"90 90.65997 14.42556 H 19 FFJICI+TimesNewRomanPSMT
104.4395 90.65997 8.871117 e 19 FFJICI+TimesNewRomanPSMT
113.3247 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
118.8931 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
124.4615 90.65997 9.989998 o 19 FFJICI+TimesNewRomanPSMT
139.4505 90.65997 6.733261 ﺪ 19 FFJIAH+TimesNewRomanPSMT
146.1778 90.65997 7.872116 ﻤ 19 FFJIAH+TimesNewRomanPSMT
154.0439 90.65997 10.5894 ﺤ 19 FFJIAH+TimesNewRomanPSMT
164.6273 90.65997 7.872116 ﻣ 19 FFJIAH+TimesNewRomanPSMT
177.4964 90.65997 18.86111 W 19 FFJICI+TimesNewRomanPSMT
196.3575 90.65997 9.990005 o 19 FFJICI+TimesNewRomanPSMT
206.4275 90.65997 6.653336 r 19 FFJICI+TimesNewRomanPSMT
213.0808 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
218.6352 90.65997 9.990005 d 19 FFJICI+TimesNewRomanPSMT
228.6252 90.65997 4.994995 . 19 FFJICI+TimesNewRomanPSMT";
var result = data.Split(new[] {"\r", "\n", "\r\n"}, StringSplitOptions.RemoveEmptyEntries)
.Select(AssertablePositionData.Parse)
.ToList();
return result;
}
} }
} }

View File

@@ -1,15 +0,0 @@
128 64 32 16 8 4 2 1
0 0 0 0 | 0 0 0 0
03
0 0 0 0 | 0 0 1 1
20
0 0 1 0 | 0 0 0 0
37
0 0 1 1 | 0 1 1 1
54
0 1 0 1 | 0 1 0 0
41
0 1 0 0 | 0 0 0 1

View File

@@ -8,6 +8,7 @@
using Graphics; using Graphics;
using IO; using IO;
using Parser; using Parser;
using Util;
internal class PageFactory : IPageFactory internal class PageFactory : IPageFactory
{ {
@@ -61,6 +62,8 @@
var contents = contentStream.Decode(filterProvider); var contents = contentStream.Decode(filterProvider);
var texty = OtherEncodings.BytesAsLatin1String(contents);
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);

View File

@@ -134,6 +134,35 @@
return new TransformationMatrix(result); return new TransformationMatrix(result);
} }
public decimal GetScalingFactorX()
{
var xScale = A;
/**
* BM: if the trm is rotated, the calculation is a little more complicated
*
* The rotation matrix multiplied with the scaling matrix is:
* ( x 0 0) ( cos sin 0) ( x*cos x*sin 0)
* ( 0 y 0) * (-sin cos 0) = (-y*sin y*cos 0)
* ( 0 0 1) ( 0 0 1) ( 0 0 1)
*
* So, if you want to deduce x from the matrix you take
* M(0,0) = x*cos and M(0,1) = x*sin and use the theorem of Pythagoras
*
* sqrt(M(0,0)^2+M(0,1)^2) =
* sqrt(x2*cos2+x2*sin2) =
* sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one
* sqrt(x2) =
* abs(x)
*/
if (!(B == 0m && C == 0m))
{
xScale = (decimal)Math.Sqrt((double)(A*A + B*B));
}
return xScale;
}
public override bool Equals(object obj) public override bool Equals(object obj)
{ {
if (!(obj is TransformationMatrix m)) if (!(obj is TransformationMatrix m))

View File

@@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts.CidFonts namespace UglyToad.Pdf.Fonts.CidFonts
{ {
using Core;
using Cos; using Cos;
/// <summary> /// <summary>
@@ -32,6 +33,8 @@
/// </summary> /// </summary>
CharacterIdentifierSystemInfo SystemInfo { get; } CharacterIdentifierSystemInfo SystemInfo { get; }
TransformationMatrix FontMatrix { get; }
CidFontType CidFontType { get; } CidFontType CidFontType { get; }
FontDescriptor Descriptor { get; } FontDescriptor Descriptor { get; }

View File

@@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts.CidFonts namespace UglyToad.Pdf.Fonts.CidFonts
{ {
using Core;
using Cos; using Cos;
/// <inheritdoc/> /// <inheritdoc/>
@@ -13,9 +14,15 @@
public CosName SubType { get; } public CosName SubType { get; }
public CosName BaseFont { get; } public CosName BaseFont { get; }
public CharacterIdentifierSystemInfo SystemInfo { get; } public CharacterIdentifierSystemInfo SystemInfo { get; }
public TransformationMatrix FontMatrix { get; }
public CidFontType CidFontType => CidFontType.Type0; public CidFontType CidFontType => CidFontType.Type0;
public FontDescriptor Descriptor { get; } public FontDescriptor Descriptor { get; }
public Type0CidFont()
{
throw new System.NotImplementedException();
}
public decimal GetWidthFromFont(int characterCode) public decimal GetWidthFromFont(int characterCode)
{ {
throw new System.NotImplementedException(); throw new System.NotImplementedException();

View File

@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Fonts.CidFonts namespace UglyToad.Pdf.Fonts.CidFonts
{ {
using System.Collections.Generic; using System.Collections.Generic;
using Core;
using Cos; using Cos;
/// <inheritdoc /> /// <inheritdoc />
@@ -18,6 +19,7 @@
public CosName SubType { get; } public CosName SubType { get; }
public CosName BaseFont { get; } public CosName BaseFont { get; }
public CharacterIdentifierSystemInfo SystemInfo { get; } public CharacterIdentifierSystemInfo SystemInfo { get; }
public TransformationMatrix FontMatrix { get; }
public CidFontType CidFontType => CidFontType.Type2; public CidFontType CidFontType => CidFontType.Type2;
public FontDescriptor Descriptor { get; } public FontDescriptor Descriptor { get; }
@@ -34,10 +36,15 @@
this.fontProgram = fontProgram; this.fontProgram = fontProgram;
this.verticalWritingMetrics = verticalWritingMetrics; this.verticalWritingMetrics = verticalWritingMetrics;
this.widths = widths; this.widths = widths;
// TODO: This should maybe take units per em into account?
var scale = 1 / 1000m;
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
} }
public decimal GetWidthFromFont(int characterCode) public decimal GetWidthFromFont(int characterCode)
{ {
// TODO: Read the font width from the font program.
throw new System.NotImplementedException(); throw new System.NotImplementedException();
} }

View File

@@ -3,6 +3,7 @@
using System; using System;
using CidFonts; using CidFonts;
using Cmap; using Cmap;
using Core;
using Cos; using Cos;
using Geometry; using Geometry;
using IO; using IO;
@@ -84,5 +85,10 @@
return fromFont; return fromFont;
} }
public TransformationMatrix GetFontMatrix()
{
return CidFont.FontMatrix;
}
} }
} }

View File

@@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts namespace UglyToad.Pdf.Fonts
{ {
using Core;
using Cos; using Cos;
using Geometry; using Geometry;
using IO; using IO;
@@ -17,5 +18,7 @@
PdfVector GetDisplacement(int characterCode); PdfVector GetDisplacement(int characterCode);
decimal GetWidth(int characterCode); decimal GetWidth(int characterCode);
TransformationMatrix GetFontMatrix();
} }
} }

View File

@@ -3,6 +3,7 @@
using System; using System;
using Cmap; using Cmap;
using Composite; using Composite;
using Core;
using Cos; using Cos;
using Encodings; using Encodings;
using Geometry; using Geometry;
@@ -11,6 +12,8 @@
internal class TrueTypeSimpleFont : IFont internal class TrueTypeSimpleFont : IFont
{ {
private static readonly TransformationMatrix FontMatrix =
TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0);
private readonly int firstCharacterCode; private readonly int firstCharacterCode;
private readonly int lastCharacterCode; private readonly int lastCharacterCode;
private readonly decimal[] widths; private readonly decimal[] widths;
@@ -108,5 +111,11 @@
return widths[index]; return widths[index];
} }
public TransformationMatrix GetFontMatrix()
{
// TODO: should this also use units per em?
return FontMatrix;
}
} }
} }

View File

@@ -89,11 +89,13 @@
} }
var fontSize = currentState.FontState.FontSize; var fontSize = currentState.FontState.FontSize;
var horizontalScaling = currentState.FontState.HorizontalScaling; var horizontalScaling = currentState.FontState.HorizontalScaling / 100m;
var characterSpacing = currentState.FontState.CharacterSpacing; var characterSpacing = currentState.FontState.CharacterSpacing;
var transformationMatrix = currentState.CurrentTransformationMatrix; var transformationMatrix = currentState.CurrentTransformationMatrix;
var fontMatrix = font.GetFontMatrix();
// TODO: this does not seem correct, produces the correct result for now but we need to revisit. // TODO: this does not seem correct, produces the correct result for now but we need to revisit.
// see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2); var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2);
@@ -114,12 +116,12 @@
if (font.IsVertical) if (font.IsVertical)
{ {
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file."); throw new NotImplementedException("Vertical fonts are# currently unsupported, please submit a pull request or issue with an example file.");
} }
var displacement = font.GetDisplacement(code); var displacement = font.GetDisplacement(code);
var width = (displacement.X * fontSize) * transformationMatrix.A; var width = displacement.X * fontSize * TextMatrices.TextMatrix.GetScalingFactorX() * transformationMatrix.A;
ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize); ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize);
@@ -148,7 +150,7 @@
var textState = currentState.FontState; var textState = currentState.FontState;
var fontSize = textState.FontSize; var fontSize = textState.FontSize;
var horizontalScaling = textState.HorizontalScaling; var horizontalScaling = textState.HorizontalScaling/100m;
var font = resourceStore.GetFont(textState.FontName); var font = resourceStore.GetFont(textState.FontName);
var isVertical = font.IsVertical; var isVertical = font.IsVertical;