mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
fix bug with computing text positions
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Tests.Integration
|
namespace UglyToad.Pdf.Tests.Integration
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using Content;
|
using Content;
|
||||||
@@ -51,5 +52,66 @@
|
|||||||
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
|
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void LetterPositionsAreCorrectPdfBox()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
var pdfBoxData = GetPdfBoxPositionData();
|
||||||
|
|
||||||
|
var index = 0;
|
||||||
|
foreach (var pageLetter in page.Letters)
|
||||||
|
{
|
||||||
|
if (index >= pdfBoxData.Count)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
var myX = pageLetter.Location.X;
|
||||||
|
var theirX = pdfBoxData[index].X;
|
||||||
|
|
||||||
|
var myLetter = pageLetter.Value;
|
||||||
|
var theirLetter = pdfBoxData[index].Text;
|
||||||
|
|
||||||
|
if (myLetter == " " && theirLetter != " ")
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.Equal(theirLetter, myLetter);
|
||||||
|
Assert.Equal(theirX, myX, 2);
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IReadOnlyList<AssertablePositionData> GetPdfBoxPositionData()
|
||||||
|
{
|
||||||
|
const string data = @"90 90.65997 14.42556 H 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
104.4395 90.65997 8.871117 e 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
113.3247 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
118.8931 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
124.4615 90.65997 9.989998 o 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
139.4505 90.65997 6.733261 ﺪ 19 FFJIAH+TimesNewRomanPSMT
|
||||||
|
146.1778 90.65997 7.872116 ﻤ 19 FFJIAH+TimesNewRomanPSMT
|
||||||
|
154.0439 90.65997 10.5894 ﺤ 19 FFJIAH+TimesNewRomanPSMT
|
||||||
|
164.6273 90.65997 7.872116 ﻣ 19 FFJIAH+TimesNewRomanPSMT
|
||||||
|
177.4964 90.65997 18.86111 W 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
196.3575 90.65997 9.990005 o 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
206.4275 90.65997 6.653336 r 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
213.0808 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
218.6352 90.65997 9.990005 d 19 FFJICI+TimesNewRomanPSMT
|
||||||
|
228.6252 90.65997 4.994995 . 19 FFJICI+TimesNewRomanPSMT";
|
||||||
|
|
||||||
|
var result = data.Split(new[] {"\r", "\n", "\r\n"}, StringSplitOptions.RemoveEmptyEntries)
|
||||||
|
.Select(AssertablePositionData.Parse)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +0,0 @@
|
|||||||
|
|
||||||
128 64 32 16 8 4 2 1
|
|
||||||
0 0 0 0 | 0 0 0 0
|
|
||||||
03
|
|
||||||
0 0 0 0 | 0 0 1 1
|
|
||||||
20
|
|
||||||
0 0 1 0 | 0 0 0 0
|
|
||||||
|
|
||||||
37
|
|
||||||
0 0 1 1 | 0 1 1 1
|
|
||||||
54
|
|
||||||
0 1 0 1 | 0 1 0 0
|
|
||||||
|
|
||||||
41
|
|
||||||
0 1 0 0 | 0 0 0 1
|
|
||||||
@@ -8,6 +8,7 @@
|
|||||||
using Graphics;
|
using Graphics;
|
||||||
using IO;
|
using IO;
|
||||||
using Parser;
|
using Parser;
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class PageFactory : IPageFactory
|
internal class PageFactory : IPageFactory
|
||||||
{
|
{
|
||||||
@@ -61,6 +62,8 @@
|
|||||||
|
|
||||||
var contents = contentStream.Decode(filterProvider);
|
var contents = contentStream.Decode(filterProvider);
|
||||||
|
|
||||||
|
var texty = OtherEncodings.BytesAsLatin1String(contents);
|
||||||
|
|
||||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||||
|
|
||||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||||
|
|||||||
@@ -134,6 +134,35 @@
|
|||||||
return new TransformationMatrix(result);
|
return new TransformationMatrix(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public decimal GetScalingFactorX()
|
||||||
|
{
|
||||||
|
var xScale = A;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BM: if the trm is rotated, the calculation is a little more complicated
|
||||||
|
*
|
||||||
|
* The rotation matrix multiplied with the scaling matrix is:
|
||||||
|
* ( x 0 0) ( cos sin 0) ( x*cos x*sin 0)
|
||||||
|
* ( 0 y 0) * (-sin cos 0) = (-y*sin y*cos 0)
|
||||||
|
* ( 0 0 1) ( 0 0 1) ( 0 0 1)
|
||||||
|
*
|
||||||
|
* So, if you want to deduce x from the matrix you take
|
||||||
|
* M(0,0) = x*cos and M(0,1) = x*sin and use the theorem of Pythagoras
|
||||||
|
*
|
||||||
|
* sqrt(M(0,0)^2+M(0,1)^2) =
|
||||||
|
* sqrt(x2*cos2+x2*sin2) =
|
||||||
|
* sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one
|
||||||
|
* sqrt(x2) =
|
||||||
|
* abs(x)
|
||||||
|
*/
|
||||||
|
if (!(B == 0m && C == 0m))
|
||||||
|
{
|
||||||
|
xScale = (decimal)Math.Sqrt((double)(A*A + B*B));
|
||||||
|
}
|
||||||
|
|
||||||
|
return xScale;
|
||||||
|
}
|
||||||
|
|
||||||
public override bool Equals(object obj)
|
public override bool Equals(object obj)
|
||||||
{
|
{
|
||||||
if (!(obj is TransformationMatrix m))
|
if (!(obj is TransformationMatrix m))
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||||
{
|
{
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -32,6 +33,8 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
CharacterIdentifierSystemInfo SystemInfo { get; }
|
CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
|
||||||
|
TransformationMatrix FontMatrix { get; }
|
||||||
|
|
||||||
CidFontType CidFontType { get; }
|
CidFontType CidFontType { get; }
|
||||||
|
|
||||||
FontDescriptor Descriptor { get; }
|
FontDescriptor Descriptor { get; }
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||||
{
|
{
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <inheritdoc/>
|
/// <inheritdoc/>
|
||||||
@@ -13,9 +14,15 @@
|
|||||||
public CosName SubType { get; }
|
public CosName SubType { get; }
|
||||||
public CosName BaseFont { get; }
|
public CosName BaseFont { get; }
|
||||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
public TransformationMatrix FontMatrix { get; }
|
||||||
public CidFontType CidFontType => CidFontType.Type0;
|
public CidFontType CidFontType => CidFontType.Type0;
|
||||||
public FontDescriptor Descriptor { get; }
|
public FontDescriptor Descriptor { get; }
|
||||||
|
|
||||||
|
public Type0CidFont()
|
||||||
|
{
|
||||||
|
throw new System.NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
public decimal GetWidthFromFont(int characterCode)
|
public decimal GetWidthFromFont(int characterCode)
|
||||||
{
|
{
|
||||||
throw new System.NotImplementedException();
|
throw new System.NotImplementedException();
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
@@ -18,6 +19,7 @@
|
|||||||
public CosName SubType { get; }
|
public CosName SubType { get; }
|
||||||
public CosName BaseFont { get; }
|
public CosName BaseFont { get; }
|
||||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
public TransformationMatrix FontMatrix { get; }
|
||||||
public CidFontType CidFontType => CidFontType.Type2;
|
public CidFontType CidFontType => CidFontType.Type2;
|
||||||
public FontDescriptor Descriptor { get; }
|
public FontDescriptor Descriptor { get; }
|
||||||
|
|
||||||
@@ -34,10 +36,15 @@
|
|||||||
this.fontProgram = fontProgram;
|
this.fontProgram = fontProgram;
|
||||||
this.verticalWritingMetrics = verticalWritingMetrics;
|
this.verticalWritingMetrics = verticalWritingMetrics;
|
||||||
this.widths = widths;
|
this.widths = widths;
|
||||||
|
|
||||||
|
// TODO: This should maybe take units per em into account?
|
||||||
|
var scale = 1 / 1000m;
|
||||||
|
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public decimal GetWidthFromFont(int characterCode)
|
public decimal GetWidthFromFont(int characterCode)
|
||||||
{
|
{
|
||||||
|
// TODO: Read the font width from the font program.
|
||||||
throw new System.NotImplementedException();
|
throw new System.NotImplementedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
using System;
|
using System;
|
||||||
using CidFonts;
|
using CidFonts;
|
||||||
using Cmap;
|
using Cmap;
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
using Geometry;
|
using Geometry;
|
||||||
using IO;
|
using IO;
|
||||||
@@ -84,5 +85,10 @@
|
|||||||
|
|
||||||
return fromFont;
|
return fromFont;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TransformationMatrix GetFontMatrix()
|
||||||
|
{
|
||||||
|
return CidFont.FontMatrix;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
namespace UglyToad.Pdf.Fonts
|
namespace UglyToad.Pdf.Fonts
|
||||||
{
|
{
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
using Geometry;
|
using Geometry;
|
||||||
using IO;
|
using IO;
|
||||||
@@ -17,5 +18,7 @@
|
|||||||
PdfVector GetDisplacement(int characterCode);
|
PdfVector GetDisplacement(int characterCode);
|
||||||
|
|
||||||
decimal GetWidth(int characterCode);
|
decimal GetWidth(int characterCode);
|
||||||
|
|
||||||
|
TransformationMatrix GetFontMatrix();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
using System;
|
using System;
|
||||||
using Cmap;
|
using Cmap;
|
||||||
using Composite;
|
using Composite;
|
||||||
|
using Core;
|
||||||
using Cos;
|
using Cos;
|
||||||
using Encodings;
|
using Encodings;
|
||||||
using Geometry;
|
using Geometry;
|
||||||
@@ -11,6 +12,8 @@
|
|||||||
|
|
||||||
internal class TrueTypeSimpleFont : IFont
|
internal class TrueTypeSimpleFont : IFont
|
||||||
{
|
{
|
||||||
|
private static readonly TransformationMatrix FontMatrix =
|
||||||
|
TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0);
|
||||||
private readonly int firstCharacterCode;
|
private readonly int firstCharacterCode;
|
||||||
private readonly int lastCharacterCode;
|
private readonly int lastCharacterCode;
|
||||||
private readonly decimal[] widths;
|
private readonly decimal[] widths;
|
||||||
@@ -108,5 +111,11 @@
|
|||||||
|
|
||||||
return widths[index];
|
return widths[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TransformationMatrix GetFontMatrix()
|
||||||
|
{
|
||||||
|
// TODO: should this also use units per em?
|
||||||
|
return FontMatrix;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,11 +89,13 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
var fontSize = currentState.FontState.FontSize;
|
var fontSize = currentState.FontState.FontSize;
|
||||||
var horizontalScaling = currentState.FontState.HorizontalScaling;
|
var horizontalScaling = currentState.FontState.HorizontalScaling / 100m;
|
||||||
var characterSpacing = currentState.FontState.CharacterSpacing;
|
var characterSpacing = currentState.FontState.CharacterSpacing;
|
||||||
|
|
||||||
var transformationMatrix = currentState.CurrentTransformationMatrix;
|
var transformationMatrix = currentState.CurrentTransformationMatrix;
|
||||||
|
|
||||||
|
var fontMatrix = font.GetFontMatrix();
|
||||||
|
|
||||||
// TODO: this does not seem correct, produces the correct result for now but we need to revisit.
|
// TODO: this does not seem correct, produces the correct result for now but we need to revisit.
|
||||||
// see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
|
// see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points
|
||||||
var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2);
|
var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2);
|
||||||
@@ -114,12 +116,12 @@
|
|||||||
|
|
||||||
if (font.IsVertical)
|
if (font.IsVertical)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
|
throw new NotImplementedException("Vertical fonts are# currently unsupported, please submit a pull request or issue with an example file.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var displacement = font.GetDisplacement(code);
|
var displacement = font.GetDisplacement(code);
|
||||||
|
|
||||||
var width = (displacement.X * fontSize) * transformationMatrix.A;
|
var width = displacement.X * fontSize * TextMatrices.TextMatrix.GetScalingFactorX() * transformationMatrix.A;
|
||||||
|
|
||||||
ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize);
|
ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize);
|
||||||
|
|
||||||
@@ -148,7 +150,7 @@
|
|||||||
var textState = currentState.FontState;
|
var textState = currentState.FontState;
|
||||||
|
|
||||||
var fontSize = textState.FontSize;
|
var fontSize = textState.FontSize;
|
||||||
var horizontalScaling = textState.HorizontalScaling;
|
var horizontalScaling = textState.HorizontalScaling/100m;
|
||||||
var font = resourceStore.GetFont(textState.FontName);
|
var font = resourceStore.GetFont(textState.FontName);
|
||||||
|
|
||||||
var isVertical = font.IsVertical;
|
var isVertical = font.IsVertical;
|
||||||
|
|||||||
Reference in New Issue
Block a user