diff --git a/src/UglyToad.Pdf.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs b/src/UglyToad.Pdf.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs index 15651035..b0a8e257 100644 --- a/src/UglyToad.Pdf.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs +++ b/src/UglyToad.Pdf.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs @@ -1,6 +1,7 @@ namespace UglyToad.Pdf.Tests.Integration { using System; + using System.Collections.Generic; using System.IO; using System.Linq; using Content; @@ -51,5 +52,66 @@ Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text); } } + + [Fact] + public void LetterPositionsAreCorrectPdfBox() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + var pdfBoxData = GetPdfBoxPositionData(); + + var index = 0; + foreach (var pageLetter in page.Letters) + { + if (index >= pdfBoxData.Count) + { + break; + } + + var myX = pageLetter.Location.X; + var theirX = pdfBoxData[index].X; + + var myLetter = pageLetter.Value; + var theirLetter = pdfBoxData[index].Text; + + if (myLetter == " " && theirLetter != " ") + { + continue; + } + + Assert.Equal(theirLetter, myLetter); + Assert.Equal(theirX, myX, 2); + + index++; + } + } + } + + private static IReadOnlyList GetPdfBoxPositionData() + { + const string data = @"90 90.65997 14.42556 H 19 FFJICI+TimesNewRomanPSMT +104.4395 90.65997 8.871117 e 19 FFJICI+TimesNewRomanPSMT +113.3247 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT +118.8931 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT +124.4615 90.65997 9.989998 o 19 FFJICI+TimesNewRomanPSMT +139.4505 90.65997 6.733261 ﺪ 19 FFJIAH+TimesNewRomanPSMT +146.1778 90.65997 7.872116 ﻤ 19 FFJIAH+TimesNewRomanPSMT +154.0439 90.65997 10.5894 ﺤ 19 FFJIAH+TimesNewRomanPSMT +164.6273 90.65997 7.872116 ﻣ 19 FFJIAH+TimesNewRomanPSMT +177.4964 90.65997 18.86111 W 19 FFJICI+TimesNewRomanPSMT +196.3575 90.65997 9.990005 o 19 FFJICI+TimesNewRomanPSMT +206.4275 90.65997 6.653336 r 19 FFJICI+TimesNewRomanPSMT +213.0808 90.65997 5.554443 l 19 FFJICI+TimesNewRomanPSMT +218.6352 90.65997 9.990005 d 19 FFJICI+TimesNewRomanPSMT +228.6252 90.65997 4.994995 . 19 FFJICI+TimesNewRomanPSMT"; + + var result = data.Split(new[] {"\r", "\n", "\r\n"}, StringSplitOptions.RemoveEmptyEntries) + .Select(AssertablePositionData.Parse) + .ToList(); + + return result; + } } } diff --git a/src/UglyToad.Pdf.Tests/Integration/hex mappings tounicode.txt b/src/UglyToad.Pdf.Tests/Integration/hex mappings tounicode.txt deleted file mode 100644 index a1d90da2..00000000 --- a/src/UglyToad.Pdf.Tests/Integration/hex mappings tounicode.txt +++ /dev/null @@ -1,15 +0,0 @@ - -128 64 32 16 8 4 2 1 -0 0 0 0 | 0 0 0 0 -03 -0 0 0 0 | 0 0 1 1 -20 -0 0 1 0 | 0 0 0 0 - -37 -0 0 1 1 | 0 1 1 1 -54 -0 1 0 1 | 0 1 0 0 - -41 -0 1 0 0 | 0 0 0 1 \ No newline at end of file diff --git a/src/UglyToad.Pdf/Content/PageFactory.cs b/src/UglyToad.Pdf/Content/PageFactory.cs index 6873b196..6377df3f 100644 --- a/src/UglyToad.Pdf/Content/PageFactory.cs +++ b/src/UglyToad.Pdf/Content/PageFactory.cs @@ -8,6 +8,7 @@ using Graphics; using IO; using Parser; + using Util; internal class PageFactory : IPageFactory { @@ -61,6 +62,8 @@ var contents = contentStream.Decode(filterProvider); + var texty = OtherEncodings.BytesAsLatin1String(contents); + var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); diff --git a/src/UglyToad.Pdf/Core/TransformationMatrix.cs b/src/UglyToad.Pdf/Core/TransformationMatrix.cs index 8ff4d3ef..5b6aa317 100644 --- a/src/UglyToad.Pdf/Core/TransformationMatrix.cs +++ b/src/UglyToad.Pdf/Core/TransformationMatrix.cs @@ -134,6 +134,35 @@ return new TransformationMatrix(result); } + public decimal GetScalingFactorX() + { + var xScale = A; + + /** + * BM: if the trm is rotated, the calculation is a little more complicated + * + * The rotation matrix multiplied with the scaling matrix is: + * ( x 0 0) ( cos sin 0) ( x*cos x*sin 0) + * ( 0 y 0) * (-sin cos 0) = (-y*sin y*cos 0) + * ( 0 0 1) ( 0 0 1) ( 0 0 1) + * + * So, if you want to deduce x from the matrix you take + * M(0,0) = x*cos and M(0,1) = x*sin and use the theorem of Pythagoras + * + * sqrt(M(0,0)^2+M(0,1)^2) = + * sqrt(x2*cos2+x2*sin2) = + * sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one + * sqrt(x2) = + * abs(x) + */ + if (!(B == 0m && C == 0m)) + { + xScale = (decimal)Math.Sqrt((double)(A*A + B*B)); + } + + return xScale; + } + public override bool Equals(object obj) { if (!(obj is TransformationMatrix m)) diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs index 19939dc0..5798cd97 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs @@ -1,5 +1,6 @@ namespace UglyToad.Pdf.Fonts.CidFonts { + using Core; using Cos; /// @@ -32,6 +33,8 @@ /// CharacterIdentifierSystemInfo SystemInfo { get; } + TransformationMatrix FontMatrix { get; } + CidFontType CidFontType { get; } FontDescriptor Descriptor { get; } diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs index 87f1e784..ba312efc 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs @@ -1,5 +1,6 @@ namespace UglyToad.Pdf.Fonts.CidFonts { + using Core; using Cos; /// @@ -13,9 +14,15 @@ public CosName SubType { get; } public CosName BaseFont { get; } public CharacterIdentifierSystemInfo SystemInfo { get; } + public TransformationMatrix FontMatrix { get; } public CidFontType CidFontType => CidFontType.Type0; public FontDescriptor Descriptor { get; } + public Type0CidFont() + { + throw new System.NotImplementedException(); + } + public decimal GetWidthFromFont(int characterCode) { throw new System.NotImplementedException(); diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs index bd2c501f..4ee8a283 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs @@ -1,6 +1,7 @@ namespace UglyToad.Pdf.Fonts.CidFonts { using System.Collections.Generic; + using Core; using Cos; /// @@ -18,6 +19,7 @@ public CosName SubType { get; } public CosName BaseFont { get; } public CharacterIdentifierSystemInfo SystemInfo { get; } + public TransformationMatrix FontMatrix { get; } public CidFontType CidFontType => CidFontType.Type2; public FontDescriptor Descriptor { get; } @@ -34,10 +36,15 @@ this.fontProgram = fontProgram; this.verticalWritingMetrics = verticalWritingMetrics; this.widths = widths; + + // TODO: This should maybe take units per em into account? + var scale = 1 / 1000m; + FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0); } public decimal GetWidthFromFont(int characterCode) { + // TODO: Read the font width from the font program. throw new System.NotImplementedException(); } diff --git a/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs b/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs index bebefcaf..ac72da0e 100644 --- a/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs @@ -3,6 +3,7 @@ using System; using CidFonts; using Cmap; + using Core; using Cos; using Geometry; using IO; @@ -84,5 +85,10 @@ return fromFont; } + + public TransformationMatrix GetFontMatrix() + { + return CidFont.FontMatrix; + } } } diff --git a/src/UglyToad.Pdf/Fonts/IFont.cs b/src/UglyToad.Pdf/Fonts/IFont.cs index 0eea2891..3b5e2381 100644 --- a/src/UglyToad.Pdf/Fonts/IFont.cs +++ b/src/UglyToad.Pdf/Fonts/IFont.cs @@ -1,5 +1,6 @@ namespace UglyToad.Pdf.Fonts { + using Core; using Cos; using Geometry; using IO; @@ -17,5 +18,7 @@ PdfVector GetDisplacement(int characterCode); decimal GetWidth(int characterCode); + + TransformationMatrix GetFontMatrix(); } } diff --git a/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs index 5948b070..28dd1f63 100644 --- a/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs @@ -3,6 +3,7 @@ using System; using Cmap; using Composite; + using Core; using Cos; using Encodings; using Geometry; @@ -11,6 +12,8 @@ internal class TrueTypeSimpleFont : IFont { + private static readonly TransformationMatrix FontMatrix = + TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0); private readonly int firstCharacterCode; private readonly int lastCharacterCode; private readonly decimal[] widths; @@ -108,5 +111,11 @@ return widths[index]; } + + public TransformationMatrix GetFontMatrix() + { + // TODO: should this also use units per em? + return FontMatrix; + } } } diff --git a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs index 7f480319..35abbc6c 100644 --- a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs @@ -89,11 +89,13 @@ } var fontSize = currentState.FontState.FontSize; - var horizontalScaling = currentState.FontState.HorizontalScaling; + var horizontalScaling = currentState.FontState.HorizontalScaling / 100m; var characterSpacing = currentState.FontState.CharacterSpacing; var transformationMatrix = currentState.CurrentTransformationMatrix; + var fontMatrix = font.GetFontMatrix(); + // TODO: this does not seem correct, produces the correct result for now but we need to revisit. // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2); @@ -114,12 +116,12 @@ if (font.IsVertical) { - throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file."); + throw new NotImplementedException("Vertical fonts are# currently unsupported, please submit a pull request or issue with an example file."); } var displacement = font.GetDisplacement(code); - - var width = (displacement.X * fontSize) * transformationMatrix.A; + + var width = displacement.X * fontSize * TextMatrices.TextMatrix.GetScalingFactorX() * transformationMatrix.A; ShowGlyph(renderingMatrix, font, code, unicode, width, fontSize, pointSize); @@ -148,7 +150,7 @@ var textState = currentState.FontState; var fontSize = textState.FontSize; - var horizontalScaling = textState.HorizontalScaling; + var horizontalScaling = textState.HorizontalScaling/100m; var font = resourceStore.GetFont(textState.FontName); var isVertical = font.IsVertical;