From 564e32e072e70f46b4c690d63a4ae8cfb6daeccc Mon Sep 17 00:00:00 2001 From: modest-as Date: Fri, 30 Mar 2018 23:16:54 +0100 Subject: [PATCH] Return bounding boxes for letters --- ...SinglePageNonLatinAcrobatDistillerTests.cs | 2 +- .../SinglePageSimpleGoogleChromeTests.cs | 12 ++-- .../PublicApiScannerTests.cs | 1 + src/UglyToad.PdfPig/Content/Letter.cs | 16 ++--- .../Core/TransformationMatrix.cs | 31 ++++++++++ .../Fonts/Composite/Type0Font.cs | 13 ++-- src/UglyToad.PdfPig/Fonts/IFont.cs | 4 +- .../Fonts/Simple/TrueTypeSimpleFont.cs | 24 +++---- .../Fonts/Simple/Type1FontSimple.cs | 11 ++-- .../Fonts/Simple/Type1Standard14Font.cs | 16 ++--- src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs | 8 +-- src/UglyToad.PdfPig/Geometry/PdfPoint.cs | 5 ++ src/UglyToad.PdfPig/Geometry/PdfRectangle.cs | 62 ++++++++++++++++--- src/UglyToad.PdfPig/Geometry/PdfVector.cs | 22 ++++++- .../Graphics/ContentStreamProcessor.cs | 26 ++++---- 15 files changed, 175 insertions(+), 78 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs index 84aea6d3..d4f1bc67 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs @@ -70,7 +70,7 @@ break; } - var myX = pageLetter.Location.X; + var myX = pageLetter.Rectangle.BottomLeft.X; var theirX = pdfBoxData[index].X; var myLetter = pageLetter.Value; diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs index 32a1ca24..b90fd210 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs @@ -134,12 +134,12 @@ namespace UglyToad.PdfPig.Tests.Integration } Assert.Equal(datum.Text, letter.Value); - Assert.Equal(datum.X, letter.Location.X, 2); + Assert.Equal(datum.X, letter.Rectangle.BottomLeft.X, 2); - var transformed = page.Height - letter.Location.Y; + var transformed = page.Height - letter.Rectangle.BottomLeft.Y; Assert.Equal(datum.Y, transformed, 2); - Assert.Equal(datum.Width, letter.Width, 2); + Assert.Equal(datum.Width, letter.Rectangle.Width, 2); Assert.Equal(datum.FontName, letter.FontName); @@ -179,13 +179,13 @@ namespace UglyToad.PdfPig.Tests.Integration } Assert.Equal(datum.Text, letter.Value); - Assert.Equal(datum.X, letter.Location.X, 2); + Assert.Equal(datum.X, letter.Rectangle.BottomLeft.X, 2); - var transformed = page.Height - letter.Location.Y; + var transformed = page.Height - letter.Rectangle.BottomLeft.Y; Assert.Equal(datum.Y, transformed, 2); // Until we get width from glyphs we're a bit out. - Assert.True(Math.Abs(datum.Width - letter.Width) < 0.03m); + Assert.True(Math.Abs(datum.Width - letter.Rectangle.Width) < 0.03m); index++; } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 71c764ee..8130d41d 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -29,6 +29,7 @@ "UglyToad.PdfPig.ParsingOptions", "UglyToad.PdfPig.Logging.ILog", "UglyToad.PdfPig.Geometry.PdfPoint", + "UglyToad.PdfPig.Geometry.PdfRectangle", "UglyToad.PdfPig.Fonts.Exceptions.InvalidFontFormatException", "UglyToad.PdfPig.Exceptions.PdfDocumentFormatException", "UglyToad.PdfPig.Content.Letter", diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index c2b10f34..e83f940a 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -13,14 +13,9 @@ public string Value { get; } /// - /// The lower-left position of the letter. Letters with descenders will extend below this point. + /// Position of the bounding box. /// - public PdfPoint Location { get; } - - /// - /// The width of the letter. - /// - public decimal Width { get; } + public PdfRectangle Rectangle { get; } /// /// Size as defined in the PDF file. This is not equivalent to font size in points but is relative to other font sizes on the page. @@ -40,11 +35,10 @@ /// /// Create a new letter to represent some text drawn by the Tj operator. /// - internal Letter(string value, PdfPoint location, decimal width, decimal fontSize, string fontName, decimal pointSize) + internal Letter(string value, PdfRectangle rectangle, decimal fontSize, string fontName, decimal pointSize) { Value = value; - Location = location; - Width = width; + Rectangle = rectangle; FontSize = fontSize; FontName = fontName; PointSize = pointSize; @@ -55,7 +49,7 @@ /// public override string ToString() { - return $"{Location} {Width} {Value} {FontName} {PointSize}"; + return $"{Rectangle} {Value} {FontName} {PointSize}"; } } } diff --git a/src/UglyToad.PdfPig/Core/TransformationMatrix.cs b/src/UglyToad.PdfPig/Core/TransformationMatrix.cs index ebe4a1ae..8deb6f21 100644 --- a/src/UglyToad.PdfPig/Core/TransformationMatrix.cs +++ b/src/UglyToad.PdfPig/Core/TransformationMatrix.cs @@ -102,6 +102,17 @@ return new PdfVector(x, y); } + [Pure] + public PdfRectangle Transform(PdfRectangle original) + { + return new PdfRectangle( + Transform(original.BottomLeft.ToVector()), + Transform(original.BottomLeft.ToVector()), + Transform(original.BottomLeft.ToVector()), + Transform(original.BottomLeft.ToVector()) + ); + } + public static TransformationMatrix FromValues(decimal a, decimal b, decimal c, decimal d, decimal e, decimal f) => FromArray(new[] {a, b, c, d, e, f}); public static TransformationMatrix FromArray(decimal[] values) @@ -144,6 +155,26 @@ return new TransformationMatrix(result); } + public TransformationMatrix Multiply(decimal scalar) + { + var result = new decimal[9]; + + for (int i = 0; i < Rows; i++) + { + for (int j = 0; j < Columns; j++) + { + var index = (i * Rows) + j; + + for (int x = 0; x < Rows; x++) + { + result[index] += this[i, x] * scalar; + } + } + } + + return new TransformationMatrix(result); + } + public decimal GetScalingFactorX() { var xScale = A; diff --git a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs index 40bfbe13..42db63da 100644 --- a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs @@ -30,6 +30,8 @@ public bool IsVertical => CMap.WritingMode == WritingMode.Vertical; + private readonly TransformationMatrix fontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0); + public Type0Font(NameToken baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap) { BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont)); @@ -69,21 +71,18 @@ return ToUnicode.TryGet(characterCode, out value); } - public PdfVector GetDisplacement(int characterCode) + public PdfRectangle GetDisplacement(int characterCode) { - // This width is in units scaled up by 1000 - var width = GetWidth(characterCode); - - return new PdfVector(width / 1000, 0); + return fontMatrix.Transform(GetRectangle(characterCode)); } - public decimal GetWidth(int characterCode) + public PdfRectangle GetRectangle(int characterCode) { var cid = CMap.ConvertToCid(characterCode); var fromFont = CidFont.GetWidthFromDictionary(cid); - return fromFont; + return new PdfRectangle(0, 0, fromFont, 0); } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Fonts/IFont.cs b/src/UglyToad.PdfPig/Fonts/IFont.cs index 68b77db8..7a05095f 100644 --- a/src/UglyToad.PdfPig/Fonts/IFont.cs +++ b/src/UglyToad.PdfPig/Fonts/IFont.cs @@ -15,9 +15,7 @@ bool TryGetUnicode(int characterCode, out string value); - PdfVector GetDisplacement(int characterCode); - - decimal GetWidth(int characterCode); + PdfRectangle GetDisplacement(int characterCode); TransformationMatrix GetFontMatrix(); } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 59ebf1a7..f2eb6cd1 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -11,12 +11,14 @@ internal class TrueTypeSimpleFont : IFont { - private static readonly TransformationMatrix FontMatrix = - TransformationMatrix.FromValues(1/1000m, 0, 0, 1/1000m, 0, 0); private readonly int firstCharacterCode; + private readonly int lastCharacterCode; + private readonly decimal[] widths; + private readonly FontDescriptor descriptor; + [CanBeNull] private readonly Encoding encoding; @@ -27,6 +29,8 @@ [NotNull] public ToUnicodeCMap ToUnicode { get; set; } + private readonly TransformationMatrix fontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0); + public TrueTypeSimpleFont(NameToken name, int firstCharacterCode, int lastCharacterCode, decimal[] widths, FontDescriptor descriptor, [CanBeNull]CMap toUnicodeCMap, @@ -79,29 +83,27 @@ return true; } - public PdfVector GetDisplacement(int characterCode) + public PdfRectangle GetDisplacement(int characterCode) { - var tx = GetWidth(characterCode); - - return new PdfVector(tx / 1000m, 0); + return fontMatrix.Transform(GetRectangle(characterCode)); } - public decimal GetWidth(int characterCode) + public PdfRectangle GetRectangle(int characterCode) { var index = characterCode - firstCharacterCode; - + if (index < 0 || index >= widths.Length) { - return descriptor.MissingWidth; + return new PdfRectangle(0, 0, descriptor.MissingWidth, 0); } - return widths[index]; + return new PdfRectangle(0, 0, widths[index], 0); } public TransformationMatrix GetFontMatrix() { // TODO: should this also use units per em? - return FontMatrix; + return fontMatrix; } } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 0032b37e..e0e9e9ff 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -19,6 +19,7 @@ private readonly FontDescriptor fontDescriptor; private readonly Encoding encoding; private readonly ToUnicodeCMap toUnicodeCMap; + private readonly TransformationMatrix fontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0); public NameToken Name { get; } @@ -80,19 +81,19 @@ return true; } - public PdfVector GetDisplacement(int characterCode) + public PdfRectangle GetDisplacement(int characterCode) { - return fontMatrix.Transform(new PdfVector(GetWidth(characterCode), 0)); + return fontMatrix.Transform(GetRectangle(characterCode)); } - public decimal GetWidth(int characterCode) + public PdfRectangle GetRectangle(int characterCode) { if (characterCode < firstChar || characterCode > lastChar) { - return 250; + return new PdfRectangle(0, 0, 250, 0); } - return widths[characterCode - firstChar]; + return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0); } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs index 2ce52a09..fcc3c097 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs @@ -9,14 +9,14 @@ internal class Type1Standard14Font: IFont { - private static readonly TransformationMatrix FontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0); - private readonly FontMetrics standardFontMetrics; private readonly Encoding encoding; public NameToken Name { get; } public bool IsVertical { get; } + private readonly TransformationMatrix fontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0); + public Type1Standard14Font(FontMetrics standardFontMetrics) { this.standardFontMetrics = standardFontMetrics ?? throw new ArgumentNullException(nameof(standardFontMetrics)); @@ -44,26 +44,26 @@ return true; } - public PdfVector GetDisplacement(int characterCode) + public PdfRectangle GetDisplacement(int characterCode) { - return FontMatrix.Transform(new PdfVector(GetWidth(characterCode), 0)); + return fontMatrix.Transform(GetRectangle(characterCode)); } - public decimal GetWidth(int characterCode) + public PdfRectangle GetRectangle(int characterCode) { var name = encoding.GetName(characterCode); if (!standardFontMetrics.CharacterMetrics.TryGetValue(name, out var metrics)) { - return 250; + return new PdfRectangle(0, 0, 250, 0); } - return metrics.WidthX; + return new PdfRectangle(0, 0, metrics.WidthX, 0); } public TransformationMatrix GetFontMatrix() { - return FontMatrix; + return fontMatrix; } } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs index 1f6a34df..28a2b5e8 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs @@ -63,19 +63,19 @@ return true; } - public PdfVector GetDisplacement(int characterCode) + public PdfRectangle GetDisplacement(int characterCode) { - return fontMatrix.Transform(new PdfVector(GetWidth(characterCode), 0)); + return fontMatrix.Transform(GetRectangle(characterCode)); } - public decimal GetWidth(int characterCode) + public PdfRectangle GetRectangle(int characterCode) { if (characterCode < firstChar || characterCode > lastChar) { throw new InvalidFontFormatException($"The character code was not contained in the widths array: {characterCode}."); } - return widths[characterCode - firstChar]; + return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0); ; } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Geometry/PdfPoint.cs b/src/UglyToad.PdfPig/Geometry/PdfPoint.cs index 026beb38..1aaf2a1e 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfPoint.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfPoint.cs @@ -52,6 +52,11 @@ Y = (decimal)y; } + internal PdfVector ToVector() + { + return new PdfVector(X, Y); + } + /// /// Get a string representation of this point. /// diff --git a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs index 48ee98e3..0b9f6943 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs @@ -2,25 +2,68 @@ { using System; - internal class PdfRectangle + /// + /// A rectangle in a PDF file. + /// + /// + /// PDF coordinates are defined with the origin at the lower left (0, 0). + /// The Y-axis extends vertically upwards and the X-axis horizontally to the right. + /// Unless otherwise specified on a per-page basis, units in PDF space are equivalent to a typographic point (1/72 inch). + /// + public class PdfRectangle { + /// + /// Top left point of the rectangle. + /// public PdfPoint TopLeft { get; } - public PdfPoint BottomRight { get; } - + /// + /// Top right point of the rectangle. + /// public PdfPoint TopRight { get; } + /// + /// Bottom right point of the rectangle. + /// + public PdfPoint BottomRight { get; } + + /// + /// Bottom left point of the rectangle. + /// public PdfPoint BottomLeft { get; } + /// + /// Width of the rectangle. + /// public decimal Width { get; } + /// + /// Height of the rectangle. + /// public decimal Height { get; } + /// + /// Area of the rectangle. + /// public decimal Area { get; } - public PdfRectangle(PdfPoint point1, PdfPoint point2) : this(point1.X, point1.Y, point2.X, point2.Y) { } - public PdfRectangle(short x1, short y1, short x2, short y2) : this((decimal) x1, y1, x2, y2) { } - public PdfRectangle(decimal x1, decimal y1, decimal x2, decimal y2) + internal PdfRectangle(PdfVector topLeft, PdfVector topRight, PdfVector bottomLeft, PdfVector bottomRight) + { + TopLeft = topLeft.ToPoint(); + TopRight = topRight.ToPoint(); + + BottomLeft = bottomLeft.ToPoint(); + BottomRight = bottomRight.ToPoint(); + + Width = bottomRight.Subtract(bottomLeft).GetMagnitude(); + Height = topLeft.Subtract(bottomLeft).GetMagnitude(); + + Area = Width * Height; + } + + internal PdfRectangle(short x1, short y1, short x2, short y2) : this((decimal) x1, y1, x2, y2) { } + + internal PdfRectangle(decimal x1, decimal y1, decimal x2, decimal y2) { var bottom = Math.Min(y1, y2); var top = Math.Max(y1, y2); @@ -36,12 +79,17 @@ Width = right - left; Height = top - bottom; + Area = Width * Height; } + /// + /// To string override. + /// + /// public override string ToString() { - return $"[{TopLeft}, {BottomRight}]"; + return $"[{TopLeft}, {Width}, {Height}]"; } } } diff --git a/src/UglyToad.PdfPig/Geometry/PdfVector.cs b/src/UglyToad.PdfPig/Geometry/PdfVector.cs index e6f64a66..9c937a2c 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfVector.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfVector.cs @@ -1,4 +1,6 @@ -namespace UglyToad.PdfPig.Geometry +using System; + +namespace UglyToad.PdfPig.Geometry { internal struct PdfVector { @@ -17,6 +19,24 @@ return new PdfVector(X * scale, Y * scale); } + public decimal GetMagnitude() + { + var doubleX = (double)X; + var doubleY = (double)Y; + + return (decimal)Math.Sqrt(doubleX * doubleX + doubleY * doubleY); + } + + public PdfVector Subtract(PdfVector vector) + { + return new PdfVector(X - vector.X, Y - vector.Y); + } + + public PdfPoint ToPoint() + { + return new PdfPoint(X, Y); + } + public override string ToString() { return $"({X}, {Y})"; diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 3a945f95..e21a379b 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -96,8 +96,6 @@ var transformationMatrix = currentState.CurrentTransformationMatrix; - var fontMatrix = font.GetFontMatrix(); - // TODO: this does not seem correct, produces the correct result for now but we need to revisit. // see: https://stackoverflow.com/questions/48010235/pdf-specification-get-font-size-in-points var pointSize = decimal.Round(fontSize * transformationMatrix.A, 2); @@ -119,28 +117,31 @@ wordSpacing += GetCurrentState().FontState.WordSpacing; } - var renderingMatrix = TextMatrices.GetRenderingMatrix(GetCurrentState()); - if (font.IsVertical) { throw new NotImplementedException("Vertical fonts are# currently unsupported, please submit a pull request or issue with an example file."); } var displacement = font.GetDisplacement(code); - - var width = displacement.X * fontSize * TextMatrices.TextMatrix.GetScalingFactorX() * transformationMatrix.A; - ShowGlyph(renderingMatrix, font, unicode, width, fontSize, pointSize); + var fontScaling = TransformationMatrix.Identity.Multiply(fontSize); + + var transformedDisplacement = transformationMatrix + .Transform(TextMatrices.TextMatrix + .Transform(fontScaling + .Transform(displacement))); + + ShowGlyph(font, transformedDisplacement, unicode, fontSize, pointSize); decimal tx, ty; if (font.IsVertical) { tx = 0; - ty = displacement.Y * fontSize + characterSpacing + wordSpacing; + ty = displacement.Height * fontSize + characterSpacing + wordSpacing; } else { - tx = (displacement.X * fontSize + characterSpacing + wordSpacing) * horizontalScaling; + tx = (displacement.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling; ty = 0; } @@ -208,12 +209,9 @@ TextMatrices.TextMatrix = newMatrix; } - private void ShowGlyph(TransformationMatrix renderingMatrix, IFont font, string unicode, decimal width, decimal fontSize, - decimal pointSize) + private void ShowGlyph(IFont font, PdfRectangle rectangle, string unicode, decimal fontSize, decimal pointSize) { - var location = new PdfPoint(renderingMatrix.E, renderingMatrix.F); - - var letter = new Letter(unicode, location, width, fontSize, font.Name.Data, pointSize); + var letter = new Letter(unicode, rectangle, fontSize, font.Name.Data, pointSize); Letters.Add(letter); }