diff --git a/src/UglyToad.PdfPig.Core/PdfRectangle.cs b/src/UglyToad.PdfPig.Core/PdfRectangle.cs
index 169acf58..5d3f1cd6 100644
--- a/src/UglyToad.PdfPig.Core/PdfRectangle.cs
+++ b/src/UglyToad.PdfPig.Core/PdfRectangle.cs
@@ -1,7 +1,7 @@
-using System;
-
-namespace UglyToad.PdfPig.Core
+namespace UglyToad.PdfPig.Core
{
+ using System;
+
///
/// A rectangle in a PDF file.
///
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Distances.cs
index 2ee54379..7f06f7e5 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Distances.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Distances.cs
@@ -52,10 +52,9 @@
///
/// The first point.
/// The second point.
- ///
public static double Angle(PdfPoint point1, PdfPoint point2)
{
- return Math.Atan2(point2.Y - point1.Y, point2.X - point1.X) * 180.0 / Math.PI;
+ return Math.Atan2(point2.Y - point1.Y, point2.X - point1.X) * 57.29577951;
}
///
@@ -63,7 +62,6 @@
///
/// The first point.
/// The second point.
- ///
public static double Vertical(PdfPoint point1, PdfPoint point2)
{
return Math.Abs(point2.Y - point1.Y);
@@ -74,7 +72,6 @@
///
/// The first point.
/// The second point.
- ///
public static double Horizontal(PdfPoint point1, PdfPoint point2)
{
return Math.Abs(point2.X - point1.X);
@@ -85,7 +82,6 @@
///
/// The first string.
/// The second string.
- ///
public static int MinimumEditDistance(string string1, string string2)
{
ushort[,] d = new ushort[string1.Length + 1, string2.Length + 1];
@@ -134,7 +130,6 @@
///
/// The distance measure to use.
/// The distance between reference point, and its nearest neighbour.
- ///
internal static int FindIndexNearest(this T element, IReadOnlyList candidates,
Func candidatesPoint, Func pivotPoint,
Func distanceMeasure, out double distance)
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
index 96e5c88e..198ef724 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
@@ -71,10 +71,10 @@
private PdfRectangle NormaliseRectangle(PdfRectangle rectangle)
{
- return new PdfRectangle(Math.Min(rectangle.Left, rectangle.Right),
- Math.Min(rectangle.Bottom, rectangle.Top),
- Math.Max(rectangle.Left, rectangle.Right),
- Math.Max(rectangle.Bottom, rectangle.Top));
+ return new PdfRectangle(Math.Min(Math.Min(Math.Min(rectangle.TopLeft.X, rectangle.TopRight.X), rectangle.BottomLeft.X), rectangle.BottomRight.X),
+ Math.Min(Math.Min(Math.Min(rectangle.TopLeft.Y, rectangle.TopRight.Y), rectangle.BottomLeft.Y), rectangle.BottomRight.Y),
+ Math.Max(Math.Max(Math.Max(rectangle.TopLeft.X, rectangle.TopRight.X), rectangle.BottomLeft.X), rectangle.BottomRight.X),
+ Math.Max(Math.Max(Math.Max(rectangle.TopLeft.Y, rectangle.TopRight.Y), rectangle.BottomLeft.Y), rectangle.BottomRight.Y));
}
///
diff --git a/src/UglyToad.PdfPig/Content/Word.cs b/src/UglyToad.PdfPig/Content/Word.cs
index a8832b62..e6d26255 100644
--- a/src/UglyToad.PdfPig/Content/Word.cs
+++ b/src/UglyToad.PdfPig/Content/Word.cs
@@ -3,7 +3,9 @@
using Core;
using System;
using System.Collections.Generic;
+ using System.Linq;
using System.Text;
+ using UglyToad.PdfPig.Geometry;
///
/// A word.
@@ -266,99 +268,19 @@
{
var builder = new StringBuilder();
- var minX = double.MaxValue;
- var maxX = double.MinValue;
- var minY = double.MaxValue;
- var maxY = double.MinValue;
+ var points = letters.SelectMany(r => new[]
+ {
+ r.StartBaseLine,
+ r.EndBaseLine,
+ r.GlyphRectangle.TopLeft,
+ r.GlyphRectangle.TopRight
+ }).Distinct();
+ var convexHull = GeometryExtensions.GrahamScan(points).ToList();
+ var minimalBoundingRectangle = GeometryExtensions.ParametricPerpendicularProjection(convexHull);
for (var i = 0; i < letters.Count; i++)
{
- var letter = letters[i];
- builder.Append(letter.Value);
-
- // maxX
- if (letter.GlyphRectangle.BottomLeft.X > maxX)
- {
- maxX = letter.GlyphRectangle.BottomLeft.X;
- }
-
- if (letter.GlyphRectangle.BottomRight.X > maxX)
- {
- maxX = letter.GlyphRectangle.BottomRight.X;
- }
-
- if (letter.GlyphRectangle.TopLeft.X > maxX)
- {
- maxX = letter.GlyphRectangle.TopLeft.X;
- }
-
- if (letter.GlyphRectangle.TopRight.X > maxX)
- {
- maxX = letter.GlyphRectangle.TopRight.X;
- }
-
- // minX
- if (letter.GlyphRectangle.BottomLeft.X < minX)
- {
- minX = letter.GlyphRectangle.BottomLeft.X;
- }
-
- if (letter.GlyphRectangle.BottomRight.X < minX)
- {
- minX = letter.GlyphRectangle.BottomRight.X;
- }
-
- if (letter.GlyphRectangle.TopLeft.X < minX)
- {
- minX = letter.GlyphRectangle.TopLeft.X;
- }
-
- if (letter.GlyphRectangle.TopRight.X < minX)
- {
- minX = letter.GlyphRectangle.TopRight.X;
- }
-
- // maxY
- if (letter.GlyphRectangle.BottomLeft.Y > maxY)
- {
- maxY = letter.GlyphRectangle.BottomLeft.Y;
- }
-
- if (letter.GlyphRectangle.BottomRight.Y > maxY)
- {
- maxY = letter.GlyphRectangle.BottomRight.Y;
- }
-
- if (letter.GlyphRectangle.TopLeft.Y > maxY)
- {
- maxY = letter.GlyphRectangle.TopLeft.Y;
- }
-
- if (letter.GlyphRectangle.TopRight.Y > maxY)
- {
- maxY = letter.GlyphRectangle.TopRight.Y;
- }
-
- // minY
- if (letter.GlyphRectangle.BottomLeft.Y < minY)
- {
- minY = letter.GlyphRectangle.BottomLeft.Y;
- }
-
- if (letter.GlyphRectangle.BottomRight.Y < minY)
- {
- minY = letter.GlyphRectangle.BottomRight.Y;
- }
-
- if (letter.GlyphRectangle.TopLeft.Y < minY)
- {
- minY = letter.GlyphRectangle.TopLeft.Y;
- }
-
- if (letter.GlyphRectangle.TopRight.Y < minY)
- {
- minY = letter.GlyphRectangle.TopRight.Y;
- }
+ builder.Append(letters[i].Value);
}
var firstLetter = letters[0];
@@ -370,26 +292,30 @@
if (rotation >= -0.785398 && rotation < 0.785398)
{
// top border on top
- return new Tuple(builder.ToString(), new PdfRectangle(minX, minY, maxX, maxY));
+ //return new Tuple(builder.ToString(), new PdfRectangle(minX, minY, maxX, maxY));
+ return new Tuple(builder.ToString(), minimalBoundingRectangle);
}
else if (rotation >= 0.785398 && rotation < 2.356194)
{
// top border on the left
- return new Tuple(builder.ToString(), new PdfRectangle(
- new PdfPoint(minX, minY), new PdfPoint(minX, maxY),
- new PdfPoint(maxX, minY), new PdfPoint(maxX, maxY)));
+ //return new Tuple(builder.ToString(), new PdfRectangle(
+ // new PdfPoint(minX, minY), new PdfPoint(minX, maxY),
+ // new PdfPoint(maxX, minY), new PdfPoint(maxX, maxY)));
+ return new Tuple(builder.ToString(), minimalBoundingRectangle);
}
else if (rotation >= 2.356194 && rotation < 3.926991)
{
// top border on the bottom
- return new Tuple(builder.ToString(), new PdfRectangle(minX, maxY, maxX, minY));
+ //return new Tuple(builder.ToString(), new PdfRectangle(minX, maxY, maxX, minY));
+ return new Tuple(builder.ToString(), minimalBoundingRectangle);
}
else
{
// top border on the right
- return new Tuple(builder.ToString(), new PdfRectangle(
- new PdfPoint(maxX, maxY), new PdfPoint(maxX, minY),
- new PdfPoint(minX, maxY), new PdfPoint(minX, minY)));
+ //return new Tuple(builder.ToString(), new PdfRectangle(
+ // new PdfPoint(maxX, maxY), new PdfPoint(maxX, minY),
+ // new PdfPoint(minX, maxY), new PdfPoint(minX, minY)));
+ return new Tuple(builder.ToString(), minimalBoundingRectangle);
}
}
#endregion
diff --git a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
index f1b5966d..c5e9bb7d 100644
--- a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
+++ b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
@@ -1,16 +1,215 @@
namespace UglyToad.PdfPig.Geometry
{
+ using Core;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
- using Core;
///
/// Extension class to Geometry.
///
public static class GeometryExtensions
{
+ #region PdfPoint
+ ///
+ /// Get the dot product of both points.
+ ///
+ /// The first point.
+ /// The second point.
+ public static double DotProduct(this PdfPoint point1, PdfPoint point2)
+ {
+ return point1.X * point2.X + point1.Y * point2.Y;
+ }
+
+ ///
+ /// Get a point with the summed coordinates of both points.
+ ///
+ /// The first point.
+ /// The second point.
+ public static PdfPoint Add(this PdfPoint point1, PdfPoint point2)
+ {
+ return new PdfPoint(point1.X + point2.X, point1.Y + point2.Y);
+ }
+
+ ///
+ /// Get a point with the substracted coordinates of both points.
+ ///
+ /// The first point.
+ /// The second point.
+ public static PdfPoint Subtract(this PdfPoint point1, PdfPoint point2)
+ {
+ return new PdfPoint(point1.X - point2.X, point1.Y - point2.Y);
+ }
+
+ ///
+ /// Algorithm to find a minimal bounding rectangle (MBR) such that the MBR corresponds to a rectangle
+ /// with smallest possible area completely enclosing the polygon.
+ /// From A Fast Algorithm for Generating a Minimal Bounding Rectangle by Lennert D. Den Boer.
+ ///
+ internal static PdfRectangle ParametricPerpendicularProjection(IReadOnlyList polygon)
+ {
+ // The vertices of P are assumed to be in strict cyclic sequential order,
+ // either clockwise or counter-clockwise relative to the origin P0. Polygon P is assumed to be
+ // both simple and convex, and to contain no duplicate (coincident) vertices.
+ polygon = polygon.Distinct().OrderBy(p => p.X).ThenBy(p => p.Y).ToList();
+ var P0 = polygon[0];
+ polygon = polygon.OrderBy(p => p, new PdfPointComparer(P0)).ToList();
+
+ PdfPoint[] MBR = new PdfPoint[0];
+
+ double Amin = double.MaxValue;
+ double tmin = 1;
+ double tmax = 0;
+ double smax = 0;
+ int j = 1;
+ int k = 0;
+ int l = -1;
+
+ PdfPoint Q = new PdfPoint();
+ PdfPoint R0 = new PdfPoint();
+ PdfPoint R1 = new PdfPoint();
+
+ int nv = polygon.Count;
+ PdfPoint u = new PdfPoint();
+
+ while (true)
+ {
+ var Pk = polygon[k];
+
+ PdfPoint v = polygon[j].Subtract(Pk);
+ double r = 1.0 / v.DotProduct(v);
+
+ for (j = 0; j < nv; j++)
+ {
+ if (j == k) continue;
+ PdfPoint Pj = polygon[j];
+ u = Pj.Subtract(Pk);
+ double t = u.DotProduct(v) * r;
+ PdfPoint Pt = new PdfPoint(t * v.X + Pk.X, t * v.Y + Pk.Y);
+ u = Pt.Subtract(Pj);
+ double s = u.DotProduct(u);
+
+ if (t < tmin)
+ {
+ tmin = t;
+ R0 = Pt;
+ }
+
+ if (t > tmax)
+ {
+ tmax = t;
+ R1 = Pt;
+ }
+
+ if (s > smax)
+ {
+ smax = s;
+ Q = Pt;
+ l = j;
+ }
+ }
+
+ PdfPoint PlMinusQ = polygon[l].Subtract(Q);
+ PdfPoint R2 = R1.Add(PlMinusQ);
+ PdfPoint R3 = R0.Add(PlMinusQ);
+ u = R1.Subtract(R0);
+ double A = u.DotProduct(u) * smax;
+
+ if (A < Amin)
+ {
+ Amin = A;
+ MBR = new[] { R0, R1, R2, R3 };
+ }
+
+ k++;
+ j = k;
+
+ if (j == nv) j = 0;
+
+ if (k == nv) break;
+ }
+
+ return new PdfRectangle(MBR[2], MBR[3], MBR[1], MBR[0]);
+ }
+
+ private class PdfPointComparer : IComparer
+ {
+ PdfPoint P0;
+
+ public PdfPointComparer(PdfPoint referencePoint)
+ {
+ P0 = referencePoint;
+ }
+
+ public int Compare(PdfPoint a, PdfPoint b)
+ {
+ var det = Math.Round((a.X - P0.X) * (b.Y - P0.Y) - (b.X - P0.X) * (a.Y - P0.Y), 6);
+ if (det == 0) return 0;
+ return Math.Sign(det);
+ }
+ }
+
+ ///
+ /// Algorithm to find the convex hull of the set of points with time complexity O(n log n).
+ ///
+ internal static IEnumerable GrahamScan(IEnumerable points)
+ {
+ if (points.Count() < 3) return points;
+
+ Func ccw = (PdfPoint p1, PdfPoint p2, PdfPoint p3) =>
+ {
+ return Math.Round((p2.X - p1.X) * (p3.Y - p1.Y) - (p2.Y - p1.Y) * (p3.X - p1.X), 6);
+ };
+
+ Func polarAngle = (PdfPoint point1, PdfPoint point2) =>
+ {
+ return Math.Atan2(point2.Y - point1.Y, point2.X - point1.X) % Math.PI;
+ };
+
+ Stack stack = new Stack();
+ var sortedPoints = points.OrderBy(p => p.Y).ThenBy(p => p.X).ToList();
+ var P0 = sortedPoints[0];
+ var groups = sortedPoints.Skip(1).GroupBy(p => polarAngle(P0, p)).OrderBy(g => g.Key);
+
+ sortedPoints = new List();
+ foreach (var group in groups)
+ {
+ if (group.Count() == 1)
+ {
+ sortedPoints.Add(group.First());
+ }
+ else
+ {
+ // if more than one point has the same angle,
+ // remove all but the one that is farthest from P0
+ sortedPoints.Add(group.OrderByDescending(p =>
+ {
+ double dx = p.X - P0.X;
+ double dy = p.Y - P0.Y;
+ return dx * dx + dy * dy;
+ }).First());
+ }
+ }
+
+ stack.Push(P0);
+ stack.Push(sortedPoints[0]);
+ stack.Push(sortedPoints[1]);
+
+ for (int i = 2; i < sortedPoints.Count; i++)
+ {
+ var point = sortedPoints[i];
+ while (ccw(stack.ElementAt(1), stack.Peek(), point) < 0)
+ {
+ stack.Pop();
+ }
+ stack.Push(point);
+ }
+
+ return stack;
+ }
+ #endregion
+
#region PdfRectangle
///
/// Whether the rectangle contains the point.