namespace UglyToad.PdfPig.DocumentLayoutAnalysis { using Core; using System; using System.Collections.Generic; using System.Linq; /// /// Contains helpful tools for distance measures. /// public static class Distances { /// /// The Euclidean distance is the "ordinary" straight-line distance between two points. /// /// The first point. /// The second point. public static double Euclidean(PdfPoint point1, PdfPoint point2) { double dx = point1.X - point2.X; double dy = point1.Y - point2.Y; return Math.Sqrt(dx * dx + dy * dy); } /// /// The weighted Euclidean distance. /// /// The first point. /// The second point. /// The weight of the X coordinates. Default is 1. /// The weight of the Y coordinates. Default is 1. public static double WeightedEuclidean(PdfPoint point1, PdfPoint point2, double wX = 1.0, double wY = 1.0) { double dx = point1.X - point2.X; double dy = point1.Y - point2.Y; return Math.Sqrt(wX * dx * dx + wY * dy * dy); } /// /// The Manhattan distance between two points is the sum of the absolute differences of their Cartesian coordinates. /// Also known as rectilinear distance, L1 distance, L1 norm, snake distance, city block distance, taxicab metric. /// /// The first point. /// The second point. public static double Manhattan(PdfPoint point1, PdfPoint point2) { return Math.Abs(point1.X - point2.X) + Math.Abs(point1.Y - point2.Y); } /// /// The angle in degrees between the horizontal axis and the line between two points. /// -180 ≤ θ ≤ 180 /// /// The first point. /// The second point. public static double Angle(PdfPoint startPoint, PdfPoint endPoint) { return Math.Atan2(endPoint.Y - startPoint.Y, endPoint.X - startPoint.X) * 180 / Math.PI; } /// /// The absolute distance between the Y coordinates of two points. /// /// The first point. /// The second point. public static double Vertical(PdfPoint point1, PdfPoint point2) { return Math.Abs(point2.Y - point1.Y); } /// /// The absolute distance between the X coordinates of two points. /// /// The first point. /// The second point. public static double Horizontal(PdfPoint point1, PdfPoint point2) { return Math.Abs(point2.X - point1.X); } /// /// Bound angle so that -180 ≤ θ ≤ 180. /// /// The angle to bound. public static double BoundAngle180(double angle) { angle = (angle + 180) % 360; if (angle < 0) angle += 360; return angle - 180; } /// /// Bound angle so that 0 ≤ θ ≤ 360. /// /// The angle to bound. public static double BoundAngle0to360(double angle) { angle %= 360; if (angle < 0) angle += 360; return angle; } /// /// Get the minimum edit distance between two strings. /// /// The first string. /// The second string. public static int MinimumEditDistance(string string1, string string2) { ushort[,] d = new ushort[string1.Length + 1, string2.Length + 1]; for (int i = 1; i <= string1.Length; i++) { d[i, 0] = (ushort)i; } for (int j = 1; j <= string2.Length; j++) { d[0, j] = (ushort)j; } for (int j = 1; j <= string2.Length; j++) { for (int i = 1; i <= string1.Length; i++) { d[i, j] = Math.Min(Math.Min( (ushort)(d[i - 1, j] + 1), (ushort)(d[i, j - 1] + 1)), (ushort)(d[i - 1, j - 1] + (string1[i - 1] == string2[j - 1] ? 0 : 1))); // substitution, set cost to 1 } } return d[string1.Length, string2.Length]; } /// /// Get the minimum edit distance between two strings. /// Returned values are between 0 and 1 included. A value of 0 means that the two strings are indentical. /// /// The first string. /// The second string. public static double MinimumEditDistanceNormalised(string string1, string string2) { return MinimumEditDistance(string1, string2) / (double)Math.Max(string1.Length, string2.Length); } /// /// Find the index of the nearest point, excluding itself. /// /// /// The reference point, for which to find the nearest neighbour. /// The list of neighbours candidates. /// /// /// The distance measure to use. /// The distance between the reference element and its nearest neighbour. public static int FindIndexNearest(T element, IReadOnlyList candidates, Func pivotPoint, Func candidatePoint, Func distanceMeasure, out double distance) { if (candidates == null || candidates.Count == 0) { throw new ArgumentException("Distances.FindIndexNearest(): The list of neighbours candidates is either null or empty.", nameof(candidates)); } if (distanceMeasure == null) { throw new ArgumentException("Distances.FindIndexNearest(): The distance measure must not be null.", nameof(distanceMeasure)); } distance = double.MaxValue; int closestPointIndex = -1; var candidatesPoints = candidates.Select(candidatePoint).ToList(); var pivot = pivotPoint(element); for (var i = 0; i < candidates.Count; i++) { double currentDistance = distanceMeasure(pivot, candidatesPoints[i]); if (currentDistance < distance && !candidates[i].Equals(element)) { distance = currentDistance; closestPointIndex = i; } } return closestPointIndex; } /// /// Find the index of the nearest line, excluding itself. /// /// /// The reference line, for which to find the nearest neighbour. /// The list of neighbours candidates. /// /// /// The distance measure between two lines to use. /// The distance between the reference element and its nearest neighbour. public static int FindIndexNearest(T element, IReadOnlyList candidates, Func pivotLine, Func candidateLine, Func distanceMeasure, out double distance) { if (candidates == null || candidates.Count == 0) { throw new ArgumentException("Distances.FindIndexNearest(): The list of neighbours candidates is either null or empty.", nameof(candidates)); } if (distanceMeasure == null) { throw new ArgumentException("Distances.FindIndexNearest(): The distance measure must not be null.", nameof(distanceMeasure)); } distance = double.MaxValue; int closestLineIndex = -1; var candidatesLines = candidates.Select(candidateLine).ToList(); var pivot = pivotLine(element); for (var i = 0; i < candidates.Count; i++) { double currentDistance = distanceMeasure(pivot, candidatesLines[i]); if (currentDistance < distance && !candidates[i].Equals(element)) { distance = currentDistance; closestLineIndex = i; } } return closestLineIndex; } } }