namespace UglyToad.PdfPig.DocumentLayoutAnalysis { using System; using System.Collections.Generic; using System.Linq; using UglyToad.PdfPig.Core; // for kd-tree with line segments, see https://stackoverflow.com/questions/14376679/how-to-represent-line-segments-in-kd-tree /// /// K-D tree data structure of . /// public class KdTree : KdTree { /// /// K-D tree data structure of . /// /// The points used to build the tree. public KdTree(IReadOnlyList points) : base(points, p => p) { } /// /// Get the nearest neighbour to the pivot point. /// Only returns 1 neighbour, even if equidistant points are found. /// /// The point for which to find the nearest neighbour. /// The distance measure used, e.g. the Euclidian distance. /// The nearest neighbour's index (returns -1 if not found). /// The distance between the pivot and the nearest neighbour (returns if not found). /// The nearest neighbour's point. public PdfPoint FindNearestNeighbour(PdfPoint pivot, Func distanceMeasure, out int index, out double distance) { return FindNearestNeighbour(pivot, p => p, distanceMeasure, out index, out distance); } /// /// Get the k nearest neighbours to the pivot point. /// Might return more than k neighbours if points are equidistant. /// Use if only looking for the (single) closest point. /// /// The point for which to find the nearest neighbour. /// The number of neighbours to return. Might return more than k neighbours if points are equidistant. /// The distance measure used, e.g. the Euclidian distance. /// Returns a list of tuples of the k nearest neighbours. Tuples are (element, index, distance). public IReadOnlyList<(PdfPoint, int, double)> FindNearestNeighbours(PdfPoint pivot, int k, Func distanceMeasure) { return FindNearestNeighbours(pivot, k, p => p, distanceMeasure); } } /// /// K-D tree data structure. /// /// public class KdTree { /// /// The root of the tree. /// public readonly KdTreeNode Root; /// /// Number of elements in the tree. /// public readonly int Count; /// /// K-D tree data structure. /// /// The elements used to build the tree. /// The function that converts the candidate elements into a . public KdTree(IReadOnlyList elements, Func elementsPointFunc) { if (elements == null || elements.Count == 0) { throw new ArgumentException("KdTree(): candidates cannot be null or empty.", nameof(elements)); } Count = elements.Count; Root = BuildTree(Enumerable.Range(0, elements.Count).Zip(elements, (e, p) => (e, elementsPointFunc(p), p)).ToArray(), 0); } private KdTreeNode BuildTree((int, PdfPoint, T)[] P, int depth) { if (P.Length == 0) { return null; } else if (P.Length == 1) { return new KdTreeLeaf(P[0], depth); } if (depth % 2 == 0) { Array.Sort(P, (p0, p1) => p0.Item2.X.CompareTo(p1.Item2.X)); } else { Array.Sort(P, (p0, p1) => p0.Item2.Y.CompareTo(p1.Item2.Y)); } if (P.Length == 2) { return new KdTreeNode(new KdTreeLeaf(P[0], depth + 1), null, P[1], depth); } int median = P.Length / 2; KdTreeNode vLeft = BuildTree(P.Take(median).ToArray(), depth + 1); KdTreeNode vRight = BuildTree(P.Skip(median + 1).ToArray(), depth + 1); return new KdTreeNode(vLeft, vRight, P[median], depth); } #region NN /// /// Get the nearest neighbour to the pivot element. /// Only returns 1 neighbour, even if equidistant points are found. /// /// The element for which to find the nearest neighbour. /// The function that converts the pivot element into a . /// The distance measure used, e.g. the Euclidian distance. /// The nearest neighbour's index (returns -1 if not found). /// The distance between the pivot and the nearest neighbour (returns if not found). /// The nearest neighbour's element. public T FindNearestNeighbour(T pivot, Func pivotPointFunc, Func distanceMeasure, out int index, out double distance) { var result = FindNearestNeighbour(Root, pivot, pivotPointFunc, distanceMeasure); index = result.Item1 != null ? result.Item1.Index : -1; distance = result.Item2 ?? double.NaN; return result.Item1 != null ? result.Item1.Element : default; } private static (KdTreeNode, double?) FindNearestNeighbour(KdTreeNode node, T pivot, Func pivotPointFunc, Func distance) { if (node == null) { return (null, null); } else if (node.IsLeaf) { if (node.Element.Equals(pivot)) { return (null, null); } return (node, distance(node.Value, pivotPointFunc(pivot))); } else { var point = pivotPointFunc(pivot); var currentNearestNode = node; var currentDistance = distance(node.Value, point); KdTreeNode newNode = null; double? newDist = null; var pointValue = node.IsAxisCutX ? point.X : point.Y; if (pointValue < node.L) { // start left (newNode, newDist) = FindNearestNeighbour(node.LeftChild, pivot, pivotPointFunc, distance); if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { currentDistance = newDist.Value; currentNearestNode = newNode; } if (node.RightChild != null && pointValue + currentDistance >= node.L) { (newNode, newDist) = FindNearestNeighbour(node.RightChild, pivot, pivotPointFunc, distance); } } else { // start right (newNode, newDist) = FindNearestNeighbour(node.RightChild, pivot, pivotPointFunc, distance); if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { currentDistance = newDist.Value; currentNearestNode = newNode; } if (node.LeftChild != null && pointValue - currentDistance <= node.L) { (newNode, newDist) = FindNearestNeighbour(node.LeftChild, pivot, pivotPointFunc, distance); } } if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { currentDistance = newDist.Value; currentNearestNode = newNode; } return (currentNearestNode, currentDistance); } } #endregion #region k-NN /// /// Get the k nearest neighbours to the pivot element. /// Might return more than k neighbours if points are equidistant. /// Use if only looking for the (single) closest point. /// /// The element for which to find the k nearest neighbours. /// The number of neighbours to return. Might return more than k neighbours if points are equidistant. /// The function that converts the pivot element into a . /// The distance measure used, e.g. the Euclidian distance. /// Returns a list of tuples of the k nearest neighbours. Tuples are (element, index, distance). public IReadOnlyList<(T, int, double)> FindNearestNeighbours(T pivot, int k, Func pivotPointFunc, Func distanceMeasure) { var kdTreeNodes = new KNearestNeighboursQueue(k); FindNearestNeighbours(Root, pivot, k, pivotPointFunc, distanceMeasure, kdTreeNodes); return kdTreeNodes.SelectMany(n => n.Value.Select(e => (e.Element, e.Index, n.Key))).ToList(); } private static (KdTreeNode, double) FindNearestNeighbours(KdTreeNode node, T pivot, int k, Func pivotPointFunc, Func distance, KNearestNeighboursQueue queue) { if (node == null) { return (null, double.NaN); } else if (node.IsLeaf) { if (node.Element.Equals(pivot)) { return (null, double.NaN); } var currentDistance = distance(node.Value, pivotPointFunc(pivot)); var currentNearestNode = node; if (!queue.IsFull || currentDistance <= queue.LastDistance) { queue.Add(currentDistance, currentNearestNode); currentDistance = queue.LastDistance; currentNearestNode = queue.LastElement; } return (currentNearestNode, currentDistance); } else { var point = pivotPointFunc(pivot); var currentNearestNode = node; var currentDistance = distance(node.Value, point); if ((!queue.IsFull || currentDistance <= queue.LastDistance) && !node.Element.Equals(pivot)) { queue.Add(currentDistance, currentNearestNode); currentDistance = queue.LastDistance; currentNearestNode = queue.LastElement; } KdTreeNode newNode = null; double newDist = double.NaN; var pointValue = node.IsAxisCutX ? point.X : point.Y; if (pointValue < node.L) { // start left (newNode, newDist) = FindNearestNeighbours(node.LeftChild, pivot, k, pivotPointFunc, distance, queue); if (!double.IsNaN(newDist) && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { queue.Add(newDist, newNode); currentDistance = queue.LastDistance; currentNearestNode = queue.LastElement; } if (node.RightChild != null && pointValue + currentDistance >= node.L) { (newNode, newDist) = FindNearestNeighbours(node.RightChild, pivot, k, pivotPointFunc, distance, queue); } } else { // start right (newNode, newDist) = FindNearestNeighbours(node.RightChild, pivot, k, pivotPointFunc, distance, queue); if (!double.IsNaN(newDist) && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { queue.Add(newDist, newNode); currentDistance = queue.LastDistance; currentNearestNode = queue.LastElement; } if (node.LeftChild != null && pointValue - currentDistance <= node.L) { (newNode, newDist) = FindNearestNeighbours(node.LeftChild, pivot, k, pivotPointFunc, distance, queue); } } if (!double.IsNaN(newDist) && newDist <= currentDistance && !newNode.Element.Equals(pivot)) { queue.Add(newDist, newNode); currentDistance = queue.LastDistance; currentNearestNode = queue.LastElement; } return (currentNearestNode, currentDistance); } } private class KNearestNeighboursQueue : SortedList>> { public readonly int K; public KdTreeNode LastElement { get; private set; } public double LastDistance { get; private set; } public bool IsFull => Count >= K; public KNearestNeighboursQueue(int k) : base(k) { K = k; LastDistance = double.PositiveInfinity; } public void Add(double key, KdTreeNode value) { if (key > LastDistance && IsFull) { return; } if (!ContainsKey(key)) { base.Add(key, new HashSet>()); if (Count > K) { RemoveAt(Count - 1); } } if (this[key].Add(value)) { var last = this.Last(); LastElement = last.Value.Last(); LastDistance = last.Key; } } } #endregion /// /// K-D tree leaf. /// /// public class KdTreeLeaf : KdTreeNode { /// /// Return true if leaf. /// public override bool IsLeaf => true; internal KdTreeLeaf((int, PdfPoint, Q) point, int depth) : base(null, null, point, depth) { } /// public override string ToString() { return "Leaf->" + Value.ToString(); } } /// /// K-D tree node. /// /// public class KdTreeNode { /// /// Split value (X or Y axis). /// public double L => IsAxisCutX ? Value.X : Value.Y; /// /// Split point. /// public PdfPoint Value { get; } /// /// Left child. /// public KdTreeNode LeftChild { get; internal set; } /// /// Right child. /// public KdTreeNode RightChild { get; internal set; } /// /// The node's element. /// public Q Element { get; } /// /// True if this cuts with X axis, false if cuts with Y axis. /// public bool IsAxisCutX { get; } /// /// The element's depth in the tree. /// public int Depth { get; } /// /// Return true if leaf. /// public virtual bool IsLeaf => false; /// /// The index of the element in the original array. /// public int Index { get; } internal KdTreeNode(KdTreeNode leftChild, KdTreeNode rightChild, (int, PdfPoint, Q) point, int depth) { LeftChild = leftChild; RightChild = rightChild; Value = point.Item2; Element = point.Item3; Depth = depth; IsAxisCutX = depth % 2 == 0; Index = point.Item1; } /// /// Get the leaves. /// public IEnumerable> GetLeaves() { var leaves = new List>(); RecursiveGetLeaves(LeftChild, ref leaves); RecursiveGetLeaves(RightChild, ref leaves); return leaves; } private void RecursiveGetLeaves(KdTreeNode leaf, ref List> leaves) { if (leaf == null) return; if (leaf is KdTreeLeaf lLeaf) { leaves.Add(lLeaf); } else { RecursiveGetLeaves(leaf.LeftChild, ref leaves); RecursiveGetLeaves(leaf.RightChild, ref leaves); } } /// public override string ToString() { return "Node->" + Value.ToString(); } } } }