2020-02-12 16:02:47 +00:00
|
|
|
|
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
|
|
|
|
|
{
|
|
|
|
|
|
using System;
|
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
|
using System.Linq;
|
|
|
|
|
|
using UglyToad.PdfPig.Core;
|
|
|
|
|
|
|
|
|
|
|
|
// for kd-tree with line segments, see https://stackoverflow.com/questions/14376679/how-to-represent-line-segments-in-kd-tree
|
|
|
|
|
|
|
|
|
|
|
|
internal class KdTree : KdTree<PdfPoint>
|
|
|
|
|
|
{
|
|
|
|
|
|
public KdTree(PdfPoint[] candidates) : base(candidates, p => p)
|
|
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
|
|
public PdfPoint FindNearestNeighbours(PdfPoint pivot, Func<PdfPoint, PdfPoint, double> distanceMeasure, out int index, out double distance)
|
|
|
|
|
|
{
|
|
|
|
|
|
return FindNearestNeighbours(pivot, p => p, distanceMeasure, out index, out distance);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
internal class KdTree<T>
|
|
|
|
|
|
{
|
|
|
|
|
|
private KdTreeNode<T> Root;
|
|
|
|
|
|
|
|
|
|
|
|
public KdTree(IReadOnlyList<T> candidates, Func<T, PdfPoint> candidatesPointFunc)
|
|
|
|
|
|
{
|
2020-03-02 23:27:35 +00:00
|
|
|
|
if (candidates == null || candidates.Count == 0)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
2020-03-02 23:27:35 +00:00
|
|
|
|
throw new ArgumentException("KdTree(): candidates cannot be null or empty.", nameof(candidates));
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
2020-03-02 23:27:35 +00:00
|
|
|
|
|
2020-03-04 10:53:25 +00:00
|
|
|
|
Root = BuildTree(Enumerable.Range(0, candidates.Count).Zip(candidates, (e, p) => (e, candidatesPointFunc(p), p)).ToArray(), 0);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-03-04 10:53:25 +00:00
|
|
|
|
private KdTreeNode<T> BuildTree((int, PdfPoint, T)[] P, int depth)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
2020-03-04 10:53:25 +00:00
|
|
|
|
if (P.Length == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (P.Length == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
return new KdTreeLeaf<T>(P[0], depth);
|
|
|
|
|
|
}
|
2020-03-02 23:27:35 +00:00
|
|
|
|
|
|
|
|
|
|
if (depth % 2 == 0)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
2020-03-02 23:27:35 +00:00
|
|
|
|
Array.Sort(P, (p0, p1) => p0.Item2.X.CompareTo(p1.Item2.X));
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2020-03-02 23:27:35 +00:00
|
|
|
|
Array.Sort(P, (p0, p1) => p0.Item2.Y.CompareTo(p1.Item2.Y));
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-03-04 10:53:25 +00:00
|
|
|
|
if (P.Length == 2)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
2020-03-06 13:27:32 +00:00
|
|
|
|
return new KdTreeNode<T>(new KdTreeLeaf<T>(P[0], depth + 1), null, P[1], depth);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-03-04 10:53:25 +00:00
|
|
|
|
int median = P.Length / 2;
|
|
|
|
|
|
|
|
|
|
|
|
KdTreeNode<T> vLeft = BuildTree(P.Take(median).ToArray(), depth + 1);
|
|
|
|
|
|
KdTreeNode<T> vRight = BuildTree(P.Skip(median + 1).ToArray(), depth + 1);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
|
2020-03-02 23:27:35 +00:00
|
|
|
|
return new KdTreeNode<T>(vLeft, vRight, P[median], depth);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#region NN
|
|
|
|
|
|
public T FindNearestNeighbours(T pivot, Func<T, PdfPoint> pivotPointFunc, Func<PdfPoint, PdfPoint, double> distanceMeasure, out int index, out double distance)
|
|
|
|
|
|
{
|
|
|
|
|
|
var result = FindNearestNeighbours(Root, pivot, pivotPointFunc, distanceMeasure);
|
|
|
|
|
|
index = result.Item1.Index;
|
|
|
|
|
|
distance = result.Item2.Value;
|
|
|
|
|
|
return result.Item1.Element;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static (KdTreeNode<T>, double?) FindNearestNeighbours(KdTreeNode<T> node, T pivot, Func<T, PdfPoint> pivotPointFunc, Func<PdfPoint, PdfPoint, double> distance)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (node == null)
|
|
|
|
|
|
{
|
|
|
|
|
|
return (null, null);
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (node.IsLeaf)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (node.Element.Equals(pivot))
|
|
|
|
|
|
{
|
|
|
|
|
|
return (null, null);
|
|
|
|
|
|
}
|
|
|
|
|
|
return (node, distance(node.Value, pivotPointFunc(pivot)));
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
var point = pivotPointFunc(pivot);
|
|
|
|
|
|
var currentNearestNode = node;
|
|
|
|
|
|
var currentDistance = distance(node.Value, point);
|
|
|
|
|
|
|
|
|
|
|
|
KdTreeNode<T> newNode = null;
|
|
|
|
|
|
double? newDist = null;
|
|
|
|
|
|
|
2020-03-06 13:27:32 +00:00
|
|
|
|
var pointValue = node.IsAxisCutX ? point.X : point.Y;
|
2020-02-12 16:02:47 +00:00
|
|
|
|
|
|
|
|
|
|
if (pointValue < node.L)
|
|
|
|
|
|
{
|
|
|
|
|
|
// start left
|
|
|
|
|
|
(newNode, newDist) = FindNearestNeighbours(node.LeftChild, pivot, pivotPointFunc, distance);
|
|
|
|
|
|
|
|
|
|
|
|
if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot))
|
|
|
|
|
|
{
|
|
|
|
|
|
currentDistance = newDist.Value;
|
|
|
|
|
|
currentNearestNode = newNode;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (node.RightChild != null && pointValue + currentDistance >= node.L)
|
|
|
|
|
|
{
|
|
|
|
|
|
(newNode, newDist) = FindNearestNeighbours(node.RightChild, pivot, pivotPointFunc, distance);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
// start right
|
|
|
|
|
|
(newNode, newDist) = FindNearestNeighbours(node.RightChild, pivot, pivotPointFunc, distance);
|
|
|
|
|
|
|
|
|
|
|
|
if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot))
|
|
|
|
|
|
{
|
|
|
|
|
|
currentDistance = newDist.Value;
|
|
|
|
|
|
currentNearestNode = newNode;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (node.LeftChild != null && pointValue - currentDistance <= node.L)
|
|
|
|
|
|
{
|
|
|
|
|
|
(newNode, newDist) = FindNearestNeighbours(node.LeftChild, pivot, pivotPointFunc, distance);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (newDist.HasValue && newDist <= currentDistance && !newNode.Element.Equals(pivot))
|
|
|
|
|
|
{
|
|
|
|
|
|
currentDistance = newDist.Value;
|
|
|
|
|
|
currentNearestNode = newNode;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return (currentNearestNode, currentDistance);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#endregion
|
|
|
|
|
|
|
|
|
|
|
|
private class KdTreeLeaf<Q> : KdTreeNode<Q>
|
|
|
|
|
|
{
|
|
|
|
|
|
public override bool IsLeaf => true;
|
|
|
|
|
|
|
2020-03-02 23:27:35 +00:00
|
|
|
|
public KdTreeLeaf((int, PdfPoint, Q) point, int depth)
|
|
|
|
|
|
: base(null, null, point, depth)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
|
|
public override string ToString()
|
|
|
|
|
|
{
|
|
|
|
|
|
return "Leaf->" + Value.ToString();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private class KdTreeNode<Q>
|
|
|
|
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Split value.
|
|
|
|
|
|
/// </summary>
|
2020-03-06 13:27:32 +00:00
|
|
|
|
public double L => IsAxisCutX ? Value.X : Value.Y;
|
2020-02-12 16:02:47 +00:00
|
|
|
|
|
|
|
|
|
|
public PdfPoint Value { get; }
|
|
|
|
|
|
|
|
|
|
|
|
public KdTreeNode<Q> LeftChild { get; internal set; }
|
|
|
|
|
|
|
|
|
|
|
|
public KdTreeNode<Q> RightChild { get; internal set; }
|
|
|
|
|
|
|
|
|
|
|
|
public Q Element { get; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2020-03-06 13:27:32 +00:00
|
|
|
|
/// True if this cuts with X axis, false if cuts with Y axis.
|
2020-02-12 16:02:47 +00:00
|
|
|
|
/// </summary>
|
2020-03-06 13:27:32 +00:00
|
|
|
|
public bool IsAxisCutX { get; }
|
|
|
|
|
|
|
2020-02-12 16:02:47 +00:00
|
|
|
|
public int Depth { get; }
|
|
|
|
|
|
|
|
|
|
|
|
public virtual bool IsLeaf => false;
|
|
|
|
|
|
|
|
|
|
|
|
public int Index { get; }
|
|
|
|
|
|
|
2020-03-02 23:27:35 +00:00
|
|
|
|
public KdTreeNode(KdTreeNode<Q> leftChild, KdTreeNode<Q> rightChild, (int, PdfPoint, Q) point, int depth)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
|
|
|
|
|
LeftChild = leftChild;
|
|
|
|
|
|
RightChild = rightChild;
|
2020-03-02 23:27:35 +00:00
|
|
|
|
Value = point.Item2;
|
|
|
|
|
|
Element = point.Item3;
|
2020-03-06 13:27:32 +00:00
|
|
|
|
Depth = depth;
|
|
|
|
|
|
IsAxisCutX = depth % 2 == 0;
|
2020-03-02 23:27:35 +00:00
|
|
|
|
Index = point.Item1;
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public IEnumerable<KdTreeLeaf<Q>> GetLeaves()
|
|
|
|
|
|
{
|
2020-03-03 09:34:23 +00:00
|
|
|
|
var leaves = new List<KdTreeLeaf<Q>>();
|
|
|
|
|
|
RecursiveGetLeaves(LeftChild, ref leaves);
|
|
|
|
|
|
RecursiveGetLeaves(RightChild, ref leaves);
|
|
|
|
|
|
return leaves;
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-03-03 09:34:23 +00:00
|
|
|
|
private void RecursiveGetLeaves(KdTreeNode<Q> leaf, ref List<KdTreeLeaf<Q>> leaves)
|
2020-02-12 16:02:47 +00:00
|
|
|
|
{
|
|
|
|
|
|
if (leaf == null) return;
|
|
|
|
|
|
if (leaf is KdTreeLeaf<Q> lLeaf)
|
|
|
|
|
|
{
|
2020-03-03 09:34:23 +00:00
|
|
|
|
leaves.Add(lLeaf);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2020-03-03 09:34:23 +00:00
|
|
|
|
RecursiveGetLeaves(leaf.LeftChild, ref leaves);
|
|
|
|
|
|
RecursiveGetLeaves(leaf.RightChild, ref leaves);
|
2020-02-12 16:02:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public override string ToString()
|
|
|
|
|
|
{
|
|
|
|
|
|
return "Node->" + Value.ToString();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|