mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-20 03:17:57 +08:00
Make clustering algos public and use shorter names
This commit is contained in:
@@ -9,7 +9,7 @@
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Clustering Algorithms.
|
/// Clustering Algorithms.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
internal class ClusteringAlgorithms
|
public static class Clustering
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Algorithm to group elements using nearest neighbours.
|
/// Algorithm to group elements using nearest neighbours.
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
||||||
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
||||||
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
||||||
internal static IEnumerable<HashSet<int>> ClusterNearestNeighbours<T>(IReadOnlyList<T> elements,
|
internal static IEnumerable<HashSet<int>> NearestNeighbours<T>(IReadOnlyList<T> elements,
|
||||||
Func<PdfPoint, PdfPoint, double> distMeasure,
|
Func<PdfPoint, PdfPoint, double> distMeasure,
|
||||||
Func<T, T, double> maxDistanceFunction,
|
Func<T, T, double> maxDistanceFunction,
|
||||||
Func<T, PdfPoint> pivotPoint, Func<T, PdfPoint> candidatesPoint,
|
Func<T, PdfPoint> pivotPoint, Func<T, PdfPoint> candidatesPoint,
|
||||||
@@ -94,7 +94,7 @@
|
|||||||
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
||||||
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
||||||
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
||||||
internal static IEnumerable<HashSet<int>> ClusterNearestNeighbours<T>(IReadOnlyList<T> elements, int k,
|
internal static IEnumerable<HashSet<int>> NearestNeighbours<T>(IReadOnlyList<T> elements, int k,
|
||||||
Func<PdfPoint, PdfPoint, double> distMeasure,
|
Func<PdfPoint, PdfPoint, double> distMeasure,
|
||||||
Func<T, T, double> maxDistanceFunction,
|
Func<T, T, double> maxDistanceFunction,
|
||||||
Func<T, PdfPoint> pivotPoint, Func<T, PdfPoint> candidatesPoint,
|
Func<T, PdfPoint> pivotPoint, Func<T, PdfPoint> candidatesPoint,
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
/// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled.
|
||||||
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
/// <para>A positive property value limits the number of concurrent operations to the set value.
|
||||||
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
/// If it is -1, there is no limit on the number of concurrently running operations.</para></param>
|
||||||
internal static IEnumerable<HashSet<int>> ClusterNearestNeighbours<T>(T[] elements,
|
internal static IEnumerable<HashSet<int>> NearestNeighbours<T>(IReadOnlyList<T> elements,
|
||||||
Func<PdfLine, PdfLine, double> distMeasure,
|
Func<PdfLine, PdfLine, double> distMeasure,
|
||||||
Func<T, T, double> maxDistanceFunction,
|
Func<T, T, double> maxDistanceFunction,
|
||||||
Func<T, PdfLine> pivotLine, Func<T, PdfLine> candidatesLine,
|
Func<T, PdfLine> pivotLine, Func<T, PdfLine> candidatesLine,
|
||||||
@@ -185,12 +185,12 @@
|
|||||||
* (i,j,k) will form a group and (m,n) will form another group.
|
* (i,j,k) will form a group and (m,n) will form another group.
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
|
|
||||||
int[] indexes = Enumerable.Repeat(-1, elements.Length).ToArray();
|
int[] indexes = Enumerable.Repeat(-1, elements.Count).ToArray();
|
||||||
|
|
||||||
ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism };
|
ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism };
|
||||||
|
|
||||||
// 1. Find nearest neighbours indexes
|
// 1. Find nearest neighbours indexes
|
||||||
Parallel.For(0, elements.Length, parallelOptions, e =>
|
Parallel.For(0, elements.Count, parallelOptions, e =>
|
||||||
{
|
{
|
||||||
var pivot = elements[e];
|
var pivot = elements[e];
|
||||||
|
|
@@ -197,7 +197,7 @@
|
|||||||
private static IEnumerable<TextLine> GetLines(List<Word> words, double maxDist, AngleBounds withinLine, int maxDegreeOfParallelism)
|
private static IEnumerable<TextLine> GetLines(List<Word> words, double maxDist, AngleBounds withinLine, int maxDegreeOfParallelism)
|
||||||
{
|
{
|
||||||
TextDirection textDirection = words[0].TextDirection;
|
TextDirection textDirection = words[0].TextDirection;
|
||||||
var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(words, 2, Distances.Euclidean,
|
var groupedIndexes = Clustering.NearestNeighbours(words, 2, Distances.Euclidean,
|
||||||
(pivot, candidate) => maxDist,
|
(pivot, candidate) => maxDist,
|
||||||
pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
|
pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
|
||||||
pivot => true,
|
pivot => true,
|
||||||
@@ -246,7 +246,7 @@
|
|||||||
new PdfPoint(left + d / 2, l2.Point1.Y));
|
new PdfPoint(left + d / 2, l2.Point1.Y));
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(lines,
|
var groupedIndexes = Clustering.NearestNeighbours(lines,
|
||||||
euclidianOverlappingMiddleDistance,
|
euclidianOverlappingMiddleDistance,
|
||||||
(pivot, candidate) => maxDist,
|
(pivot, candidate) => maxDist,
|
||||||
pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight),
|
pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight),
|
||||||
|
@@ -106,7 +106,7 @@
|
|||||||
throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction.");
|
throw new ArgumentException("NearestNeighbourWordExtractor.GetWords(): Mixed Text Direction.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(pageLetters,
|
var groupedIndexes = Clustering.NearestNeighbours(pageLetters,
|
||||||
distMeasure, maxDistanceFunction,
|
distMeasure, maxDistanceFunction,
|
||||||
l => l.EndBaseLine, l => l.StartBaseLine,
|
l => l.EndBaseLine, l => l.StartBaseLine,
|
||||||
l => !string.IsNullOrWhiteSpace(l.Value),
|
l => !string.IsNullOrWhiteSpace(l.Value),
|
||||||
|
Reference in New Issue
Block a user