Fix usage of List.Contains

This commit is contained in:
theolivenbaum 2025-07-26 21:42:40 +02:00 committed by BobLd
parent 0ebbe0540d
commit febfa4d4b3
3 changed files with 65 additions and 9 deletions

View File

@ -128,13 +128,13 @@
throw new ArgumentException("The algorithm cannot be used with a document of less than 2 pages.", nameof(pagesTextBlocks)); throw new ArgumentException("The algorithm cannot be used with a document of less than 2 pages.", nameof(pagesTextBlocks));
} }
ConcurrentDictionary<int, List<TextBlock>> pageDecorations = new ConcurrentDictionary<int, List<TextBlock>>(); ConcurrentDictionary<int, OrderedSet<TextBlock>> pageDecorations = new ConcurrentDictionary<int, OrderedSet<TextBlock>>();
ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism };
Parallel.For(0, pagesTextBlocks.Count, parallelOptions, p => Parallel.For(0, pagesTextBlocks.Count, parallelOptions, p =>
{ {
if (!pageDecorations.TryAdd(p, new List<TextBlock>())) if (!pageDecorations.TryAdd(p, new OrderedSet<TextBlock>()))
{ {
throw new ArgumentException("Cannot add element with index " + p + " in ConcurrentDictionary."); throw new ArgumentException("Cannot add element with index " + p + " in ConcurrentDictionary.");
} }
@ -165,7 +165,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n); var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold) if (score >= similarityThreshold)
{ {
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current); pageDecorations[p].TryAdd(current);
} }
} }
@ -180,7 +180,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n); var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold) if (score >= similarityThreshold)
{ {
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current); pageDecorations[p].TryAdd(current);
} }
} }
@ -195,7 +195,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n); var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold) if (score >= similarityThreshold)
{ {
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current); pageDecorations[p].TryAdd(current);
} }
} }
@ -210,12 +210,12 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n); var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold) if (score >= similarityThreshold)
{ {
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current); pageDecorations[p].TryAdd(current);
} }
} }
}); });
return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value).ToList(); return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value.GetList()).ToList();
} }
/// <summary> /// <summary>

View File

@ -0,0 +1,56 @@
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
{
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
internal class OrderedSet<T>
{
private readonly HashSet<T> _set;
private readonly List<T> _list;
public OrderedSet() : this(EqualityComparer<T>.Default)
{
}
public OrderedSet(IEqualityComparer<T> comparer)
{
_set = new HashSet<T>(comparer);
_list = new List<T>();
}
public int Count => _set.Count;
public bool TryAdd(T item)
{
if (_set.Contains(item)) return false;
_list.Add(item);
_set.Add(item);
return true;
}
public void Clear()
{
_list.Clear();
_set.Clear();
}
public bool Contains(T item)
{
return item is not null && _set.Contains(item);
}
public void CopyTo(T[] array, int arrayIndex)
{
_list.CopyTo(array, arrayIndex);
}
public List<T> GetList()
{
return _list;
}
}
}

View File

@ -122,12 +122,12 @@
/// Returns an equivalent token where any indirect references of child objects are /// Returns an equivalent token where any indirect references of child objects are
/// recursively traversed and resolved. /// recursively traversed and resolved.
/// </summary> /// </summary>
internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, List<IndirectReference>? visited = null) where T : IToken internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, HashSet<IndirectReference>? visited = null) where T : IToken
{ {
return (T?)ResolveInternal(token, scanner, visited ?? []); return (T?)ResolveInternal(token, scanner, visited ?? []);
} }
private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, List<IndirectReference> visited) private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, HashSet<IndirectReference> visited)
{ {
if (token is StreamToken stream) if (token is StreamToken stream)
{ {