Fix usage of List.Contains

This commit is contained in:
theolivenbaum 2025-07-26 21:42:40 +02:00 committed by BobLd
parent 0ebbe0540d
commit febfa4d4b3
3 changed files with 65 additions and 9 deletions

View File

@ -128,13 +128,13 @@
throw new ArgumentException("The algorithm cannot be used with a document of less than 2 pages.", nameof(pagesTextBlocks));
}
ConcurrentDictionary<int, List<TextBlock>> pageDecorations = new ConcurrentDictionary<int, List<TextBlock>>();
ConcurrentDictionary<int, OrderedSet<TextBlock>> pageDecorations = new ConcurrentDictionary<int, OrderedSet<TextBlock>>();
ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism };
Parallel.For(0, pagesTextBlocks.Count, parallelOptions, p =>
{
if (!pageDecorations.TryAdd(p, new List<TextBlock>()))
if (!pageDecorations.TryAdd(p, new OrderedSet<TextBlock>()))
{
throw new ArgumentException("Cannot add element with index " + p + " in ConcurrentDictionary.");
}
@ -165,7 +165,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold)
{
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
pageDecorations[p].TryAdd(current);
}
}
@ -180,7 +180,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold)
{
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
pageDecorations[p].TryAdd(current);
}
}
@ -195,7 +195,7 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold)
{
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
pageDecorations[p].TryAdd(current);
}
}
@ -210,12 +210,12 @@
var score = Score(current, previousPage, nextPage, minimumEditDistanceNormalised, similarityThreshold, n);
if (score >= similarityThreshold)
{
if (!pageDecorations[p].Contains(current)) pageDecorations[p].Add(current);
pageDecorations[p].TryAdd(current);
}
}
});
return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value).ToList();
return pageDecorations.OrderBy(x => x.Key).Select(x => x.Value.GetList()).ToList();
}
/// <summary>

View File

@ -0,0 +1,56 @@
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
{
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
internal class OrderedSet<T>
{
private readonly HashSet<T> _set;
private readonly List<T> _list;
public OrderedSet() : this(EqualityComparer<T>.Default)
{
}
public OrderedSet(IEqualityComparer<T> comparer)
{
_set = new HashSet<T>(comparer);
_list = new List<T>();
}
public int Count => _set.Count;
public bool TryAdd(T item)
{
if (_set.Contains(item)) return false;
_list.Add(item);
_set.Add(item);
return true;
}
public void Clear()
{
_list.Clear();
_set.Clear();
}
public bool Contains(T item)
{
return item is not null && _set.Contains(item);
}
public void CopyTo(T[] array, int arrayIndex)
{
_list.CopyTo(array, arrayIndex);
}
public List<T> GetList()
{
return _list;
}
}
}

View File

@ -122,12 +122,12 @@
/// Returns an equivalent token where any indirect references of child objects are
/// recursively traversed and resolved.
/// </summary>
internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, List<IndirectReference>? visited = null) where T : IToken
internal static T? Resolve<T>(this T? token, IPdfTokenScanner scanner, HashSet<IndirectReference>? visited = null) where T : IToken
{
return (T?)ResolveInternal(token, scanner, visited ?? []);
}
private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, List<IndirectReference> visited)
private static IToken? ResolveInternal(this IToken? token, IPdfTokenScanner scanner, HashSet<IndirectReference> visited)
{
if (token is StreamToken stream)
{