Update NearestNeighbourWordExtractor .cs

Removing the font name check (`string.Equals(l1.FontName, l2.FontName, StringComparison.OrdinalIgnoreCase)`) because some special characters or ligature may belong to different subsets.
This commit is contained in:
BobLd
2019-12-27 11:14:40 +00:00
committed by Eliot Jones
parent 3b79ebc5d5
commit 5e3f5651b8

View File

@@ -117,7 +117,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
distMeasure, maxDistanceFunction,
l => l.EndBaseLine, l => l.StartBaseLine,
l => !string.IsNullOrWhiteSpace(l.Value),
(l1, l2) => string.Equals(l1.FontName, l2.FontName, StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(l2.Value),
(l1, l2) => !string.IsNullOrWhiteSpace(l2.Value),
maxDegreeOfParallelism).ToList();
List<Word> words = new List<Word>();