mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
Update NearestNeighbourWordExtractor .cs
Removing the font name check (`string.Equals(l1.FontName, l2.FontName, StringComparison.OrdinalIgnoreCase)`) because some special characters or ligature may belong to different subsets.
This commit is contained in:
@@ -117,7 +117,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||
distMeasure, maxDistanceFunction,
|
||||
l => l.EndBaseLine, l => l.StartBaseLine,
|
||||
l => !string.IsNullOrWhiteSpace(l.Value),
|
||||
(l1, l2) => string.Equals(l1.FontName, l2.FontName, StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(l2.Value),
|
||||
(l1, l2) => !string.IsNullOrWhiteSpace(l2.Value),
|
||||
maxDegreeOfParallelism).ToList();
|
||||
|
||||
List<Word> words = new List<Word>();
|
||||
|
Reference in New Issue
Block a user