mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Updated Document Layout Analysis (markdown)
@@ -72,13 +72,16 @@ using (var document = PdfDocument.Open(@"document.pdf"))
|
||||
{
|
||||
// Ignore the letters that are space or belong to 'punctuation' array
|
||||
// These letters will be put in a single word
|
||||
FilterPivot = letter => !string.IsNullOrWhiteSpace(letter.Value) && !punctuation.Contains(letter.Value),
|
||||
FilterPivot = letter => !string.IsNullOrWhiteSpace(letter.Value) &&
|
||||
!punctuation.Contains(letter.Value),
|
||||
|
||||
Filter = (pivot, candidate) =>
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(candidate.Value) || cannotEndWord.Contains(candidate.Value))
|
||||
if (string.IsNullOrWhiteSpace(candidate.Value) ||
|
||||
cannotEndWord.Contains(candidate.Value))
|
||||
{
|
||||
// start new word if the candidate neighbour is a space or belongs to 'cannotEndWord' array
|
||||
// start new word if the candidate neighbour is
|
||||
// a space or belongs to 'cannotEndWord' array
|
||||
return false;
|
||||
}
|
||||
else if (cannotStartWord.Contains(pivot.Value))
|
||||
|
||||
Reference in New Issue
Block a user