From 71f4b6f40f74c0cdc5e8d461c449940dec9d791f Mon Sep 17 00:00:00 2001 From: BobLd <38405645+BobLd@users.noreply.github.com> Date: Sat, 20 Jun 2020 16:34:34 +0100 Subject: [PATCH] Updated Document Layout Analysis (markdown) --- Document-Layout-Analysis.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Document-Layout-Analysis.md b/Document-Layout-Analysis.md index 28db605..d911570 100644 --- a/Document-Layout-Analysis.md +++ b/Document-Layout-Analysis.md @@ -72,13 +72,16 @@ using (var document = PdfDocument.Open(@"document.pdf")) { // Ignore the letters that are space or belong to 'punctuation' array // These letters will be put in a single word - FilterPivot = letter => !string.IsNullOrWhiteSpace(letter.Value) && !punctuation.Contains(letter.Value), + FilterPivot = letter => !string.IsNullOrWhiteSpace(letter.Value) && + !punctuation.Contains(letter.Value), Filter = (pivot, candidate) => { - if (string.IsNullOrWhiteSpace(candidate.Value) || cannotEndWord.Contains(candidate.Value)) + if (string.IsNullOrWhiteSpace(candidate.Value) || + cannotEndWord.Contains(candidate.Value)) { - // start new word if the candidate neighbour is a space or belongs to 'cannotEndWord' array + // start new word if the candidate neighbour is + // a space or belongs to 'cannotEndWord' array return false; } else if (cannotStartWord.Contains(pivot.Value))