From 0d8e113f50308e3ee3f272b5a5316e2d5cd09dbc Mon Sep 17 00:00:00 2001 From: davebrokit <87085235+davebrokit@users.noreply.github.com> Date: Thu, 30 May 2024 09:27:54 +0100 Subject: [PATCH] Updated Document Layout Analysis (markdown) --- Document-Layout-Analysis.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Document-Layout-Analysis.md b/Document-Layout-Analysis.md index 45ff89a..6662ac0 100644 --- a/Document-Layout-Analysis.md +++ b/Document-Layout-Analysis.md @@ -177,12 +177,23 @@ using (var document = PdfDocument.Open(“document.pdf”)) foreach (var block in blocks) { // Do something + // E.g. Output the blocks + foreach (TextLine line in block.TextLines) + { + foreach (Word word in line.Words) + { + Console.Write(word.Text + " "); + } + } + } } } ``` + #### Advanced cases + The method can be tailored by providing a __minimum block width__, and __horizontal and vertical gap sizes/functions__: - Minimum block width is set to 1/3 of page width: