From a16f377d5a9a922968eb24c53c858d74942bc9b9 Mon Sep 17 00:00:00 2001 From: BobLd Date: Sat, 23 May 2020 20:08:40 +0100 Subject: [PATCH] update DefaultPageSegmenter to use DlaOptions --- .../PageSegmenter/DefaultPageSegmenter.cs | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DefaultPageSegmenter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DefaultPageSegmenter.cs index bd8fe89a..ce0c0944 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DefaultPageSegmenter.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DefaultPageSegmenter.cs @@ -2,9 +2,11 @@ { using Content; using Core; + using System; using System.Collections.Generic; using System.Linq; + /// /// /// Default Page Segmenter. All words are included in one block. /// @@ -15,15 +17,43 @@ /// public static DefaultPageSegmenter Instance { get; } = new DefaultPageSegmenter(); + /// /// - /// Get the blocks. + /// Get the blocks using default options values. /// - /// The words in the page. - public IReadOnlyList GetBlocks(IEnumerable pageWords) + /// The page's words to generate text blocks for. + public IReadOnlyList GetBlocks(IEnumerable words) { - if (pageWords.Count() == 0) return EmptyArray.Instance; - - return new List() { new TextBlock(new XYLeaf(pageWords).GetLines()) }; + return GetBlocks(words, new DefaultPageSegmenterOptions()); } + + /// + /// Get the text blocks using options. + /// + /// The page's words to generate text blocks for. + /// The to use. + /// The s generated by the default method. + public IReadOnlyList GetBlocks(IEnumerable words, DlaOptions options) + { + if (options is DefaultPageSegmenterOptions dOptions) + { + if (words?.Any() != true) + { + return EmptyArray.Instance; + } + + return new List() { new TextBlock(new XYLeaf(words).GetLines(dOptions.WordSeparator), dOptions.LineSeparator) }; + } + else + { + throw new ArgumentException("Options provided must be of type " + nameof(DefaultPageSegmenterOptions) + ".", nameof(options)); + } + } + + /// + /// Default page segmenter options. + /// + public class DefaultPageSegmenterOptions : PageSegmenterOptions + { } } -} +} \ No newline at end of file