update DefaultPageSegmenter to use DlaOptions

This commit is contained in:
BobLd
2020-05-23 20:08:40 +01:00
committed by Eliot Jones
parent 1438fec741
commit a16f377d5a

View File

@@ -2,9 +2,11 @@
{
using Content;
using Core;
using System;
using System.Collections.Generic;
using System.Linq;
/// <inheritdoc />
/// <summary>
/// Default Page Segmenter. All words are included in one block.
/// </summary>
@@ -15,15 +17,43 @@
/// </summary>
public static DefaultPageSegmenter Instance { get; } = new DefaultPageSegmenter();
/// <inheritdoc />
/// <summary>
/// Get the blocks.
/// Get the blocks using default options values.
/// </summary>
/// <param name="pageWords">The words in the page.</param>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords)
/// <param name="words">The page's words to generate text blocks for.</param>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> words)
{
if (pageWords.Count() == 0) return EmptyArray<TextBlock>.Instance;
return new List<TextBlock>() { new TextBlock(new XYLeaf(pageWords).GetLines()) };
return GetBlocks(words, new DefaultPageSegmenterOptions());
}
/// <summary>
/// Get the text blocks using options.
/// </summary>
/// <param name="words">The page's words to generate text blocks for.</param>
/// <param name="options">The <see cref="DefaultPageSegmenterOptions"/> to use.</param>
/// <returns>The <see cref="TextBlock"/>s generated by the default method.</returns>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> words, DlaOptions options)
{
if (options is DefaultPageSegmenterOptions dOptions)
{
if (words?.Any() != true)
{
return EmptyArray<TextBlock>.Instance;
}
return new List<TextBlock>() { new TextBlock(new XYLeaf(words).GetLines(dOptions.WordSeparator), dOptions.LineSeparator) };
}
else
{
throw new ArgumentException("Options provided must be of type " + nameof(DefaultPageSegmenterOptions) + ".", nameof(options));
}
}
/// <summary>
/// Default page segmenter options.
/// </summary>
public class DefaultPageSegmenterOptions : PageSegmenterOptions
{ }
}
}
}