Files
PdfPig/src/UglyToad.PdfPig/DocumentLayoutAnalysis/IPageSegmenter.cs
BobLd eb9a9fd00e Document Layout Analysis - IPageSegmenter, Docstrum
- Create a TextBlock class
- Creates IPageSegmenter
- Add other useful distances: angle, etc.
- Update RecursiveXYCut
 - With IPageSegmenter and TextBlock
 - Make XYNode and XYLeaf internal
- Optimise (faster) NearestNeighbourWordExtractor and isolate the clustering algorithms for use outside of this class
- Implement a Docstrum inspired page segmentation algorithm
2019-08-10 16:01:27 +01:00

20 lines
776 B
C#

using System.Collections.Generic;
using UglyToad.PdfPig.Content;
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
{
/// <summary>
/// Page segmentation divides a page into areas, each consisting of a layout structure (blocks, lines, etc.).
/// <para> See 'Performance Comparison of Six Algorithms for Page Segmentation' by Faisal Shafait, Daniel Keysers, and Thomas M. Breuel.</para>
/// </summary>
public interface IPageSegmenter
{
/// <summary>
/// Get the text blocks.
/// </summary>
/// <param name="pageWords">The words to generate text blocks for.</param>
/// <returns>A list of text blocks from this approach.</returns>
IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> pageWords);
}
}