using System.Collections.Generic; using System.Linq; using UglyToad.PdfPig.Geometry; using UglyToad.PdfPig.Util; namespace UglyToad.PdfPig.DocumentLayoutAnalysis { /// /// A Node used in the algorithm. /// internal class XYNode { /// /// Returns true if this node is a leaf, false otherwise. /// public virtual bool IsLeaf => false; /// /// The rectangle completely containing the node. /// public PdfRectangle BoundingBox { get; set; } /// /// The children of the node. /// public XYNode[] Children { get; set; } /// /// Create a new . /// /// The node's children. public XYNode(params XYNode[] children) : this(children?.ToList()) { } /// /// Create a new . /// /// The node's children. public XYNode(IEnumerable children) { if (children != null && children.Count() != 0) { Children = children.ToArray(); decimal left = children.Min(b => b.BoundingBox.Left); decimal right = children.Max(b => b.BoundingBox.Right); decimal bottom = children.Min(b => b.BoundingBox.Bottom); decimal top = children.Max(b => b.BoundingBox.Top); BoundingBox = new PdfRectangle(left, bottom, right, top); } else { Children = EmptyArray.Instance; } } /// /// Recursively counts the words included in this node. /// public virtual int CountWords() { if (Children == null) { return 0; } int count = 0; RecursiveCount(Children, ref count); return count; } /// /// Recursively gets the leafs (last nodes) of this node. /// public virtual List GetLeafs() { List leafs = new List(); if (Children == null || Children.Length == 0) { return leafs; } int level = 0; RecursiveGetLeafs(Children, ref leafs, level); return leafs; } private void RecursiveCount(IEnumerable children, ref int count) { if (children.Count() == 0) return; foreach (XYNode node in children.Where(x => x.IsLeaf)) { count += node.CountWords(); } foreach (XYNode node in children.Where(x => !x.IsLeaf)) { RecursiveCount(node.Children, ref count); } } private void RecursiveGetLeafs(IEnumerable children, ref List leafs, int level) { if (children.Count() == 0) return; bool isVerticalCut = level % 2 == 0; foreach (XYLeaf node in children.Where(x => x.IsLeaf)) { leafs.Add(node); } level++; IEnumerable notLeafs = children.Where(x => !x.IsLeaf); if (isVerticalCut) { notLeafs = notLeafs.OrderBy(x => x.BoundingBox.Left).ToList(); } else { notLeafs = notLeafs.OrderByDescending(x => x.BoundingBox.Top).ToList(); } foreach (XYNode node in notLeafs) { RecursiveGetLeafs(node.Children, ref leafs, level); } } /// public override string ToString() { return IsLeaf ? "Leaf" : "Node"; } } }