namespace UglyToad.PdfPig.DocumentLayoutAnalysis { using Content; using Core; using System; using System.Collections.Generic; using System.Linq; /// /// A block of text. /// public class TextBlock { /// /// The separator used between lines in the block. /// public readonly string Separator; /// /// The text of the block. /// public string Text { get; } /// /// The text orientation of the block. /// public TextOrientation TextOrientation { get; } /// /// The rectangle completely containing the block. /// public PdfRectangle BoundingBox { get; } /// /// The text lines contained in the block. /// public IReadOnlyList TextLines { get; } /// /// The reading order index. Starts at 0. A value of -1 means the block is not ordered. /// public int ReadingOrder { get; private set; } /// /// Create a new . /// /// The words contained in the line, in the correct order. /// The separator used between lines in the block. public TextBlock(IReadOnlyList lines, string separator = "\n") { if (lines == null) { throw new ArgumentNullException(nameof(lines)); } if (lines.Count == 0) { throw new ArgumentException("Empty lines provided.", nameof(lines)); } Separator = separator; ReadingOrder = -1; TextLines = lines; if (lines.Count == 1) { BoundingBox = lines[0].BoundingBox; Text = lines[0].Text; TextOrientation = lines[0].TextOrientation; } else { var tempTextOrientation = lines[0].TextOrientation; if (tempTextOrientation != TextOrientation.Other) { foreach (var letter in lines) { if (letter.TextOrientation != tempTextOrientation) { tempTextOrientation = TextOrientation.Other; break; } } } switch (tempTextOrientation) { case TextOrientation.Horizontal: BoundingBox = GetBoundingBoxH(lines); break; case TextOrientation.Rotate180: BoundingBox = GetBoundingBox180(lines); break; case TextOrientation.Rotate90: BoundingBox = GetBoundingBox90(lines); break; case TextOrientation.Rotate270: BoundingBox = GetBoundingBox270(lines); break; case TextOrientation.Other: default: BoundingBox = GetBoundingBoxOther(lines); break; } Text = string.Join(separator, lines.Select(x => x.Text)); TextOrientation = tempTextOrientation; } } #region Bounding box private PdfRectangle GetBoundingBoxH(IReadOnlyList lines) { var blX = double.MaxValue; var trX = double.MinValue; var blY = double.MaxValue; var trY = double.MinValue; for (var i = 0; i < lines.Count; i++) { var line = lines[i]; if (line.BoundingBox.BottomLeft.X < blX) { blX = line.BoundingBox.BottomLeft.X; } if (line.BoundingBox.BottomLeft.Y < blY) { blY = line.BoundingBox.BottomLeft.Y; } var right = line.BoundingBox.BottomLeft.X + line.BoundingBox.Width; if (right > trX) { trX = right; } if (line.BoundingBox.TopLeft.Y > trY) { trY = line.BoundingBox.TopLeft.Y; } } return new PdfRectangle(blX, blY, trX, trY); } private PdfRectangle GetBoundingBox180(IReadOnlyList lines) { var blX = double.MinValue; var blY = double.MinValue; var trX = double.MaxValue; var trY = double.MaxValue; for (var i = 0; i < lines.Count; i++) { var line = lines[i]; if (line.BoundingBox.BottomLeft.X > blX) { blX = line.BoundingBox.BottomLeft.X; } if (line.BoundingBox.BottomLeft.Y > blY) { blY = line.BoundingBox.BottomLeft.Y; } var right = line.BoundingBox.BottomLeft.X - line.BoundingBox.Width; if (right < trX) { trX = right; } if (line.BoundingBox.TopRight.Y < trY) { trY = line.BoundingBox.TopRight.Y; } } return new PdfRectangle(blX, blY, trX, trY); } private PdfRectangle GetBoundingBox90(IReadOnlyList lines) { var b = double.MaxValue; var r = double.MaxValue; var t = double.MinValue; var l = double.MinValue; for (var i = 0; i < lines.Count; i++) { var line = lines[i]; if (line.BoundingBox.BottomLeft.X < b) { b = line.BoundingBox.BottomLeft.X; } if (line.BoundingBox.BottomRight.Y < r) { r = line.BoundingBox.BottomRight.Y; } var right = line.BoundingBox.BottomLeft.X + line.BoundingBox.Height; if (right > t) { t = right; } if (line.BoundingBox.BottomLeft.Y > l) { l = line.BoundingBox.BottomLeft.Y; } } return new PdfRectangle(new PdfPoint(t, l), new PdfPoint(t, r), new PdfPoint(b, l), new PdfPoint(b, r)); } private PdfRectangle GetBoundingBox270(IReadOnlyList lines) { var t = double.MaxValue; var b = double.MinValue; var l = double.MaxValue; var r = double.MinValue; for (var i = 0; i < lines.Count; i++) { var line = lines[i]; if (line.BoundingBox.BottomLeft.X > b) { b = line.BoundingBox.BottomLeft.X; } if (line.BoundingBox.BottomLeft.Y < l) { l = line.BoundingBox.BottomLeft.Y; } var right = line.BoundingBox.BottomLeft.X - line.BoundingBox.Height; if (right < t) { t = right; } if (line.BoundingBox.BottomRight.Y > r) { r = line.BoundingBox.BottomRight.Y; } } return new PdfRectangle(new PdfPoint(t, l), new PdfPoint(t, r), new PdfPoint(b, l), new PdfPoint(b, r)); } private PdfRectangle GetBoundingBoxOther(IReadOnlyList lines) { var points = lines.SelectMany(l => new[] { l.BoundingBox.BottomLeft, l.BoundingBox.BottomRight, l.BoundingBox.TopLeft, l.BoundingBox.TopRight }); // Candidates bounding boxes var obb = Geometry.GeometryExtensions.MinimumAreaRectangle(points); var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight); var obb2 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft); var obb3 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft); // Find the orientation of the OBB, using the baseline angle // Assumes line order is correct var lastLine = lines[lines.Count - 1]; var baseLineAngle = Distances.BoundAngle180(Distances.Angle(lastLine.BoundingBox.BottomLeft, lastLine.BoundingBox.BottomRight)); double deltaAngle = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle)); double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle)); if (deltaAngle1 < deltaAngle) { deltaAngle = deltaAngle1; obb = obb1; } double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle)); if (deltaAngle2 < deltaAngle) { deltaAngle = deltaAngle2; obb = obb2; } double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle)); if (deltaAngle3 < deltaAngle) { obb = obb3; } return obb; } #endregion /// /// Sets the 's reading order. /// /// public void SetReadingOrder(int readingOrder) { if (readingOrder < -1) { throw new ArgumentException("The reading order should be more or equal to -1. A value of -1 means the block is not ordered.", nameof(readingOrder)); } this.ReadingOrder = readingOrder; } /// public override string ToString() { return Text; } } }