mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 10:55:04 +08:00
allow oriented bounding box for TextLine
This commit is contained in:
@@ -57,34 +57,295 @@
|
||||
|
||||
Words = words;
|
||||
|
||||
Text = string.Join(" ", words.Where(s => !string.IsNullOrWhiteSpace(s.Text)).Select(x => x.Text));
|
||||
|
||||
var normalisedBoundingBoxes = words.Select(x => NormaliseRectangle(x.BoundingBox)).ToList();
|
||||
var minX = normalisedBoundingBoxes.Min(x => x.Left);
|
||||
var minY = normalisedBoundingBoxes.Min(x => x.Bottom);
|
||||
var maxX = normalisedBoundingBoxes.Max(x => x.Right);
|
||||
var maxY = normalisedBoundingBoxes.Max(x => x.Top);
|
||||
|
||||
BoundingBox = new PdfRectangle(minX, minY, maxX, maxY);
|
||||
|
||||
if (words.All(x => x.TextOrientation == words[0].TextOrientation))
|
||||
if (Words.Count == 1)
|
||||
{
|
||||
BoundingBox = Words[0].BoundingBox;
|
||||
Text = Words[0].Text;
|
||||
TextOrientation = words[0].TextOrientation;
|
||||
}
|
||||
else
|
||||
{
|
||||
TextOrientation = TextOrientation.Other;
|
||||
var tempTextOrientation = words[0].TextOrientation;
|
||||
if (tempTextOrientation != TextOrientation.Other)
|
||||
{
|
||||
foreach (var letter in words)
|
||||
{
|
||||
if (letter.TextOrientation != tempTextOrientation)
|
||||
{
|
||||
tempTextOrientation = TextOrientation.Other;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (tempTextOrientation)
|
||||
{
|
||||
case TextOrientation.Horizontal:
|
||||
BoundingBox = GetBoundingBoxH(words);
|
||||
break;
|
||||
|
||||
case TextOrientation.Rotate180:
|
||||
BoundingBox = GetBoundingBox180(words);
|
||||
break;
|
||||
|
||||
case TextOrientation.Rotate90:
|
||||
BoundingBox = GetBoundingBox90(words);
|
||||
break;
|
||||
|
||||
case TextOrientation.Rotate270:
|
||||
BoundingBox = GetBoundingBox270(words);
|
||||
break;
|
||||
|
||||
case TextOrientation.Other:
|
||||
default:
|
||||
BoundingBox = GetBoundingBoxOther(words);
|
||||
break;
|
||||
}
|
||||
|
||||
Text = string.Join(Separator, words.Where(s => !string.IsNullOrWhiteSpace(s.Text)).Select(x => x.Text));
|
||||
TextOrientation = tempTextOrientation;
|
||||
}
|
||||
}
|
||||
|
||||
private PdfRectangle NormaliseRectangle(PdfRectangle rectangle)
|
||||
#region Bounding box
|
||||
private PdfRectangle GetBoundingBoxH(IReadOnlyList<Word> words)
|
||||
{
|
||||
return new PdfRectangle(Math.Min(Math.Min(Math.Min(rectangle.TopLeft.X, rectangle.TopRight.X), rectangle.BottomLeft.X), rectangle.BottomRight.X),
|
||||
Math.Min(Math.Min(Math.Min(rectangle.TopLeft.Y, rectangle.TopRight.Y), rectangle.BottomLeft.Y), rectangle.BottomRight.Y),
|
||||
Math.Max(Math.Max(Math.Max(rectangle.TopLeft.X, rectangle.TopRight.X), rectangle.BottomLeft.X), rectangle.BottomRight.X),
|
||||
Math.Max(Math.Max(Math.Max(rectangle.TopLeft.Y, rectangle.TopRight.Y), rectangle.BottomLeft.Y), rectangle.BottomRight.Y));
|
||||
var blX = double.MaxValue;
|
||||
var trX = double.MinValue;
|
||||
var blY = double.MaxValue;
|
||||
var trY = double.MinValue;
|
||||
|
||||
for (var i = 0; i < words.Count; i++)
|
||||
{
|
||||
var word = words[i];
|
||||
if (word.BoundingBox.BottomLeft.X < blX)
|
||||
{
|
||||
blX = word.BoundingBox.BottomLeft.X;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomLeft.Y < blY)
|
||||
{
|
||||
blY = word.BoundingBox.BottomLeft.Y;
|
||||
}
|
||||
|
||||
var right = word.BoundingBox.BottomLeft.X + word.BoundingBox.Width;
|
||||
if (right > trX)
|
||||
{
|
||||
trX = right;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.TopLeft.Y > trY)
|
||||
{
|
||||
trY = word.BoundingBox.TopLeft.Y;
|
||||
}
|
||||
}
|
||||
|
||||
return new PdfRectangle(blX, blY, trX, trY);
|
||||
}
|
||||
|
||||
private PdfRectangle GetBoundingBox180(IReadOnlyList<Word> words)
|
||||
{
|
||||
var blX = double.MinValue;
|
||||
var blY = double.MinValue;
|
||||
var trX = double.MaxValue;
|
||||
var trY = double.MaxValue;
|
||||
|
||||
for (var i = 0; i < words.Count; i++)
|
||||
{
|
||||
var word = words[i];
|
||||
if (word.BoundingBox.BottomLeft.X > blX)
|
||||
{
|
||||
blX = word.BoundingBox.BottomLeft.X;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomLeft.Y > blY)
|
||||
{
|
||||
blY = word.BoundingBox.BottomLeft.Y;
|
||||
}
|
||||
|
||||
var right = word.BoundingBox.BottomLeft.X - word.BoundingBox.Width;
|
||||
if (right < trX)
|
||||
{
|
||||
trX = right;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.TopRight.Y < trY)
|
||||
{
|
||||
trY = word.BoundingBox.TopRight.Y;
|
||||
}
|
||||
}
|
||||
|
||||
return new PdfRectangle(blX, blY, trX, trY);
|
||||
}
|
||||
|
||||
private PdfRectangle GetBoundingBox90(IReadOnlyList<Word> words)
|
||||
{
|
||||
var b = double.MaxValue;
|
||||
var r = double.MaxValue;
|
||||
var t = double.MinValue;
|
||||
var l = double.MinValue;
|
||||
|
||||
for (var i = 0; i < words.Count; i++)
|
||||
{
|
||||
var word = words[i];
|
||||
if (word.BoundingBox.BottomLeft.X < b)
|
||||
{
|
||||
b = word.BoundingBox.BottomLeft.X;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomRight.Y < r)
|
||||
{
|
||||
r = word.BoundingBox.BottomRight.Y;
|
||||
}
|
||||
|
||||
var right = word.BoundingBox.BottomLeft.X + word.BoundingBox.Height;
|
||||
if (right > t)
|
||||
{
|
||||
t = right;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomLeft.Y > l)
|
||||
{
|
||||
l = word.BoundingBox.BottomLeft.Y;
|
||||
}
|
||||
}
|
||||
|
||||
return new PdfRectangle(new PdfPoint(b, l), new PdfPoint(t, l),
|
||||
new PdfPoint(t, r), new PdfPoint(b, r));
|
||||
}
|
||||
|
||||
private PdfRectangle GetBoundingBox270(IReadOnlyList<Word> words)
|
||||
{
|
||||
var t = double.MaxValue;
|
||||
var b = double.MinValue;
|
||||
var l = double.MaxValue;
|
||||
var r = double.MinValue;
|
||||
|
||||
for (var i = 0; i < words.Count; i++)
|
||||
{
|
||||
var word = words[i];
|
||||
if (word.BoundingBox.BottomLeft.X > b)
|
||||
{
|
||||
b = word.BoundingBox.BottomLeft.X;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomLeft.Y < l)
|
||||
{
|
||||
l = word.BoundingBox.BottomLeft.Y;
|
||||
}
|
||||
|
||||
var right = word.BoundingBox.BottomLeft.X - word.BoundingBox.Height;
|
||||
if (right < t)
|
||||
{
|
||||
t = right;
|
||||
}
|
||||
|
||||
if (word.BoundingBox.BottomRight.Y > r)
|
||||
{
|
||||
r = word.BoundingBox.BottomRight.Y;
|
||||
}
|
||||
}
|
||||
|
||||
return new PdfRectangle(new PdfPoint(b, l), new PdfPoint(t, l),
|
||||
new PdfPoint(t, r), new PdfPoint(b, r));
|
||||
}
|
||||
|
||||
private static PdfRectangle GetBoundingBoxOther(IReadOnlyList<Word> words)
|
||||
{
|
||||
var baseLinePoints = words.SelectMany(r => new[]
|
||||
{
|
||||
r.BoundingBox.BottomLeft,
|
||||
r.BoundingBox.BottomRight,
|
||||
}).ToList();
|
||||
|
||||
// Fitting a line through the base lines points
|
||||
// to find the orientation (slope)
|
||||
double x0 = baseLinePoints.Average(p => p.X);
|
||||
double y0 = baseLinePoints.Average(p => p.Y);
|
||||
double sumProduct = 0;
|
||||
double sumDiffSquaredX = 0;
|
||||
|
||||
for (int i = 0; i < baseLinePoints.Count; i++)
|
||||
{
|
||||
var point = baseLinePoints[i];
|
||||
var x_diff = point.X - x0;
|
||||
var y_diff = point.Y - y0;
|
||||
sumProduct += x_diff * y_diff;
|
||||
sumDiffSquaredX += x_diff * x_diff;
|
||||
}
|
||||
|
||||
double cos = 0;
|
||||
double sin = 1;
|
||||
if (sumDiffSquaredX > 1e-3)
|
||||
{
|
||||
// not a vertical line
|
||||
double angleRad = Math.Atan(sumProduct / sumDiffSquaredX); // -π/2 ≤ θ ≤ π/2
|
||||
cos = Math.Cos(angleRad);
|
||||
sin = Math.Sin(angleRad);
|
||||
}
|
||||
|
||||
// Rotate the points to build the axis-aligned bounding box (AABB)
|
||||
var inverseRotation = new TransformationMatrix(
|
||||
cos, -sin, 0,
|
||||
sin, cos, 0,
|
||||
0, 0, 1);
|
||||
|
||||
var transformedPoints = words.SelectMany(r => new[]
|
||||
{
|
||||
r.BoundingBox.BottomLeft,
|
||||
r.BoundingBox.BottomRight,
|
||||
r.BoundingBox.TopLeft,
|
||||
r.BoundingBox.TopRight
|
||||
}).Distinct().Select(p => inverseRotation.Transform(p));
|
||||
var aabb = new PdfRectangle(transformedPoints.Min(p => p.X),
|
||||
transformedPoints.Min(p => p.Y),
|
||||
transformedPoints.Max(p => p.X),
|
||||
transformedPoints.Max(p => p.Y));
|
||||
|
||||
// Rotate back the AABB to obtain to oriented bounding box (OBB)
|
||||
var rotateBack = new TransformationMatrix(
|
||||
cos, sin, 0,
|
||||
-sin, cos, 0,
|
||||
0, 0, 1);
|
||||
|
||||
// Candidates bounding boxes
|
||||
var obb = rotateBack.Transform(aabb);
|
||||
var obb1 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopLeft, obb.TopRight);
|
||||
var obb2 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.BottomLeft, obb.TopLeft);
|
||||
var obb3 = new PdfRectangle(obb.TopLeft, obb.TopRight, obb.BottomRight, obb.BottomLeft);
|
||||
|
||||
// Find the orientation of the OBB, using the baseline angle
|
||||
// Assumes word order is correct
|
||||
var firstWord = words[0];
|
||||
var lastWord = words[words.Count - 1];
|
||||
|
||||
var baseLineAngle = Distances.Angle(firstWord.BoundingBox.BottomLeft, lastWord.BoundingBox.BottomRight);
|
||||
|
||||
double deltaAngle = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle));
|
||||
double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle));
|
||||
if (deltaAngle1 < deltaAngle)
|
||||
{
|
||||
deltaAngle = deltaAngle1;
|
||||
obb = obb1;
|
||||
}
|
||||
|
||||
double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle));
|
||||
if (deltaAngle2 < deltaAngle)
|
||||
{
|
||||
deltaAngle = deltaAngle2;
|
||||
obb = obb2;
|
||||
}
|
||||
|
||||
double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle));
|
||||
if (deltaAngle3 < deltaAngle)
|
||||
{
|
||||
obb = obb3;
|
||||
}
|
||||
|
||||
return obb;
|
||||
}
|
||||
#endregion
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
|
Reference in New Issue
Block a user