PdfPig/src/UglyToad.PdfPig/Content/Word.cs

373 lines
12 KiB
C#
Raw Normal View History

namespace UglyToad.PdfPig.Content
{
using Core;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using UglyToad.PdfPig.Geometry;
/// <summary>
/// A word.
/// </summary>
public class Word
{
/// <summary>
/// The text of the word.
/// </summary>
public string Text { get; }
/// <summary>
/// The text orientation of the word.
/// </summary>
public TextOrientation TextOrientation { get; }
/// <summary>
/// The rectangle completely containing the word.
/// </summary>
public PdfRectangle BoundingBox { get; }
/// <summary>
/// The name of the font for the word.
/// </summary>
public string FontName { get; }
/// <summary>
/// The letters contained in the word.
/// </summary>
public IReadOnlyList<Letter> Letters { get; }
/// <summary>
/// Create a new <see cref="Word"/>.
/// </summary>
/// <param name="letters">The letters contained in the word.</param>
public Word(IReadOnlyList<Letter> letters)
{
if (letters == null)
{
throw new ArgumentNullException(nameof(letters));
}
if (letters.Count == 0)
{
throw new ArgumentException("Empty letters provided.", nameof(letters));
}
Letters = letters;
var tempTextOrientation = letters[0].TextOrientation;
if (tempTextOrientation != TextOrientation.Other)
{
foreach (var letter in letters)
{
if (letter.TextOrientation != tempTextOrientation)
{
tempTextOrientation = TextOrientation.Other;
break;
}
}
}
Tuple<string, PdfRectangle> data;
switch (tempTextOrientation)
{
case TextOrientation.Horizontal:
data = GetBoundingBoxH(letters);
break;
case TextOrientation.Rotate180:
data = GetBoundingBox180(letters);
break;
case TextOrientation.Rotate90:
data = GetBoundingBox90(letters);
break;
case TextOrientation.Rotate270:
data = GetBoundingBox270(letters);
break;
case TextOrientation.Other:
default:
data = GetBoundingBoxOther(letters);
break;
}
Text = data.Item1;
BoundingBox = data.Item2;
FontName = letters[0].FontName;
TextOrientation = tempTextOrientation;
}
#region Bounding box
private Tuple<string, PdfRectangle> GetBoundingBoxH(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
var minX = double.MaxValue;
var maxX = double.MinValue;
var minY = double.MaxValue;
var maxY = double.MinValue;
for (var i = 0; i < letters.Count; i++)
{
var letter = letters[i];
builder.Append(letter.Value);
if (letter.StartBaseLine.X < minX)
{
minX = letter.StartBaseLine.X;
}
if (letter.StartBaseLine.Y < minY)
{
minY = letter.StartBaseLine.Y;
}
var right = letter.StartBaseLine.X + Math.Max(letter.Width, letter.GlyphRectangle.Width);
if (right > maxX)
{
maxX = right;
}
if (letter.GlyphRectangle.Top > maxY)
{
maxY = letter.GlyphRectangle.Top;
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(minX, minY, maxX, maxY));
}
private Tuple<string, PdfRectangle> GetBoundingBox180(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
var minX = double.MaxValue;
var maxX = double.MinValue;
var maxY = double.MinValue;
var minY = double.MaxValue;
for (var i = 0; i < letters.Count; i++)
{
var letter = letters[i];
builder.Append(letter.Value);
if (letter.StartBaseLine.X > maxX)
{
maxX = letter.StartBaseLine.X;
}
if (letter.StartBaseLine.Y > maxY)
{
maxY = letter.StartBaseLine.Y;
}
var right = letter.StartBaseLine.X + Math.Min(letter.Width, letter.GlyphRectangle.Width);
if (right < minX)
{
minX = right;
}
if (letter.GlyphRectangle.Top < minY)
{
minY = letter.GlyphRectangle.Top;
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(maxX, maxY, minX, minY));
}
private Tuple<string, PdfRectangle> GetBoundingBox90(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
var minX = double.MaxValue;
var maxX = double.MinValue;
var minY = double.MaxValue;
var maxY = double.MinValue;
for (var i = 0; i < letters.Count; i++)
{
var letter = letters[i];
builder.Append(letter.Value);
if (letter.StartBaseLine.X < minX)
{
minX = letter.StartBaseLine.X;
}
if (letter.EndBaseLine.Y < minY)
{
minY = letter.EndBaseLine.Y;
}
var right = letter.StartBaseLine.X - letter.GlyphRectangle.Height;
if (right > maxX)
{
maxX = right;
}
if (letter.GlyphRectangle.Top > maxY)
{
maxY = letter.GlyphRectangle.Top;
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(new PdfPoint(maxX, maxY),
new PdfPoint(maxX, minY),
new PdfPoint(minX, maxY),
new PdfPoint(minX, minY)));
}
private Tuple<string, PdfRectangle> GetBoundingBox270(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
var minX = double.MaxValue;
var maxX = double.MinValue;
var minY = double.MaxValue;
var maxY = double.MinValue;
for (var i = 0; i < letters.Count; i++)
{
var letter = letters[i];
builder.Append(letter.Value);
if (letter.StartBaseLine.X > maxX)
{
maxX = letter.StartBaseLine.X;
}
if (letter.StartBaseLine.Y < minY)
{
minY = letter.StartBaseLine.Y;
}
2020-01-17 19:03:09 +08:00
var right = letter.StartBaseLine.X - letter.GlyphRectangle.Height;
if (right < minX)
{
minX = right;
}
if (letter.GlyphRectangle.Bottom > maxY)
{
maxY = letter.GlyphRectangle.Bottom;
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(new PdfPoint(minX, minY),
new PdfPoint(minX, maxY),
new PdfPoint(maxX, minY),
new PdfPoint(maxX, maxY)));
}
private Tuple<string, PdfRectangle> GetBoundingBoxOther(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
for (var i = 0; i < letters.Count; i++)
{
builder.Append(letters[i].Value);
}
2020-02-01 01:53:10 +08:00
var baseLinePoints = letters.SelectMany(r => new[]
{
r.StartBaseLine,
r.EndBaseLine,
}).ToList();
// Fitting a line through the base lines points
// to find the orientation (slope)
double x0 = baseLinePoints.Average(p => p.X);
double y0 = baseLinePoints.Average(p => p.Y);
double sumProduct = 0;
double sumDiffSquaredX = 0;
for (int i = 0; i < baseLinePoints.Count; i++)
{
2020-02-01 02:22:05 +08:00
var point = baseLinePoints[i];
var x_diff = point.X - x0;
var y_diff = point.Y - y0;
2020-02-01 01:53:10 +08:00
sumProduct += x_diff * y_diff;
sumDiffSquaredX += x_diff * x_diff;
}
var slope = sumProduct / sumDiffSquaredX;
// Rotate the points to build the axis-aligned bounding box (AABB)
var angleRad = Math.Atan(slope);
var cos = Math.Cos(angleRad);
var sin = Math.Sin(angleRad);
var inverseRotation = new TransformationMatrix(
2020-02-01 01:53:10 +08:00
cos, -sin, 0,
sin, cos, 0,
0, 0, 1);
2020-02-01 01:53:10 +08:00
var transformedPoints = letters.SelectMany(r => new[]
{
r.StartBaseLine,
r.EndBaseLine,
r.GlyphRectangle.TopLeft,
r.GlyphRectangle.TopRight
}).Distinct().Select(p => inverseRotation.Transform(p));
2020-02-01 01:53:10 +08:00
var aabb = new PdfRectangle(transformedPoints.Min(p => p.X),
transformedPoints.Min(p => p.Y),
transformedPoints.Max(p => p.X),
transformedPoints.Max(p => p.Y));
2020-02-01 01:53:10 +08:00
// Rotate back the AABB to obtain to oriented bounding box (OBB)
var rotateBack = new TransformationMatrix(
cos, sin, 0,
-sin, cos, 0,
0, 0, 1);
// Candidates bounding boxes
var obb = rotateBack.Transform(aabb);
2020-02-01 01:53:10 +08:00
var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight);
var obb2 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft);
var obb3 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft);
2020-02-01 01:53:10 +08:00
// Find the orientation of the OBB, using the baseline angle
2020-01-21 21:20:01 +08:00
var firstLetter = letters[0];
var lastLetter = letters[letters.Count - 1];
var baseLineAngle = Math.Atan2(
2020-01-21 21:20:01 +08:00
lastLetter.EndBaseLine.Y - firstLetter.StartBaseLine.Y,
2020-02-01 01:53:10 +08:00
lastLetter.EndBaseLine.X - firstLetter.StartBaseLine.X) * 180 / Math.PI;
double deltaAngle = Math.Abs(baseLineAngle - obb.Rotation);
2020-02-01 01:53:10 +08:00
double deltaAngle1 = Math.Abs(baseLineAngle - obb1.Rotation);
if (deltaAngle1 < deltaAngle)
{
deltaAngle = deltaAngle1;
obb = obb1;
}
2020-02-01 01:53:10 +08:00
double deltaAngle2 = Math.Abs(baseLineAngle - obb2.Rotation);
if (deltaAngle2 < deltaAngle)
{
deltaAngle = deltaAngle2;
obb = obb2;
}
2020-02-01 01:53:10 +08:00
double deltaAngle3 = Math.Abs(baseLineAngle - obb3.Rotation);
if (deltaAngle3 < deltaAngle)
{
obb = obb3;
}
return new Tuple<string, PdfRectangle>(builder.ToString(), obb);
}
#endregion
/// <inheritdoc />
public override string ToString()
{
return Text;
}
}
}