lazily evaluate page text and remove linq from word constructor

This commit is contained in:
Eliot Jones
2019-08-25 15:06:37 +01:00
parent 0cd7795bff
commit d089a34aa4
2 changed files with 40 additions and 9 deletions

View File

@@ -16,6 +16,8 @@
/// </summary>
public class Page
{
private readonly Lazy<string> textLazy;
/// <summary>
/// The raw PDF dictionary token for this page in the document.
/// </summary>
@@ -41,11 +43,11 @@
/// The set of <see cref="Letter"/>s drawn by the PDF content.
/// </summary>
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
/// <summary>
/// The full text of all characters on the page in the order they are presented in the PDF content.
/// </summary>
public string Text { get; }
public string Text => textLazy.Value;
/// <summary>
/// Gets the width of the page in points.
@@ -88,7 +90,7 @@
CropBox = cropBox;
Rotation = rotation;
Content = content;
Text = GetText(content);
textLazy = new Lazy<string>(() => GetText(Content));
Width = mediaBox.Bounds.Width;
Height = mediaBox.Bounds.Height;

View File

@@ -2,7 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Geometry;
/// <summary>
@@ -53,12 +53,41 @@
Letters = letters;
Text = string.Join(string.Empty, letters.Select(x => x.Value));
var builder = new StringBuilder();
var minX = letters.Min(x => x.Location.X);
var minY = letters.Min(x => x.Location.Y);
var maxX = letters.Max(x => x.Location.X + x.Width);
var maxY = letters.Max(x => x.GlyphRectangle.Top);
var minX = decimal.MaxValue;
var minY = decimal.MaxValue;
var maxX = decimal.MinValue;
var maxY = decimal.MinValue;
for (var i = 0; i < letters.Count; i++)
{
var letter = letters[i];
builder.Append(letter.Value);
if (letter.Location.X < minX)
{
minX = letter.Location.X;
}
if (letter.Location.Y < minY)
{
minY = letter.Location.Y;
}
var right = letter.Location.X + letter.Width;
if (right > maxX)
{
maxX = right;
}
if (letter.GlyphRectangle.Top > maxY)
{
maxY = letter.GlyphRectangle.Top;
}
}
Text = builder.ToString();
BoundingBox = new PdfRectangle(minX, minY, maxX, maxY);
FontName = letters[0].FontName;