mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-20 19:03:20 +08:00
@@ -42,7 +42,7 @@ New in v0.0.5 - To create documents use the class ```PdfDocumentBuilder```. Thou
|
|||||||
|
|
||||||
byte[] documentBytes = builder.Build();
|
byte[] documentBytes = builder.Build();
|
||||||
|
|
||||||
File.WriteAllBytes(@"C:\git\newPdf.pdf");
|
File.WriteAllBytes(@"C:\git\newPdf.pdf", documentBytes);
|
||||||
|
|
||||||
Each font must be registered with the PdfDocumentBuilder prior to use enable pages to share the font resources. Currently only Standard 14 fonts and TrueType fonts (.ttf) are supported.
|
Each font must be registered with the PdfDocumentBuilder prior to use enable pages to share the font resources. Currently only Standard 14 fonts and TrueType fonts (.ttf) are supported.
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@
|
|||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
using PdfPig.Graphics.Operations;
|
using PdfPig.Graphics.Operations;
|
||||||
|
using PdfPig.Graphics.Operations.InlineImages;
|
||||||
using PdfPig.Tokens;
|
using PdfPig.Tokens;
|
||||||
using Xunit;
|
using Xunit;
|
||||||
|
|
||||||
@@ -41,6 +42,10 @@
|
|||||||
|
|
||||||
operation = (IGraphicsStateOperation)field.GetValue(null);
|
operation = (IGraphicsStateOperation)field.GetValue(null);
|
||||||
}
|
}
|
||||||
|
else if (operationType == typeof(EndInlineImage))
|
||||||
|
{
|
||||||
|
operation = new EndInlineImage(new List<IToken>(), new List<byte>());
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
var constructor = constructors[0];
|
var constructor = constructors[0];
|
||||||
|
@@ -64,6 +64,12 @@
|
|||||||
|
|
||||||
Assert.True(stream.IsAtEnd());
|
Assert.True(stream.IsAtEnd());
|
||||||
Assert.True(array.IsAtEnd());
|
Assert.True(array.IsAtEnd());
|
||||||
|
|
||||||
|
stream.Seek(0);
|
||||||
|
array.Seek(0);
|
||||||
|
|
||||||
|
Assert.False(stream.IsAtEnd());
|
||||||
|
Assert.False(array.IsAtEnd());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -65,6 +65,8 @@
|
|||||||
"UglyToad.PdfPig.DocumentLayoutAnalysis.RecursiveXYCut",
|
"UglyToad.PdfPig.DocumentLayoutAnalysis.RecursiveXYCut",
|
||||||
"UglyToad.PdfPig.DocumentLayoutAnalysis.XYNode",
|
"UglyToad.PdfPig.DocumentLayoutAnalysis.XYNode",
|
||||||
"UglyToad.PdfPig.DocumentLayoutAnalysis.XYLeaf",
|
"UglyToad.PdfPig.DocumentLayoutAnalysis.XYLeaf",
|
||||||
|
"UglyToad.PdfPig.DocumentLayoutAnalysis.TextEdgesExtractor",
|
||||||
|
"UglyToad.PdfPig.DocumentLayoutAnalysis.EdgeType",
|
||||||
"UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException",
|
"UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException",
|
||||||
"UglyToad.PdfPig.Exceptions.PdfDocumentFormatException",
|
"UglyToad.PdfPig.Exceptions.PdfDocumentFormatException",
|
||||||
"UglyToad.PdfPig.Fonts.DescriptorFontFile",
|
"UglyToad.PdfPig.Fonts.DescriptorFontFile",
|
||||||
@@ -174,6 +176,7 @@
|
|||||||
"UglyToad.PdfPig.Tokens.HexToken",
|
"UglyToad.PdfPig.Tokens.HexToken",
|
||||||
"UglyToad.PdfPig.Tokens.IDataToken`1",
|
"UglyToad.PdfPig.Tokens.IDataToken`1",
|
||||||
"UglyToad.PdfPig.Tokens.IndirectReferenceToken",
|
"UglyToad.PdfPig.Tokens.IndirectReferenceToken",
|
||||||
|
"UglyToad.PdfPig.Tokens.InlineImageDataToken",
|
||||||
"UglyToad.PdfPig.Tokens.IToken",
|
"UglyToad.PdfPig.Tokens.IToken",
|
||||||
"UglyToad.PdfPig.Tokens.NameToken",
|
"UglyToad.PdfPig.Tokens.NameToken",
|
||||||
"UglyToad.PdfPig.Tokens.NullToken",
|
"UglyToad.PdfPig.Tokens.NullToken",
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
using Util;
|
using Util;
|
||||||
using Util.JetBrains.Annotations;
|
using Util.JetBrains.Annotations;
|
||||||
using XObjects;
|
using XObjects;
|
||||||
using UglyToad.PdfPig.Geometry;
|
using Geometry;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Contains the content and provides access to methods of a single page in the <see cref="PdfDocument"/>.
|
/// Contains the content and provides access to methods of a single page in the <see cref="PdfDocument"/>.
|
||||||
@@ -30,23 +30,18 @@
|
|||||||
|
|
||||||
internal CropBox CropBox { get; }
|
internal CropBox CropBox { get; }
|
||||||
|
|
||||||
|
internal PageContent Content { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The rotation of the page in degrees (clockwise). Valid values are 0, 90, 180 and 270.
|
/// The rotation of the page in degrees (clockwise). Valid values are 0, 90, 180 and 270.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public PageRotationDegrees Rotation { get; }
|
public PageRotationDegrees Rotation { get; }
|
||||||
|
|
||||||
internal PageContent Content { get; }
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The set of <see cref="Letter"/>s drawn by the PDF content.
|
/// The set of <see cref="Letter"/>s drawn by the PDF content.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
|
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// The set of <see cref="PdfPath"/>s drawn by the PDF content.
|
|
||||||
/// </summary>
|
|
||||||
public IReadOnlyList<PdfPath> Paths => Content?.Paths ?? new List<PdfPath>();
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The full text of all characters on the page in the order they are presented in the PDF content.
|
/// The full text of all characters on the page in the order they are presented in the PDF content.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -136,6 +131,11 @@
|
|||||||
private readonly Page page;
|
private readonly Page page;
|
||||||
private readonly AnnotationProvider annotationProvider;
|
private readonly AnnotationProvider annotationProvider;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The set of <see cref="PdfPath"/>s drawn by the PDF content.
|
||||||
|
/// </summary>
|
||||||
|
public IReadOnlyList<PdfPath> Paths => page.Content?.Paths ?? new List<PdfPath>();
|
||||||
|
|
||||||
internal Experimental(Page page, AnnotationProvider annotationProvider)
|
internal Experimental(Page page, AnnotationProvider annotationProvider)
|
||||||
{
|
{
|
||||||
this.page = page;
|
this.page = page;
|
||||||
|
@@ -11,7 +11,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
|||||||
/// https://en.wikipedia.org/wiki/Recursive_X-Y_cut
|
/// https://en.wikipedia.org/wiki/Recursive_X-Y_cut
|
||||||
/// <para>See 'Recursive X-Y Cut using Bounding Boxes of Connected Components' by Jaekyu Ha, Robert M.Haralick and Ihsin T. Phillips</para>
|
/// <para>See 'Recursive X-Y Cut using Bounding Boxes of Connected Components' by Jaekyu Ha, Robert M.Haralick and Ihsin T. Phillips</para>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class RecursiveXYCut
|
public static class RecursiveXYCut
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get the blocks.
|
/// Get the blocks.
|
||||||
|
109
src/UglyToad.PdfPig/DocumentLayoutAnalysis/TextEdgesExtractor.cs
Normal file
109
src/UglyToad.PdfPig/DocumentLayoutAnalysis/TextEdgesExtractor.cs
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using UglyToad.PdfPig.Content;
|
||||||
|
using UglyToad.PdfPig.Geometry;
|
||||||
|
|
||||||
|
namespace UglyToad.PdfPig.DocumentLayoutAnalysis
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Text edges extractor. Text edges are where words have either their BoundingBox's left, right or mid coordinates aligned on the same vertical line.
|
||||||
|
/// <para>Useful to detect text columns, tables, justified text, lists, etc.</para>
|
||||||
|
/// </summary>
|
||||||
|
public static class TextEdgesExtractor
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Functions used to define left, middle and right edges.
|
||||||
|
/// </summary>
|
||||||
|
private static readonly Tuple<EdgeType, Func<PdfRectangle, decimal>>[] edgesFuncs = new Tuple<EdgeType, Func<PdfRectangle, decimal>>[]
|
||||||
|
{
|
||||||
|
Tuple.Create<EdgeType, Func<PdfRectangle, decimal>>(EdgeType.Left, x => Math.Round(x.Left, 0)), // use BoundingBox's left coordinate
|
||||||
|
Tuple.Create<EdgeType, Func<PdfRectangle, decimal>>(EdgeType.Mid, x => Math.Round(x.Left + x.Width / 2, 0)), // use BoundingBox's mid coordinate
|
||||||
|
Tuple.Create<EdgeType, Func<PdfRectangle, decimal>>(EdgeType.Right, x => Math.Round(x.Right, 0)) // use BoundingBox's right coordinate
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Get the text edges.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="pageWords">The words in the page.</param>
|
||||||
|
/// <param name="minimumElements">The minimum number of elements to define a text edge.</param>
|
||||||
|
public static IReadOnlyDictionary<EdgeType, List<PdfLine>> GetEdges(IEnumerable<Word> pageWords, int minimumElements = 4)
|
||||||
|
{
|
||||||
|
if (minimumElements < 0)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("TextEdgesExtractor.GetEdges(): The minimum number of elements should be positive.", "minimumElements");
|
||||||
|
}
|
||||||
|
|
||||||
|
var cleanWords = pageWords.Where(x => !string.IsNullOrWhiteSpace(x.Text.Trim()));
|
||||||
|
|
||||||
|
ConcurrentDictionary<EdgeType, List<PdfLine>> dictionary = new ConcurrentDictionary<EdgeType, List<PdfLine>>();
|
||||||
|
|
||||||
|
Parallel.ForEach(edgesFuncs, f =>
|
||||||
|
{
|
||||||
|
dictionary.TryAdd(f.Item1, GetVerticalEdges(cleanWords, f.Item2, minimumElements));
|
||||||
|
});
|
||||||
|
return dictionary.ToDictionary(x => x.Key, x => x.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<PdfLine> GetVerticalEdges(IEnumerable<Word> pageWords, Func<PdfRectangle, decimal> func, int minimumElements)
|
||||||
|
{
|
||||||
|
Dictionary<decimal, List<Word>> edges = pageWords.GroupBy(x => func(x.BoundingBox))
|
||||||
|
.Where(x => x.Count() >= minimumElements).ToDictionary(gdc => gdc.Key, gdc => gdc.ToList());
|
||||||
|
Dictionary<decimal, List<List<Word>>> cleanEdges = new Dictionary<decimal, List<List<Word>>>();
|
||||||
|
|
||||||
|
foreach (var edge in edges)
|
||||||
|
{
|
||||||
|
var sortedEdges = edge.Value.OrderBy(x => x.BoundingBox.Bottom).ToList();
|
||||||
|
cleanEdges.Add(edge.Key, new List<List<Word>>());
|
||||||
|
|
||||||
|
var cuttings = pageWords.Except(edge.Value) // remove selected words
|
||||||
|
// words that cut the vertical line
|
||||||
|
.Where(x => x.BoundingBox.Left < edge.Key && x.BoundingBox.Right > edge.Key)
|
||||||
|
// and that are within the boundaries of the edge
|
||||||
|
.Where(k => k.BoundingBox.Bottom > edge.Value.Min(z => z.BoundingBox.Bottom)
|
||||||
|
&& k.BoundingBox.Top < edge.Value.Max(z => z.BoundingBox.Top))
|
||||||
|
.OrderBy(x => x.BoundingBox.Bottom).ToList();
|
||||||
|
|
||||||
|
if (cuttings.Count > 0)
|
||||||
|
{
|
||||||
|
foreach (var cut in cuttings)
|
||||||
|
{
|
||||||
|
var group1 = sortedEdges.Where(x => x.BoundingBox.Top < cut.BoundingBox.Bottom).ToList();
|
||||||
|
if (group1.Count >= minimumElements) cleanEdges[edge.Key].Add(group1);
|
||||||
|
sortedEdges = sortedEdges.Except(group1).ToList();
|
||||||
|
}
|
||||||
|
if (sortedEdges.Count >= minimumElements) cleanEdges[edge.Key].Add(sortedEdges);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cleanEdges[edge.Key].Add(sortedEdges);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cleanEdges.SelectMany(x => x.Value.Select(y => new PdfLine(x.Key, y.Min(w => w.BoundingBox.Bottom), x.Key, y.Max(w => w.BoundingBox.Top)))).ToList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The type of text edge.
|
||||||
|
/// </summary>
|
||||||
|
public enum EdgeType
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Text edges where words have their BoundingBox's left coordinate aligned on the same vertical line.
|
||||||
|
/// </summary>
|
||||||
|
Left = 0,
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Text edges where words have their BoundingBox's mid coordinate aligned on the same vertical line.
|
||||||
|
/// </summary>
|
||||||
|
Mid = 1,
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Text edges where words have their BoundingBox's right coordinate aligned on the same vertical line.
|
||||||
|
/// </summary>
|
||||||
|
Right = 2
|
||||||
|
}
|
||||||
|
}
|
@@ -48,6 +48,11 @@
|
|||||||
|
|
||||||
public decimal GetWidthFromFont(int characterIdentifier)
|
public decimal GetWidthFromFont(int characterIdentifier)
|
||||||
{
|
{
|
||||||
|
if (fontProgram == null)
|
||||||
|
{
|
||||||
|
return GetWidthFromDictionary(characterIdentifier);
|
||||||
|
}
|
||||||
|
|
||||||
if (fontProgram.TryGetBoundingAdvancedWidth(characterIdentifier, cidToGid.GetGlyphIndex, out var width))
|
if (fontProgram.TryGetBoundingAdvancedWidth(characterIdentifier, cidToGid.GetGlyphIndex, out var width))
|
||||||
{
|
{
|
||||||
return width;
|
return width;
|
||||||
|
@@ -5,19 +5,28 @@ namespace UglyToad.PdfPig.Geometry
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using UglyToad.PdfPig.Core;
|
using Core;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A path in a PDF document, used by glyphs and page content.
|
/// A path in a PDF document, used by glyphs and page content. Can contain multiple sub-paths.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class PdfPath
|
public class PdfPath
|
||||||
{
|
{
|
||||||
private readonly List<IPathCommand> commands = new List<IPathCommand>();
|
private readonly List<IPathCommand> commands = new List<IPathCommand>();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The sequence of sub-paths which form this <see cref="PdfPath"/>.
|
||||||
|
/// </summary>
|
||||||
public IReadOnlyList<IPathCommand> Commands => commands;
|
public IReadOnlyList<IPathCommand> Commands => commands;
|
||||||
|
|
||||||
private PdfPoint? currentPosition;
|
private PdfPoint? currentPosition;
|
||||||
private TransformationMatrix currentTransformationMatrix = TransformationMatrix.Identity;
|
|
||||||
|
|
||||||
|
private readonly TransformationMatrix currentTransformationMatrix;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a new <see cref="PdfPath"/>.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="transformationMatrix">The transformation to apply to all points in this path.</param>
|
||||||
public PdfPath(TransformationMatrix transformationMatrix)
|
public PdfPath(TransformationMatrix transformationMatrix)
|
||||||
{
|
{
|
||||||
currentTransformationMatrix = transformationMatrix;
|
currentTransformationMatrix = transformationMatrix;
|
||||||
@@ -162,79 +171,140 @@ namespace UglyToad.PdfPig.Geometry
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// A command in a <see cref="PdfPath"/>.
|
||||||
|
/// </summary>
|
||||||
public interface IPathCommand
|
public interface IPathCommand
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the smallest rectangle which contains the path region given by this command.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns></returns>
|
||||||
PdfRectangle? GetBoundingRectangle();
|
PdfRectangle? GetBoundingRectangle();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Converts from the path command to an SVG string representing the path operation.
|
||||||
|
/// </summary>
|
||||||
void WriteSvg(StringBuilder builder);
|
void WriteSvg(StringBuilder builder);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class Close : IPathCommand
|
/// <summary>
|
||||||
|
/// Close the current <see cref="PdfPath"/>.
|
||||||
|
/// </summary>
|
||||||
|
public class Close : IPathCommand
|
||||||
{
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
public PdfRectangle? GetBoundingRectangle()
|
public PdfRectangle? GetBoundingRectangle()
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public void WriteSvg(StringBuilder builder)
|
public void WriteSvg(StringBuilder builder)
|
||||||
{
|
{
|
||||||
builder.Append("Z ");
|
builder.Append("Z ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Move drawing of the current <see cref="PdfPath"/> to the specified location.
|
||||||
|
/// </summary>
|
||||||
public class Move : IPathCommand
|
public class Move : IPathCommand
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The location to move to.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint Location { get; }
|
public PdfPoint Location { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a new <see cref="Move"/> path command.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="location"></param>
|
||||||
public Move(PdfPoint location)
|
public Move(PdfPoint location)
|
||||||
{
|
{
|
||||||
Location = location;
|
Location = location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns <see langword="null"/> since this generates no visible path.
|
||||||
|
/// </summary>
|
||||||
public PdfRectangle? GetBoundingRectangle()
|
public PdfRectangle? GetBoundingRectangle()
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public void WriteSvg(StringBuilder builder)
|
public void WriteSvg(StringBuilder builder)
|
||||||
{
|
{
|
||||||
builder.Append("M ").Append(Location.X).Append(' ').Append(Location.Y).Append(' ');
|
builder.Append("M ").Append(Location.X).Append(' ').Append(Location.Y).Append(' ');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Draw a straight line between two points.
|
||||||
|
/// </summary>
|
||||||
public class Line : IPathCommand
|
public class Line : IPathCommand
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The start of the line.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint From { get; }
|
public PdfPoint From { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The end of the line.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint To { get; }
|
public PdfPoint To { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a new <see cref="Line"/>.
|
||||||
|
/// </summary>
|
||||||
public Line(PdfPoint from, PdfPoint to)
|
public Line(PdfPoint from, PdfPoint to)
|
||||||
{
|
{
|
||||||
From = from;
|
From = from;
|
||||||
To = to;
|
To = to;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public PdfRectangle? GetBoundingRectangle()
|
public PdfRectangle? GetBoundingRectangle()
|
||||||
{
|
{
|
||||||
return new PdfRectangle(From, To);
|
return new PdfRectangle(From, To);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public void WriteSvg(StringBuilder builder)
|
public void WriteSvg(StringBuilder builder)
|
||||||
{
|
{
|
||||||
builder.AppendFormat("L {0} {1} ", To.X, To.Y);
|
builder.AppendFormat("L {0} {1} ", To.X, To.Y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Draw a Bezier curve given by the start, control and end points.
|
||||||
|
/// </summary>
|
||||||
public class BezierCurve : IPathCommand
|
public class BezierCurve : IPathCommand
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The start point of the Bezier curve.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint StartPoint { get; }
|
public PdfPoint StartPoint { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The first control point of the curve.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint FirstControlPoint { get; }
|
public PdfPoint FirstControlPoint { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The second control point of the curve.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint SecondControlPoint { get; }
|
public PdfPoint SecondControlPoint { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The end point of the curve.
|
||||||
|
/// </summary>
|
||||||
public PdfPoint EndPoint { get; }
|
public PdfPoint EndPoint { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a Bezier curve at the provided points.
|
||||||
|
/// </summary>
|
||||||
public BezierCurve(PdfPoint startPoint, PdfPoint firstControlPoint, PdfPoint secondControlPoint, PdfPoint endPoint)
|
public BezierCurve(PdfPoint startPoint, PdfPoint firstControlPoint, PdfPoint secondControlPoint, PdfPoint endPoint)
|
||||||
{
|
{
|
||||||
StartPoint = startPoint;
|
StartPoint = startPoint;
|
||||||
@@ -243,6 +313,7 @@ namespace UglyToad.PdfPig.Geometry
|
|||||||
EndPoint = endPoint;
|
EndPoint = endPoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public PdfRectangle? GetBoundingRectangle()
|
public PdfRectangle? GetBoundingRectangle()
|
||||||
{
|
{
|
||||||
// Optimised
|
// Optimised
|
||||||
@@ -287,6 +358,13 @@ namespace UglyToad.PdfPig.Geometry
|
|||||||
return new PdfRectangle((decimal)minX, (decimal)minY, (decimal)maxX, (decimal)maxY);
|
return new PdfRectangle((decimal)minX, (decimal)minY, (decimal)maxX, (decimal)maxY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public void WriteSvg(StringBuilder builder)
|
||||||
|
{
|
||||||
|
builder.AppendFormat("C {0} {1}, {2} {3}, {4} {5} ", FirstControlPoint.X, FirstControlPoint.Y, SecondControlPoint.X, SecondControlPoint.Y,
|
||||||
|
EndPoint.X, EndPoint.Y);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private bool TrySolveQuadratic(bool isX, double currentMin, double currentMax, out (double min, double max) solutions)
|
private bool TrySolveQuadratic(bool isX, double currentMin, double currentMax, out (double min, double max) solutions)
|
||||||
{
|
{
|
||||||
@@ -378,12 +456,6 @@ namespace UglyToad.PdfPig.Geometry
|
|||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void WriteSvg(StringBuilder builder)
|
|
||||||
{
|
|
||||||
builder.AppendFormat("C {0} {1}, {2} {3}, {4} {5} ", FirstControlPoint.X, FirstControlPoint.Y, SecondControlPoint.X, SecondControlPoint.Y,
|
|
||||||
EndPoint.X, EndPoint.Y);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal void Rectangle(decimal x, decimal y, decimal width, decimal height)
|
internal void Rectangle(decimal x, decimal y, decimal width, decimal height)
|
||||||
|
@@ -82,6 +82,27 @@
|
|||||||
return new PdfVector(X, Y);
|
return new PdfVector(X, Y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a value indicating whether this <see cref="PdfPoint"/> is equal to a specified <see cref="PdfPoint"/> .
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="obj"></param>
|
||||||
|
public override bool Equals(object obj)
|
||||||
|
{
|
||||||
|
if (obj is PdfPoint point)
|
||||||
|
{
|
||||||
|
return point.X == this.X && point.Y == this.Y;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the hash code for this <see cref="PdfPoint"/>.
|
||||||
|
/// </summary>
|
||||||
|
public override int GetHashCode()
|
||||||
|
{
|
||||||
|
return (X, Y).GetHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get a string representation of this point.
|
/// Get a string representation of this point.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@@ -30,6 +30,11 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public PdfPoint BottomLeft { get; }
|
public PdfPoint BottomLeft { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Centroid point of the rectangle.
|
||||||
|
/// </summary>
|
||||||
|
public PdfPoint Centroid { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Width of the rectangle.
|
/// Width of the rectangle.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -105,15 +110,14 @@
|
|||||||
|
|
||||||
BottomLeft = new PdfPoint(left, bottom);
|
BottomLeft = new PdfPoint(left, bottom);
|
||||||
BottomRight = new PdfPoint(right, bottom);
|
BottomRight = new PdfPoint(right, bottom);
|
||||||
|
|
||||||
|
Centroid = new PdfPoint(left + (right - left) / 2, bottom + (top - bottom) / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
internal PdfRectangle(PdfVector topLeft, PdfVector topRight, PdfVector bottomLeft, PdfVector bottomRight)
|
internal PdfRectangle(PdfVector topLeft, PdfVector topRight, PdfVector bottomLeft, PdfVector bottomRight)
|
||||||
|
: this(topLeft.ToPoint(), topRight.ToPoint(), bottomLeft.ToPoint(), bottomRight.ToPoint())
|
||||||
{
|
{
|
||||||
TopLeft = topLeft.ToPoint();
|
|
||||||
TopRight = topRight.ToPoint();
|
|
||||||
|
|
||||||
BottomLeft = bottomLeft.ToPoint();
|
|
||||||
BottomRight = bottomRight.ToPoint();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal PdfRectangle(PdfPoint topLeft, PdfPoint topRight, PdfPoint bottomLeft, PdfPoint bottomRight)
|
internal PdfRectangle(PdfPoint topLeft, PdfPoint topRight, PdfPoint bottomLeft, PdfPoint bottomRight)
|
||||||
@@ -123,6 +127,8 @@
|
|||||||
|
|
||||||
BottomLeft = bottomLeft;
|
BottomLeft = bottomLeft;
|
||||||
BottomRight = bottomRight;
|
BottomRight = bottomRight;
|
||||||
|
|
||||||
|
Centroid = new PdfPoint(topLeft.X + (topRight.X - topLeft.X) / 2, bottomLeft.Y + (topLeft.Y - bottomLeft.Y) / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@@ -1,6 +1,9 @@
|
|||||||
namespace UglyToad.PdfPig.Graphics.Operations.InlineImages
|
namespace UglyToad.PdfPig.Graphics.Operations.InlineImages
|
||||||
{
|
{
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using Tokens;
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -14,15 +17,27 @@
|
|||||||
public const string Symbol = "EI";
|
public const string Symbol = "EI";
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The instance of the <see cref="EndInlineImage"/> operation.
|
/// The tokens declared in order for this inline image object.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly EndInlineImage Value = new EndInlineImage();
|
public IReadOnlyList<IToken> ImageTokens { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The raw data for the inline image which should be interpreted according to the <see cref="ImageTokens"/>.
|
||||||
|
/// </summary>
|
||||||
|
public IReadOnlyList<byte> ImageData { get; }
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public string Operator => Symbol;
|
public string Operator => Symbol;
|
||||||
|
|
||||||
private EndInlineImage()
|
/// <summary>
|
||||||
|
/// Create a new <see cref="EndInlineImage"/> operation.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="imageTokens">The tokens which were set during the declaration of this image.</param>
|
||||||
|
/// <param name="imageData">The raw byte data of this image.</param>
|
||||||
|
public EndInlineImage(IReadOnlyList<IToken> imageTokens, IReadOnlyList<byte> imageData)
|
||||||
{
|
{
|
||||||
|
ImageTokens = imageTokens ?? throw new ArgumentNullException(nameof(imageTokens));
|
||||||
|
ImageData = imageData ?? throw new ArgumentNullException(nameof(imageData));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
|
@@ -75,6 +75,8 @@
|
|||||||
|
|
||||||
public void Seek(long position)
|
public void Seek(long position)
|
||||||
{
|
{
|
||||||
|
isAtEnd = false;
|
||||||
|
|
||||||
if (position == 0)
|
if (position == 0)
|
||||||
{
|
{
|
||||||
stream.Seek(0, SeekOrigin.Begin);
|
stream.Seek(0, SeekOrigin.Begin);
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using Graphics;
|
using Graphics;
|
||||||
using Graphics.Operations;
|
using Graphics.Operations;
|
||||||
|
using Graphics.Operations.InlineImages;
|
||||||
using IO;
|
using IO;
|
||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
@@ -27,7 +28,13 @@
|
|||||||
{
|
{
|
||||||
var token = scanner.CurrentToken;
|
var token = scanner.CurrentToken;
|
||||||
|
|
||||||
if (token is OperatorToken op)
|
if (token is InlineImageDataToken inlineImageData)
|
||||||
|
{
|
||||||
|
graphicsStateOperations.Add(BeginInlineImageData.Value);
|
||||||
|
graphicsStateOperations.Add(new EndInlineImage(precedingTokens, inlineImageData.Data));
|
||||||
|
precedingTokens.Clear();
|
||||||
|
}
|
||||||
|
else if (token is OperatorToken op)
|
||||||
{
|
{
|
||||||
var operation = operationFactory.Create(op, precedingTokens);
|
var operation = operationFactory.Create(op, precedingTokens);
|
||||||
|
|
||||||
|
@@ -20,11 +20,12 @@
|
|||||||
|
|
||||||
private readonly ScannerScope scope;
|
private readonly ScannerScope scope;
|
||||||
private readonly IInputBytes inputBytes;
|
private readonly IInputBytes inputBytes;
|
||||||
private readonly List<byte> currentBuffer = new List<byte>();
|
|
||||||
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
||||||
|
|
||||||
internal long CurrentTokenStart { get; private set; }
|
internal long CurrentTokenStart { get; private set; }
|
||||||
|
|
||||||
public IToken CurrentToken { get; private set; }
|
public IToken CurrentToken { get; private set; }
|
||||||
|
|
||||||
public bool TryReadToken<T>(out T token) where T : class, IToken
|
public bool TryReadToken<T>(out T token) where T : class, IToken
|
||||||
{
|
{
|
||||||
token = default(T);
|
token = default(T);
|
||||||
@@ -51,6 +52,7 @@
|
|||||||
public long CurrentPosition => inputBytes.CurrentOffset;
|
public long CurrentPosition => inputBytes.CurrentOffset;
|
||||||
|
|
||||||
private bool hasBytePreRead;
|
private bool hasBytePreRead;
|
||||||
|
private bool isInInlineImage;
|
||||||
|
|
||||||
internal CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
|
internal CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
|
||||||
{
|
{
|
||||||
@@ -60,8 +62,6 @@
|
|||||||
|
|
||||||
public bool MoveNext()
|
public bool MoveNext()
|
||||||
{
|
{
|
||||||
currentBuffer.Clear();
|
|
||||||
|
|
||||||
var endAngleBracesRead = 0;
|
var endAngleBracesRead = 0;
|
||||||
|
|
||||||
bool isSkippingSymbol = false;
|
bool isSkippingSymbol = false;
|
||||||
@@ -89,7 +89,6 @@
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// If we failed to read the symbol for whatever reason we pass over it.
|
// If we failed to read the symbol for whatever reason we pass over it.
|
||||||
if (isSkippingSymbol && c != '>')
|
if (isSkippingSymbol && c != '>')
|
||||||
{
|
{
|
||||||
@@ -161,6 +160,23 @@
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (token is OperatorToken op)
|
||||||
|
{
|
||||||
|
if (op.Data == "BI")
|
||||||
|
{
|
||||||
|
isInInlineImage = true;
|
||||||
|
}
|
||||||
|
else if (isInInlineImage && op.Data == "ID")
|
||||||
|
{
|
||||||
|
// Special case handling for inline images.
|
||||||
|
var imageData = ReadInlineImageData();
|
||||||
|
isInInlineImage = false;
|
||||||
|
CurrentToken = new InlineImageDataToken(imageData);
|
||||||
|
hasBytePreRead = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CurrentToken = token;
|
CurrentToken = token;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -190,6 +206,35 @@
|
|||||||
customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer));
|
customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private IReadOnlyList<byte> ReadInlineImageData()
|
||||||
|
{
|
||||||
|
// The ID operator should be followed by a single white-space character, and the next character is interpreted
|
||||||
|
// as the first byte of image data.
|
||||||
|
if (inputBytes.CurrentByte != ' ')
|
||||||
|
{
|
||||||
|
throw new PdfDocumentFormatException($"No whitespace character following the image data (ID) operator. Position: {inputBytes.CurrentOffset}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
var startsAt = inputBytes.CurrentOffset - 2;
|
||||||
|
|
||||||
|
var imageData = new List<byte>();
|
||||||
|
byte prevByte = 0;
|
||||||
|
while (inputBytes.MoveNext())
|
||||||
|
{
|
||||||
|
if (inputBytes.CurrentByte == 'I' && prevByte == 'E')
|
||||||
|
{
|
||||||
|
imageData.RemoveAt(imageData.Count - 1);
|
||||||
|
return imageData;
|
||||||
|
}
|
||||||
|
|
||||||
|
imageData.Add(inputBytes.CurrentByte);
|
||||||
|
|
||||||
|
prevByte = inputBytes.CurrentByte;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new PdfDocumentFormatException($"No end of inline image data (EI) was found for image data at position {startsAt}.");
|
||||||
|
}
|
||||||
|
|
||||||
private static bool IsEmpty(byte b)
|
private static bool IsEmpty(byte b)
|
||||||
{
|
{
|
||||||
return b == ' ' || b == '\r' || b == '\n' || b == 0;
|
return b == ' ' || b == '\r' || b == '\n' || b == 0;
|
||||||
|
22
src/UglyToad.PdfPig/Tokens/InlineImageDataToken.cs
Normal file
22
src/UglyToad.PdfPig/Tokens/InlineImageDataToken.cs
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tokens
|
||||||
|
{
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Inline image data is used to embed images in PDF content streams. The content is wrapped by ID and ED tags in a BI operation.
|
||||||
|
/// </summary>
|
||||||
|
public class InlineImageDataToken : IDataToken<IReadOnlyList<byte>>
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public IReadOnlyList<byte> Data { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a new <see cref="InlineImageDataToken"/>.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="data"></param>
|
||||||
|
public InlineImageDataToken(IReadOnlyList<byte> data)
|
||||||
|
{
|
||||||
|
Data = data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -6,15 +6,15 @@
|
|||||||
<DebugType>full</DebugType>
|
<DebugType>full</DebugType>
|
||||||
<Authors>UglyToad</Authors>
|
<Authors>UglyToad</Authors>
|
||||||
<Title>PdfPig</Title>
|
<Title>PdfPig</Title>
|
||||||
<Description>Reads text content from PDF documents and supports document creation.</Description>
|
<Description>Reads text content from PDF documents and supports document creation. Apache 2.0 licensed.</Description>
|
||||||
<PackageLicenseUrl>https://raw.githubusercontent.com/UglyToad/PdfPig/master/LICENSE</PackageLicenseUrl>
|
<PackageLicenseUrl>https://raw.githubusercontent.com/UglyToad/PdfPig/master/LICENSE</PackageLicenseUrl>
|
||||||
<PackageProjectUrl>https://github.com/UglyToad/PdfPig</PackageProjectUrl>
|
<PackageProjectUrl>https://github.com/UglyToad/PdfPig</PackageProjectUrl>
|
||||||
<PackageTags>PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract</PackageTags>
|
<PackageTags>PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract</PackageTags>
|
||||||
<RepositoryUrl>https://github.com/UglyToad/PdfPig</RepositoryUrl>
|
<RepositoryUrl>https://github.com/UglyToad/PdfPig</RepositoryUrl>
|
||||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||||
<Version>0.0.6</Version>
|
<Version>0.0.7</Version>
|
||||||
<AssemblyVersion>0.0.6.0</AssemblyVersion>
|
<AssemblyVersion>0.0.7.0</AssemblyVersion>
|
||||||
<FileVersion>0.0.6.0</FileVersion>
|
<FileVersion>0.0.7.0</FileVersion>
|
||||||
<PackageIconUrl>https://raw.githubusercontent.com/UglyToad/PdfPig/master/documentation/pdfpig.png</PackageIconUrl>
|
<PackageIconUrl>https://raw.githubusercontent.com/UglyToad/PdfPig/master/documentation/pdfpig.png</PackageIconUrl>
|
||||||
<Product>PdfPig</Product>
|
<Product>PdfPig</Product>
|
||||||
<PublishRepositoryUrl>true</PublishRepositoryUrl>
|
<PublishRepositoryUrl>true</PublishRepositoryUrl>
|
||||||
|
Reference in New Issue
Block a user