diff --git a/src/UglyToad.PdfPig.Tests/Graphics/Operations/GraphicsStateOperationTests.cs b/src/UglyToad.PdfPig.Tests/Graphics/Operations/GraphicsStateOperationTests.cs index ca310063..1aacdee0 100644 --- a/src/UglyToad.PdfPig.Tests/Graphics/Operations/GraphicsStateOperationTests.cs +++ b/src/UglyToad.PdfPig.Tests/Graphics/Operations/GraphicsStateOperationTests.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Reflection; using PdfPig.Graphics.Operations; + using PdfPig.Graphics.Operations.InlineImages; using PdfPig.Tokens; using Xunit; @@ -41,6 +42,10 @@ operation = (IGraphicsStateOperation)field.GetValue(null); } + else if (operationType == typeof(EndInlineImage)) + { + operation = new EndInlineImage(new List(), new List()); + } else { var constructor = constructors[0]; diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 1c3c5e96..2b26f516 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -174,6 +174,7 @@ "UglyToad.PdfPig.Tokens.HexToken", "UglyToad.PdfPig.Tokens.IDataToken`1", "UglyToad.PdfPig.Tokens.IndirectReferenceToken", + "UglyToad.PdfPig.Tokens.InlineImageDataToken", "UglyToad.PdfPig.Tokens.IToken", "UglyToad.PdfPig.Tokens.NameToken", "UglyToad.PdfPig.Tokens.NullToken", diff --git a/src/UglyToad.PdfPig/Content/Page.cs b/src/UglyToad.PdfPig/Content/Page.cs index f28c1834..6ce7b9ff 100644 --- a/src/UglyToad.PdfPig/Content/Page.cs +++ b/src/UglyToad.PdfPig/Content/Page.cs @@ -9,7 +9,7 @@ using Util; using Util.JetBrains.Annotations; using XObjects; - using UglyToad.PdfPig.Geometry; + using Geometry; /// /// Contains the content and provides access to methods of a single page in the . @@ -30,23 +30,18 @@ internal CropBox CropBox { get; } + internal PageContent Content { get; } + /// /// The rotation of the page in degrees (clockwise). Valid values are 0, 90, 180 and 270. /// public PageRotationDegrees Rotation { get; } - internal PageContent Content { get; } - /// /// The set of s drawn by the PDF content. /// public IReadOnlyList Letters => Content?.Letters ?? new Letter[0]; - - /// - /// The set of s drawn by the PDF content. - /// - public IReadOnlyList Paths => Content?.Paths ?? new List(); - + /// /// The full text of all characters on the page in the order they are presented in the PDF content. /// @@ -136,6 +131,11 @@ private readonly Page page; private readonly AnnotationProvider annotationProvider; + /// + /// The set of s drawn by the PDF content. + /// + public IReadOnlyList Paths => page.Content?.Paths ?? new List(); + internal Experimental(Page page, AnnotationProvider annotationProvider) { this.page = page; diff --git a/src/UglyToad.PdfPig/Geometry/PdfPath.cs b/src/UglyToad.PdfPig/Geometry/PdfPath.cs index 5d261692..7b4061ed 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfPath.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfPath.cs @@ -5,19 +5,28 @@ namespace UglyToad.PdfPig.Geometry using System.Collections.Generic; using System.Linq; using System.Text; - using UglyToad.PdfPig.Core; + using Core; /// - /// A path in a PDF document, used by glyphs and page content. + /// A path in a PDF document, used by glyphs and page content. Can contain multiple sub-paths. /// public class PdfPath { private readonly List commands = new List(); + + /// + /// The sequence of sub-paths which form this . + /// public IReadOnlyList Commands => commands; private PdfPoint? currentPosition; - private TransformationMatrix currentTransformationMatrix = TransformationMatrix.Identity; + private readonly TransformationMatrix currentTransformationMatrix; + + /// + /// Create a new . + /// + /// The transformation to apply to all points in this path. public PdfPath(TransformationMatrix transformationMatrix) { currentTransformationMatrix = transformationMatrix; @@ -162,79 +171,140 @@ namespace UglyToad.PdfPig.Geometry return result; } + /// + /// A command in a . + /// public interface IPathCommand { + /// + /// Returns the smallest rectangle which contains the path region given by this command. + /// + /// PdfRectangle? GetBoundingRectangle(); + /// + /// Converts from the path command to an SVG string representing the path operation. + /// void WriteSvg(StringBuilder builder); } - private class Close : IPathCommand + /// + /// Close the current . + /// + public class Close : IPathCommand { + /// public PdfRectangle? GetBoundingRectangle() { return null; } + /// public void WriteSvg(StringBuilder builder) { builder.Append("Z "); } } + /// + /// Move drawing of the current to the specified location. + /// public class Move : IPathCommand { + /// + /// The location to move to. + /// public PdfPoint Location { get; } + /// + /// Create a new path command. + /// + /// public Move(PdfPoint location) { Location = location; } + /// + /// Returns since this generates no visible path. + /// public PdfRectangle? GetBoundingRectangle() { return null; } + /// public void WriteSvg(StringBuilder builder) { builder.Append("M ").Append(Location.X).Append(' ').Append(Location.Y).Append(' '); } } + /// + /// Draw a straight line between two points. + /// public class Line : IPathCommand { + /// + /// The start of the line. + /// public PdfPoint From { get; } + /// + /// The end of the line. + /// public PdfPoint To { get; } + /// + /// Create a new . + /// public Line(PdfPoint from, PdfPoint to) { From = from; To = to; } + /// public PdfRectangle? GetBoundingRectangle() { return new PdfRectangle(From, To); } + /// public void WriteSvg(StringBuilder builder) { builder.AppendFormat("L {0} {1} ", To.X, To.Y); } } + /// + /// Draw a Bezier curve given by the start, control and end points. + /// public class BezierCurve : IPathCommand { + /// + /// The start point of the Bezier curve. + /// public PdfPoint StartPoint { get; } + /// + /// The first control point of the curve. + /// public PdfPoint FirstControlPoint { get; } + /// + /// The second control point of the curve. + /// public PdfPoint SecondControlPoint { get; } + /// + /// The end point of the curve. + /// public PdfPoint EndPoint { get; } + /// + /// Create a Bezier curve at the provided points. + /// public BezierCurve(PdfPoint startPoint, PdfPoint firstControlPoint, PdfPoint secondControlPoint, PdfPoint endPoint) { StartPoint = startPoint; @@ -243,6 +313,7 @@ namespace UglyToad.PdfPig.Geometry EndPoint = endPoint; } + /// public PdfRectangle? GetBoundingRectangle() { // Optimised @@ -287,6 +358,13 @@ namespace UglyToad.PdfPig.Geometry return new PdfRectangle((decimal)minX, (decimal)minY, (decimal)maxX, (decimal)maxY); } + /// + public void WriteSvg(StringBuilder builder) + { + builder.AppendFormat("C {0} {1}, {2} {3}, {4} {5} ", FirstControlPoint.X, FirstControlPoint.Y, SecondControlPoint.X, SecondControlPoint.Y, + EndPoint.X, EndPoint.Y); + } + private bool TrySolveQuadratic(bool isX, double currentMin, double currentMax, out (double min, double max) solutions) { @@ -378,12 +456,6 @@ namespace UglyToad.PdfPig.Geometry return p; } - - public void WriteSvg(StringBuilder builder) - { - builder.AppendFormat("C {0} {1}, {2} {3}, {4} {5} ", FirstControlPoint.X, FirstControlPoint.Y, SecondControlPoint.X, SecondControlPoint.Y, - EndPoint.X, EndPoint.Y); - } } internal void Rectangle(decimal x, decimal y, decimal width, decimal height) diff --git a/src/UglyToad.PdfPig/Graphics/Operations/InlineImages/EndInlineImage.cs b/src/UglyToad.PdfPig/Graphics/Operations/InlineImages/EndInlineImage.cs index 87eacfec..f6b7ceb3 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/InlineImages/EndInlineImage.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/InlineImages/EndInlineImage.cs @@ -1,6 +1,9 @@ namespace UglyToad.PdfPig.Graphics.Operations.InlineImages { + using System; + using System.Collections.Generic; using System.IO; + using Tokens; /// /// @@ -14,15 +17,27 @@ public const string Symbol = "EI"; /// - /// The instance of the operation. + /// The tokens declared in order for this inline image object. /// - public static readonly EndInlineImage Value = new EndInlineImage(); + public IReadOnlyList ImageTokens { get; } + /// + /// The raw data for the inline image which should be interpreted according to the . + /// + public IReadOnlyList ImageData { get; } + /// public string Operator => Symbol; - private EndInlineImage() + /// + /// Create a new operation. + /// + /// The tokens which were set during the declaration of this image. + /// The raw byte data of this image. + public EndInlineImage(IReadOnlyList imageTokens, IReadOnlyList imageData) { + ImageTokens = imageTokens ?? throw new ArgumentNullException(nameof(imageTokens)); + ImageData = imageData ?? throw new ArgumentNullException(nameof(imageData)); } /// diff --git a/src/UglyToad.PdfPig/Parser/PageContentParser.cs b/src/UglyToad.PdfPig/Parser/PageContentParser.cs index 4c3712c8..a777a0be 100644 --- a/src/UglyToad.PdfPig/Parser/PageContentParser.cs +++ b/src/UglyToad.PdfPig/Parser/PageContentParser.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using Graphics; using Graphics.Operations; + using Graphics.Operations.InlineImages; using IO; using Tokenization.Scanner; using Tokens; @@ -27,7 +28,13 @@ { var token = scanner.CurrentToken; - if (token is OperatorToken op) + if (token is InlineImageDataToken inlineImageData) + { + graphicsStateOperations.Add(BeginInlineImageData.Value); + graphicsStateOperations.Add(new EndInlineImage(precedingTokens, inlineImageData.Data)); + precedingTokens.Clear(); + } + else if (token is OperatorToken op) { var operation = operationFactory.Create(op, precedingTokens); diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/CoreTokenScanner.cs index 07563135..2d792592 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/CoreTokenScanner.cs @@ -20,11 +20,12 @@ private readonly ScannerScope scope; private readonly IInputBytes inputBytes; - private readonly List currentBuffer = new List(); private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>(); internal long CurrentTokenStart { get; private set; } + public IToken CurrentToken { get; private set; } + public bool TryReadToken(out T token) where T : class, IToken { token = default(T); @@ -51,6 +52,7 @@ public long CurrentPosition => inputBytes.CurrentOffset; private bool hasBytePreRead; + private bool isInInlineImage; internal CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None) { @@ -60,8 +62,6 @@ public bool MoveNext() { - currentBuffer.Clear(); - var endAngleBracesRead = 0; bool isSkippingSymbol = false; @@ -89,7 +89,6 @@ continue; } - // If we failed to read the symbol for whatever reason we pass over it. if (isSkippingSymbol && c != '>') { @@ -161,6 +160,23 @@ continue; } + if (token is OperatorToken op) + { + if (op.Data == "BI") + { + isInInlineImage = true; + } + else if (isInInlineImage && op.Data == "ID") + { + // Special case handling for inline images. + var imageData = ReadInlineImageData(); + isInInlineImage = false; + CurrentToken = new InlineImageDataToken(imageData); + hasBytePreRead = false; + return true; + } + } + CurrentToken = token; /* @@ -190,6 +206,35 @@ customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer)); } + private IReadOnlyList ReadInlineImageData() + { + // The ID operator should be followed by a single white-space character, and the next character is interpreted + // as the first byte of image data. + if (inputBytes.CurrentByte != ' ') + { + throw new PdfDocumentFormatException($"No whitespace character following the image data (ID) operator. Position: {inputBytes.CurrentOffset}."); + } + + var startsAt = inputBytes.CurrentOffset - 2; + + var imageData = new List(); + byte prevByte = 0; + while (inputBytes.MoveNext()) + { + if (inputBytes.CurrentByte == 'I' && prevByte == 'E') + { + imageData.RemoveAt(imageData.Count - 1); + return imageData; + } + + imageData.Add(inputBytes.CurrentByte); + + prevByte = inputBytes.CurrentByte; + } + + throw new PdfDocumentFormatException($"No end of inline image data (EI) was found for image data at position {startsAt}."); + } + private static bool IsEmpty(byte b) { return b == ' ' || b == '\r' || b == '\n' || b == 0; diff --git a/src/UglyToad.PdfPig/Tokens/InlineImageDataToken.cs b/src/UglyToad.PdfPig/Tokens/InlineImageDataToken.cs new file mode 100644 index 00000000..19b3083e --- /dev/null +++ b/src/UglyToad.PdfPig/Tokens/InlineImageDataToken.cs @@ -0,0 +1,22 @@ +namespace UglyToad.PdfPig.Tokens +{ + using System.Collections.Generic; + + /// + /// Inline image data is used to embed images in PDF content streams. The content is wrapped by ID and ED tags in a BI operation. + /// + public class InlineImageDataToken : IDataToken> + { + /// + public IReadOnlyList Data { get; } + + /// + /// Create a new . + /// + /// + public InlineImageDataToken(IReadOnlyList data) + { + Data = data; + } + } +} \ No newline at end of file