diff --git a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs index d977f911..2e6dce19 100644 --- a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs +++ b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs @@ -91,6 +91,16 @@ { } + public void BeginMarkedContent(NameToken name, NameToken propertyDictionaryName, DictionaryToken properties) + { + + } + + public void EndMarkedContent() + { + + } + private class TestFontFactory : IFontFactory { public IFont Get(DictionaryToken dictionary, bool isLenientParsing) diff --git a/src/UglyToad.PdfPig/Content/Page.cs b/src/UglyToad.PdfPig/Content/Page.cs index 7729930b..4ebc1f07 100644 --- a/src/UglyToad.PdfPig/Content/Page.cs +++ b/src/UglyToad.PdfPig/Content/Page.cs @@ -153,6 +153,12 @@ /// public IEnumerable GetImages() => Content.GetImages(); + /// + /// + /// + /// + public IReadOnlyList GetMarkedContents() => Content.GetMarkedContents(); + /// /// Provides access to useful members which will change in future releases. /// diff --git a/src/UglyToad.PdfPig/Content/PageContent.cs b/src/UglyToad.PdfPig/Content/PageContent.cs index d7f63a3d..0ae84f41 100644 --- a/src/UglyToad.PdfPig/Content/PageContent.cs +++ b/src/UglyToad.PdfPig/Content/PageContent.cs @@ -20,6 +20,7 @@ internal class PageContent { private readonly IReadOnlyList> images; + private readonly IReadOnlyList markedContents; private readonly IPdfTokenScanner pdfScanner; private readonly IFilterProvider filterProvider; private readonly IResourceStore resourceStore; @@ -34,6 +35,7 @@ internal PageContent(IReadOnlyList graphicsStateOperations, IReadOnlyList letters, IReadOnlyList paths, IReadOnlyList> images, + IReadOnlyList markedContents, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, IResourceStore resourceStore, @@ -43,6 +45,7 @@ Letters = letters; Paths = paths; this.images = images; + this.markedContents = markedContents; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.resourceStore = resourceStore ?? throw new ArgumentNullException(nameof(resourceStore)); @@ -61,5 +64,7 @@ yield return result; } } + + public IReadOnlyList GetMarkedContents() => markedContents; } } diff --git a/src/UglyToad.PdfPig/Content/PdfArtifactMarkedContent.cs b/src/UglyToad.PdfPig/Content/PdfArtifactMarkedContent.cs new file mode 100644 index 00000000..72d94e25 --- /dev/null +++ b/src/UglyToad.PdfPig/Content/PdfArtifactMarkedContent.cs @@ -0,0 +1,159 @@ +namespace UglyToad.PdfPig.Content +{ + using System; + using System.Linq; + using UglyToad.PdfPig.Core; + using UglyToad.PdfPig.Tokens; + + /// + /// Artifacts are graphics objects that are not part of the author’s original content but rather are + /// generated by the conforming writer in the course of pagination, layout, or other strictly mechanical + /// processes. + /// Artifacts may also be used to describe areas of the document where the author uses a graphical + /// background, with the goal of enhancing the visual experience. In such a case, the background is not + /// required for understanding the content. - PDF 32000-1:2008, Section 14.8.2.2 + /// + public class PdfArtifactMarkedContent : PdfMarkedContent + { + internal PdfArtifactMarkedContent(int id, DictionaryToken properties) : base(id, NameToken.Artifact, properties) + { + IsArtifact = true; + } + + /// + /// The artifact's type: Pagination, Layout, Page, or (PDF 1.7) Background. + /// + public ArtifactType Type + { + get + { + if (Properties == null) return ArtifactType.Unknown; + if (Properties.TryGet(NameToken.Type, out IDataToken typeToken)) + { + if (Enum.TryParse(typeToken.Data, true, out ArtifactType result)) + { + return result; + } + } + return ArtifactType.Unknown; + } + } + + /// + /// The artifact's attribute owners. + /// + public string AttributeOwners + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.O, out IDataToken typeToken)) + { + return typeToken.Data; + } + return null; + } + } + + /// + /// The artifact's bounding box. + /// + public PdfRectangle? BoundingBox + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.Bbox, out ArrayToken arrayToken)) + { + var left = arrayToken[2] as NumericToken; + var bottom = arrayToken[3] as NumericToken; + var right = arrayToken[4] as NumericToken; + var top = arrayToken[5] as NumericToken; + return new PdfRectangle((double)left.Data, (double)bottom.Data, (double)right.Data, (double)top.Data); + } + return null; + } + } + + /// + /// Is the artifact attached to the top edge? + /// + public bool IsTopAttached => IsAttached("Top"); + + /// + /// Is the artifact attached to the bottom edge? + /// + public bool IsBottomAttached => IsAttached("Bottom"); + + /// + /// Is the artifact attached to the left edge? + /// + public bool IsLeftAttached => IsAttached("Left"); + + /// + /// Is the artifact attached to the right edge? + /// + public bool IsRightAttached => IsAttached("Right"); + + /// + /// The artifact's subtype. Standard values are Header, Footer, and Watermark. Additional values may be specified for this entry, provided they comply with the naming conventions. + /// + public string SubType + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.Subtype, out IDataToken subTypeToken)) + { + return subTypeToken.Data; + } + return null; + } + } + + private bool IsAttached(string edge) + { + if (Properties == null) return false; + if (this.Properties.TryGet(NameToken.Attached, out ArrayToken arrayToken)) + { + return arrayToken.Data.Contains(NameToken.Create(edge)); + } + return false; + } + } + + /// + /// If present, shall be one of the names Pagination, Layout, Page, or (PDF 1.7) Background. + /// + public enum ArtifactType + { + /// + /// Unknown artifact type. + /// + Unknown, + + /// + /// Ancillary page features such as running heads and folios (page numbers). + /// + Pagination, + + /// + /// Purely cosmetic typographical or design elements such as footnote rules or background screens. + /// + Layout, + + /// + /// Production aids extraneous to the document itself, such as cut marks and colour bars. + /// + Page, + + /// + /// (PDF 1.7) Images, patterns or coloured blocks that either run the entire length and/or + /// width of the page or the entire dimensions of a structural element. Background artifacts + /// typically serve as a background for content shown either on top of or placed adjacent to + /// that background. + /// A background artifact can further be classified as visual content that serves to enhance the user experience, that lies under the actual content, and that is not required except to retain visual fidelity. + /// + Background + } +} diff --git a/src/UglyToad.PdfPig/Content/PdfMarkedContent.cs b/src/UglyToad.PdfPig/Content/PdfMarkedContent.cs new file mode 100644 index 00000000..49729fcd --- /dev/null +++ b/src/UglyToad.PdfPig/Content/PdfMarkedContent.cs @@ -0,0 +1,190 @@ +namespace UglyToad.PdfPig.Content +{ + using System.Collections.Generic; + using UglyToad.PdfPig.Core; + using UglyToad.PdfPig.Graphics; + using UglyToad.PdfPig.Tokens; + using UglyToad.PdfPig.Util; + + /// + /// + /// + public class PdfMarkedContent + { + private readonly List images = new List(); + private readonly List pdfPaths = new List(); + private readonly List letters = new List(); + private readonly List xObjectContentRecords = new List(); + + internal PdfMarkedContent(int id, NameToken tag, DictionaryToken properties) + { + this.Id = id; + this.Tag = tag; + this.Properties = properties; + this.ChildContents = new List(); + } + + /// + /// Is the marked content an artifact. + /// + public bool IsArtifact { get; internal set; } + + /// + /// Internal Id for top marked content. Child marked contents will share the same Id as the parent. + /// + public int Id { get; } + + /// + /// Marked-content identifier. + /// + public int MCID + { + get + { + if (Properties == null) return -1; + if (Properties.ContainsKey(NameToken.Mcid)) + { + return Properties.GetInt(NameToken.Mcid); + } + return -1; + } + } + + /// + /// + /// + public string Tag { get; } + + /// + /// Properties. + /// + public DictionaryToken Properties { get; } + + /// + /// Child contents. + /// + public List ChildContents { get; } + + /// + /// The natural language specification. + /// + public string Language + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.Lang, out IDataToken langToken)) + { + return langToken.Data; + } + return null; + } + } + + /// + /// The replacement text. + /// + public string ActualText + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.ActualText, out IDataToken textToken)) + { + return textToken.Data; + } + return null; + } + } + + /// + /// The alternate description. + /// + public string AlternateDescription + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.Alternate, out IDataToken textToken)) + { + return textToken.Data; + } + return null; + } + } + + /// + /// The abbreviation expansion text. + /// + public string ExpandedForm + { + get + { + if (Properties == null) return null; + if (Properties.TryGet(NameToken.E, out IDataToken textToken)) + { + return textToken.Data; + } + return null; + } + } + + /// + /// The marked content's images. + /// + public IReadOnlyList Images => images; + + /// + /// The marked content's paths. + /// + public IReadOnlyList PdfPaths => pdfPaths; + + /// + /// The marked content's letters. + /// + public IReadOnlyList Letters => letters; + + internal void Add(IPdfImage pdfImage) + { + images.Add(pdfImage); + } + + internal void Add(PdfPath pdfPath) + { + pdfPaths.Add(pdfPath); + } + + internal void Add(Letter letter) + { + letters.Add(letter); + } + + internal void Add(XObjectContentRecord xObjectContentRecord) + { + xObjectContentRecords.Add(xObjectContentRecord); + } + + internal void Add(PdfMarkedContent markedContent) + { + ChildContents.Add(markedContent); + } + + internal static PdfMarkedContent Create(int id, NameToken name, DictionaryToken properties) + { + if (name.Equals(NameToken.Artifact)) + { + return new PdfArtifactMarkedContent(id, properties); + } + else + { + return new PdfMarkedContent(id, name, properties); + } + } + + /// + public override string ToString() + { + return "Id=" + Id + ", Tag=" + this.Tag + ", Properties=" + this.Properties + ", Contents=" + this.ChildContents.Count; + } + } +} diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 6b7302ca..fb501d7d 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -24,6 +24,14 @@ internal class ContentStreamProcessor : IOperationContext { + private readonly Stack queuedMarkedContents = new Stack(); + private int currentMarkedContentId; + + /// + /// Stores each marked content as it is encountered in the content stream. + /// + private readonly List markedContents = new List(); + /// /// Stores each letter as it is encountered in the content stream. /// @@ -103,7 +111,7 @@ ProcessOperations(operations); - return new PageContent(operations, letters, paths, images, pdfScanner, filterProvider, resourceStore, isLenientParsing); + return new PageContent(operations, letters, paths, images, markedContents, pdfScanner, filterProvider, resourceStore, isLenientParsing); } private void ProcessOperations(IReadOnlyList operations) @@ -227,6 +235,11 @@ pointSize, textSequence); + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(letter); + } + letters.Add(letter); double tx, ty; @@ -312,11 +325,21 @@ if (subType.Equals(NameToken.Ps)) { - xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent)); + var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent); + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(contentRecord); + } + xObjects[XObjectType.PostScript].Add(contentRecord); } else if (subType.Equals(NameToken.Image)) { - images.Add(Union.One(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent))); + var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent); + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(contentRecord); + } + images.Add(Union.One(contentRecord)); } else if (subType.Equals(NameToken.Form)) { @@ -384,6 +407,10 @@ { if (CurrentPath != null && CurrentPath.Commands.Count > 0 && !currentPathAdded) { + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(CurrentPath); + } paths.Add(CurrentPath); } @@ -399,6 +426,10 @@ } else { + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(CurrentPath); + } paths.Add(CurrentPath); currentPathAdded = true; } @@ -412,6 +443,10 @@ } else { + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(CurrentPath); + } paths.Add(CurrentPath); currentPathAdded = true; } @@ -420,6 +455,10 @@ public void ClosePath() { CurrentPath.ClosePath(); + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(CurrentPath); + } paths.Add(CurrentPath); CurrentPath = null; currentPathAdded = false; @@ -496,11 +535,54 @@ var image = inlineImageBuilder.CreateInlineImage(CurrentTransformationMatrix, filterProvider, pdfScanner, GetCurrentState().RenderingIntent, resourceStore); + if (queuedMarkedContents.Any()) + { + queuedMarkedContents.Peek().Add(image); + } + images.Add(Union.Two(image)); inlineImageBuilder = null; } + public void BeginMarkedContent(NameToken name, NameToken propertyDictionaryName, DictionaryToken properties) + { + if (!queuedMarkedContents.Any()) currentMarkedContentId++; // top parent id only + + var markedContent = PdfMarkedContent.Create(currentMarkedContentId, name, properties); + + if (propertyDictionaryName != null) + { + log.Error("BeginMarkedContent(): propertyDictionaryName not null to implement, name=" + + name.Data + ", propertyDictionaryName=" + propertyDictionaryName); + markedContent = PdfMarkedContent.Create(currentMarkedContentId, propertyDictionaryName, properties); + } + + if (queuedMarkedContents.Any()) + { + var currentMarkedContent = queuedMarkedContents.Peek(); + if (currentMarkedContent != null) + { + currentMarkedContent.Add(markedContent); + } + } + + queuedMarkedContents.Push(markedContent); + } + + public void EndMarkedContent() + { + if (queuedMarkedContents.Any()) + { + var mc = queuedMarkedContents.Pop(); + + if (!queuedMarkedContents.Any()) + { + markedContents.Add(mc); + } + } + } + private void AdjustTextMatrix(double tx, double ty) { var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty); diff --git a/src/UglyToad.PdfPig/Graphics/IOperationContext.cs b/src/UglyToad.PdfPig/Graphics/IOperationContext.cs index e278b0a6..d8b6a09d 100644 --- a/src/UglyToad.PdfPig/Graphics/IOperationContext.cs +++ b/src/UglyToad.PdfPig/Graphics/IOperationContext.cs @@ -1,9 +1,8 @@ namespace UglyToad.PdfPig.Graphics { - using System.Collections.Generic; - using Geometry; - using Tokens; using PdfPig.Core; + using System.Collections.Generic; + using Tokens; using Util.JetBrains.Annotations; /// @@ -98,6 +97,16 @@ /// void ClosePath(); + /// + /// + /// + void BeginMarkedContent(NameToken name, NameToken propertyDictionaryName, DictionaryToken Properties); + + /// + /// + /// + void EndMarkedContent(); + /// /// Update the graphics state to apply the state from the named ExtGState dictionary. /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContent.cs b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContent.cs index da2da861..17121168 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContent.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContent.cs @@ -35,6 +35,7 @@ /// public void Run(IOperationContext operationContext) { + operationContext.BeginMarkedContent(Name, null, null); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContentWithProperties.cs b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContentWithProperties.cs index 8c45a508..73e7cdbe 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContentWithProperties.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/BeginMarkedContentWithProperties.cs @@ -63,6 +63,7 @@ /// public void Run(IOperationContext operationContext) { + operationContext.BeginMarkedContent(Name, PropertyDictionaryName, Properties); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/EndMarkedContent.cs b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/EndMarkedContent.cs index 326f27ac..5da0a454 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/EndMarkedContent.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/MarkedContent/EndMarkedContent.cs @@ -28,6 +28,7 @@ /// public void Run(IOperationContext operationContext) { + operationContext.EndMarkedContent(); } ///