rename marked content elements and use factory

since the properties in marked content may be indirect references or belong to the page resources array, the value should be calculated during content processing. this change tidies up the marked content classes so they do not expose mutable data and uses the pdf token scanner overloads to load dictionary data.
This commit is contained in:
Eliot Jones
2020-01-09 15:30:16 +00:00
parent 097692f1cb
commit 43574097f1
11 changed files with 431 additions and 344 deletions

View File

@@ -64,6 +64,7 @@
"UglyToad.PdfPig.Annotations.AnnotationFlags",
"UglyToad.PdfPig.Annotations.AnnotationType",
"UglyToad.PdfPig.Annotations.QuadPointsQuadrilateral",
"UglyToad.PdfPig.Content.ArtifactMarkedContentElement",
"UglyToad.PdfPig.Content.Catalog",
"UglyToad.PdfPig.Content.CropBox",
"UglyToad.PdfPig.Content.DocumentInformation",
@@ -72,12 +73,11 @@
"UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPdfImage",
"UglyToad.PdfPig.Content.Letter",
"UglyToad.PdfPig.Content.MarkedContentElement",
"UglyToad.PdfPig.Content.Page",
"UglyToad.PdfPig.Content.PageRotationDegrees",
"UglyToad.PdfPig.Content.PageSize",
"UglyToad.PdfPig.Content.PageTreeNode",
"UglyToad.PdfPig.Content.PdfMarkedContent",
"UglyToad.PdfPig.Content.PdfArtifactMarkedContent",
"UglyToad.PdfPig.Content.Word",
"UglyToad.PdfPig.Content.TextDirection",
"UglyToad.PdfPig.Content.XmpMetadata",

View File

@@ -65,6 +65,7 @@
public static readonly NameToken BleedBox = new NameToken("BleedBox");
public static readonly NameToken Bm = new NameToken("BM");
public static readonly NameToken Border = new NameToken("Border");
public static readonly NameToken Bottom = new NameToken("Bottom");
public static readonly NameToken Bounds = new NameToken("Bounds");
public static readonly NameToken Bpc = new NameToken("BPC");
public static readonly NameToken Bs = new NameToken("BS");
@@ -307,6 +308,7 @@
public static readonly NameToken Lc = new NameToken("LC");
public static readonly NameToken Le = new NameToken("LE");
public static readonly NameToken Leading = new NameToken("Leading");
public static readonly NameToken Left = new NameToken("Left");
public static readonly NameToken LegalAttestation = new NameToken("LegalAttestation");
public static readonly NameToken Length = new NameToken("Length");
public static readonly NameToken Length1 = new NameToken("Length1");
@@ -448,6 +450,7 @@
public static readonly NameToken Resources = new NameToken("Resources");
public static readonly NameToken Rgb = new NameToken("RGB");
public static readonly NameToken Ri = new NameToken("RI");
public static readonly NameToken Right = new NameToken("Right");
public static readonly NameToken RoleMap = new NameToken("RoleMap");
public static readonly NameToken Root = new NameToken("Root");
public static readonly NameToken Rotate = new NameToken("Rotate");
@@ -513,6 +516,7 @@
public static readonly NameToken Title = new NameToken("Title");
public static readonly NameToken Tk = new NameToken("TK");
public static readonly NameToken Tm = new NameToken("TM");
public static readonly NameToken Top = new NameToken("Top");
public static readonly NameToken ToUnicode = new NameToken("ToUnicode");
public static readonly NameToken Tr = new NameToken("TR");
public static readonly NameToken Tr2 = new NameToken("TR2");

View File

@@ -1,123 +1,107 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Linq;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Tokens;
using System.Collections.Generic;
using Core;
using Tokens;
/// <inheritdoc />
/// <summary>
/// Artifacts are graphics objects that are not part of the authors original content but rather are
/// Artifacts are graphics objects that are not part of the author's original content but rather are
/// generated by the conforming writer in the course of pagination, layout, or other strictly mechanical
/// processes.
/// <para>Artifacts may also be used to describe areas of the document where the author uses a graphical
/// background, with the goal of enhancing the visual experience. In such a case, the background is not
/// required for understanding the content. - PDF 32000-1:2008, Section 14.8.2.2</para>
/// </summary>
public class PdfArtifactMarkedContent : PdfMarkedContent
public class ArtifactMarkedContentElement : MarkedContentElement
{
internal PdfArtifactMarkedContent(int id, DictionaryToken properties) : base(id, NameToken.Artifact, properties)
{
IsArtifact = true;
}
/// <summary>
/// The artifact's type: Pagination, Layout, Page, or (PDF 1.7) Background.
/// </summary>
public ArtifactType Type
{
get
{
if (Properties == null) return ArtifactType.Unknown;
if (Properties.TryGet(NameToken.Type, out IDataToken<string> typeToken))
{
if (Enum.TryParse(typeToken.Data, true, out ArtifactType result))
{
return result;
}
}
return ArtifactType.Unknown;
}
}
public ArtifactType Type { get; }
/// <summary>
/// The artifact's subtype. Standard values are Header, Footer, and Watermark.
/// Additional values may be specified for this entry, provided they comply with the naming conventions.
/// </summary>
public string SubType { get; }
/// <summary>
/// The artifact's attribute owners.
/// </summary>
public string AttributeOwners
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.O, out IDataToken<string> typeToken))
{
return typeToken.Data;
}
return null;
}
}
public string AttributeOwners { get; }
/// <summary>
/// The artifact's bounding box.
/// </summary>
public PdfRectangle? BoundingBox
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.Bbox, out ArrayToken arrayToken))
{
var left = arrayToken[2] as NumericToken;
var bottom = arrayToken[3] as NumericToken;
var right = arrayToken[4] as NumericToken;
var top = arrayToken[5] as NumericToken;
return new PdfRectangle(left.Double, bottom.Double, right.Double, top.Double);
}
return null;
}
}
public PdfRectangle? BoundingBox { get; }
/// <summary>
/// The names of regions this element is attached to.
/// </summary>
public IReadOnlyList<NameToken> Attached { get; set; }
/// <summary>
/// Is the artifact attached to the top edge?
/// </summary>
public bool IsTopAttached => IsAttached("Top");
public bool IsTopAttached => IsAttached(NameToken.Top);
/// <summary>
/// Is the artifact attached to the bottom edge?
/// </summary>
public bool IsBottomAttached => IsAttached("Bottom");
public bool IsBottomAttached => IsAttached(NameToken.Bottom);
/// <summary>
/// Is the artifact attached to the left edge?
/// </summary>
public bool IsLeftAttached => IsAttached("Left");
public bool IsLeftAttached => IsAttached(NameToken.Left);
/// <summary>
/// Is the artifact attached to the right edge?
/// </summary>
public bool IsRightAttached => IsAttached("Right");
public bool IsRightAttached => IsAttached(NameToken.Right);
/// <summary>
/// The artifact's subtype. Standard values are Header, Footer, and Watermark. Additional values may be specified for this entry, provided they comply with the naming conventions.
/// </summary>
public string SubType
internal ArtifactMarkedContentElement(int markedContentIdentifier, NameToken tag, DictionaryToken properties,
string language,
string actualText,
string alternateDescription,
string expandedForm,
ArtifactType artifactType,
string subType,
string attributeOwners,
PdfRectangle? boundingBox,
IReadOnlyList<NameToken> attached,
IReadOnlyList<MarkedContentElement> children,
IReadOnlyList<Letter> letters,
IReadOnlyList<PdfPath> paths,
int index)
: base(markedContentIdentifier, tag, properties, language,
actualText,
alternateDescription,
expandedForm,
true,
children,
letters,
paths,
index)
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.Subtype, out IDataToken<string> subTypeToken))
{
return subTypeToken.Data;
Type = artifactType;
SubType = subType;
AttributeOwners = attributeOwners;
BoundingBox = boundingBox;
Attached = attached ?? EmptyArray<NameToken>.Instance;
}
return null;
private bool IsAttached(NameToken edge)
{
foreach (var name in Attached)
{
if (name == edge)
{
return true;
}
}
private bool IsAttached(string edge)
{
if (Properties == null) return false;
if (this.Properties.TryGet(NameToken.Attached, out ArrayToken arrayToken))
{
return arrayToken.Data.Contains(NameToken.Create(edge));
}
return false;
}

View File

@@ -23,5 +23,7 @@
IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing);
bool TryGetNamedColorSpace(NameToken name, out ResourceColorSpace namedColorSpace);
DictionaryToken GetMarkedContentPropertiesDictionary(NameToken name);
}
}

View File

@@ -0,0 +1,108 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Collections.Generic;
using Core;
using Tokens;
/// <summary>
/// A marked content element can be used to provide application specific data in the
/// page's content stream. Interpretation of the marked content is outside of the PDF specification.
/// </summary>
public class MarkedContentElement
{
/// <summary>
/// Marked-content identifier.
/// </summary>
public int MarkedContentIdentifier { get; }
/// <summary>
/// The index of this marked content element in the set of marked content in the page.
/// <see cref="Children"/> marked content elements will have the same index as the parent.
/// </summary>
public int Index { get; }
/// <summary>
/// A name indicating the role or significance of the point.
/// </summary>
public string Tag { get; }
/// <summary>
/// The properties for this element.
/// </summary>
public DictionaryToken Properties { get; }
/// <summary>
/// Is the marked content an artifact, see <see cref="ArtifactMarkedContentElement"/>.
/// </summary>
public bool IsArtifact { get; }
/// <summary>
/// Child contents.
/// </summary>
public IReadOnlyList<MarkedContentElement> Children { get; }
/// <summary>
/// Letters contained in this marked content.
/// </summary>
public IReadOnlyList<Letter> Letters { get; }
/// <summary>
/// Paths contained in this marked content.
/// </summary>
public IReadOnlyList<PdfPath> Paths { get; }
/// <summary>
/// The natural language specification.
/// </summary>
public string Language { get; }
/// <summary>
/// The replacement text.
/// </summary>
public string ActualText { get; }
/// <summary>
/// The alternate description.
/// </summary>
public string AlternateDescription { get; }
/// <summary>
/// The abbreviation expansion text.
/// </summary>
public string ExpandedForm { get; }
public MarkedContentElement(int markedContentIdentifier, NameToken tag, DictionaryToken properties,
string language,
string actualText,
string alternateDescription,
string expandedForm,
bool isArtifact,
IReadOnlyList<MarkedContentElement> children,
IReadOnlyList<Letter> letters,
IReadOnlyList<PdfPath> paths,
int index)
{
MarkedContentIdentifier = markedContentIdentifier;
Tag = tag;
Language = language;
ActualText = actualText;
AlternateDescription = alternateDescription;
ExpandedForm = expandedForm;
Properties = properties ?? new DictionaryToken(new Dictionary<NameToken, IToken>());
IsArtifact = isArtifact;
Children = children ?? throw new ArgumentNullException(nameof(children));
Letters = letters ?? throw new ArgumentNullException(nameof(letters));
Paths = paths ?? throw new ArgumentNullException(nameof(paths));
Index = index;
}
/// <inheritdoc />
public override string ToString()
{
return $"Index={Index}, MCID={MarkedContentIdentifier}, Tag={Tag}, Properties={Properties}, Contents={Children.Count}";
}
}
}

View File

@@ -156,8 +156,7 @@
/// <summary>
/// Gets any marked content on the page.
/// </summary>
/// <returns></returns>
public IReadOnlyList<PdfMarkedContent> GetMarkedContents() => Content.GetMarkedContents();
public IReadOnlyList<MarkedContentElement> GetMarkedContents() => Content.GetMarkedContents();
/// <summary>
/// Provides access to useful members which will change in future releases.

View File

@@ -20,7 +20,7 @@
internal class PageContent
{
private readonly IReadOnlyList<Union<XObjectContentRecord, InlineImage>> images;
private readonly IReadOnlyList<PdfMarkedContent> markedContents;
private readonly IReadOnlyList<MarkedContentElement> markedContents;
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly IResourceStore resourceStore;
@@ -35,7 +35,7 @@
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
IReadOnlyList<PdfPath> paths,
IReadOnlyList<Union<XObjectContentRecord, InlineImage>> images,
IReadOnlyList<PdfMarkedContent> markedContents,
IReadOnlyList<MarkedContentElement> markedContents,
IPdfTokenScanner pdfScanner,
IFilterProvider filterProvider,
IResourceStore resourceStore,
@@ -65,6 +65,6 @@
}
}
public IReadOnlyList<PdfMarkedContent> GetMarkedContents() => markedContents;
public IReadOnlyList<MarkedContentElement> GetMarkedContents() => markedContents;
}
}

View File

@@ -1,190 +0,0 @@
namespace UglyToad.PdfPig.Content
{
using System.Collections.Generic;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Graphics;
using UglyToad.PdfPig.Tokens;
using UglyToad.PdfPig.Util;
/// <summary>
///
/// </summary>
public class PdfMarkedContent
{
private readonly List<IPdfImage> images = new List<IPdfImage>();
private readonly List<PdfPath> pdfPaths = new List<PdfPath>();
private readonly List<Letter> letters = new List<Letter>();
private readonly List<XObjectContentRecord> xObjectContentRecords = new List<XObjectContentRecord>();
internal PdfMarkedContent(int id, NameToken tag, DictionaryToken properties)
{
this.Id = id;
this.Tag = tag;
this.Properties = properties;
this.ChildContents = new List<PdfMarkedContent>();
}
/// <summary>
/// Is the marked content an artifact.
/// </summary>
public bool IsArtifact { get; internal set; }
/// <summary>
/// Internal Id for top marked content. Child marked contents will share the same Id as the parent.
/// </summary>
public int Id { get; }
/// <summary>
/// Marked-content identifier.
/// </summary>
public int MCID
{
get
{
if (Properties == null) return -1;
if (Properties.ContainsKey(NameToken.Mcid))
{
return Properties.GetInt(NameToken.Mcid);
}
return -1;
}
}
/// <summary>
///
/// </summary>
public string Tag { get; }
/// <summary>
/// Properties.
/// </summary>
public DictionaryToken Properties { get; }
/// <summary>
/// Child contents.
/// </summary>
public List<PdfMarkedContent> ChildContents { get; }
/// <summary>
/// The natural language specification.
/// </summary>
public string Language
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.Lang, out IDataToken<string> langToken))
{
return langToken.Data;
}
return null;
}
}
/// <summary>
/// The replacement text.
/// </summary>
public string ActualText
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.ActualText, out IDataToken<string> textToken))
{
return textToken.Data;
}
return null;
}
}
/// <summary>
/// The alternate description.
/// </summary>
public string AlternateDescription
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.Alternate, out IDataToken<string> textToken))
{
return textToken.Data;
}
return null;
}
}
/// <summary>
/// The abbreviation expansion text.
/// </summary>
public string ExpandedForm
{
get
{
if (Properties == null) return null;
if (Properties.TryGet(NameToken.E, out IDataToken<string> textToken))
{
return textToken.Data;
}
return null;
}
}
/// <summary>
/// The marked content's images.
/// </summary>
public IReadOnlyList<IPdfImage> Images => images;
/// <summary>
/// The marked content's paths.
/// </summary>
public IReadOnlyList<PdfPath> PdfPaths => pdfPaths;
/// <summary>
/// The marked content's letters.
/// </summary>
public IReadOnlyList<Letter> Letters => letters;
internal void Add(IPdfImage pdfImage)
{
images.Add(pdfImage);
}
internal void Add(PdfPath pdfPath)
{
pdfPaths.Add(pdfPath);
}
internal void Add(Letter letter)
{
letters.Add(letter);
}
internal void Add(XObjectContentRecord xObjectContentRecord)
{
xObjectContentRecords.Add(xObjectContentRecord);
}
internal void Add(PdfMarkedContent markedContent)
{
ChildContents.Add(markedContent);
}
internal static PdfMarkedContent Create(int id, NameToken name, DictionaryToken properties)
{
if (name.Equals(NameToken.Artifact))
{
return new PdfArtifactMarkedContent(id, properties);
}
else
{
return new PdfMarkedContent(id, name, properties);
}
}
/// <inheritdoc />
public override string ToString()
{
return "Id=" + Id + ", Tag=" + this.Tag + ", Properties=" + this.Properties + ", Contents=" + this.ChildContents.Count;
}
}
}

View File

@@ -22,6 +22,8 @@
private readonly Dictionary<NameToken, ResourceColorSpace> namedColorSpaces = new Dictionary<NameToken, ResourceColorSpace>();
private readonly Dictionary<NameToken, DictionaryToken> markedContentProperties = new Dictionary<NameToken, DictionaryToken>();
private (NameToken name, IFont font) lastLoadedFont;
public ResourceStore(IPdfTokenScanner scanner, IFontFactory fontFactory)
@@ -101,6 +103,21 @@
}
}
}
if (resourceDictionary.TryGet(NameToken.Properties, scanner, out DictionaryToken markedContentPropertiesList))
{
foreach (var pair in markedContentPropertiesList.Data)
{
var key = NameToken.Create(pair.Key);
if (!DirectObjectFinder.TryGet(pair.Value, scanner, out DictionaryToken namedProperties))
{
continue;
}
markedContentProperties[key] = namedProperties;
}
}
}
public void UnloadResourceDictionary()
@@ -207,5 +224,10 @@
{
return extendedGraphicsStates[name];
}
public DictionaryToken GetMarkedContentPropertiesDictionary(NameToken name)
{
return markedContentProperties.TryGetValue(name, out var result) ? result : null;
}
}
}

View File

@@ -4,14 +4,10 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Colors;
using Content;
using Core;
using Exceptions;
using Filters;
using Fonts;
using Geometry;
using IO;
using Logging;
using Operations;
using Parser;
@@ -19,19 +15,10 @@
using PdfPig.Core;
using Tokenization.Scanner;
using Tokens;
using Util;
using XObjects;
internal class ContentStreamProcessor : IOperationContext
{
private readonly Stack<PdfMarkedContent> queuedMarkedContents = new Stack<PdfMarkedContent>();
private int currentMarkedContentId;
/// <summary>
/// Stores each marked content as it is encountered in the content stream.
/// </summary>
private readonly List<PdfMarkedContent> markedContents = new List<PdfMarkedContent>();
/// <summary>
/// Stores each letter as it is encountered in the content stream.
/// </summary>
@@ -47,6 +34,11 @@
/// </summary>
private readonly List<Union<XObjectContentRecord, InlineImage>> images = new List<Union<XObjectContentRecord, InlineImage>>();
/// <summary>
/// Stores each marked content as it is encountered in the content stream.
/// </summary>
private readonly List<MarkedContentElement> markedContents = new List<MarkedContentElement>();
private readonly IResourceStore resourceStore;
private readonly UserSpaceUnit userSpaceUnit;
private readonly PageRotationDegrees rotation;
@@ -55,6 +47,7 @@
private readonly IPageContentParser pageContentParser;
private readonly IFilterProvider filterProvider;
private readonly ILog log;
private readonly MarkedContentStack markedContentStack = new MarkedContentStack();
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
private IFont activeExtendedGraphicsStateFont;
@@ -235,13 +228,10 @@
pointSize,
textSequence);
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(letter);
}
letters.Add(letter);
markedContentStack.AddLetter(letter);
double tx, ty;
if (font.IsVertical)
{
@@ -326,20 +316,18 @@
if (subType.Equals(NameToken.Ps))
{
var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent);
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(contentRecord);
}
xObjects[XObjectType.PostScript].Add(contentRecord);
markedContentStack.AddXObject(contentRecord);
}
else if (subType.Equals(NameToken.Image))
{
var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent);
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(contentRecord);
}
images.Add(Union<XObjectContentRecord, InlineImage>.One(contentRecord));
markedContentStack.AddXObject(contentRecord);
}
else if (subType.Equals(NameToken.Form))
{
@@ -407,11 +395,8 @@
{
if (CurrentPath != null && CurrentPath.Commands.Count > 0 && !currentPathAdded)
{
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(CurrentPath);
}
paths.Add(CurrentPath);
markedContentStack.AddPath(CurrentPath);
}
CurrentPath = new PdfPath();
@@ -426,11 +411,8 @@
}
else
{
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(CurrentPath);
}
paths.Add(CurrentPath);
markedContentStack.AddPath(CurrentPath);
currentPathAdded = true;
}
}
@@ -443,11 +425,8 @@
}
else
{
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(CurrentPath);
}
paths.Add(CurrentPath);
markedContentStack.AddPath(CurrentPath);
currentPathAdded = true;
}
}
@@ -455,11 +434,8 @@
public void ClosePath()
{
CurrentPath.ClosePath();
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(CurrentPath);
}
paths.Add(CurrentPath);
markedContentStack.AddPath(CurrentPath);
CurrentPath = null;
currentPathAdded = false;
}
@@ -535,51 +511,30 @@
var image = inlineImageBuilder.CreateInlineImage(CurrentTransformationMatrix, filterProvider, pdfScanner, GetCurrentState().RenderingIntent, resourceStore);
if (queuedMarkedContents.Any())
{
queuedMarkedContents.Peek().Add(image);
}
images.Add(Union<XObjectContentRecord, InlineImage>.Two(image));
markedContentStack.AddImage(image);
inlineImageBuilder = null;
}
public void BeginMarkedContent(NameToken name, NameToken propertyDictionaryName, DictionaryToken properties)
{
if (!queuedMarkedContents.Any()) currentMarkedContentId++; // top parent id only
var markedContent = PdfMarkedContent.Create(currentMarkedContentId, name, properties);
if (propertyDictionaryName != null)
{
log.Error("BeginMarkedContent(): propertyDictionaryName not null to implement, name="
+ name.Data + ", propertyDictionaryName=" + propertyDictionaryName);
markedContent = PdfMarkedContent.Create(currentMarkedContentId, propertyDictionaryName, properties);
var actual = resourceStore.GetMarkedContentPropertiesDictionary(propertyDictionaryName);
properties = actual ?? properties;
}
if (queuedMarkedContents.Any())
{
var currentMarkedContent = queuedMarkedContents.Peek();
if (currentMarkedContent != null)
{
currentMarkedContent.Add(markedContent);
}
}
queuedMarkedContents.Push(markedContent);
markedContentStack.Push(name, properties);
}
public void EndMarkedContent()
{
if (queuedMarkedContents.Any())
if (markedContentStack.CanPop)
{
var mc = queuedMarkedContents.Pop();
if (!queuedMarkedContents.Any())
{
markedContents.Add(mc);
}
markedContents.Add(markedContentStack.Pop(pdfScanner));
}
}

View File

@@ -0,0 +1,203 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using System.Collections.Generic;
using Content;
using PdfPig.Core;
using Tokenization.Scanner;
using Tokens;
/// <summary>
/// Handles building <see cref="MarkedContentElement"/>s.
/// </summary>
internal class MarkedContentStack
{
private readonly Stack<MarkedContentElementBuilder> builderStack = new Stack<MarkedContentElementBuilder>();
private int number;
private MarkedContentElementBuilder top;
public bool CanPop => top != null;
public void Push(NameToken name, DictionaryToken properties)
{
if (builderStack.Count > 0)
{
number++;
}
top = new MarkedContentElementBuilder(number, name, properties);
builderStack.Push(top);
}
public MarkedContentElement Pop(IPdfTokenScanner pdfScanner)
{
var builder = builderStack.Pop();
var result = builder.Build(pdfScanner);
if (builderStack.Count > 0)
{
top = builderStack.Peek();
top.Children.Add(result);
}
else
{
top = null;
}
return result;
}
public void AddLetter(Letter letter)
{
top?.AddLetter(letter);
}
public void AddPath(PdfPath path)
{
top?.AddPath(path);
}
public void AddImage(IPdfImage image)
{
top?.AddImage(image);
}
public void AddXObject(XObjectContentRecord xObject)
{
top?.AddXObject(xObject);
}
private class MarkedContentElementBuilder
{
private readonly int number;
private readonly NameToken name;
private readonly DictionaryToken properties;
private readonly List<Letter> letters = new List<Letter>();
private readonly List<IPdfImage> images = new List<IPdfImage>();
private readonly List<PdfPath> paths = new List<PdfPath>();
private readonly List<XObjectContentRecord> xobjects = new List<XObjectContentRecord>();
public List<MarkedContentElement> Children { get; } = new List<MarkedContentElement>();
public MarkedContentElementBuilder(int number, NameToken name, DictionaryToken properties)
{
this.number = number;
this.name = name;
this.properties = properties ?? new DictionaryToken(new Dictionary<NameToken, IToken>());
}
public void AddLetter(Letter letter)
{
letters.Add(letter);
}
public void AddImage(IPdfImage image)
{
images.Add(image);
}
public void AddPath(PdfPath path)
{
paths.Add(path);
}
public void AddXObject(XObjectContentRecord xobjext)
{
xobjects.Add(xobjext);
}
public MarkedContentElement Build(IPdfTokenScanner pdfScanner)
{
var mcid = -1;
if (properties.TryGet(NameToken.Mcid, pdfScanner, out NumericToken mcidToken))
{
mcid = mcidToken.Int;
}
var language = GetOptional(NameToken.Lang, pdfScanner);
var actualText = GetOptional(NameToken.ActualText, pdfScanner);
var alternateDescription = GetOptional(NameToken.Alternate, pdfScanner);
var expandedForm = GetOptional(NameToken.E, pdfScanner);
if (name != NameToken.Artifact)
{
return new MarkedContentElement(mcid, name, properties,
language,
actualText,
alternateDescription,
expandedForm,
false,
Children,
letters,
paths,
number);
}
var artifactType = ArtifactMarkedContentElement.ArtifactType.Unknown;
if (properties.TryGet(NameToken.Type, pdfScanner, out IDataToken<string> typeToken)
&& Enum.TryParse(typeToken.Data, true, out ArtifactMarkedContentElement.ArtifactType parsedType))
{
artifactType = parsedType;
}
var subType = GetOptional(NameToken.Subtype, pdfScanner);
var attributeOwners = GetOptional(NameToken.O, pdfScanner);
var boundingBox = default(PdfRectangle?);
if (properties.TryGet(NameToken.Bbox, pdfScanner, out ArrayToken arrayToken)
&& arrayToken.Length == 6)
{
var left = arrayToken[2] as NumericToken;
var bottom = arrayToken[3] as NumericToken;
var right = arrayToken[4] as NumericToken;
var top = arrayToken[5] as NumericToken;
if (left != null && bottom != null && right != null && top != null)
{
boundingBox = new PdfRectangle(left.Double, bottom.Double, right.Double, top.Double);
}
}
var attached = new List<NameToken>();
if (properties.TryGet(NameToken.Attached, out ArrayToken attachedToken))
{
foreach (var token in attachedToken.Data)
{
if (token is NameToken aName)
{
attached.Add(aName);
}
}
}
return new ArtifactMarkedContentElement(mcid, name, properties, language,
actualText,
alternateDescription,
expandedForm,
artifactType,
subType,
attributeOwners,
boundingBox,
attached,
Children,
letters,
paths,
number);
}
private string GetOptional(NameToken optionName, IPdfTokenScanner pdfScanner)
{
var result = default(string);
if (properties.TryGet(optionName, pdfScanner, out IDataToken<string> token))
{
result = token.Data;
}
return result;
}
}
}
}