mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
load images eagerly for marked content
when a marked content region contains an image we load it eagerly since we won't have access to the necessary classes at evaluation time. we also default image colorspace to the active graphics state colorspace if the dictionary doesn't contain a valid entry.
This commit is contained in:
@@ -74,6 +74,7 @@
|
|||||||
IReadOnlyList<MarkedContentElement> children,
|
IReadOnlyList<MarkedContentElement> children,
|
||||||
IReadOnlyList<Letter> letters,
|
IReadOnlyList<Letter> letters,
|
||||||
IReadOnlyList<PdfPath> paths,
|
IReadOnlyList<PdfPath> paths,
|
||||||
|
IReadOnlyList<IPdfImage> images,
|
||||||
int index)
|
int index)
|
||||||
: base(markedContentIdentifier, tag, properties, language,
|
: base(markedContentIdentifier, tag, properties, language,
|
||||||
actualText,
|
actualText,
|
||||||
@@ -83,6 +84,7 @@
|
|||||||
children,
|
children,
|
||||||
letters,
|
letters,
|
||||||
paths,
|
paths,
|
||||||
|
images,
|
||||||
index)
|
index)
|
||||||
{
|
{
|
||||||
Type = artifactType;
|
Type = artifactType;
|
||||||
|
|||||||
@@ -52,6 +52,11 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public IReadOnlyList<PdfPath> Paths { get; }
|
public IReadOnlyList<PdfPath> Paths { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Images contained in this marked content.
|
||||||
|
/// </summary>
|
||||||
|
public IReadOnlyList<IPdfImage> Images { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The natural language specification.
|
/// The natural language specification.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -81,7 +86,8 @@
|
|||||||
IReadOnlyList<MarkedContentElement> children,
|
IReadOnlyList<MarkedContentElement> children,
|
||||||
IReadOnlyList<Letter> letters,
|
IReadOnlyList<Letter> letters,
|
||||||
IReadOnlyList<PdfPath> paths,
|
IReadOnlyList<PdfPath> paths,
|
||||||
int index)
|
IReadOnlyList<IPdfImage> images,
|
||||||
|
int index)
|
||||||
{
|
{
|
||||||
MarkedContentIdentifier = markedContentIdentifier;
|
MarkedContentIdentifier = markedContentIdentifier;
|
||||||
Tag = tag;
|
Tag = tag;
|
||||||
@@ -95,6 +101,7 @@
|
|||||||
Children = children ?? throw new ArgumentNullException(nameof(children));
|
Children = children ?? throw new ArgumentNullException(nameof(children));
|
||||||
Letters = letters ?? throw new ArgumentNullException(nameof(letters));
|
Letters = letters ?? throw new ArgumentNullException(nameof(letters));
|
||||||
Paths = paths ?? throw new ArgumentNullException(nameof(paths));
|
Paths = paths ?? throw new ArgumentNullException(nameof(paths));
|
||||||
|
Images = images ?? throw new ArgumentNullException(nameof(images));
|
||||||
|
|
||||||
Index = index;
|
Index = index;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
using Graphics.Operations;
|
using Graphics.Operations;
|
||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using XObjects;
|
using XObjects;
|
||||||
using Geometry;
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Wraps content parsed from a page content stream for access.
|
/// Wraps content parsed from a page content stream for access.
|
||||||
@@ -24,7 +23,6 @@
|
|||||||
private readonly IPdfTokenScanner pdfScanner;
|
private readonly IPdfTokenScanner pdfScanner;
|
||||||
private readonly IFilterProvider filterProvider;
|
private readonly IFilterProvider filterProvider;
|
||||||
private readonly IResourceStore resourceStore;
|
private readonly IResourceStore resourceStore;
|
||||||
private readonly bool isLenientParsing;
|
|
||||||
|
|
||||||
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
|
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
|
||||||
|
|
||||||
@@ -38,8 +36,7 @@
|
|||||||
IReadOnlyList<MarkedContentElement> markedContents,
|
IReadOnlyList<MarkedContentElement> markedContents,
|
||||||
IPdfTokenScanner pdfScanner,
|
IPdfTokenScanner pdfScanner,
|
||||||
IFilterProvider filterProvider,
|
IFilterProvider filterProvider,
|
||||||
IResourceStore resourceStore,
|
IResourceStore resourceStore)
|
||||||
bool isLenientParsing)
|
|
||||||
{
|
{
|
||||||
GraphicsStateOperations = graphicsStateOperations;
|
GraphicsStateOperations = graphicsStateOperations;
|
||||||
Letters = letters;
|
Letters = letters;
|
||||||
@@ -49,17 +46,14 @@
|
|||||||
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
|
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
|
||||||
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
||||||
this.resourceStore = resourceStore ?? throw new ArgumentNullException(nameof(resourceStore));
|
this.resourceStore = resourceStore ?? throw new ArgumentNullException(nameof(resourceStore));
|
||||||
this.isLenientParsing = isLenientParsing;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public IEnumerable<IPdfImage> GetImages()
|
public IEnumerable<IPdfImage> GetImages()
|
||||||
{
|
{
|
||||||
foreach (var image in images)
|
foreach (var image in images)
|
||||||
{
|
{
|
||||||
|
var result = image.Match<IPdfImage>(x => XObjectFactory.ReadImage(x, pdfScanner, filterProvider, resourceStore),
|
||||||
IPdfImage result = null;
|
x => x);
|
||||||
image.Match(x => { result = XObjectFactory.ReadImage(x, pdfScanner, filterProvider, resourceStore, isLenientParsing); },
|
|
||||||
x => { result = x; });
|
|
||||||
|
|
||||||
yield return result;
|
yield return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using Colors;
|
||||||
using Content;
|
using Content;
|
||||||
using Core;
|
using Core;
|
||||||
using Filters;
|
using Filters;
|
||||||
@@ -106,7 +107,7 @@
|
|||||||
|
|
||||||
ProcessOperations(operations);
|
ProcessOperations(operations);
|
||||||
|
|
||||||
return new PageContent(operations, letters, paths, images, markedContents, pdfScanner, filterProvider, resourceStore, isLenientParsing);
|
return new PageContent(operations, letters, paths, images, markedContents, pdfScanner, filterProvider, resourceStore);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
||||||
@@ -317,19 +318,19 @@
|
|||||||
|
|
||||||
if (subType.Equals(NameToken.Ps))
|
if (subType.Equals(NameToken.Ps))
|
||||||
{
|
{
|
||||||
var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent);
|
var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent,
|
||||||
|
state.CurrentStrokingColor?.ColorSpace ?? ColorSpace.DeviceRGB);
|
||||||
|
|
||||||
xObjects[XObjectType.PostScript].Add(contentRecord);
|
xObjects[XObjectType.PostScript].Add(contentRecord);
|
||||||
|
|
||||||
markedContentStack.AddXObject(contentRecord);
|
|
||||||
}
|
}
|
||||||
else if (subType.Equals(NameToken.Image))
|
else if (subType.Equals(NameToken.Image))
|
||||||
{
|
{
|
||||||
var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent);
|
var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent,
|
||||||
|
state.CurrentStrokingColor?.ColorSpace ?? ColorSpace.DeviceRGB);
|
||||||
|
|
||||||
images.Add(Union<XObjectContentRecord, InlineImage>.One(contentRecord));
|
images.Add(Union<XObjectContentRecord, InlineImage>.One(contentRecord));
|
||||||
|
|
||||||
markedContentStack.AddXObject(contentRecord);
|
markedContentStack.AddXObject(contentRecord, pdfScanner, filterProvider, resourceStore);
|
||||||
}
|
}
|
||||||
else if (subType.Equals(NameToken.Form))
|
else if (subType.Equals(NameToken.Form))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -3,19 +3,21 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using Content;
|
using Content;
|
||||||
|
using Filters;
|
||||||
using PdfPig.Core;
|
using PdfPig.Core;
|
||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
|
using XObjects;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Handles building <see cref="MarkedContentElement"/>s.
|
/// Handles building <see cref="MarkedContentElement"/>s.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
internal class MarkedContentStack
|
internal class MarkedContentStack
|
||||||
{
|
{
|
||||||
private readonly Stack<MarkedContentElementBuilder> builderStack = new Stack<MarkedContentElementBuilder>();
|
private readonly Stack<MarkedContentElementActiveBuilder> builderStack = new Stack<MarkedContentElementActiveBuilder>();
|
||||||
|
|
||||||
private int number;
|
private int number;
|
||||||
private MarkedContentElementBuilder top;
|
private MarkedContentElementActiveBuilder top;
|
||||||
|
|
||||||
public bool CanPop => top != null;
|
public bool CanPop => top != null;
|
||||||
|
|
||||||
@@ -26,7 +28,7 @@
|
|||||||
number++;
|
number++;
|
||||||
}
|
}
|
||||||
|
|
||||||
top = new MarkedContentElementBuilder(number, name, properties);
|
top = new MarkedContentElementActiveBuilder(number, name, properties);
|
||||||
builderStack.Push(top);
|
builderStack.Push(top);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,25 +66,31 @@
|
|||||||
top?.AddImage(image);
|
top?.AddImage(image);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void AddXObject(XObjectContentRecord xObject)
|
public void AddXObject(XObjectContentRecord xObject,
|
||||||
|
IPdfTokenScanner scanner,
|
||||||
|
IFilterProvider filterProvider,
|
||||||
|
IResourceStore resourceStore)
|
||||||
{
|
{
|
||||||
top?.AddXObject(xObject);
|
if (top != null && xObject.Type == XObjectType.Image)
|
||||||
|
{
|
||||||
|
var image = XObjectFactory.ReadImage(xObject, scanner, filterProvider, resourceStore);
|
||||||
|
top?.AddImage(image);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MarkedContentElementBuilder
|
private class MarkedContentElementActiveBuilder
|
||||||
{
|
{
|
||||||
private readonly int number;
|
private readonly int number;
|
||||||
private readonly NameToken name;
|
private readonly NameToken name;
|
||||||
private readonly DictionaryToken properties;
|
private readonly DictionaryToken properties;
|
||||||
|
|
||||||
private readonly List<Letter> letters = new List<Letter>();
|
private readonly List<Letter> letters = new List<Letter>();
|
||||||
private readonly List<IPdfImage> images = new List<IPdfImage>();
|
|
||||||
private readonly List<PdfPath> paths = new List<PdfPath>();
|
private readonly List<PdfPath> paths = new List<PdfPath>();
|
||||||
private readonly List<XObjectContentRecord> xobjects = new List<XObjectContentRecord>();
|
private readonly List<IPdfImage> images = new List<IPdfImage>();
|
||||||
|
|
||||||
public List<MarkedContentElement> Children { get; } = new List<MarkedContentElement>();
|
public List<MarkedContentElement> Children { get; } = new List<MarkedContentElement>();
|
||||||
|
|
||||||
public MarkedContentElementBuilder(int number, NameToken name, DictionaryToken properties)
|
public MarkedContentElementActiveBuilder(int number, NameToken name, DictionaryToken properties)
|
||||||
{
|
{
|
||||||
this.number = number;
|
this.number = number;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
@@ -104,11 +112,6 @@
|
|||||||
paths.Add(path);
|
paths.Add(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void AddXObject(XObjectContentRecord xobjext)
|
|
||||||
{
|
|
||||||
xobjects.Add(xobjext);
|
|
||||||
}
|
|
||||||
|
|
||||||
public MarkedContentElement Build(IPdfTokenScanner pdfScanner)
|
public MarkedContentElement Build(IPdfTokenScanner pdfScanner)
|
||||||
{
|
{
|
||||||
var mcid = -1;
|
var mcid = -1;
|
||||||
@@ -133,6 +136,7 @@
|
|||||||
Children,
|
Children,
|
||||||
letters,
|
letters,
|
||||||
paths,
|
paths,
|
||||||
|
images,
|
||||||
number);
|
number);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,6 +189,7 @@
|
|||||||
Children,
|
Children,
|
||||||
letters,
|
letters,
|
||||||
paths,
|
paths,
|
||||||
|
images,
|
||||||
number);
|
number);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
namespace UglyToad.PdfPig.Graphics
|
namespace UglyToad.PdfPig.Graphics
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
|
using Colors;
|
||||||
using Core;
|
using Core;
|
||||||
using PdfPig.Core;
|
using PdfPig.Core;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
@@ -18,13 +19,17 @@
|
|||||||
|
|
||||||
public RenderingIntent DefaultRenderingIntent { get; }
|
public RenderingIntent DefaultRenderingIntent { get; }
|
||||||
|
|
||||||
|
public ColorSpace DefaultColorSpace { get; }
|
||||||
|
|
||||||
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation,
|
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation,
|
||||||
RenderingIntent defaultRenderingIntent)
|
RenderingIntent defaultRenderingIntent,
|
||||||
|
ColorSpace defaultColorSpace)
|
||||||
{
|
{
|
||||||
Type = type;
|
Type = type;
|
||||||
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||||
AppliedTransformation = appliedTransformation;
|
AppliedTransformation = appliedTransformation;
|
||||||
DefaultRenderingIntent = defaultRenderingIntent;
|
DefaultRenderingIntent = defaultRenderingIntent;
|
||||||
|
DefaultColorSpace = defaultColorSpace;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,8 +16,7 @@
|
|||||||
{
|
{
|
||||||
public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner,
|
public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner,
|
||||||
IFilterProvider filterProvider,
|
IFilterProvider filterProvider,
|
||||||
IResourceStore resourceStore,
|
IResourceStore resourceStore)
|
||||||
bool isLenientParsing)
|
|
||||||
{
|
{
|
||||||
if (xObject == null)
|
if (xObject == null)
|
||||||
{
|
{
|
||||||
@@ -87,25 +86,19 @@
|
|||||||
{
|
{
|
||||||
colorSpace = colorSpaceResult;
|
colorSpace = colorSpaceResult;
|
||||||
}
|
}
|
||||||
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken))
|
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken)
|
||||||
|
&& colorSpaceArrayToken.Length > 0)
|
||||||
{
|
{
|
||||||
if (colorSpaceArrayToken.Length == 0)
|
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Empty ColorSpace array defined for image XObject: {dictionary}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
var first = colorSpaceArrayToken.Data[0];
|
var first = colorSpaceArrayToken.Data[0];
|
||||||
|
|
||||||
if (!(first is NameToken firstColorSpaceName) || !TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult))
|
if ((first is NameToken firstColorSpaceName) && TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult))
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException($"Invalid ColorSpace array defined for image XObject: {colorSpaceArrayToken}.");
|
colorSpace = colorSpaceResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
colorSpace = colorSpaceResult;
|
|
||||||
}
|
}
|
||||||
else if (!isJpxDecode)
|
else if (!isJpxDecode)
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException($"No ColorSpace defined for image XObject: {dictionary}.");
|
colorSpace = xObject.DefaultColorSpace;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user