mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 02:37:56 +08:00
load images eagerly for marked content
when a marked content region contains an image we load it eagerly since we won't have access to the necessary classes at evaluation time. we also default image colorspace to the active graphics state colorspace if the dictionary doesn't contain a valid entry.
This commit is contained in:
@@ -74,6 +74,7 @@
|
||||
IReadOnlyList<MarkedContentElement> children,
|
||||
IReadOnlyList<Letter> letters,
|
||||
IReadOnlyList<PdfPath> paths,
|
||||
IReadOnlyList<IPdfImage> images,
|
||||
int index)
|
||||
: base(markedContentIdentifier, tag, properties, language,
|
||||
actualText,
|
||||
@@ -83,6 +84,7 @@
|
||||
children,
|
||||
letters,
|
||||
paths,
|
||||
images,
|
||||
index)
|
||||
{
|
||||
Type = artifactType;
|
||||
|
@@ -52,6 +52,11 @@
|
||||
/// </summary>
|
||||
public IReadOnlyList<PdfPath> Paths { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Images contained in this marked content.
|
||||
/// </summary>
|
||||
public IReadOnlyList<IPdfImage> Images { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The natural language specification.
|
||||
/// </summary>
|
||||
@@ -81,7 +86,8 @@
|
||||
IReadOnlyList<MarkedContentElement> children,
|
||||
IReadOnlyList<Letter> letters,
|
||||
IReadOnlyList<PdfPath> paths,
|
||||
int index)
|
||||
IReadOnlyList<IPdfImage> images,
|
||||
int index)
|
||||
{
|
||||
MarkedContentIdentifier = markedContentIdentifier;
|
||||
Tag = tag;
|
||||
@@ -95,6 +101,7 @@
|
||||
Children = children ?? throw new ArgumentNullException(nameof(children));
|
||||
Letters = letters ?? throw new ArgumentNullException(nameof(letters));
|
||||
Paths = paths ?? throw new ArgumentNullException(nameof(paths));
|
||||
Images = images ?? throw new ArgumentNullException(nameof(images));
|
||||
|
||||
Index = index;
|
||||
}
|
||||
|
@@ -8,7 +8,6 @@
|
||||
using Graphics.Operations;
|
||||
using Tokenization.Scanner;
|
||||
using XObjects;
|
||||
using Geometry;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps content parsed from a page content stream for access.
|
||||
@@ -24,7 +23,6 @@
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly bool isLenientParsing;
|
||||
|
||||
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
|
||||
|
||||
@@ -38,8 +36,7 @@
|
||||
IReadOnlyList<MarkedContentElement> markedContents,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
IFilterProvider filterProvider,
|
||||
IResourceStore resourceStore,
|
||||
bool isLenientParsing)
|
||||
IResourceStore resourceStore)
|
||||
{
|
||||
GraphicsStateOperations = graphicsStateOperations;
|
||||
Letters = letters;
|
||||
@@ -49,17 +46,14 @@
|
||||
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
|
||||
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
||||
this.resourceStore = resourceStore ?? throw new ArgumentNullException(nameof(resourceStore));
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
}
|
||||
|
||||
public IEnumerable<IPdfImage> GetImages()
|
||||
{
|
||||
foreach (var image in images)
|
||||
{
|
||||
|
||||
IPdfImage result = null;
|
||||
image.Match(x => { result = XObjectFactory.ReadImage(x, pdfScanner, filterProvider, resourceStore, isLenientParsing); },
|
||||
x => { result = x; });
|
||||
var result = image.Match<IPdfImage>(x => XObjectFactory.ReadImage(x, pdfScanner, filterProvider, resourceStore),
|
||||
x => x);
|
||||
|
||||
yield return result;
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using Colors;
|
||||
using Content;
|
||||
using Core;
|
||||
using Filters;
|
||||
@@ -106,7 +107,7 @@
|
||||
|
||||
ProcessOperations(operations);
|
||||
|
||||
return new PageContent(operations, letters, paths, images, markedContents, pdfScanner, filterProvider, resourceStore, isLenientParsing);
|
||||
return new PageContent(operations, letters, paths, images, markedContents, pdfScanner, filterProvider, resourceStore);
|
||||
}
|
||||
|
||||
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
||||
@@ -317,19 +318,19 @@
|
||||
|
||||
if (subType.Equals(NameToken.Ps))
|
||||
{
|
||||
var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent);
|
||||
var contentRecord = new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent,
|
||||
state.CurrentStrokingColor?.ColorSpace ?? ColorSpace.DeviceRGB);
|
||||
|
||||
xObjects[XObjectType.PostScript].Add(contentRecord);
|
||||
|
||||
markedContentStack.AddXObject(contentRecord);
|
||||
}
|
||||
else if (subType.Equals(NameToken.Image))
|
||||
{
|
||||
var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent);
|
||||
var contentRecord = new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent,
|
||||
state.CurrentStrokingColor?.ColorSpace ?? ColorSpace.DeviceRGB);
|
||||
|
||||
images.Add(Union<XObjectContentRecord, InlineImage>.One(contentRecord));
|
||||
|
||||
markedContentStack.AddXObject(contentRecord);
|
||||
markedContentStack.AddXObject(contentRecord, pdfScanner, filterProvider, resourceStore);
|
||||
}
|
||||
else if (subType.Equals(NameToken.Form))
|
||||
{
|
||||
|
@@ -3,19 +3,21 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using Filters;
|
||||
using PdfPig.Core;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using XObjects;
|
||||
|
||||
/// <summary>
|
||||
/// Handles building <see cref="MarkedContentElement"/>s.
|
||||
/// </summary>
|
||||
internal class MarkedContentStack
|
||||
{
|
||||
private readonly Stack<MarkedContentElementBuilder> builderStack = new Stack<MarkedContentElementBuilder>();
|
||||
private readonly Stack<MarkedContentElementActiveBuilder> builderStack = new Stack<MarkedContentElementActiveBuilder>();
|
||||
|
||||
private int number;
|
||||
private MarkedContentElementBuilder top;
|
||||
private MarkedContentElementActiveBuilder top;
|
||||
|
||||
public bool CanPop => top != null;
|
||||
|
||||
@@ -26,7 +28,7 @@
|
||||
number++;
|
||||
}
|
||||
|
||||
top = new MarkedContentElementBuilder(number, name, properties);
|
||||
top = new MarkedContentElementActiveBuilder(number, name, properties);
|
||||
builderStack.Push(top);
|
||||
}
|
||||
|
||||
@@ -64,25 +66,31 @@
|
||||
top?.AddImage(image);
|
||||
}
|
||||
|
||||
public void AddXObject(XObjectContentRecord xObject)
|
||||
public void AddXObject(XObjectContentRecord xObject,
|
||||
IPdfTokenScanner scanner,
|
||||
IFilterProvider filterProvider,
|
||||
IResourceStore resourceStore)
|
||||
{
|
||||
top?.AddXObject(xObject);
|
||||
if (top != null && xObject.Type == XObjectType.Image)
|
||||
{
|
||||
var image = XObjectFactory.ReadImage(xObject, scanner, filterProvider, resourceStore);
|
||||
top?.AddImage(image);
|
||||
}
|
||||
}
|
||||
|
||||
private class MarkedContentElementBuilder
|
||||
private class MarkedContentElementActiveBuilder
|
||||
{
|
||||
private readonly int number;
|
||||
private readonly NameToken name;
|
||||
private readonly DictionaryToken properties;
|
||||
|
||||
private readonly List<Letter> letters = new List<Letter>();
|
||||
private readonly List<IPdfImage> images = new List<IPdfImage>();
|
||||
private readonly List<PdfPath> paths = new List<PdfPath>();
|
||||
private readonly List<XObjectContentRecord> xobjects = new List<XObjectContentRecord>();
|
||||
private readonly List<IPdfImage> images = new List<IPdfImage>();
|
||||
|
||||
public List<MarkedContentElement> Children { get; } = new List<MarkedContentElement>();
|
||||
|
||||
public MarkedContentElementBuilder(int number, NameToken name, DictionaryToken properties)
|
||||
public MarkedContentElementActiveBuilder(int number, NameToken name, DictionaryToken properties)
|
||||
{
|
||||
this.number = number;
|
||||
this.name = name;
|
||||
@@ -104,11 +112,6 @@
|
||||
paths.Add(path);
|
||||
}
|
||||
|
||||
public void AddXObject(XObjectContentRecord xobjext)
|
||||
{
|
||||
xobjects.Add(xobjext);
|
||||
}
|
||||
|
||||
public MarkedContentElement Build(IPdfTokenScanner pdfScanner)
|
||||
{
|
||||
var mcid = -1;
|
||||
@@ -133,6 +136,7 @@
|
||||
Children,
|
||||
letters,
|
||||
paths,
|
||||
images,
|
||||
number);
|
||||
}
|
||||
|
||||
@@ -185,6 +189,7 @@
|
||||
Children,
|
||||
letters,
|
||||
paths,
|
||||
images,
|
||||
number);
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Graphics
|
||||
{
|
||||
using System;
|
||||
using Colors;
|
||||
using Core;
|
||||
using PdfPig.Core;
|
||||
using Tokens;
|
||||
@@ -18,13 +19,17 @@
|
||||
|
||||
public RenderingIntent DefaultRenderingIntent { get; }
|
||||
|
||||
public ColorSpace DefaultColorSpace { get; }
|
||||
|
||||
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation,
|
||||
RenderingIntent defaultRenderingIntent)
|
||||
RenderingIntent defaultRenderingIntent,
|
||||
ColorSpace defaultColorSpace)
|
||||
{
|
||||
Type = type;
|
||||
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
AppliedTransformation = appliedTransformation;
|
||||
DefaultRenderingIntent = defaultRenderingIntent;
|
||||
DefaultColorSpace = defaultColorSpace;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -16,8 +16,7 @@
|
||||
{
|
||||
public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner,
|
||||
IFilterProvider filterProvider,
|
||||
IResourceStore resourceStore,
|
||||
bool isLenientParsing)
|
||||
IResourceStore resourceStore)
|
||||
{
|
||||
if (xObject == null)
|
||||
{
|
||||
@@ -87,25 +86,19 @@
|
||||
{
|
||||
colorSpace = colorSpaceResult;
|
||||
}
|
||||
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken))
|
||||
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken)
|
||||
&& colorSpaceArrayToken.Length > 0)
|
||||
{
|
||||
if (colorSpaceArrayToken.Length == 0)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Empty ColorSpace array defined for image XObject: {dictionary}.");
|
||||
}
|
||||
|
||||
var first = colorSpaceArrayToken.Data[0];
|
||||
|
||||
if (!(first is NameToken firstColorSpaceName) || !TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult))
|
||||
if ((first is NameToken firstColorSpaceName) && TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Invalid ColorSpace array defined for image XObject: {colorSpaceArrayToken}.");
|
||||
colorSpace = colorSpaceResult;
|
||||
}
|
||||
|
||||
colorSpace = colorSpaceResult;
|
||||
}
|
||||
else if (!isJpxDecode)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"No ColorSpace defined for image XObject: {dictionary}.");
|
||||
colorSpace = xObject.DefaultColorSpace;
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user