Resolve image data (implementation from @kasperdaff)
Some checks are pending
Build and test / build (push) Waiting to run
Run Integration Tests / build (push) Waiting to run

This commit is contained in:
BobLd 2025-02-22 20:47:07 +00:00
parent c4a235fb62
commit d973e03206
2 changed files with 141 additions and 135 deletions

View File

@ -1,87 +1,130 @@
namespace UglyToad.PdfPig namespace UglyToad.PdfPig
{ {
using System; using System;
using System.Diagnostics.CodeAnalysis; using System.Diagnostics.CodeAnalysis;
using Core; using Core;
using Filters; using Filters;
using Parser.Parts; using Parser.Parts;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
/// <summary>
/// Extensions for PDF types.
/// </summary>
public static class PdfExtensions
{
/// <summary>
/// Try and get the entry with a given name and type or look-up the object if it's an indirect reference.
/// </summary>
public static bool TryGet<T>(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner tokenScanner, [NotNullWhen(true)] out T? token)
where T : class, IToken
{
token = default;
if (!dictionary.TryGet(name, out var t) || !(t is T typedToken))
{
if (t is IndirectReferenceToken reference)
{
return DirectObjectFinder.TryGet(reference, tokenScanner, out token);
}
return false;
}
token = typedToken;
return true;
}
/// <summary> /// <summary>
/// Get the entry with a given name and type or look-up the object if it's an indirect reference. /// Extensions for PDF types.
/// </summary> /// </summary>
public static T Get<T>(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner scanner) where T : class, IToken public static class PdfExtensions
{ {
if (!dictionary.TryGet(name, out var token) || !(token is T typedToken)) /// <summary>
{ /// Try and get the entry with a given name and type or look-up the object if it's an indirect reference.
if (!(token is IndirectReferenceToken indirectReference)) /// </summary>
{ public static bool TryGet<T>(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner tokenScanner, [NotNullWhen(true)] out T? token)
throw new PdfDocumentFormatException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}."); where T : class, IToken
} {
token = default;
typedToken = DirectObjectFinder.Get<T>(indirectReference, scanner); if (!dictionary.TryGet(name, out var t) || !(t is T typedToken))
} {
if (t is IndirectReferenceToken reference)
return typedToken; {
return DirectObjectFinder.TryGet(reference, tokenScanner, out token);
}
return false;
}
token = typedToken;
return true;
} }
/// <summary> /// <summary>
/// Get the decoded data from this stream. /// Get the entry with a given name and type or look-up the object if it's an indirect reference.
/// </summary> /// </summary>
public static ReadOnlyMemory<byte> Decode(this StreamToken stream, IFilterProvider filterProvider) public static T Get<T>(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner scanner) where T : class, IToken
{ {
var filters = filterProvider.GetFilters(stream.StreamDictionary); if (!dictionary.TryGet(name, out var token) || !(token is T typedToken))
{
var transform = stream.Data; if (!(token is IndirectReferenceToken indirectReference))
for (var i = 0; i < filters.Count; i++) {
{ throw new PdfDocumentFormatException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}.");
transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i); }
}
typedToken = DirectObjectFinder.Get<T>(indirectReference, scanner);
return transform; }
return typedToken;
} }
/// <summary> /// <summary>
/// Get the decoded data from this stream. /// Get the decoded data from this stream.
/// </summary> /// </summary>
public static ReadOnlyMemory<byte> Decode(this StreamToken stream, ILookupFilterProvider filterProvider, IPdfTokenScanner scanner) public static ReadOnlyMemory<byte> Decode(this StreamToken stream, IFilterProvider filterProvider)
{ {
var filters = filterProvider.GetFilters(stream.StreamDictionary, scanner); var filters = filterProvider.GetFilters(stream.StreamDictionary);
var transform = stream.Data; var transform = stream.Data;
for (var i = 0; i < filters.Count; i++) for (var i = 0; i < filters.Count; i++)
{ {
transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i); transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i);
} }
return transform; return transform;
} }
}
} /// <summary>
/// Get the decoded data from this stream.
/// </summary>
public static ReadOnlyMemory<byte> Decode(this StreamToken stream, ILookupFilterProvider filterProvider, IPdfTokenScanner scanner)
{
var filters = filterProvider.GetFilters(stream.StreamDictionary, scanner);
var transform = stream.Data;
for (var i = 0; i < filters.Count; i++)
{
transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i);
}
return transform;
}
/// <summary>
/// Returns an equivalent token where any indirect references of child objects are
/// recursively traversed and resolved.
/// </summary>
internal static T Resolve<T>(this T token, IPdfTokenScanner scanner) where T : IToken
{
return (T)ResolveInternal(token, scanner);
}
private static IToken ResolveInternal(this IToken token, IPdfTokenScanner scanner)
{
if (token is StreamToken stream)
{
return new StreamToken(Resolve(stream.StreamDictionary, scanner), stream.Data);
}
if (token is DictionaryToken dict)
{
var resolvedItems = new Dictionary<NameToken, IToken>();
foreach (var kvp in dict.Data)
{
var value = kvp.Value is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : kvp.Value;
resolvedItems[NameToken.Create(kvp.Key)] = ResolveInternal(value, scanner);
}
return new DictionaryToken(resolvedItems);
}
if (token is ArrayToken arr)
{
var resolvedItems = new List<IToken>();
for (int i = 0; i < arr.Length; i++)
{
var value = arr.Data[i] is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : arr.Data[i];
resolvedItems.Add(ResolveInternal(value, scanner));
}
return new ArrayToken(resolvedItems);
}
var val = token is IndirectReferenceToken tokenReference ? scanner.Get(tokenReference.Data).Data : token;
return val;
}
}
}

View File

@ -34,24 +34,18 @@
if (xObject.Type != XObjectType.Image) if (xObject.Type != XObjectType.Image)
{ {
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}."); throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
} }
var dictionary = xObject.Stream.StreamDictionary; var dictionary = xObject.Stream.StreamDictionary.Resolve(pdfScanner);
var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1))); var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1)));
var width = dictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int; var width = dictionary.GetInt(NameToken.Width);
var height = dictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int; var height = dictionary.GetInt(NameToken.Height);
bool isImageMask = false; var isImageMask = dictionary.TryGet(NameToken.ImageMask, out BooleanToken isMaskToken) && isMaskToken.Data;
if (dictionary.TryGet(NameToken.ImageMask, pdfScanner, out BooleanToken? isMaskToken))
{
dictionary = dictionary.With(NameToken.ImageMask, isMaskToken);
isImageMask = isMaskToken.Data;
}
var isJpxDecode = dictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken filterName) var isJpxDecode = dictionary.TryGet(NameToken.Filter, out NameToken filterName) && filterName.Equals(NameToken.JpxDecode);
&& filterName.Equals(NameToken.JpxDecode);
int bitsPerComponent; int bitsPerComponent;
if (isImageMask) if (isImageMask)
@ -63,7 +57,7 @@
if (isJpxDecode) if (isJpxDecode)
{ {
// Optional for JPX // Optional for JPX
if (dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken)) if (dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken? bitsPerComponentToken))
{ {
bitsPerComponent = bitsPerComponentToken.Int; bitsPerComponent = bitsPerComponentToken.Int;
System.Diagnostics.Debug.Assert(bitsPerComponent == Jpeg2000Helper.GetJp2BitsPerComponent(xObject.Stream.Data.Span)); System.Diagnostics.Debug.Assert(bitsPerComponent == Jpeg2000Helper.GetJp2BitsPerComponent(xObject.Stream.Data.Span));
@ -76,7 +70,7 @@
} }
else else
{ {
if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken)) if (!dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken? bitsPerComponentToken))
{ {
throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}."); throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}.");
} }
@ -86,26 +80,14 @@
} }
var intent = xObject.DefaultRenderingIntent; var intent = xObject.DefaultRenderingIntent;
if (dictionary.TryGet(NameToken.Intent, pdfScanner, out NameToken renderingIntentToken)) if (dictionary.TryGet(NameToken.Intent, out NameToken renderingIntentToken))
{ {
intent = renderingIntentToken.Data.ToRenderingIntent(); intent = renderingIntentToken.Data.ToRenderingIntent();
} }
var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken? interpolateToken) var interpolate = dictionary.TryGet(NameToken.Interpolate, out BooleanToken? interpolateToken)
&& interpolateToken.Data; && interpolateToken.Data;
if (dictionary.TryGet(NameToken.Filter, out var filterToken) && filterToken is IndirectReferenceToken)
{
if (dictionary.TryGet(NameToken.Filter, pdfScanner, out ArrayToken? filterArray))
{
dictionary = dictionary.With(NameToken.Filter, filterArray);
}
else if (dictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken? filterNameToken))
{
dictionary = dictionary.With(NameToken.Filter, filterNameToken);
}
}
var supportsFilters = true; var supportsFilters = true;
var filters = filterProvider.GetFilters(dictionary, pdfScanner); var filters = filterProvider.GetFilters(dictionary, pdfScanner);
foreach (var filter in filters) foreach (var filter in filters)
@ -115,34 +97,15 @@
supportsFilters = false; supportsFilters = false;
break; break;
} }
}
var decodeParams = dictionary.GetObjectOrDefault(NameToken.DecodeParms, NameToken.Dp);
if (decodeParams is IndirectReferenceToken refToken)
{
dictionary = dictionary.With(NameToken.DecodeParms, pdfScanner.Get(refToken.Data).Data);
} }
var jbig2GlobalsParams = dictionary.GetObjectOrDefault(NameToken.Jbig2Globals);
if (jbig2GlobalsParams is IndirectReferenceToken jbig2RefToken)
{
dictionary = dictionary.With(NameToken.Jbig2Globals, pdfScanner.Get(jbig2RefToken.Data).Data);
}
var imParams = dictionary.GetObjectOrDefault(NameToken.Im);
if (imParams is IndirectReferenceToken imRefToken)
{
dictionary = dictionary.With(NameToken.Im, pdfScanner.Get(imRefToken.Data).Data);
}
var streamToken = new StreamToken(dictionary, xObject.Stream.Data); var streamToken = new StreamToken(dictionary, xObject.Stream.Data);
var decodedBytes = supportsFilters ? new Lazy<ReadOnlyMemory<byte>>(() => streamToken.Decode(filterProvider, pdfScanner)) var decodedBytes = supportsFilters ? new Lazy<ReadOnlyMemory<byte>>(() => streamToken.Decode(filterProvider, pdfScanner))
: null; : null;
var decode = Array.Empty<double>(); var decode = Array.Empty<double>();
if (dictionary.TryGet(NameToken.Decode, out ArrayToken decodeArrayToken))
if (dictionary.TryGet(NameToken.Decode, pdfScanner, out ArrayToken? decodeArrayToken))
{ {
decode = decodeArrayToken.Data.OfType<NumericToken>() decode = decodeArrayToken.Data.OfType<NumericToken>()
.Select(x => x.Double) .Select(x => x.Double)
@ -152,11 +115,11 @@
ColorSpaceDetails? details = null; ColorSpaceDetails? details = null;
if (!isImageMask) if (!isImageMask)
{ {
if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out NameToken? colorSpaceNameToken)) if (dictionary.TryGet(NameToken.ColorSpace, out NameToken? colorSpaceNameToken))
{ {
details = resourceStore.GetColorSpaceDetails(colorSpaceNameToken, dictionary); details = resourceStore.GetColorSpaceDetails(colorSpaceNameToken, dictionary);
} }
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken? colorSpaceArrayToken) else if (dictionary.TryGet(NameToken.ColorSpace, out ArrayToken? colorSpaceArrayToken)
&& colorSpaceArrayToken.Length > 0 && colorSpaceArrayToken.Data[0] is NameToken firstColorSpaceName) && colorSpaceArrayToken.Length > 0 && colorSpaceArrayToken.Data[0] is NameToken firstColorSpaceName)
{ {
details = resourceStore.GetColorSpaceDetails(firstColorSpaceName, dictionary); details = resourceStore.GetColorSpaceDetails(firstColorSpaceName, dictionary);
@ -164,7 +127,7 @@
else if (!isJpxDecode) else if (!isJpxDecode)
{ {
details = xObject.DefaultColorSpace; details = xObject.DefaultColorSpace;
} }
} }
else else
{ {
@ -184,7 +147,7 @@
dictionary, dictionary,
xObject.Stream.Data, xObject.Stream.Data,
decodedBytes, decodedBytes,
details); details);
} }
} }
} }