Resolve image data (implementation from @kasperdaff)
Some checks are pending
Build and test / build (push) Waiting to run
Run Integration Tests / build (push) Waiting to run

This commit is contained in:
BobLd 2025-02-22 20:47:07 +00:00
parent c4a235fb62
commit d973e03206
2 changed files with 141 additions and 135 deletions

View File

@ -83,5 +83,48 @@
return transform;
}
/// <summary>
/// Returns an equivalent token where any indirect references of child objects are
/// recursively traversed and resolved.
/// </summary>
internal static T Resolve<T>(this T token, IPdfTokenScanner scanner) where T : IToken
{
return (T)ResolveInternal(token, scanner);
}
private static IToken ResolveInternal(this IToken token, IPdfTokenScanner scanner)
{
if (token is StreamToken stream)
{
return new StreamToken(Resolve(stream.StreamDictionary, scanner), stream.Data);
}
if (token is DictionaryToken dict)
{
var resolvedItems = new Dictionary<NameToken, IToken>();
foreach (var kvp in dict.Data)
{
var value = kvp.Value is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : kvp.Value;
resolvedItems[NameToken.Create(kvp.Key)] = ResolveInternal(value, scanner);
}
return new DictionaryToken(resolvedItems);
}
if (token is ArrayToken arr)
{
var resolvedItems = new List<IToken>();
for (int i = 0; i < arr.Length; i++)
{
var value = arr.Data[i] is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : arr.Data[i];
resolvedItems.Add(ResolveInternal(value, scanner));
}
return new ArrayToken(resolvedItems);
}
var val = token is IndirectReferenceToken tokenReference ? scanner.Get(tokenReference.Data).Data : token;
return val;
}
}
}

View File

@ -36,22 +36,16 @@
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
}
var dictionary = xObject.Stream.StreamDictionary;
var dictionary = xObject.Stream.StreamDictionary.Resolve(pdfScanner);
var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1)));
var width = dictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
var height = dictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
var width = dictionary.GetInt(NameToken.Width);
var height = dictionary.GetInt(NameToken.Height);
bool isImageMask = false;
if (dictionary.TryGet(NameToken.ImageMask, pdfScanner, out BooleanToken? isMaskToken))
{
dictionary = dictionary.With(NameToken.ImageMask, isMaskToken);
isImageMask = isMaskToken.Data;
}
var isImageMask = dictionary.TryGet(NameToken.ImageMask, out BooleanToken isMaskToken) && isMaskToken.Data;
var isJpxDecode = dictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken filterName)
&& filterName.Equals(NameToken.JpxDecode);
var isJpxDecode = dictionary.TryGet(NameToken.Filter, out NameToken filterName) && filterName.Equals(NameToken.JpxDecode);
int bitsPerComponent;
if (isImageMask)
@ -63,7 +57,7 @@
if (isJpxDecode)
{
// Optional for JPX
if (dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken))
if (dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken? bitsPerComponentToken))
{
bitsPerComponent = bitsPerComponentToken.Int;
System.Diagnostics.Debug.Assert(bitsPerComponent == Jpeg2000Helper.GetJp2BitsPerComponent(xObject.Stream.Data.Span));
@ -76,7 +70,7 @@
}
else
{
if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken))
if (!dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken? bitsPerComponentToken))
{
throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}.");
}
@ -86,26 +80,14 @@
}
var intent = xObject.DefaultRenderingIntent;
if (dictionary.TryGet(NameToken.Intent, pdfScanner, out NameToken renderingIntentToken))
if (dictionary.TryGet(NameToken.Intent, out NameToken renderingIntentToken))
{
intent = renderingIntentToken.Data.ToRenderingIntent();
}
var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken? interpolateToken)
var interpolate = dictionary.TryGet(NameToken.Interpolate, out BooleanToken? interpolateToken)
&& interpolateToken.Data;
if (dictionary.TryGet(NameToken.Filter, out var filterToken) && filterToken is IndirectReferenceToken)
{
if (dictionary.TryGet(NameToken.Filter, pdfScanner, out ArrayToken? filterArray))
{
dictionary = dictionary.With(NameToken.Filter, filterArray);
}
else if (dictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken? filterNameToken))
{
dictionary = dictionary.With(NameToken.Filter, filterNameToken);
}
}
var supportsFilters = true;
var filters = filterProvider.GetFilters(dictionary, pdfScanner);
foreach (var filter in filters)
@ -117,32 +99,13 @@
}
}
var decodeParams = dictionary.GetObjectOrDefault(NameToken.DecodeParms, NameToken.Dp);
if (decodeParams is IndirectReferenceToken refToken)
{
dictionary = dictionary.With(NameToken.DecodeParms, pdfScanner.Get(refToken.Data).Data);
}
var jbig2GlobalsParams = dictionary.GetObjectOrDefault(NameToken.Jbig2Globals);
if (jbig2GlobalsParams is IndirectReferenceToken jbig2RefToken)
{
dictionary = dictionary.With(NameToken.Jbig2Globals, pdfScanner.Get(jbig2RefToken.Data).Data);
}
var imParams = dictionary.GetObjectOrDefault(NameToken.Im);
if (imParams is IndirectReferenceToken imRefToken)
{
dictionary = dictionary.With(NameToken.Im, pdfScanner.Get(imRefToken.Data).Data);
}
var streamToken = new StreamToken(dictionary, xObject.Stream.Data);
var decodedBytes = supportsFilters ? new Lazy<ReadOnlyMemory<byte>>(() => streamToken.Decode(filterProvider, pdfScanner))
: null;
var decode = Array.Empty<double>();
if (dictionary.TryGet(NameToken.Decode, pdfScanner, out ArrayToken? decodeArrayToken))
if (dictionary.TryGet(NameToken.Decode, out ArrayToken decodeArrayToken))
{
decode = decodeArrayToken.Data.OfType<NumericToken>()
.Select(x => x.Double)
@ -152,11 +115,11 @@
ColorSpaceDetails? details = null;
if (!isImageMask)
{
if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out NameToken? colorSpaceNameToken))
if (dictionary.TryGet(NameToken.ColorSpace, out NameToken? colorSpaceNameToken))
{
details = resourceStore.GetColorSpaceDetails(colorSpaceNameToken, dictionary);
}
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken? colorSpaceArrayToken)
else if (dictionary.TryGet(NameToken.ColorSpace, out ArrayToken? colorSpaceArrayToken)
&& colorSpaceArrayToken.Length > 0 && colorSpaceArrayToken.Data[0] is NameToken firstColorSpaceName)
{
details = resourceStore.GetColorSpaceDetails(firstColorSpaceName, dictionary);