merge pull request #70 from uglytoad/add-images

#55 move support for images to page and add inline images
This commit is contained in:
Eliot Jones
2019-10-08 14:11:19 +01:00
committed by GitHub
41 changed files with 1083 additions and 169 deletions

View File

@@ -44,7 +44,11 @@
}
else if (operationType == typeof(EndInlineImage))
{
operation = new EndInlineImage(new List<IToken>(), new List<byte>());
operation = new EndInlineImage(new List<byte>());
}
else if (operationType == typeof(BeginInlineImageData))
{
operation = new BeginInlineImageData(new Dictionary<NameToken, IToken>());
}
else
{

View File

@@ -77,5 +77,17 @@
public void SetNamedGraphicsState(NameToken stateName)
{
}
public void BeginInlineImage()
{
}
public void SetInlineImageProperties(IReadOnlyDictionary<NameToken, IToken> properties)
{
}
public void EndInlineImage(IReadOnlyList<byte> bytes)
{
}
}
}

View File

@@ -1,33 +0,0 @@
namespace UglyToad.PdfPig.Tests.Graphics
{
using Content;
using PdfPig.Fonts;
using PdfPig.Tokens;
internal class TestResourceStore : IResourceStore
{
public void LoadResourceDictionary(DictionaryToken dictionary, bool isLenientParsing)
{
}
public IFont GetFont(NameToken name)
{
return null;
}
public StreamToken GetXObject(NameToken name)
{
return null;
}
public DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name)
{
return null;
}
public IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing)
{
return null;
}
}
}

View File

@@ -81,14 +81,14 @@
{
var page = document.GetPage(i + 1);
var images = page.ExperimentalAccess.GetRawImages();
var images = page.GetImages();
Assert.NotNull(images);
foreach (var image in images)
{
Assert.True(image.Width > 0, $"Image had width of zero on page {i + 1}.");
Assert.True(image.Height > 0, $"Image had height of zero on page {i + 1}.");
Assert.True(image.WidthInSamples > 0, $"Image had width of zero on page {i + 1}.");
Assert.True(image.HeightInSamples > 0, $"Image had height of zero on page {i + 1}.");
}
}
}

View File

@@ -41,7 +41,7 @@
{
var page = document.GetPage(1);
var images = page.ExperimentalAccess.GetRawImages().ToList();
var images = page.GetImages().ToList();
Assert.Single(images);
}
}

View File

@@ -0,0 +1,87 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Linq;
using Xunit;
public class SinglePageLibreOfficeImages
{
private static string GetFilePath() => IntegrationHelpers.GetDocumentPath(@"Single Page Images - from libre office.pdf");
[Fact]
public void Has3Images()
{
using (var document = PdfDocument.Open(GetFilePath(), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
var images = page.GetImages().ToList();
Assert.Equal(3, images.Count);
}
}
[Fact]
public void ImagesHaveCorrectDimensionsAndLocations()
{
using (var document = PdfDocument.Open(GetFilePath(), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
var images = page.GetImages().OrderBy(x => x.Bounds.Width).ToList();
var pdfPigSquare = images[0];
Assert.Equal(148.3m, pdfPigSquare.Bounds.Width);
Assert.Equal(148.3m, pdfPigSquare.Bounds.Height);
Assert.Equal(60.1m, pdfPigSquare.Bounds.Left);
Assert.Equal(765.8m, pdfPigSquare.Bounds.Top);
var pdfPigSquished = images[1];
Assert.Equal(206.8m, pdfPigSquished.Bounds.Width);
Assert.Equal(83.2m, pdfPigSquished.Bounds.Height);
Assert.Equal(309.8m, pdfPigSquished.Bounds.Left);
Assert.Equal(552.1m, pdfPigSquished.Bounds.Top);
var birthdayPigs = images[2];
Assert.Equal(391m, birthdayPigs.Bounds.Width);
Assert.Equal(267.1m, birthdayPigs.Bounds.Height);
Assert.Equal(102.2m, birthdayPigs.Bounds.Left);
Assert.Equal(426.3m, birthdayPigs.Bounds.Top);
}
}
[Fact]
public void HasCorrectText()
{
using (var document = PdfDocument.Open(GetFilePath(), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
Assert.Equal("Oink oink", page.Text);
}
}
[Fact]
public void CanAccessImageBytesExceptUnsupported()
{
using (var document = PdfDocument.Open(GetFilePath(), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
foreach (var image in page.GetImages())
{
try
{
Assert.NotNull(image.Bytes);
}
catch (NotSupportedException )
{
// Should allow access to raw bytes.
Assert.NotNull(image.RawBytes);
}
}
}
}
}
}

View File

@@ -53,6 +53,8 @@
"UglyToad.PdfPig.Annotations.AnnotationType",
"UglyToad.PdfPig.Content.Catalog",
"UglyToad.PdfPig.Content.DocumentInformation",
"UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPdfImage",
"UglyToad.PdfPig.Content.Letter",
"UglyToad.PdfPig.Content.Page",
"UglyToad.PdfPig.Content.PageRotationDegrees",

View File

@@ -11,6 +11,11 @@
return new List<IFilter>();
}
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
{
return new List<IFilter>();
}
public IReadOnlyList<IFilter> GetAllFilters()
{
return new List<IFilter>();

View File

@@ -42,5 +42,9 @@ namespace UglyToad.PdfPig.Tests.Tokens
{
return Objects[reference];
}
public void Dispose()
{
}
}
}

View File

@@ -0,0 +1,94 @@
namespace UglyToad.PdfPig.Content
{
using System.Collections.Generic;
using Geometry;
using Graphics.Colors;
using Graphics.Core;
using XObjects;
/// <summary>
/// An image in a PDF document, may be an <see cref="InlineImage"/> or a PostScript image XObject (<see cref="XObjectImage"/>).
/// </summary>
public interface IPdfImage
{
/// <summary>
/// The placement rectangle of the image in PDF coordinates.
/// </summary>
PdfRectangle Bounds { get; }
/// <summary>
/// The width of the image in samples.
/// </summary>
int WidthInSamples { get; }
/// <summary>
/// The height of the image in samples.
/// </summary>
int HeightInSamples { get; }
/// <summary>
/// The <see cref="ColorSpace"/> used to interpret the image.
/// This defines the number of color components per sample, e.g.
/// 1 component for <see cref="Graphics.Colors.ColorSpace.DeviceGray"/>,
/// 3 components for <see cref="Graphics.Colors.ColorSpace.DeviceRGB"/>,
/// 4 components for <see cref="Graphics.Colors.ColorSpace.DeviceCMYK"/>,
/// etc.
/// This is not defined where <see cref="IsImageMask"/> is <see langword="true"/> and is optional where the image is JPXEncoded for <see cref="XObjectImage"/>.
/// </summary>
ColorSpace? ColorSpace { get; }
/// <summary>
/// The number of bits used to represent each color component.
/// </summary>
int BitsPerComponent { get; }
/// <summary>
/// The bytes of the image with any filters decoded.
/// If the filter used to encode the bytes is not supported accessing this property will throw, access the <see cref="RawBytes"/>
/// instead.
/// </summary>
IReadOnlyList<byte> Bytes { get; }
/// <summary>
/// The encoded bytes of the image with all filters still applied.
/// </summary>
IReadOnlyList<byte> RawBytes { get; }
/// <summary>
/// The color rendering intent to be used when rendering the image.
/// </summary>
RenderingIntent RenderingIntent { get; }
/// <summary>
/// Indicates whether the image is to be treated as an image mask.
/// If <see langword="true"/> the image is a monochrome image in which each sample
/// is specified by a single bit (<see cref="BitsPerComponent"/> is 1).
/// The image represents a stencil where sample values represent places on the page
/// that should be marked with the current color or masked (not marked).
/// </summary>
bool IsImageMask { get; }
/// <summary>
/// Describes how to map image samples into the values appropriate for the
/// <see cref="ColorSpace"/>.
/// The image data is initially composed of values in the range 0 to 2^n - 1
/// where n is <see cref="BitsPerComponent"/>.
/// The decode array contains a pair of numbers for each component in the <see cref="ColorSpace"/>.
/// The value from the image data is then interpolated into the values relevant to the <see cref="ColorSpace"/>
/// using the corresponding values of the decode array.
/// </summary>
IReadOnlyList<decimal> Decode { get; }
/// <summary>
/// Specifies whether interpolation is to be performed. Interpolation smooths images where a single component in the image
/// as defined may correspond to many pixels on the output device. The interpolation algorithm is implementation
/// dependent and is not defined by the specification.
/// </summary>
bool Interpolate { get; }
/// <summary>
/// Whether this image is an <see cref="InlineImage"/> or a <see cref="XObjectImage"/>.
/// </summary>
bool IsInlineImage { get; }
}
}

View File

@@ -14,5 +14,7 @@
DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name);
IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing);
bool TryGetNamedColorSpace(NameToken name, out IToken namedColorSpace);
}
}

View File

@@ -0,0 +1,98 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Collections.Generic;
using System.Linq;
using Filters;
using Geometry;
using Graphics.Colors;
using Graphics.Core;
using Tokens;
/// <inheritdoc />
/// <summary>
/// A small image that is completely defined directly inline within a <see cref="T:UglyToad.PdfPig.Content.Page" />'s content stream.
/// </summary>
public class InlineImage : IPdfImage
{
private readonly Lazy<IReadOnlyList<byte>> bytesFactory;
/// <inheritdoc />
public PdfRectangle Bounds { get; }
/// <inheritdoc />
public int WidthInSamples { get; }
/// <inheritdoc />
public int HeightInSamples { get; }
/// <inheritdoc />
public ColorSpace? ColorSpace { get; }
/// <inheritdoc />
public int BitsPerComponent { get; }
/// <inheritdoc />
public bool IsImageMask { get; }
/// <inheritdoc />
public IReadOnlyList<decimal> Decode { get; }
/// <inheritdoc />
public bool IsInlineImage { get; } = true;
/// <inheritdoc />
public RenderingIntent RenderingIntent { get; }
/// <inheritdoc />
public bool Interpolate { get; }
/// <inheritdoc />
public IReadOnlyList<byte> Bytes => bytesFactory.Value;
/// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; }
/// <summary>
/// Create a new <see cref="InlineImage"/>.
/// </summary>
internal InlineImage(PdfRectangle bounds, int widthInSamples, int heightInSamples, int bitsPerComponent, bool isImageMask,
RenderingIntent renderingIntent,
bool interpolate,
ColorSpace? colorSpace,
IReadOnlyList<decimal> decode,
IReadOnlyList<byte> bytes,
IReadOnlyList<IFilter> filters,
DictionaryToken streamDictionary)
{
Bounds = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
ColorSpace = colorSpace;
Decode = decode;
BitsPerComponent = bitsPerComponent;
IsImageMask = isImageMask;
RenderingIntent = renderingIntent;
Interpolate = interpolate;
RawBytes = bytes;
bytesFactory = new Lazy<IReadOnlyList<byte>>(() =>
{
var b = bytes.ToArray();
for (var i = 0; i < filters.Count; i++)
{
var filter = filters[i];
b = filter.Decode(b, streamDictionary, i);
}
return b;
});
}
/// <inheritdoc />
public override string ToString()
{
return $"Inline Image (w {Bounds.Width}, h {Bounds.Height})";
}
}
}

View File

@@ -8,7 +8,6 @@
using Tokens;
using Util;
using Util.JetBrains.Annotations;
using XObjects;
using Geometry;
/// <summary>
@@ -60,7 +59,7 @@
public decimal Height { get; }
/// <summary>
/// The size of the page according to the standard page sizes or Custom if no matching standard size found.
/// The size of the page according to the standard page sizes or <see cref="PageSize.Custom"/> if no matching standard size found.
/// </summary>
public PageSize Size { get; }
@@ -131,6 +130,11 @@
return (wordExtractor ?? DefaultWordExtractor.Instance).GetWords(Letters);
}
/// <summary>
/// Gets any images on the page.
/// </summary>
public IEnumerable<IPdfImage> GetImages() => Content.GetImages();
/// <summary>
/// Provides access to useful members which will change in future releases.
/// </summary>
@@ -150,16 +154,6 @@
this.annotationProvider = annotationProvider;
}
/// <summary>
/// Retrieve any images referenced in this page's content.
/// These are returned as <see cref="XObjectImage"/>s which are
/// raw data from the PDF's content rather than images.
/// </summary>
public IEnumerable<XObjectImage> GetRawImages()
{
return page.Content.GetImages();
}
/// <summary>
/// Get the annotation objects from the page.
/// </summary>

View File

@@ -1,14 +1,17 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Collections.Generic;
using Filters;
using Graphics;
using Graphics.Operations;
using Tokenization.Scanner;
using XObjects;
using UglyToad.PdfPig.Geometry;
using Geometry;
using Util;
/// <summary>
///
/// Wraps content parsed from a page content stream for access.
/// </summary>
/// <remarks>
/// This should contain a replayable stack of drawing instructions for page content
@@ -16,36 +19,46 @@
/// </remarks>
internal class PageContent
{
private readonly IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects;
private readonly IReadOnlyList<Union<XObjectContentRecord, InlineImage>> images;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly IFilterProvider filterProvider;
private readonly IResourceStore resourceStore;
private readonly bool isLenientParsing;
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
public IReadOnlyList<Letter> Letters { get; }
public IReadOnlyList<PdfPath> Paths { get; }
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters, List<PdfPath> paths,
IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects,
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
IReadOnlyList<PdfPath> paths,
IReadOnlyList<Union<XObjectContentRecord, InlineImage>> images,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory,
IFilterProvider filterProvider,
IResourceStore resourceStore,
bool isLenientParsing)
{
GraphicsStateOperations = graphicsStateOperations;
Letters = letters;
Paths = paths;
this.xObjects = xObjects;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
this.images = images;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.resourceStore = resourceStore ?? throw new ArgumentNullException(nameof(resourceStore));
this.isLenientParsing = isLenientParsing;
}
public IEnumerable<XObjectImage> GetImages()
public IEnumerable<IPdfImage> GetImages()
{
foreach (var contentRecord in xObjects[XObjectType.Image])
foreach (var image in images)
{
yield return xObjectFactory.CreateImage(contentRecord, pdfScanner, isLenientParsing);
IPdfImage result = null;
image.Match(x => { result = XObjectFactory.ReadImage(x, pdfScanner, filterProvider, resourceStore, isLenientParsing); },
x => { result = x; });
yield return result;
}
}
}

View File

@@ -8,7 +8,7 @@
using Tokenization.Scanner;
using Tokens;
internal class ResourceContainer : IResourceStore
internal class ResourceStore : IResourceStore
{
private readonly IPdfTokenScanner scanner;
private readonly IFontFactory fontFactory;
@@ -18,7 +18,9 @@
private readonly Dictionary<NameToken, DictionaryToken> extendedGraphicsStates = new Dictionary<NameToken, DictionaryToken>();
public ResourceContainer(IPdfTokenScanner scanner, IFontFactory fontFactory)
private readonly Dictionary<NameToken, NameToken> colorSpaceNames = new Dictionary<NameToken, NameToken>();
public ResourceStore(IPdfTokenScanner scanner, IFontFactory fontFactory)
{
this.scanner = scanner;
this.fontFactory = fontFactory;
@@ -58,6 +60,39 @@
extendedGraphicsStates[name] = state;
}
}
if (resourceDictionary.TryGet(NameToken.ColorSpace, scanner, out DictionaryToken colorSpaceDictionary))
{
foreach (var nameColorSpacePair in colorSpaceDictionary.Data)
{
var name = NameToken.Create(nameColorSpacePair.Key);
if (DirectObjectFinder.TryGet(nameColorSpacePair.Value, scanner, out NameToken colorSpaceName))
{
colorSpaceNames[name] = colorSpaceName;
}
else if (DirectObjectFinder.TryGet(nameColorSpacePair.Value, scanner, out ArrayToken colorSpaceArray))
{
if (colorSpaceArray.Length == 0)
{
throw new PdfDocumentFormatException($"Empty ColorSpace array encountered in page resource dictionary: {resourceDictionary}.");
}
var first = colorSpaceArray.Data[0];
if (!(first is NameToken arrayNamedColorSpace))
{
throw new PdfDocumentFormatException($"Invalid ColorSpace array encountered in page resource dictionary: {colorSpaceArray}.");
}
colorSpaceNames[name] = arrayNamedColorSpace;
}
else
{
throw new PdfDocumentFormatException($"Invalid ColorSpace token encountered in page resource dictionary: {nameColorSpacePair.Value}.");
}
}
}
}
private void LoadFontDictionary(DictionaryToken fontDictionary, bool isLenientParsing)
@@ -115,6 +150,25 @@
return font;
}
public bool TryGetNamedColorSpace(NameToken name, out IToken namedToken)
{
namedToken = null;
if (name == null)
{
throw new ArgumentNullException(nameof(name));
}
if (!colorSpaceNames.TryGetValue(name, out var colorSpaceName))
{
return false;
}
namedToken = colorSpaceName;
return true;
}
public StreamToken GetXObject(NameToken name)
{
var reference = currentResourceState[name];

View File

@@ -33,8 +33,6 @@
using (var stream = new MemoryStream())
using (var writer = new BinaryWriter(stream))
{
for (var i = 0; i < input.Count; i++)
{
var value = input[i];

View File

@@ -0,0 +1,15 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
internal class CcittFaxDecodeFilter : IFilter
{
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotSupportedException("The CCITT Fax Filter for image data is not currently supported. " +
"Try accessing the raw compressed data directly.");
}
}
}

View File

@@ -8,7 +8,8 @@
{
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotImplementedException();
throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " +
"This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries.");
}
}
}

View File

@@ -7,6 +7,8 @@
{
IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary);
IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names);
IReadOnlyList<IFilter> GetAllFilters();
}
}

View File

@@ -0,0 +1,15 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
internal class Jbig2DecodeFilter : IFilter
{
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotSupportedException("The JBIG2 Filter for monochrome image data is not currently supported. " +
"Try accessing the raw compressed data directly.");
}
}
}

View File

@@ -0,0 +1,15 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
internal class JpxDecodeFilter : IFilter
{
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotSupportedException("The JPX Filter (JPEG2000) for image data is not currently supported. " +
"Try accessing the raw compressed data directly.");
}
}
}

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.Linq;
using Exceptions;
using Logging;
using Tokens;
@@ -15,7 +16,11 @@
{
var ascii85 = new Ascii85Filter();
var asciiHex = new AsciiHexDecodeFilter();
var ccitt = new CcittFaxDecodeFilter();
var dct = new DctDecodeFilter();
var flate = new FlateFilter(decodeParameterResolver, pngPredictor, log);
var jbig2 = new Jbig2DecodeFilter();
var jpx = new JpxDecodeFilter();
var runLength = new RunLengthFilter();
var lzw = new LzwFilter(decodeParameterResolver, pngPredictor);
@@ -25,8 +30,14 @@
{NameToken.Ascii85DecodeAbbreviation.Data, ascii85},
{NameToken.AsciiHexDecode.Data, asciiHex},
{NameToken.AsciiHexDecodeAbbreviation.Data, asciiHex},
{NameToken.CcittfaxDecode.Data, ccitt},
{NameToken.CcittfaxDecodeAbbreviation.Data, ccitt},
{NameToken.DctDecode.Data, dct},
{NameToken.DctDecodeAbbreviation.Data, dct},
{NameToken.FlateDecode.Data, flate},
{NameToken.FlateDecodeAbbreviation.Data, flate},
{NameToken.Jbig2Decode.Data, jbig2},
{NameToken.JpxDecode.Data, jpx},
{NameToken.RunLengthDecode.Data, runLength},
{NameToken.RunLengthDecodeAbbreviation.Data, runLength},
{NameToken.LzwDecode, lzw},
@@ -65,6 +76,23 @@
}
}
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
{
if (names == null)
{
throw new ArgumentNullException(nameof(names));
}
var result = new List<IFilter>();
foreach (var name in names)
{
result.Add(GetFilterStrict(name));
}
return result;
}
private IFilter GetFilterStrict(string name)
{
if (!filterInstances.TryGetValue(name, out var factory))
@@ -77,7 +105,7 @@
public IReadOnlyList<IFilter> GetAllFilters()
{
throw new System.NotImplementedException();
return filterInstances.Values.Distinct().ToList();
}
}
}

View File

@@ -6,6 +6,8 @@
using Colors;
using Content;
using Core;
using Exceptions;
using Filters;
using Fonts;
using Geometry;
using IO;
@@ -19,27 +21,43 @@
internal class ContentStreamProcessor : IOperationContext
{
/// <summary>
/// Stores each letter as it is encountered in the content stream.
/// </summary>
private readonly List<Letter> letters = new List<Letter>();
/// <summary>
/// Stores each path as it is encountered in the content stream.
/// </summary>
private readonly List<PdfPath> paths = new List<PdfPath>();
/// <summary>
/// Stores a link to each image (either inline or XObject) as it is encountered in the content stream.
/// </summary>
private readonly List<Union<XObjectContentRecord, InlineImage>> images = new List<Union<XObjectContentRecord, InlineImage>>();
private readonly IResourceStore resourceStore;
private readonly UserSpaceUnit userSpaceUnit;
private readonly PageRotationDegrees rotation;
private readonly bool isLenientParsing;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly IFilterProvider filterProvider;
private readonly ILog log;
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
private IFont activeExtendedGraphicsStateFont = null;
private IFont activeExtendedGraphicsStateFont;
private InlineImageBuilder inlineImageBuilder;
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
private int textSequence = 0;
/// <summary>
/// A counter to track individual calls to <see cref="ShowText"/> operations used to determine if letters are likely to be
/// in the same word/group. This exposes internal grouping of letters used by the PDF creator which may correspond to the
/// intended grouping of letters into words.
/// </summary>
private int textSequence;
public TextMatrices TextMatrices { get; } = new TextMatrices();
public TransformationMatrix CurrentTransformationMatrix
{
get { return GetCurrentState().CurrentTransformationMatrix; }
}
public TransformationMatrix CurrentTransformationMatrix => GetCurrentState().CurrentTransformationMatrix;
public PdfPath CurrentPath { get; private set; }
@@ -56,18 +74,18 @@
{XObjectType.PostScript, new List<XObjectContentRecord>()}
};
public List<Letter> Letters = new List<Letter>();
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing,
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation,
bool isLenientParsing,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory,
IFilterProvider filterProvider,
ILog log)
{
this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit;
this.rotation = rotation;
this.isLenientParsing = isLenientParsing;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.log = log;
graphicsStack.Push(new CurrentGraphicsState());
ColorSpaceContext = new ColorSpaceContext(GetCurrentState);
@@ -75,11 +93,11 @@
public PageContent Process(IReadOnlyList<IGraphicsStateOperation> operations)
{
var currentState = CloneAllStates();
CloneAllStates();
ProcessOperations(operations);
return new PageContent(operations, Letters, paths, xObjects, pdfScanner, xObjectFactory, isLenientParsing);
return new PageContent(operations, letters, paths, images, pdfScanner, filterProvider, resourceStore, isLenientParsing);
}
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
@@ -265,7 +283,7 @@
var xObjectStream = resourceStore.GetXObject(xObjectName);
// For now we will determine the type and store the object with the graphics state information preceding it.
// Then consumers of the page can request the object/s to be retrieved by type.
// Then consumers of the page can request the object(s) to be retrieved by type.
var subType = (NameToken)xObjectStream.StreamDictionary.Data[NameToken.Subtype.Data];
var state = GetCurrentState();
@@ -274,15 +292,15 @@
if (subType.Equals(NameToken.Ps))
{
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix));
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix, state.RenderingIntent));
}
else if (subType.Equals(NameToken.Image))
{
xObjects[XObjectType.Image].Add(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix));
images.Add(Union<XObjectContentRecord, InlineImage>.One(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix, state.RenderingIntent)));
}
else if (subType.Equals(NameToken.Form))
{
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix));
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix, state.RenderingIntent));
}
else
{
@@ -361,6 +379,52 @@
}
}
public void BeginInlineImage()
{
if (inlineImageBuilder != null && !isLenientParsing)
{
throw new PdfDocumentFormatException("Begin inline image (BI) command encountered while another inline image was active.");
}
inlineImageBuilder = new InlineImageBuilder();
}
public void SetInlineImageProperties(IReadOnlyDictionary<NameToken, IToken> properties)
{
if (inlineImageBuilder == null)
{
if (isLenientParsing)
{
return;
}
throw new PdfDocumentFormatException("Begin inline image data (ID) command encountered without a corresponding begin inline image (BI) command.");
}
inlineImageBuilder.Properties = properties;
}
public void EndInlineImage(IReadOnlyList<byte> bytes)
{
if (inlineImageBuilder == null)
{
if (isLenientParsing)
{
return;
}
throw new PdfDocumentFormatException("End inline image (EI) command encountered without a corresponding begin inline image (BI) command.");
}
inlineImageBuilder.Bytes = bytes;
var image = inlineImageBuilder.CreateInlineImage(CurrentTransformationMatrix, filterProvider, pdfScanner, GetCurrentState().RenderingIntent, resourceStore);
images.Add(Union<XObjectContentRecord, InlineImage>.Two(image));
inlineImageBuilder = null;
}
private void AdjustTextMatrix(decimal tx, decimal ty)
{
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
@@ -390,7 +454,7 @@
pointSize,
textSequence);
Letters.Add(letter);
letters.Add(letter);
}
}
}

View File

@@ -4,7 +4,7 @@
using Geometry;
using IO;
using Tokens;
using UglyToad.PdfPig.Core;
using PdfPig.Core;
using Util.JetBrains.Annotations;
/// <summary>
@@ -104,5 +104,20 @@
/// </summary>
/// <param name="stateName">The name of the state to apply.</param>
void SetNamedGraphicsState(NameToken stateName);
/// <summary>
/// Indicate that an inline image is being defined.
/// </summary>
void BeginInlineImage();
/// <summary>
/// Define the properties of the inline image currently being drawn.
/// </summary>
void SetInlineImageProperties(IReadOnlyDictionary<NameToken, IToken> properties);
/// <summary>
/// Indicates that the current inline image is complete.
/// </summary>
void EndInlineImage(IReadOnlyList<byte> bytes);
}
}

View File

@@ -0,0 +1,213 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using System.Collections.Generic;
using System.Linq;
using Colors;
using Content;
using Core;
using Exceptions;
using Filters;
using Geometry;
using PdfPig.Core;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class InlineImageBuilder
{
public IReadOnlyDictionary<NameToken, IToken> Properties { get; set; }
public IReadOnlyList<byte> Bytes { get; set; }
public InlineImage CreateInlineImage(TransformationMatrix transformationMatrix, IFilterProvider filterProvider,
IPdfTokenScanner tokenScanner,
RenderingIntent defaultRenderingIntent,
IResourceStore resourceStore)
{
if (Properties == null || Bytes == null)
{
throw new InvalidOperationException($"Inline image builder not completely defined before calling {nameof(CreateInlineImage)}.");
}
bool TryMapColorSpace(NameToken name, out ColorSpace colorSpaceResult)
{
if (name.TryMapToColorSpace(out colorSpaceResult))
{
return true;
}
if (TryExtendedColorSpaceNameMapping(name, out colorSpaceResult))
{
return true;
}
if (!resourceStore.TryGetNamedColorSpace(name, out var colorSpaceNamedToken) || !(colorSpaceNamedToken is NameToken newName))
{
return false;
}
if (newName.TryMapToColorSpace(out colorSpaceResult))
{
return true;
}
if (TryExtendedColorSpaceNameMapping(newName, out colorSpaceResult))
{
return true;
}
return false;
}
var bounds = transformationMatrix.Transform(new PdfRectangle(new PdfPoint(1, 1),
new PdfPoint(0, 0)));
var width = GetByKeys<NumericToken>(NameToken.Width, NameToken.W, true).Int;
var height = GetByKeys<NumericToken>(NameToken.Height, NameToken.H, true).Int;
var maskToken = GetByKeys<BooleanToken>(NameToken.ImageMask, NameToken.Im, false);
var isMask = maskToken?.Data == true;
var bitsPerComponent = GetByKeys<NumericToken>(NameToken.BitsPerComponent, NameToken.Bpc, !isMask)?.Int ?? 1;
var colorSpace = default(ColorSpace?);
if (!isMask)
{
var colorSpaceName = GetByKeys<NameToken>(NameToken.ColorSpace, NameToken.Cs, false);
if (colorSpaceName == null)
{
var colorSpaceArray = GetByKeys<ArrayToken>(NameToken.ColorSpace, NameToken.Cs, true);
if (colorSpaceArray.Length == 0)
{
throw new PdfDocumentFormatException("Empty ColorSpace array defined for inline image.");
}
if (!(colorSpaceArray.Data[0] is NameToken firstColorSpaceName))
{
throw new PdfDocumentFormatException($"Invalid ColorSpace array defined for inline image: {colorSpaceArray}.");
}
if (!TryMapColorSpace(firstColorSpaceName, out var colorSpaceMapped))
{
throw new PdfDocumentFormatException($"Invalid ColorSpace defined for inline image: {firstColorSpaceName}.");
}
colorSpace = colorSpaceMapped;
}
else
{
if (!TryMapColorSpace(colorSpaceName, out var colorSpaceMapped))
{
throw new PdfDocumentFormatException($"Invalid ColorSpace defined for inline image: {colorSpaceName}.");
}
colorSpace = colorSpaceMapped;
}
}
var renderingIntent = GetByKeys<NameToken>(NameToken.Intent, null, false)?.Data?.ToRenderingIntent() ?? defaultRenderingIntent;
var filterNames = new List<NameToken>();
var filterName = GetByKeys<NameToken>(NameToken.Filter, NameToken.F, false);
if (filterName == null)
{
var filterArray = GetByKeys<ArrayToken>(NameToken.Filter, NameToken.F, false);
if (filterArray != null)
{
filterNames.AddRange(filterArray.Data.OfType<NameToken>());
}
}
else
{
filterNames.Add(filterName);
}
var filters = filterProvider.GetNamedFilters(filterNames);
var decodeRaw = GetByKeys<ArrayToken>(NameToken.Decode, NameToken.D, false) ?? new ArrayToken(EmptyArray<IToken>.Instance);
var decode = decodeRaw.Data.OfType<NumericToken>().Select(x => x.Data).ToArray();
var filterDictionaryEntries = new Dictionary<NameToken, IToken>();
var decodeParamsDict = GetByKeys<DictionaryToken>(NameToken.DecodeParms, NameToken.Dp, false);
if (decodeParamsDict == null)
{
var decodeParamsArray = GetByKeys<ArrayToken>(NameToken.DecodeParms, NameToken.Dp, false);
if (decodeParamsArray != null)
{
filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsArray;
}
}
else
{
filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsDict;
}
var streamDictionary = new DictionaryToken(filterDictionaryEntries);
var interpolate = GetByKeys<BooleanToken>(NameToken.Interpolate, NameToken.I, false)?.Data ?? false;
return new InlineImage(bounds, width, height, bitsPerComponent, isMask, renderingIntent, interpolate, colorSpace, decode, Bytes,
filters,
streamDictionary);
}
private static bool TryExtendedColorSpaceNameMapping(NameToken name, out ColorSpace result)
{
result = ColorSpace.DeviceGray;
switch (name.Data)
{
case "G":
result = ColorSpace.DeviceGray;
return true;
case "RGB":
result = ColorSpace.DeviceRGB;
return true;
case "CMYK":
result = ColorSpace.DeviceCMYK;
return true;
case "I":
result = ColorSpace.Indexed;
return true;
}
return false;
}
// ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local
private T GetByKeys<T>(NameToken name1, NameToken name2, bool required) where T : IToken
{
if (Properties.TryGetValue(name1, out var val) && val is T result)
{
return result;
}
if (name2 != null)
{
if (Properties.TryGetValue(name2, out val) && val is T result2)
{
return result2;
}
}
if (required)
{
throw new PdfDocumentFormatException($"Inline image dictionary missing required entry {name1}/{name2}.");
}
return default(T);
}
}
}

View File

@@ -28,6 +28,7 @@
/// <inheritdoc />
public void Run(IOperationContext operationContext)
{
operationContext.BeginInlineImage();
}
/// <inheritdoc />

View File

@@ -1,6 +1,9 @@
namespace UglyToad.PdfPig.Graphics.Operations.InlineImages
{
using System;
using System.Collections.Generic;
using System.IO;
using Tokens;
/// <inheritdoc />
/// <summary>
@@ -13,21 +16,26 @@
/// </summary>
public const string Symbol = "ID";
/// <summary>
/// The instance of the <see cref="BeginInlineImageData"/> operation.
/// </summary>
public static readonly BeginInlineImageData Value = new BeginInlineImageData();
/// <inheritdoc />
public string Operator => Symbol;
private BeginInlineImageData()
/// <summary>
/// The key-value pairs which specify attributes of the following image.
/// </summary>
public IReadOnlyDictionary<NameToken, IToken> Dictionary { get; }
/// <summary>
/// Create a new <see cref="BeginInlineImageData"/>.
/// </summary>
public BeginInlineImageData(IReadOnlyDictionary<NameToken, IToken> dictionary)
{
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
}
/// <inheritdoc />
public void Run(IOperationContext operationContext)
{
operationContext.SetInlineImageProperties(Dictionary);
}
/// <inheritdoc />

View File

@@ -3,7 +3,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using Tokens;
/// <inheritdoc />
/// <summary>
@@ -17,12 +16,7 @@
public const string Symbol = "EI";
/// <summary>
/// The tokens declared in order for this inline image object.
/// </summary>
public IReadOnlyList<IToken> ImageTokens { get; }
/// <summary>
/// The raw data for the inline image which should be interpreted according to the <see cref="ImageTokens"/>.
/// The raw data for the inline image which should be interpreted according to the corresponding <see cref="BeginInlineImageData.Dictionary"/>.
/// </summary>
public IReadOnlyList<byte> ImageData { get; }
@@ -32,17 +26,16 @@
/// <summary>
/// Create a new <see cref="EndInlineImage"/> operation.
/// </summary>
/// <param name="imageTokens">The tokens which were set during the declaration of this image.</param>
/// <param name="imageData">The raw byte data of this image.</param>
public EndInlineImage(IReadOnlyList<IToken> imageTokens, IReadOnlyList<byte> imageData)
public EndInlineImage(IReadOnlyList<byte> imageData)
{
ImageTokens = imageTokens ?? throw new ArgumentNullException(nameof(imageTokens));
ImageData = imageData ?? throw new ArgumentNullException(nameof(imageData));
}
/// <inheritdoc />
public void Run(IOperationContext operationContext)
{
operationContext.EndInlineImage(ImageData);
}
/// <inheritdoc />

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using Core;
using PdfPig.Core;
using Tokens;
using Util.JetBrains.Annotations;
@@ -15,11 +16,15 @@
public TransformationMatrix AppliedTransformation { get; }
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation)
public RenderingIntent DefaultRenderingIntent { get; }
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation,
RenderingIntent defaultRenderingIntent)
{
Type = type;
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
AppliedTransformation = appliedTransformation;
DefaultRenderingIntent = defaultRenderingIntent;
}
}
}

View File

@@ -30,8 +30,23 @@
if (token is InlineImageDataToken inlineImageData)
{
graphicsStateOperations.Add(BeginInlineImageData.Value);
graphicsStateOperations.Add(new EndInlineImage(precedingTokens, inlineImageData.Data));
var dictionary = new Dictionary<NameToken, IToken>();
for (var i = 0; i < precedingTokens.Count - 1; i++)
{
var t = precedingTokens[i];
if (!(t is NameToken n))
{
continue;
}
i++;
dictionary[n] = precedingTokens[i];
}
graphicsStateOperations.Add(new BeginInlineImageData(dictionary));
graphicsStateOperations.Add(new EndInlineImage(inlineImageData.Data));
precedingTokens.Clear();
}
else if (token is OperatorToken op)

View File

@@ -14,7 +14,6 @@
using Tokenization.Scanner;
using Tokens;
using Util;
using XObjects;
internal class PageFactory : IPageFactory
{
@@ -22,18 +21,15 @@
private readonly IResourceStore resourceStore;
private readonly IFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
private readonly XObjectFactory xObjectFactory;
private readonly ILog log;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
IPageContentParser pageContentParser,
XObjectFactory xObjectFactory,
ILog log)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.xObjectFactory = xObjectFactory;
this.log = log;
this.pdfScanner = pdfScanner;
}
@@ -125,7 +121,7 @@
{
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, isLenientParsing, pdfScanner, xObjectFactory, log);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, isLenientParsing, pdfScanner, filterProvider, log);
return context.Process(operations);
}

View File

@@ -26,7 +26,6 @@
using Tokenization.Scanner;
using Tokens;
using Util;
using XObjects;
internal static class PdfDocumentFactory
{
@@ -123,11 +122,11 @@
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
var resourceContainer = new ResourceStore(pdfScanner, fontFactory);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
new XObjectFactory(), log);
log);
var informationFactory = new DocumentInformationFactory();
var information = informationFactory.Create(pdfScanner, crossReferenceTable.Trailer);

View File

@@ -220,6 +220,7 @@
{
try
{
pdfScanner.Dispose();
inputBytes.Dispose();
}
catch (Exception ex)

View File

@@ -0,0 +1,18 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using System;
using Tokens;
/// <summary>
/// Tokenizes objects from bytes in a PDF file.
/// </summary>
internal interface IPdfTokenScanner : ISeekableTokenScanner, IDisposable
{
/// <summary>
/// Tokenize the object with a given object number.
/// </summary>
/// <param name="reference">The object number for the object to tokenize.</param>
/// <returns>The tokenized object.</returns>
ObjectToken Get(IndirectReference reference);
}
}

View File

@@ -13,19 +13,6 @@
using Parser.Parts;
using Tokens;
/// <summary>
/// Tokenizes objects from bytes in a PDF file.
/// </summary>
internal interface IPdfTokenScanner : ISeekableTokenScanner
{
/// <summary>
/// Tokenize the object with a given object number.
/// </summary>
/// <param name="reference">The object number for the object to tokenize.</param>
/// <returns>The tokenized object.</returns>
ObjectToken Get(IndirectReference reference);
}
internal class PdfTokenScanner : IPdfTokenScanner
{
private static readonly byte[] EndstreamBytes =
@@ -41,6 +28,7 @@
private readonly CoreTokenScanner coreTokenScanner;
private IEncryptionHandler encryptionHandler;
private bool isDisposed;
/// <summary>
/// Stores tokens encountered between obj - endobj markers for each <see cref="MoveNext"/> call.
@@ -75,6 +63,11 @@
public bool MoveNext()
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
// Read until we find object-number generation obj, e.g. "69 420 obj".
int tokensRead = 0;
while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.StartObject))
@@ -576,26 +569,51 @@
public bool TryReadToken<T>(out T token) where T : class, IToken
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
return coreTokenScanner.TryReadToken(out token);
}
public void Seek(long position)
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
coreTokenScanner.Seek(position);
}
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
coreTokenScanner.RegisterCustomTokenizer(firstByte, tokenizer);
}
public void DeregisterCustomTokenizer(ITokenizer tokenizer)
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
coreTokenScanner.DeregisterCustomTokenizer(tokenizer);
}
public ObjectToken Get(IndirectReference reference)
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
if (objectLocationProvider.TryGetCached(reference, out var objectToken))
{
return objectToken;
@@ -717,5 +735,11 @@
return results;
}
public void Dispose()
{
inputBytes?.Dispose();
isDisposed = true;
}
}
}

View File

@@ -114,7 +114,7 @@
public static readonly NameToken ColorDodge = new NameToken("ColorDodge");
public static readonly NameToken Colorants = new NameToken("Colorants");
public static readonly NameToken Colors = new NameToken("Colors");
public static readonly NameToken Colorspace = new NameToken("ColorSpace");
public static readonly NameToken ColorSpace = new NameToken("ColorSpace");
public static readonly NameToken Columns = new NameToken("Columns");
public static readonly NameToken Compatible = new NameToken("Compatible");
public static readonly NameToken Components = new NameToken("Components");
@@ -272,6 +272,7 @@
public static readonly NameToken Info = new NameToken("Info");
public static readonly NameToken Ink = new NameToken("Ink");
public static readonly NameToken Inklist = new NameToken("InkList");
public static readonly NameToken Intent = new NameToken("Intent");
public static readonly NameToken Interpolate = new NameToken("Interpolate");
public static readonly NameToken It = new NameToken("IT");
public static readonly NameToken ItalicAngle = new NameToken("ItalicAngle");

View File

@@ -23,6 +23,11 @@
<AllowedOutputExtensionsInPackageBuildOutputFolder>$(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb</AllowedOutputExtensionsInPackageBuildOutputFolder>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
</PropertyGroup>
<ItemGroup>
<None Remove="Resources\AdobeFontMetrics\*" />
<None Remove="Resources\CMap\*" />

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Util
{
using System.Collections.Generic;
using System.Linq;
using System.Text;
internal static class OtherEncodings
@@ -19,6 +21,16 @@
return Iso88591.GetBytes(s);
}
public static string BytesAsLatin1String(IReadOnlyList<byte> bytes)
{
if (bytes is byte[] arr)
{
return BytesAsLatin1String(arr);
}
return BytesAsLatin1String(bytes.ToArray());
}
public static string BytesAsLatin1String(byte[] bytes)
{
if (bytes == null)

View File

@@ -1,13 +1,25 @@
namespace UglyToad.PdfPig.XObjects
{
using System;
using System.Collections.Generic;
using System.Linq;
using Content;
using Exceptions;
using Filters;
using Geometry;
using Graphics;
using Graphics.Colors;
using Graphics.Core;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class XObjectFactory
internal static class XObjectFactory
{
public XObjectImage CreateImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, bool isLenientParsing)
public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner,
IFilterProvider filterProvider,
IResourceStore resourceStore,
bool isLenientParsing)
{
if (xObject == null)
{
@@ -19,19 +31,103 @@
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
}
var width = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
var height = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
var dictionary = xObject.Stream.StreamDictionary;
var isJpxDecode = xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var token)
var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1)));
var width = dictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
var height = dictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
var isImageMask = dictionary.TryGet(NameToken.ImageMask, pdfScanner, out BooleanToken isMaskToken)
&& isMaskToken.Data;
var isJpxDecode = dictionary.TryGet(NameToken.Filter, out var token)
&& token is NameToken filterName
&& filterName.Equals(NameToken.JpxDecode);
var isImageMask = xObject.Stream.StreamDictionary.TryGet(NameToken.ImageMask, out var maskToken)
&& maskToken is BooleanToken maskBoolean
&& maskBoolean.Data;
int bitsPerComponent = 0;
if (!isImageMask && !isJpxDecode)
{
if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken bitsPerComponentToken))
{
throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}.");
}
return new XObjectImage(width, height, isJpxDecode, isImageMask, xObject.Stream.StreamDictionary,
xObject.Stream.Data);
bitsPerComponent = bitsPerComponentToken.Int;
}
else if (isImageMask)
{
bitsPerComponent = 1;
}
var intent = xObject.DefaultRenderingIntent;
if (dictionary.TryGet(NameToken.Intent, out NameToken renderingIntentToken))
{
intent = renderingIntentToken.Data.ToRenderingIntent();
}
var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken)
&& interpolateToken.Data;
var decodedBytes = new Lazy<IReadOnlyList<byte>>(() => xObject.Stream.Decode(filterProvider));
var decode = EmptyArray<decimal>.Instance;
if (dictionary.TryGet(NameToken.Decode, pdfScanner, out ArrayToken decodeArrayToken))
{
decode = decodeArrayToken.Data.OfType<NumericToken>()
.Select(x => x.Data)
.ToArray();
}
var colorSpace = default(ColorSpace?);
if (!isImageMask)
{
if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out NameToken colorSpaceNameToken)
&& TryMapColorSpace(colorSpaceNameToken, resourceStore, out var colorSpaceResult))
{
colorSpace = colorSpaceResult;
}
else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken))
{
if (colorSpaceArrayToken.Length == 0)
{
throw new PdfDocumentFormatException($"Empty ColorSpace array defined for image XObject: {dictionary}.");
}
var first = colorSpaceArrayToken.Data[0];
if (!(first is NameToken firstColorSpaceName) || !TryMapColorSpace(firstColorSpaceName, resourceStore, out colorSpaceResult))
{
throw new PdfDocumentFormatException($"Invalid ColorSpace array defined for image XObject: {colorSpaceArrayToken}.");
}
colorSpace = colorSpaceResult;
}
else if (!isJpxDecode)
{
throw new PdfDocumentFormatException($"No ColorSpace defined for image XObject: {dictionary}.");
}
}
return new XObjectImage(bounds, width, height, bitsPerComponent, colorSpace, isJpxDecode, isImageMask, intent, interpolate, decode,
dictionary, xObject.Stream.Data, decodedBytes);
}
private static bool TryMapColorSpace(NameToken name, IResourceStore resourceStore, out ColorSpace colorSpaceResult)
{
if (name.TryMapToColorSpace(out colorSpaceResult))
{
return true;
}
if (!resourceStore.TryGetNamedColorSpace(name, out var colorSpaceNamedToken) || !(colorSpaceNamedToken is NameToken newName))
{
return false;
}
return newName.TryMapToColorSpace(out colorSpaceResult);
}
}
}

View File

@@ -2,23 +2,35 @@
{
using System;
using System.Collections.Generic;
using Content;
using Geometry;
using Graphics.Colors;
using Graphics.Core;
using Tokens;
using Util.JetBrains.Annotations;
/// <inheritdoc />
/// <summary>
/// The raw stream from a PDF document representing an image XObject.
/// A PostScript image XObject.
/// </summary>
public class XObjectImage
public class XObjectImage : IPdfImage
{
/// <summary>
/// The width of the image in samples.
/// </summary>
public int Width { get; }
private readonly Lazy<IReadOnlyList<byte>> bytes;
/// <summary>
/// The height of the image in samples.
/// </summary>
public int Height { get; }
/// <inheritdoc />
public PdfRectangle Bounds { get; }
/// <inheritdoc />
public int WidthInSamples { get; }
/// <inheritdoc />
public int HeightInSamples { get; }
/// <inheritdoc />
public ColorSpace? ColorSpace { get; }
/// <inheritdoc />
public int BitsPerComponent { get; }
/// <summary>
/// The JPX filter encodes data using the JPEG2000 compression method.
@@ -27,41 +39,67 @@
/// </summary>
public bool IsJpxEncoded { get; }
/// <summary>
/// Whether this image should be treated as an image maske.
/// </summary>
/// <inheritdoc />
public RenderingIntent RenderingIntent { get; }
/// <inheritdoc />
public bool IsImageMask { get; }
/// <inheritdoc />
public IReadOnlyList<decimal> Decode { get; }
/// <inheritdoc />
public bool Interpolate { get; }
/// <inheritdoc />
public bool IsInlineImage { get; } = false;
/// <summary>
/// The full dictionary for this Image XObject.
/// </summary>
[NotNull]
public DictionaryToken ImageDictionary { get; }
/// <summary>
/// The encoded bytes of this image, must be decoded via any
/// filters defined in the <see cref="ImageDictionary"/> prior to consumption.
/// </summary>
/// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; }
/// <inheritdoc />
[NotNull]
public IReadOnlyList<byte> Bytes { get; }
public IReadOnlyList<byte> Bytes => bytes.Value;
/// <summary>
/// Creates a new <see cref="XObjectImage"/>.
/// </summary>
internal XObjectImage(int width, int height, bool isJpxEncoded, bool isImageMask, DictionaryToken imageDictionary, IReadOnlyList<byte> bytes)
internal XObjectImage(PdfRectangle bounds, int widthInSamples, int heightInSamples, int bitsPerComponent,
ColorSpace? colorSpace,
bool isJpxEncoded,
bool isImageMask,
RenderingIntent renderingIntent,
bool interpolate,
IReadOnlyList<decimal> decode,
DictionaryToken imageDictionary,
IReadOnlyList<byte> rawBytes,
Lazy<IReadOnlyList<byte>> bytes)
{
Width = width;
Height = height;
Bounds = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
BitsPerComponent = bitsPerComponent;
ColorSpace = colorSpace;
IsJpxEncoded = isJpxEncoded;
IsImageMask = isImageMask;
RenderingIntent = renderingIntent;
Interpolate = interpolate;
Decode = decode;
ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary));
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
RawBytes = rawBytes;
this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
}
/// <inheritdoc />
public override string ToString()
{
return ImageDictionary.ToString();
return $"XObject Image (w {Bounds.Width}, h {Bounds.Height}): {ImageDictionary}";
}
}
}