mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
add xobjects to pages, fix parsing truetype fonts where the glyphs use the repeat flag.
This commit is contained in:
@@ -44,6 +44,10 @@
|
|||||||
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
|
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void ApplyXObject(StreamToken xObjectStream)
|
||||||
|
{
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal class TestResourceStore : IResourceStore
|
internal class TestResourceStore : IResourceStore
|
||||||
@@ -56,5 +60,10 @@
|
|||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public StreamToken GetXObject(NameToken name)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -37,5 +37,16 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GetsImageOnPageOne()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
page.Content.GetImages();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -8,5 +8,7 @@
|
|||||||
void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing);
|
void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing);
|
||||||
|
|
||||||
IFont GetFont(NameToken name);
|
IFont GetFont(NameToken name);
|
||||||
|
|
||||||
|
StreamToken GetXObject(NameToken name);
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -1,7 +1,10 @@
|
|||||||
namespace UglyToad.PdfPig.Content
|
namespace UglyToad.PdfPig.Content
|
||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using Graphics;
|
||||||
using Graphics.Operations;
|
using Graphics.Operations;
|
||||||
|
using Tokenization.Scanner;
|
||||||
|
using XObject;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
///
|
///
|
||||||
@@ -12,8 +15,35 @@
|
|||||||
/// </remarks>
|
/// </remarks>
|
||||||
internal class PageContent
|
internal class PageContent
|
||||||
{
|
{
|
||||||
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; set; }
|
private readonly IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects;
|
||||||
|
private readonly IPdfTokenScanner pdfScanner;
|
||||||
|
private readonly XObjectFactory xObjectFactory;
|
||||||
|
private readonly bool isLenientParsing;
|
||||||
|
|
||||||
public IReadOnlyList<Letter> Letters { get; set; }
|
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
|
||||||
|
|
||||||
|
public IReadOnlyList<Letter> Letters { get; }
|
||||||
|
|
||||||
|
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
|
||||||
|
IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects,
|
||||||
|
IPdfTokenScanner pdfScanner,
|
||||||
|
XObjectFactory xObjectFactory,
|
||||||
|
bool isLenientParsing)
|
||||||
|
{
|
||||||
|
GraphicsStateOperations = graphicsStateOperations;
|
||||||
|
Letters = letters;
|
||||||
|
this.xObjects = xObjects;
|
||||||
|
this.pdfScanner = pdfScanner;
|
||||||
|
this.xObjectFactory = xObjectFactory;
|
||||||
|
this.isLenientParsing = isLenientParsing;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void GetImages()
|
||||||
|
{
|
||||||
|
foreach (var contentRecord in xObjects[XObjectType.Image])
|
||||||
|
{
|
||||||
|
xObjectFactory.CreateImage(contentRecord, pdfScanner, isLenientParsing);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -89,6 +89,15 @@
|
|||||||
|
|
||||||
return font;
|
return font;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public StreamToken GetXObject(NameToken name)
|
||||||
|
{
|
||||||
|
var reference = currentResourceState[name];
|
||||||
|
|
||||||
|
var stream = DirectObjectFinder.Get<StreamToken>(new IndirectReferenceToken(reference), scanner);
|
||||||
|
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
13
src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs
Normal file
13
src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
namespace UglyToad.PdfPig.Filters
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using Tokenization.Tokens;
|
||||||
|
|
||||||
|
internal class DctDecodeFilter : IFilter
|
||||||
|
{
|
||||||
|
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -224,6 +224,18 @@
|
|||||||
for (var i = 0; i < pointCount; i++)
|
for (var i = 0; i < pointCount; i++)
|
||||||
{
|
{
|
||||||
result[i] = (SimpleGlyphFlags)data.ReadByte();
|
result[i] = (SimpleGlyphFlags)data.ReadByte();
|
||||||
|
|
||||||
|
if (result[i].HasFlag(SimpleGlyphFlags.Repeat))
|
||||||
|
{
|
||||||
|
var numberOfRepeats = data.ReadByte();
|
||||||
|
|
||||||
|
for (int j = 0; j < numberOfRepeats; j++)
|
||||||
|
{
|
||||||
|
result[i + j + 1] = result[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
i += numberOfRepeats;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@@ -9,28 +9,43 @@
|
|||||||
using IO;
|
using IO;
|
||||||
using Operations;
|
using Operations;
|
||||||
using PdfPig.Core;
|
using PdfPig.Core;
|
||||||
|
using Tokenization.Scanner;
|
||||||
using Tokenization.Tokens;
|
using Tokenization.Tokens;
|
||||||
using Util;
|
using Util;
|
||||||
|
using XObject;
|
||||||
|
|
||||||
internal class ContentStreamProcessor : IOperationContext
|
internal class ContentStreamProcessor : IOperationContext
|
||||||
{
|
{
|
||||||
private readonly IResourceStore resourceStore;
|
private readonly IResourceStore resourceStore;
|
||||||
private readonly UserSpaceUnit userSpaceUnit;
|
private readonly UserSpaceUnit userSpaceUnit;
|
||||||
private readonly bool isLenientParsing;
|
private readonly bool isLenientParsing;
|
||||||
|
private readonly IPdfTokenScanner pdfScanner;
|
||||||
|
private readonly XObjectFactory xObjectFactory;
|
||||||
|
|
||||||
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
|
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
|
||||||
|
|
||||||
public TextMatrices TextMatrices { get; } = new TextMatrices();
|
public TextMatrices TextMatrices { get; } = new TextMatrices();
|
||||||
|
|
||||||
public int StackSize => graphicsStack.Count;
|
public int StackSize => graphicsStack.Count;
|
||||||
|
|
||||||
|
private readonly Dictionary<XObjectType, List<XObjectContentRecord>> xObjects = new Dictionary<XObjectType, List<XObjectContentRecord>>
|
||||||
|
{
|
||||||
|
{XObjectType.Form, new List<XObjectContentRecord>()},
|
||||||
|
{XObjectType.Image, new List<XObjectContentRecord>()},
|
||||||
|
{XObjectType.PostScript, new List<XObjectContentRecord>()}
|
||||||
|
};
|
||||||
|
|
||||||
public List<Letter> Letters = new List<Letter>();
|
public List<Letter> Letters = new List<Letter>();
|
||||||
|
|
||||||
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
|
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
|
||||||
|
IPdfTokenScanner pdfScanner,
|
||||||
|
XObjectFactory xObjectFactory)
|
||||||
{
|
{
|
||||||
this.resourceStore = resourceStore;
|
this.resourceStore = resourceStore;
|
||||||
this.userSpaceUnit = userSpaceUnit;
|
this.userSpaceUnit = userSpaceUnit;
|
||||||
this.isLenientParsing = isLenientParsing;
|
this.isLenientParsing = isLenientParsing;
|
||||||
|
this.pdfScanner = pdfScanner;
|
||||||
|
this.xObjectFactory = xObjectFactory;
|
||||||
graphicsStack.Push(new CurrentGraphicsState());
|
graphicsStack.Push(new CurrentGraphicsState());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,11 +55,7 @@
|
|||||||
|
|
||||||
ProcessOperations(operations);
|
ProcessOperations(operations);
|
||||||
|
|
||||||
return new PageContent
|
return new PageContent(operations, Letters, xObjects, pdfScanner, xObjectFactory, isLenientParsing);
|
||||||
{
|
|
||||||
GraphicsStateOperations = operations,
|
|
||||||
Letters = Letters
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
||||||
@@ -205,6 +216,34 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void ApplyXObject(StreamToken xObjectStream)
|
||||||
|
{
|
||||||
|
// For now we will determine the type and store the object with the graphics state information preceding it.
|
||||||
|
// Then consumers of the page can request the object/s to be retrieved by type.
|
||||||
|
var subType = (NameToken)xObjectStream.StreamDictionary.Data[NameToken.Subtype.Data];
|
||||||
|
|
||||||
|
var state = GetCurrentState();
|
||||||
|
|
||||||
|
var matrix = state.CurrentTransformationMatrix;
|
||||||
|
|
||||||
|
if (subType.Equals(NameToken.Ps))
|
||||||
|
{
|
||||||
|
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix));
|
||||||
|
}
|
||||||
|
else if (subType.Equals(NameToken.Image))
|
||||||
|
{
|
||||||
|
xObjects[XObjectType.Image].Add(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix));
|
||||||
|
}
|
||||||
|
else if (subType.Equals(NameToken.Form))
|
||||||
|
{
|
||||||
|
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"XObject encountered with unexpected SubType {subType}. {xObjectStream.StreamDictionary}.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void AdjustTextMatrix(decimal tx, decimal ty)
|
private void AdjustTextMatrix(decimal tx, decimal ty)
|
||||||
{
|
{
|
||||||
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
|
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
|
||||||
|
@@ -19,5 +19,7 @@
|
|||||||
void ShowText(IInputBytes bytes);
|
void ShowText(IInputBytes bytes);
|
||||||
|
|
||||||
void ShowPositionedText(IReadOnlyList<IToken> tokens);
|
void ShowPositionedText(IReadOnlyList<IToken> tokens);
|
||||||
|
|
||||||
|
void ApplyXObject(StreamToken xObjectStream);
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -18,6 +18,9 @@
|
|||||||
|
|
||||||
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
|
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
|
||||||
{
|
{
|
||||||
|
var xobject = resourceStore.GetXObject(Name);
|
||||||
|
|
||||||
|
operationContext.ApplyXObject(xobject);
|
||||||
}
|
}
|
||||||
|
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
|
25
src/UglyToad.PdfPig/Graphics/XObjectContentRecord.cs
Normal file
25
src/UglyToad.PdfPig/Graphics/XObjectContentRecord.cs
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
namespace UglyToad.PdfPig.Graphics
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using PdfPig.Core;
|
||||||
|
using Tokenization.Tokens;
|
||||||
|
using Util.JetBrains.Annotations;
|
||||||
|
using XObject;
|
||||||
|
|
||||||
|
internal class XObjectContentRecord
|
||||||
|
{
|
||||||
|
public XObjectType Type { get; }
|
||||||
|
|
||||||
|
[NotNull]
|
||||||
|
public StreamToken Stream { get; }
|
||||||
|
|
||||||
|
public TransformationMatrix AppliedTransformation { get; }
|
||||||
|
|
||||||
|
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation)
|
||||||
|
{
|
||||||
|
Type = type;
|
||||||
|
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||||
|
AppliedTransformation = appliedTransformation;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -14,20 +14,24 @@
|
|||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokenization.Tokens;
|
using Tokenization.Tokens;
|
||||||
using Util;
|
using Util;
|
||||||
|
using XObject;
|
||||||
|
|
||||||
internal class PageFactory : IPageFactory
|
internal class PageFactory : IPageFactory
|
||||||
{
|
{
|
||||||
private readonly IResourceStore resourceStore;
|
private readonly IResourceStore resourceStore;
|
||||||
private readonly IFilterProvider filterProvider;
|
private readonly IFilterProvider filterProvider;
|
||||||
private readonly IPageContentParser pageContentParser;
|
private readonly IPageContentParser pageContentParser;
|
||||||
|
private readonly XObjectFactory xObjectFactory;
|
||||||
private readonly IPdfTokenScanner pdfScanner;
|
private readonly IPdfTokenScanner pdfScanner;
|
||||||
|
|
||||||
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
|
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
|
||||||
IPageContentParser pageContentParser)
|
IPageContentParser pageContentParser,
|
||||||
|
XObjectFactory xObjectFactory)
|
||||||
{
|
{
|
||||||
this.resourceStore = resourceStore;
|
this.resourceStore = resourceStore;
|
||||||
this.filterProvider = filterProvider;
|
this.filterProvider = filterProvider;
|
||||||
this.pageContentParser = pageContentParser;
|
this.pageContentParser = pageContentParser;
|
||||||
|
this.xObjectFactory = xObjectFactory;
|
||||||
this.pdfScanner = pdfScanner;
|
this.pdfScanner = pdfScanner;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,7 +115,7 @@
|
|||||||
|
|
||||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
||||||
|
|
||||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing);
|
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
|
||||||
|
|
||||||
return context.Process(operations);
|
return context.Process(operations);
|
||||||
}
|
}
|
||||||
|
@@ -21,6 +21,7 @@
|
|||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokenization.Tokens;
|
using Tokenization.Tokens;
|
||||||
using Util;
|
using Util;
|
||||||
|
using XObject;
|
||||||
|
|
||||||
internal static class PdfDocumentFactory
|
internal static class PdfDocumentFactory
|
||||||
{
|
{
|
||||||
@@ -109,7 +110,7 @@
|
|||||||
|
|
||||||
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
|
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
|
||||||
|
|
||||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
|
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
|
||||||
var informationFactory = new DocumentInformationFactory();
|
var informationFactory = new DocumentInformationFactory();
|
||||||
|
|
||||||
|
|
||||||
|
@@ -10,6 +10,10 @@
|
|||||||
{
|
{
|
||||||
Data = data;
|
Data = data;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return $"{Data}";
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
47
src/UglyToad.PdfPig/XObject/XObjectFactory.cs
Normal file
47
src/UglyToad.PdfPig/XObject/XObjectFactory.cs
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
namespace UglyToad.PdfPig.XObject
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using Graphics;
|
||||||
|
using Tokenization.Scanner;
|
||||||
|
using Tokenization.Tokens;
|
||||||
|
|
||||||
|
internal class XObjectFactory
|
||||||
|
{
|
||||||
|
public void CreateImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, bool isLenientParsing)
|
||||||
|
{
|
||||||
|
if (xObject == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(xObject));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xObject.Type != XObjectType.Image)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
var width = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
|
||||||
|
var height = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
|
||||||
|
|
||||||
|
var isJpxDecode = xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var token)
|
||||||
|
&& token is NameToken filterName
|
||||||
|
&& filterName.Equals(NameToken.JpxDecode);
|
||||||
|
|
||||||
|
if (isJpxDecode)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var isImageMask = xObject.Stream.StreamDictionary.TryGet(NameToken.ImageMask, out var maskToken)
|
||||||
|
&& maskToken is BooleanToken maskBoolean
|
||||||
|
&& maskBoolean.Data;
|
||||||
|
|
||||||
|
if (isImageMask)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var bitsPerComponents = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.BitsPerComponent, pdfScanner).Int;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
9
src/UglyToad.PdfPig/XObject/XObjectType.cs
Normal file
9
src/UglyToad.PdfPig/XObject/XObjectType.cs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
namespace UglyToad.PdfPig.XObject
|
||||||
|
{
|
||||||
|
internal enum XObjectType
|
||||||
|
{
|
||||||
|
Image,
|
||||||
|
Form,
|
||||||
|
PostScript
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user