mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
add xobjects to pages, fix parsing truetype fonts where the glyphs use the repeat flag.
This commit is contained in:
@@ -44,6 +44,10 @@
|
||||
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
|
||||
{
|
||||
}
|
||||
|
||||
public void ApplyXObject(StreamToken xObjectStream)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
internal class TestResourceStore : IResourceStore
|
||||
@@ -56,5 +60,10 @@
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
public StreamToken GetXObject(NameToken name)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -37,5 +37,16 @@
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetsImageOnPageOne()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
page.Content.GetImages();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@@ -8,5 +8,7 @@
|
||||
void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing);
|
||||
|
||||
IFont GetFont(NameToken name);
|
||||
|
||||
StreamToken GetXObject(NameToken name);
|
||||
}
|
||||
}
|
@@ -1,7 +1,10 @@
|
||||
namespace UglyToad.PdfPig.Content
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Graphics;
|
||||
using Graphics.Operations;
|
||||
using Tokenization.Scanner;
|
||||
using XObject;
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
@@ -12,8 +15,35 @@
|
||||
/// </remarks>
|
||||
internal class PageContent
|
||||
{
|
||||
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; set; }
|
||||
private readonly IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly XObjectFactory xObjectFactory;
|
||||
private readonly bool isLenientParsing;
|
||||
|
||||
public IReadOnlyList<Letter> Letters { get; set; }
|
||||
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
|
||||
|
||||
public IReadOnlyList<Letter> Letters { get; }
|
||||
|
||||
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
|
||||
IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
XObjectFactory xObjectFactory,
|
||||
bool isLenientParsing)
|
||||
{
|
||||
GraphicsStateOperations = graphicsStateOperations;
|
||||
Letters = letters;
|
||||
this.xObjects = xObjects;
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.xObjectFactory = xObjectFactory;
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
}
|
||||
|
||||
public void GetImages()
|
||||
{
|
||||
foreach (var contentRecord in xObjects[XObjectType.Image])
|
||||
{
|
||||
xObjectFactory.CreateImage(contentRecord, pdfScanner, isLenientParsing);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -89,6 +89,15 @@
|
||||
|
||||
return font;
|
||||
}
|
||||
|
||||
public StreamToken GetXObject(NameToken name)
|
||||
{
|
||||
var reference = currentResourceState[name];
|
||||
|
||||
var stream = DirectObjectFinder.Get<StreamToken>(new IndirectReferenceToken(reference), scanner);
|
||||
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
13
src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs
Normal file
13
src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
namespace UglyToad.PdfPig.Filters
|
||||
{
|
||||
using System;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal class DctDecodeFilter : IFilter
|
||||
{
|
||||
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
}
|
@@ -224,6 +224,18 @@
|
||||
for (var i = 0; i < pointCount; i++)
|
||||
{
|
||||
result[i] = (SimpleGlyphFlags)data.ReadByte();
|
||||
|
||||
if (result[i].HasFlag(SimpleGlyphFlags.Repeat))
|
||||
{
|
||||
var numberOfRepeats = data.ReadByte();
|
||||
|
||||
for (int j = 0; j < numberOfRepeats; j++)
|
||||
{
|
||||
result[i + j + 1] = result[i];
|
||||
}
|
||||
|
||||
i += numberOfRepeats;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@@ -9,28 +9,43 @@
|
||||
using IO;
|
||||
using Operations;
|
||||
using PdfPig.Core;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
using XObject;
|
||||
|
||||
internal class ContentStreamProcessor : IOperationContext
|
||||
{
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly UserSpaceUnit userSpaceUnit;
|
||||
private readonly bool isLenientParsing;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly XObjectFactory xObjectFactory;
|
||||
|
||||
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
|
||||
|
||||
public TextMatrices TextMatrices { get; } = new TextMatrices();
|
||||
|
||||
public int StackSize => graphicsStack.Count;
|
||||
|
||||
|
||||
private readonly Dictionary<XObjectType, List<XObjectContentRecord>> xObjects = new Dictionary<XObjectType, List<XObjectContentRecord>>
|
||||
{
|
||||
{XObjectType.Form, new List<XObjectContentRecord>()},
|
||||
{XObjectType.Image, new List<XObjectContentRecord>()},
|
||||
{XObjectType.PostScript, new List<XObjectContentRecord>()}
|
||||
};
|
||||
|
||||
public List<Letter> Letters = new List<Letter>();
|
||||
|
||||
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
|
||||
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
XObjectFactory xObjectFactory)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.userSpaceUnit = userSpaceUnit;
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.xObjectFactory = xObjectFactory;
|
||||
graphicsStack.Push(new CurrentGraphicsState());
|
||||
}
|
||||
|
||||
@@ -40,11 +55,7 @@
|
||||
|
||||
ProcessOperations(operations);
|
||||
|
||||
return new PageContent
|
||||
{
|
||||
GraphicsStateOperations = operations,
|
||||
Letters = Letters
|
||||
};
|
||||
return new PageContent(operations, Letters, xObjects, pdfScanner, xObjectFactory, isLenientParsing);
|
||||
}
|
||||
|
||||
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
|
||||
@@ -205,6 +216,34 @@
|
||||
}
|
||||
}
|
||||
|
||||
public void ApplyXObject(StreamToken xObjectStream)
|
||||
{
|
||||
// For now we will determine the type and store the object with the graphics state information preceding it.
|
||||
// Then consumers of the page can request the object/s to be retrieved by type.
|
||||
var subType = (NameToken)xObjectStream.StreamDictionary.Data[NameToken.Subtype.Data];
|
||||
|
||||
var state = GetCurrentState();
|
||||
|
||||
var matrix = state.CurrentTransformationMatrix;
|
||||
|
||||
if (subType.Equals(NameToken.Ps))
|
||||
{
|
||||
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix));
|
||||
}
|
||||
else if (subType.Equals(NameToken.Image))
|
||||
{
|
||||
xObjects[XObjectType.Image].Add(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix));
|
||||
}
|
||||
else if (subType.Equals(NameToken.Form))
|
||||
{
|
||||
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException($"XObject encountered with unexpected SubType {subType}. {xObjectStream.StreamDictionary}.");
|
||||
}
|
||||
}
|
||||
|
||||
private void AdjustTextMatrix(decimal tx, decimal ty)
|
||||
{
|
||||
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
|
||||
|
@@ -19,5 +19,7 @@
|
||||
void ShowText(IInputBytes bytes);
|
||||
|
||||
void ShowPositionedText(IReadOnlyList<IToken> tokens);
|
||||
|
||||
void ApplyXObject(StreamToken xObjectStream);
|
||||
}
|
||||
}
|
@@ -18,6 +18,9 @@
|
||||
|
||||
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
|
||||
{
|
||||
var xobject = resourceStore.GetXObject(Name);
|
||||
|
||||
operationContext.ApplyXObject(xobject);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
|
25
src/UglyToad.PdfPig/Graphics/XObjectContentRecord.cs
Normal file
25
src/UglyToad.PdfPig/Graphics/XObjectContentRecord.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace UglyToad.PdfPig.Graphics
|
||||
{
|
||||
using System;
|
||||
using PdfPig.Core;
|
||||
using Tokenization.Tokens;
|
||||
using Util.JetBrains.Annotations;
|
||||
using XObject;
|
||||
|
||||
internal class XObjectContentRecord
|
||||
{
|
||||
public XObjectType Type { get; }
|
||||
|
||||
[NotNull]
|
||||
public StreamToken Stream { get; }
|
||||
|
||||
public TransformationMatrix AppliedTransformation { get; }
|
||||
|
||||
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation)
|
||||
{
|
||||
Type = type;
|
||||
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
AppliedTransformation = appliedTransformation;
|
||||
}
|
||||
}
|
||||
}
|
@@ -14,20 +14,24 @@
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
using XObject;
|
||||
|
||||
internal class PageFactory : IPageFactory
|
||||
{
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IPageContentParser pageContentParser;
|
||||
private readonly XObjectFactory xObjectFactory;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
|
||||
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
|
||||
IPageContentParser pageContentParser)
|
||||
IPageContentParser pageContentParser,
|
||||
XObjectFactory xObjectFactory)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.filterProvider = filterProvider;
|
||||
this.pageContentParser = pageContentParser;
|
||||
this.xObjectFactory = xObjectFactory;
|
||||
this.pdfScanner = pdfScanner;
|
||||
}
|
||||
|
||||
@@ -111,7 +115,7 @@
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
||||
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing);
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
|
||||
|
||||
return context.Process(operations);
|
||||
}
|
||||
|
@@ -21,6 +21,7 @@
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
using XObject;
|
||||
|
||||
internal static class PdfDocumentFactory
|
||||
{
|
||||
@@ -109,7 +110,7 @@
|
||||
|
||||
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
|
||||
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
|
||||
var informationFactory = new DocumentInformationFactory();
|
||||
|
||||
|
||||
|
@@ -10,6 +10,10 @@
|
||||
{
|
||||
Data = data;
|
||||
}
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{Data}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
47
src/UglyToad.PdfPig/XObject/XObjectFactory.cs
Normal file
47
src/UglyToad.PdfPig/XObject/XObjectFactory.cs
Normal file
@@ -0,0 +1,47 @@
|
||||
namespace UglyToad.PdfPig.XObject
|
||||
{
|
||||
using System;
|
||||
using Graphics;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal class XObjectFactory
|
||||
{
|
||||
public void CreateImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, bool isLenientParsing)
|
||||
{
|
||||
if (xObject == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(xObject));
|
||||
}
|
||||
|
||||
if (xObject.Type != XObjectType.Image)
|
||||
{
|
||||
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
|
||||
}
|
||||
|
||||
var width = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
|
||||
var height = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
|
||||
|
||||
var isJpxDecode = xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var token)
|
||||
&& token is NameToken filterName
|
||||
&& filterName.Equals(NameToken.JpxDecode);
|
||||
|
||||
if (isJpxDecode)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var isImageMask = xObject.Stream.StreamDictionary.TryGet(NameToken.ImageMask, out var maskToken)
|
||||
&& maskToken is BooleanToken maskBoolean
|
||||
&& maskBoolean.Data;
|
||||
|
||||
if (isImageMask)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var bitsPerComponents = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.BitsPerComponent, pdfScanner).Int;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
9
src/UglyToad.PdfPig/XObject/XObjectType.cs
Normal file
9
src/UglyToad.PdfPig/XObject/XObjectType.cs
Normal file
@@ -0,0 +1,9 @@
|
||||
namespace UglyToad.PdfPig.XObject
|
||||
{
|
||||
internal enum XObjectType
|
||||
{
|
||||
Image,
|
||||
Form,
|
||||
PostScript
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user