add xobjects to pages, fix parsing truetype fonts where the glyphs use the repeat flag.

This commit is contained in:
Eliot Jones
2018-04-26 22:22:29 +01:00
parent b6585292fb
commit 1fe54c5f49
16 changed files with 233 additions and 13 deletions

View File

@@ -44,6 +44,10 @@
public void ShowPositionedText(IReadOnlyList<IToken> tokens) public void ShowPositionedText(IReadOnlyList<IToken> tokens)
{ {
} }
public void ApplyXObject(StreamToken xObjectStream)
{
}
} }
internal class TestResourceStore : IResourceStore internal class TestResourceStore : IResourceStore
@@ -56,5 +60,10 @@
{ {
return null; return null;
} }
public StreamToken GetXObject(NameToken name)
{
return null;
}
} }
} }

View File

@@ -37,5 +37,16 @@
} }
} }
[Fact]
public void GetsImageOnPageOne()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
page.Content.GetImages();
}
}
} }
} }

View File

@@ -8,5 +8,7 @@
void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing); void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing);
IFont GetFont(NameToken name); IFont GetFont(NameToken name);
StreamToken GetXObject(NameToken name);
} }
} }

View File

@@ -1,7 +1,10 @@
namespace UglyToad.PdfPig.Content namespace UglyToad.PdfPig.Content
{ {
using System.Collections.Generic; using System.Collections.Generic;
using Graphics;
using Graphics.Operations; using Graphics.Operations;
using Tokenization.Scanner;
using XObject;
/// <summary> /// <summary>
/// ///
@@ -12,8 +15,35 @@
/// </remarks> /// </remarks>
internal class PageContent internal class PageContent
{ {
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; set; } private readonly IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly bool isLenientParsing;
public IReadOnlyList<Letter> Letters { get; set; } internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
public IReadOnlyList<Letter> Letters { get; }
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory,
bool isLenientParsing)
{
GraphicsStateOperations = graphicsStateOperations;
Letters = letters;
this.xObjects = xObjects;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
this.isLenientParsing = isLenientParsing;
}
public void GetImages()
{
foreach (var contentRecord in xObjects[XObjectType.Image])
{
xObjectFactory.CreateImage(contentRecord, pdfScanner, isLenientParsing);
}
}
} }
} }

View File

@@ -89,6 +89,15 @@
return font; return font;
} }
public StreamToken GetXObject(NameToken name)
{
var reference = currentResourceState[name];
var stream = DirectObjectFinder.Get<StreamToken>(new IndirectReferenceToken(reference), scanner);
return stream;
}
} }
} }

View File

@@ -0,0 +1,13 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using Tokenization.Tokens;
internal class DctDecodeFilter : IFilter
{
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotImplementedException();
}
}
}

View File

@@ -224,6 +224,18 @@
for (var i = 0; i < pointCount; i++) for (var i = 0; i < pointCount; i++)
{ {
result[i] = (SimpleGlyphFlags)data.ReadByte(); result[i] = (SimpleGlyphFlags)data.ReadByte();
if (result[i].HasFlag(SimpleGlyphFlags.Repeat))
{
var numberOfRepeats = data.ReadByte();
for (int j = 0; j < numberOfRepeats; j++)
{
result[i + j + 1] = result[i];
}
i += numberOfRepeats;
}
} }
return result; return result;

View File

@@ -9,28 +9,43 @@
using IO; using IO;
using Operations; using Operations;
using PdfPig.Core; using PdfPig.Core;
using Tokenization.Scanner;
using Tokenization.Tokens; using Tokenization.Tokens;
using Util; using Util;
using XObject;
internal class ContentStreamProcessor : IOperationContext internal class ContentStreamProcessor : IOperationContext
{ {
private readonly IResourceStore resourceStore; private readonly IResourceStore resourceStore;
private readonly UserSpaceUnit userSpaceUnit; private readonly UserSpaceUnit userSpaceUnit;
private readonly bool isLenientParsing; private readonly bool isLenientParsing;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>(); private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
public TextMatrices TextMatrices { get; } = new TextMatrices(); public TextMatrices TextMatrices { get; } = new TextMatrices();
public int StackSize => graphicsStack.Count; public int StackSize => graphicsStack.Count;
private readonly Dictionary<XObjectType, List<XObjectContentRecord>> xObjects = new Dictionary<XObjectType, List<XObjectContentRecord>>
{
{XObjectType.Form, new List<XObjectContentRecord>()},
{XObjectType.Image, new List<XObjectContentRecord>()},
{XObjectType.PostScript, new List<XObjectContentRecord>()}
};
public List<Letter> Letters = new List<Letter>(); public List<Letter> Letters = new List<Letter>();
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing) public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory)
{ {
this.resourceStore = resourceStore; this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit; this.userSpaceUnit = userSpaceUnit;
this.isLenientParsing = isLenientParsing; this.isLenientParsing = isLenientParsing;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
graphicsStack.Push(new CurrentGraphicsState()); graphicsStack.Push(new CurrentGraphicsState());
} }
@@ -40,11 +55,7 @@
ProcessOperations(operations); ProcessOperations(operations);
return new PageContent return new PageContent(operations, Letters, xObjects, pdfScanner, xObjectFactory, isLenientParsing);
{
GraphicsStateOperations = operations,
Letters = Letters
};
} }
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations) private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
@@ -205,6 +216,34 @@
} }
} }
public void ApplyXObject(StreamToken xObjectStream)
{
// For now we will determine the type and store the object with the graphics state information preceding it.
// Then consumers of the page can request the object/s to be retrieved by type.
var subType = (NameToken)xObjectStream.StreamDictionary.Data[NameToken.Subtype.Data];
var state = GetCurrentState();
var matrix = state.CurrentTransformationMatrix;
if (subType.Equals(NameToken.Ps))
{
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix));
}
else if (subType.Equals(NameToken.Image))
{
xObjects[XObjectType.Image].Add(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix));
}
else if (subType.Equals(NameToken.Form))
{
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix));
}
else
{
throw new InvalidOperationException($"XObject encountered with unexpected SubType {subType}. {xObjectStream.StreamDictionary}.");
}
}
private void AdjustTextMatrix(decimal tx, decimal ty) private void AdjustTextMatrix(decimal tx, decimal ty)
{ {
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty); var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);

View File

@@ -19,5 +19,7 @@
void ShowText(IInputBytes bytes); void ShowText(IInputBytes bytes);
void ShowPositionedText(IReadOnlyList<IToken> tokens); void ShowPositionedText(IReadOnlyList<IToken> tokens);
void ApplyXObject(StreamToken xObjectStream);
} }
} }

View File

@@ -18,6 +18,9 @@
public void Run(IOperationContext operationContext, IResourceStore resourceStore) public void Run(IOperationContext operationContext, IResourceStore resourceStore)
{ {
var xobject = resourceStore.GetXObject(Name);
operationContext.ApplyXObject(xobject);
} }
public override string ToString() public override string ToString()

View File

@@ -0,0 +1,25 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using PdfPig.Core;
using Tokenization.Tokens;
using Util.JetBrains.Annotations;
using XObject;
internal class XObjectContentRecord
{
public XObjectType Type { get; }
[NotNull]
public StreamToken Stream { get; }
public TransformationMatrix AppliedTransformation { get; }
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation)
{
Type = type;
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
AppliedTransformation = appliedTransformation;
}
}
}

View File

@@ -14,20 +14,24 @@
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokenization.Tokens; using Tokenization.Tokens;
using Util; using Util;
using XObject;
internal class PageFactory : IPageFactory internal class PageFactory : IPageFactory
{ {
private readonly IResourceStore resourceStore; private readonly IResourceStore resourceStore;
private readonly IFilterProvider filterProvider; private readonly IFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser; private readonly IPageContentParser pageContentParser;
private readonly XObjectFactory xObjectFactory;
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider, public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
IPageContentParser pageContentParser) IPageContentParser pageContentParser,
XObjectFactory xObjectFactory)
{ {
this.resourceStore = resourceStore; this.resourceStore = resourceStore;
this.filterProvider = filterProvider; this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser; this.pageContentParser = pageContentParser;
this.xObjectFactory = xObjectFactory;
this.pdfScanner = pdfScanner; this.pdfScanner = pdfScanner;
} }
@@ -111,7 +115,7 @@
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes)); var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
return context.Process(operations); return context.Process(operations);
} }

View File

@@ -21,6 +21,7 @@
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokenization.Tokens; using Tokenization.Tokens;
using Util; using Util;
using XObject;
internal static class PdfDocumentFactory internal static class PdfDocumentFactory
{ {
@@ -109,7 +110,7 @@
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory); var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory())); var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
var informationFactory = new DocumentInformationFactory(); var informationFactory = new DocumentInformationFactory();

View File

@@ -10,6 +10,10 @@
{ {
Data = data; Data = data;
} }
}
public override string ToString()
{
return $"{Data}";
}
}
} }

View File

@@ -0,0 +1,47 @@
namespace UglyToad.PdfPig.XObject
{
using System;
using Graphics;
using Tokenization.Scanner;
using Tokenization.Tokens;
internal class XObjectFactory
{
public void CreateImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, bool isLenientParsing)
{
if (xObject == null)
{
throw new ArgumentNullException(nameof(xObject));
}
if (xObject.Type != XObjectType.Image)
{
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
}
var width = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
var height = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
var isJpxDecode = xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var token)
&& token is NameToken filterName
&& filterName.Equals(NameToken.JpxDecode);
if (isJpxDecode)
{
return;
}
var isImageMask = xObject.Stream.StreamDictionary.TryGet(NameToken.ImageMask, out var maskToken)
&& maskToken is BooleanToken maskBoolean
&& maskBoolean.Data;
if (isImageMask)
{
return;
}
var bitsPerComponents = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.BitsPerComponent, pdfScanner).Int;
}
}
}

View File

@@ -0,0 +1,9 @@
namespace UglyToad.PdfPig.XObject
{
internal enum XObjectType
{
Image,
Form,
PostScript
}
}