add xobjects to pages, fix parsing truetype fonts where the glyphs use the repeat flag.

This commit is contained in:
Eliot Jones
2018-04-26 22:22:29 +01:00
parent b6585292fb
commit 1fe54c5f49
16 changed files with 233 additions and 13 deletions

View File

@@ -44,6 +44,10 @@
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
{
}
public void ApplyXObject(StreamToken xObjectStream)
{
}
}
internal class TestResourceStore : IResourceStore
@@ -56,5 +60,10 @@
{
return null;
}
public StreamToken GetXObject(NameToken name)
{
return null;
}
}
}

View File

@@ -37,5 +37,16 @@
}
}
[Fact]
public void GetsImageOnPageOne()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
page.Content.GetImages();
}
}
}
}

View File

@@ -8,5 +8,7 @@
void LoadResourceDictionary(DictionaryToken resourceDictionary, bool isLenientParsing);
IFont GetFont(NameToken name);
StreamToken GetXObject(NameToken name);
}
}

View File

@@ -1,7 +1,10 @@
namespace UglyToad.PdfPig.Content
{
using System.Collections.Generic;
using Graphics;
using Graphics.Operations;
using Tokenization.Scanner;
using XObject;
/// <summary>
///
@@ -12,8 +15,35 @@
/// </remarks>
internal class PageContent
{
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; set; }
private readonly IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly bool isLenientParsing;
public IReadOnlyList<Letter> Letters { get; set; }
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; }
public IReadOnlyList<Letter> Letters { get; }
internal PageContent(IReadOnlyList<IGraphicsStateOperation> graphicsStateOperations, IReadOnlyList<Letter> letters,
IReadOnlyDictionary<XObjectType, List<XObjectContentRecord>> xObjects,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory,
bool isLenientParsing)
{
GraphicsStateOperations = graphicsStateOperations;
Letters = letters;
this.xObjects = xObjects;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
this.isLenientParsing = isLenientParsing;
}
public void GetImages()
{
foreach (var contentRecord in xObjects[XObjectType.Image])
{
xObjectFactory.CreateImage(contentRecord, pdfScanner, isLenientParsing);
}
}
}
}

View File

@@ -89,6 +89,15 @@
return font;
}
public StreamToken GetXObject(NameToken name)
{
var reference = currentResourceState[name];
var stream = DirectObjectFinder.Get<StreamToken>(new IndirectReferenceToken(reference), scanner);
return stream;
}
}
}

View File

@@ -0,0 +1,13 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using Tokenization.Tokens;
internal class DctDecodeFilter : IFilter
{
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotImplementedException();
}
}
}

View File

@@ -224,6 +224,18 @@
for (var i = 0; i < pointCount; i++)
{
result[i] = (SimpleGlyphFlags)data.ReadByte();
if (result[i].HasFlag(SimpleGlyphFlags.Repeat))
{
var numberOfRepeats = data.ReadByte();
for (int j = 0; j < numberOfRepeats; j++)
{
result[i + j + 1] = result[i];
}
i += numberOfRepeats;
}
}
return result;

View File

@@ -9,28 +9,43 @@
using IO;
using Operations;
using PdfPig.Core;
using Tokenization.Scanner;
using Tokenization.Tokens;
using Util;
using XObject;
internal class ContentStreamProcessor : IOperationContext
{
private readonly IResourceStore resourceStore;
private readonly UserSpaceUnit userSpaceUnit;
private readonly bool isLenientParsing;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
public TextMatrices TextMatrices { get; } = new TextMatrices();
public int StackSize => graphicsStack.Count;
private readonly Dictionary<XObjectType, List<XObjectContentRecord>> xObjects = new Dictionary<XObjectType, List<XObjectContentRecord>>
{
{XObjectType.Form, new List<XObjectContentRecord>()},
{XObjectType.Image, new List<XObjectContentRecord>()},
{XObjectType.PostScript, new List<XObjectContentRecord>()}
};
public List<Letter> Letters = new List<Letter>();
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory)
{
this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit;
this.isLenientParsing = isLenientParsing;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
graphicsStack.Push(new CurrentGraphicsState());
}
@@ -40,11 +55,7 @@
ProcessOperations(operations);
return new PageContent
{
GraphicsStateOperations = operations,
Letters = Letters
};
return new PageContent(operations, Letters, xObjects, pdfScanner, xObjectFactory, isLenientParsing);
}
private void ProcessOperations(IReadOnlyList<IGraphicsStateOperation> operations)
@@ -205,6 +216,34 @@
}
}
public void ApplyXObject(StreamToken xObjectStream)
{
// For now we will determine the type and store the object with the graphics state information preceding it.
// Then consumers of the page can request the object/s to be retrieved by type.
var subType = (NameToken)xObjectStream.StreamDictionary.Data[NameToken.Subtype.Data];
var state = GetCurrentState();
var matrix = state.CurrentTransformationMatrix;
if (subType.Equals(NameToken.Ps))
{
xObjects[XObjectType.PostScript].Add(new XObjectContentRecord(XObjectType.PostScript, xObjectStream, matrix));
}
else if (subType.Equals(NameToken.Image))
{
xObjects[XObjectType.Image].Add(new XObjectContentRecord(XObjectType.Image, xObjectStream, matrix));
}
else if (subType.Equals(NameToken.Form))
{
xObjects[XObjectType.Form].Add(new XObjectContentRecord(XObjectType.Form, xObjectStream, matrix));
}
else
{
throw new InvalidOperationException($"XObject encountered with unexpected SubType {subType}. {xObjectStream.StreamDictionary}.");
}
}
private void AdjustTextMatrix(decimal tx, decimal ty)
{
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);

View File

@@ -19,5 +19,7 @@
void ShowText(IInputBytes bytes);
void ShowPositionedText(IReadOnlyList<IToken> tokens);
void ApplyXObject(StreamToken xObjectStream);
}
}

View File

@@ -18,6 +18,9 @@
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
{
var xobject = resourceStore.GetXObject(Name);
operationContext.ApplyXObject(xobject);
}
public override string ToString()

View File

@@ -0,0 +1,25 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using PdfPig.Core;
using Tokenization.Tokens;
using Util.JetBrains.Annotations;
using XObject;
internal class XObjectContentRecord
{
public XObjectType Type { get; }
[NotNull]
public StreamToken Stream { get; }
public TransformationMatrix AppliedTransformation { get; }
public XObjectContentRecord(XObjectType type, StreamToken stream, TransformationMatrix appliedTransformation)
{
Type = type;
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
AppliedTransformation = appliedTransformation;
}
}
}

View File

@@ -14,20 +14,24 @@
using Tokenization.Scanner;
using Tokenization.Tokens;
using Util;
using XObject;
internal class PageFactory : IPageFactory
{
private readonly IResourceStore resourceStore;
private readonly IFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
private readonly XObjectFactory xObjectFactory;
private readonly IPdfTokenScanner pdfScanner;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
IPageContentParser pageContentParser)
IPageContentParser pageContentParser,
XObjectFactory xObjectFactory)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.xObjectFactory = xObjectFactory;
this.pdfScanner = pdfScanner;
}
@@ -111,7 +115,7 @@
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory);
return context.Process(operations);
}

View File

@@ -21,6 +21,7 @@
using Tokenization.Scanner;
using Tokenization.Tokens;
using Util;
using XObject;
internal static class PdfDocumentFactory
{
@@ -109,7 +110,7 @@
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()), new XObjectFactory());
var informationFactory = new DocumentInformationFactory();

View File

@@ -10,6 +10,10 @@
{
Data = data;
}
}
public override string ToString()
{
return $"{Data}";
}
}
}

View File

@@ -0,0 +1,47 @@
namespace UglyToad.PdfPig.XObject
{
using System;
using Graphics;
using Tokenization.Scanner;
using Tokenization.Tokens;
internal class XObjectFactory
{
public void CreateImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, bool isLenientParsing)
{
if (xObject == null)
{
throw new ArgumentNullException(nameof(xObject));
}
if (xObject.Type != XObjectType.Image)
{
throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}.");
}
var width = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Width, pdfScanner).Int;
var height = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.Height, pdfScanner).Int;
var isJpxDecode = xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var token)
&& token is NameToken filterName
&& filterName.Equals(NameToken.JpxDecode);
if (isJpxDecode)
{
return;
}
var isImageMask = xObject.Stream.StreamDictionary.TryGet(NameToken.ImageMask, out var maskToken)
&& maskToken is BooleanToken maskBoolean
&& maskBoolean.Data;
if (isImageMask)
{
return;
}
var bitsPerComponents = xObject.Stream.StreamDictionary.Get<NumericToken>(NameToken.BitsPerComponent, pdfScanner).Int;
}
}
}

View File

@@ -0,0 +1,9 @@
namespace UglyToad.PdfPig.XObject
{
internal enum XObjectType
{
Image,
Form,
PostScript
}
}