implement the show text with positioning operator, fix bugs with parsing stream lengths contained in indirect objects. fix bug with parsing font dictionaries contained in indirect objects.

This commit is contained in:
Eliot Jones
2017-12-31 14:11:13 +00:00
parent 33c628e0c8
commit a77e8e1a56
14 changed files with 268 additions and 55 deletions

View File

@@ -7,6 +7,7 @@
using Pdf.Cos;
using Pdf.Fonts;
using Pdf.Graphics;
using Pdf.Tokenization.Tokens;
internal class TestOperationContext : IOperationContext
{
@@ -41,6 +42,10 @@
public void ShowText(IInputBytes bytes)
{
}
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
{
}
}
internal class TestResourceStore : IResourceStore

View File

@@ -0,0 +1,39 @@
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Content;
using Xunit;
public class FontSizeTestFromLibreOfficeTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "Font Size Test - from libre office.pdf");
}
[Fact]
public void GetsCorrectNumberOfPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var pageCount = document.NumberOfPages;
Assert.Equal(1, pageCount);
}
}
[Fact]
public void GetsCorrectPageSize()
{
using (var document = PdfDocument.Open(GetFilename()))
{
//var page = document.GetPage(1);
//Assert.Equal(PageSize.A4, page.Size);
}
}
}
}

View File

@@ -269,11 +269,29 @@ trailer
trailer
<<>>");
var result = parser.TryParse(input, 0, false, objectPool, out var table);
var result = parser.TryParse(input, 0, false, objectPool, out var _);
Assert.False(result);
}
[Fact]
public void SkipsBlankLinesPrecedingTrailer()
{
var input = GetReader(@"xref
15 2
0000000190 00000 n
0000000250 00032 n
trailer
<<>>");
var result = parser.TryParse(input, 0, false, objectPool, out var table);
Assert.True(result);
Assert.Equal(2, table.AsCrossReferenceTablePart().ObjectOffsets.Count);
}
private static IRandomAccessRead GetReader(string input)
{
return new RandomAccessBuffer(OtherEncodings.StringAsLatin1Bytes(input));

View File

@@ -42,14 +42,11 @@
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
{
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
}
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
LoadResources(number, dictionary, reader, isLenientParsing);
PageContent content = default(PageContent);
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
@@ -66,7 +63,7 @@
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
var context = new ContentStreamProcessor(mediaBox.Bounds, resourceStore, userSpaceUnit);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
content = context.Process(operations);
}
@@ -138,5 +135,33 @@
return mediaBox;
}
private void LoadResources(int pageNumber, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var resources = dictionary.GetItemOrDefault(CosName.RESOURCES);
if (resources is PdfDictionary resource)
{
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
return;
}
if (resources is CosObject resourceObject)
{
var resourceDictionary =
pdfObjectParser.Parse(resourceObject.ToIndirectReference(), reader, isLenientParsing);
if (resourceDictionary is PdfDictionary resolvedDictionary)
{
resourceStore.LoadResourceDictionary(resolvedDictionary, reader, isLenientParsing);
return;
}
}
throw new InvalidOperationException(
$"No resource dictionary was found for this page ({pageNumber}), the page dictionary was {dictionary}.");
}
}
}

View File

@@ -23,8 +23,31 @@
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
if (dictionary.TryGetValue(CosName.FONT, out var fontBase))
{
PdfDictionary fontDictionary;
if (fontBase is CosObject obj)
{
var parsedObj = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing);
if (parsedObj is PdfDictionary indirectFontDictionary)
{
fontDictionary = indirectFontDictionary;
}
else
{
throw new InvalidOperationException($"No font dictionary could be found for the dictionary {dictionary}.");
}
}
else if (fontBase is PdfDictionary directDictionary)
{
fontDictionary = directDictionary;
}
else
{
throw new InvalidOperationException($"No font dictionary could be found for the dictionary {dictionary}");
}
LoadFontDictionary(fontDictionary, reader, isLenientParsing);
}
}

View File

@@ -9,6 +9,8 @@
using IO;
using Operations;
using Pdf.Core;
using Tokenization.Tokens;
using Util;
internal class ContentStreamProcessor : IOperationContext
{
@@ -134,6 +136,64 @@
}
}
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
{
var currentState = GetCurrentState();
var textState = currentState.FontState;
var fontSize = textState.FontSize;
var horizontalScaling = textState.HorizontalScaling;
var font = resourceStore.GetFont(textState.FontName);
var isVertical = font.IsVertical;
foreach (var token in tokens)
{
if (token is NumericToken number)
{
var positionAdjustment = number.Data;
decimal tx, ty;
if (isVertical)
{
tx = 0;
ty = -positionAdjustment / 1000 * fontSize;
}
else
{
tx = -positionAdjustment / 1000 * fontSize * horizontalScaling;
ty = 0;
}
AdjustTextMatrix(tx, ty);
}
else
{
IReadOnlyList<byte> bytes;
if (token is HexToken hex)
{
bytes = hex.Bytes;
}
else
{
bytes = OtherEncodings.StringAsLatin1Bytes(((StringToken) token).Data);
}
ShowText(new ByteArrayInputBytes(bytes));
}
}
}
private void AdjustTextMatrix(decimal tx, decimal ty)
{
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
var newMatrix = matrix.Multiply(TextMatrices.TextMatrix);
TextMatrices.TextMatrix = newMatrix;
}
private void ShowGlyph(TransformationMatrix renderingMatrix, IFont font, int characterCode, string unicode, decimal width, decimal fontSize,
decimal pointSize)
{

View File

@@ -1,6 +1,8 @@
namespace UglyToad.Pdf.Graphics
{
using System.Collections.Generic;
using IO;
using Tokenization.Tokens;
internal interface IOperationContext
{
@@ -15,5 +17,7 @@
void PushState();
void ShowText(IInputBytes bytes);
void ShowPositionedText(IReadOnlyList<IToken> tokens);
}
}

View File

@@ -1,7 +1,9 @@
namespace UglyToad.Pdf.Graphics.Operations.TextShowing
{
using System;
using System.Collections.Generic;
using Content;
using Tokenization.Tokens;
internal class ShowTextsWithPositioning : IGraphicsStateOperation
{
@@ -9,16 +11,30 @@
public string Operator => Symbol;
public object[] Array { get; }
public IReadOnlyList<IToken> Array { get; }
public ShowTextsWithPositioning(object[] array)
public ShowTextsWithPositioning(IReadOnlyList<IToken> array)
{
if (array == null)
{
throw new ArgumentNullException(nameof(array));
}
foreach (var token in array)
{
if (!(token is NumericToken) && !(token is HexToken)
&& !(token is StringToken))
{
throw new ArgumentException($"Found invalid token for showing texts with position: {token}");
}
}
Array = array;
}
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
{
throw new NotImplementedException();
operationContext.ShowPositionedText(Array);
}
}
}

View File

@@ -61,6 +61,20 @@ namespace UglyToad.Pdf.Graphics
{
throw new InvalidOperationException($"Tried to create a show text operation with operand type: {operands[0]?.GetType().Name ?? "null"}");
}
case ShowTextsWithPositioning.Symbol:
if (operands.Count == 0)
{
throw new InvalidOperationException("Cannot have 0 parameters for a TJ operator.");
}
if (operands.Count == 1 && operands[0] is ArrayToken arrayToken)
{
return new ShowTextsWithPositioning(arrayToken.Data);
}
var array = operands.ToArray();
return new ShowTextsWithPositioning(array);
}
if (!operations.TryGetValue(op.Data, out Type operationType))

View File

@@ -166,7 +166,7 @@
{
if (currentBase is PdfDictionary dictionary)
{
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing, null);
currentBase = stream;
}

View File

@@ -147,7 +147,7 @@
{
if (currentBase is PdfDictionary dictionary)
{
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing, this);
currentBase = stream;
}

View File

@@ -32,7 +32,7 @@
this.log = log;
}
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing)
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser)
{
PdfRawStream result;
@@ -42,7 +42,7 @@
skipWhiteSpaces(reader);
// This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null.
ICosNumber streamLength = getLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE));
ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser);
ValidateStreamLength(reader, isLenientParsing, streamLength);
@@ -87,63 +87,72 @@
private void ValidateStreamLength(IRandomAccessRead reader, bool isLenientParsing, ICosNumber streamLength)
{
if (streamLength == null)
if (streamLength != null)
{
if (isLenientParsing)
{
log.Warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " +
reader.GetPosition());
}
else
{
throw new InvalidOperationException("Missing length for stream.");
}
return;
}
if (isLenientParsing)
{
log.Warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " +
reader.GetPosition());
}
else
{
throw new InvalidOperationException("Missing length for stream.");
}
}
private ICosNumber getLength(IRandomAccessRead source, CosBase lengthBaseObj, CosName streamType)
private ICosNumber GetLength(IRandomAccessRead source, CosBase lengthBaseObj, CosName streamType, bool isLenientParsing, IPdfObjectParser parser)
{
if (lengthBaseObj == null)
{
return null;
}
ICosNumber retVal = null;
// maybe length was given directly
if (lengthBaseObj is ICosNumber)
// Length is given directly in the stream dictionary
if (lengthBaseObj is ICosNumber number)
{
retVal = (ICosNumber)lengthBaseObj;
return number;
}
// length in referenced object
else if (lengthBaseObj is CosObject)
if (lengthBaseObj is CosObject lengthObj)
{
CosObject lengthObj = (CosObject)lengthBaseObj;
if (lengthObj.GetObject() == null)
var currentObject = lengthObj.GetObject();
if (currentObject == null)
{
// not read so far, keep current stream position
long curFileOffset = source.GetPosition();
bool isObjectStream = CosName.OBJ_STM.Equals(streamType);
throw new NotImplementedException();
//parseObjectDynamically(lengthObj, isObjectStream);
// reset current stream position
source.Seek(curFileOffset);
if (lengthObj.GetObject() == null)
if (parser == null)
{
throw new InvalidOperationException("Length object content was not read.");
throw new InvalidOperationException("This method required access to the PDF object parser but it was not created yet. Figure out how to fix this.");
}
var currentOffset = source.GetPosition();
var obj = parser.Parse(lengthObj.ToIndirectReference(), source, isLenientParsing);
source.Seek(currentOffset);
if (obj is ICosNumber referenceNumber)
{
return referenceNumber;
}
throw new InvalidOperationException("Length object content was not read.");
}
if (!(lengthObj.GetObject() is ICosNumber))
if (currentObject is ICosNumber objectNumber)
{
throw new InvalidOperationException("Wrong type of referenced length object " + lengthObj
+ ": " + lengthObj.GetObject().GetType().Name);
return objectNumber;
}
retVal = (ICosNumber)lengthObj.GetObject();
throw new InvalidOperationException("Wrong type of referenced length object " + lengthObj
+ ": " + lengthObj.GetObject().GetType().Name);
}
else
{
throw new InvalidOperationException("Wrong type of length object: "
+ lengthBaseObj.GetType().Name);
}
return retVal;
throw new InvalidOperationException($"Wrong type of length object: {lengthBaseObj.GetType().Name}");
}
private void ReadValidStream(IRandomAccessRead reader, BinaryWriter output, ICosNumber streamLengthObj)

View File

@@ -179,7 +179,7 @@
PdfDictionary dict = dictionaryParser.Parse(reader, baseParser, pool);
PdfRawStream xrefStream = streamParser.Parse(reader, dict, isLenientParsing);
PdfRawStream xrefStream = streamParser.Parse(reader, dict, isLenientParsing, null);
CrossReferenceTablePart xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, xrefStream);
return xrefTablePart;

View File

@@ -52,7 +52,7 @@
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
var pool = new CosObjectPool();
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);