mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 11:44:51 +08:00
implement the show text with positioning operator, fix bugs with parsing stream lengths contained in indirect objects. fix bug with parsing font dictionaries contained in indirect objects.
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
using Pdf.Cos;
|
||||
using Pdf.Fonts;
|
||||
using Pdf.Graphics;
|
||||
using Pdf.Tokenization.Tokens;
|
||||
|
||||
internal class TestOperationContext : IOperationContext
|
||||
{
|
||||
@@ -41,6 +42,10 @@
|
||||
public void ShowText(IInputBytes bytes)
|
||||
{
|
||||
}
|
||||
|
||||
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
internal class TestResourceStore : IResourceStore
|
||||
|
@@ -0,0 +1,39 @@
|
||||
namespace UglyToad.Pdf.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class FontSizeTestFromLibreOfficeTests
|
||||
{
|
||||
private static string GetFilename()
|
||||
{
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||
|
||||
return Path.Combine(documentFolder, "Font Size Test - from libre office.pdf");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetsCorrectNumberOfPages()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var pageCount = document.NumberOfPages;
|
||||
|
||||
Assert.Equal(1, pageCount);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetsCorrectPageSize()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
//var page = document.GetPage(1);
|
||||
|
||||
//Assert.Equal(PageSize.A4, page.Size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -269,11 +269,29 @@ trailer
|
||||
trailer
|
||||
<<>>");
|
||||
|
||||
var result = parser.TryParse(input, 0, false, objectPool, out var table);
|
||||
var result = parser.TryParse(input, 0, false, objectPool, out var _);
|
||||
|
||||
Assert.False(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SkipsBlankLinesPrecedingTrailer()
|
||||
{
|
||||
var input = GetReader(@"xref
|
||||
15 2
|
||||
0000000190 00000 n
|
||||
0000000250 00032 n
|
||||
|
||||
trailer
|
||||
<<>>");
|
||||
|
||||
var result = parser.TryParse(input, 0, false, objectPool, out var table);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(2, table.AsCrossReferenceTablePart().ObjectOffsets.Count);
|
||||
}
|
||||
|
||||
private static IRandomAccessRead GetReader(string input)
|
||||
{
|
||||
return new RandomAccessBuffer(OtherEncodings.StringAsLatin1Bytes(input));
|
||||
|
@@ -42,14 +42,11 @@
|
||||
|
||||
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
|
||||
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
|
||||
|
||||
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
|
||||
{
|
||||
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
|
||||
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
|
||||
|
||||
LoadResources(number, dictionary, reader, isLenientParsing);
|
||||
|
||||
PageContent content = default(PageContent);
|
||||
|
||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
||||
@@ -66,7 +63,7 @@
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||
|
||||
var context = new ContentStreamProcessor(mediaBox.Bounds, resourceStore, userSpaceUnit);
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||
|
||||
content = context.Process(operations);
|
||||
}
|
||||
@@ -138,5 +135,33 @@
|
||||
|
||||
return mediaBox;
|
||||
}
|
||||
|
||||
private void LoadResources(int pageNumber, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var resources = dictionary.GetItemOrDefault(CosName.RESOURCES);
|
||||
|
||||
if (resources is PdfDictionary resource)
|
||||
{
|
||||
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (resources is CosObject resourceObject)
|
||||
{
|
||||
var resourceDictionary =
|
||||
pdfObjectParser.Parse(resourceObject.ToIndirectReference(), reader, isLenientParsing);
|
||||
|
||||
if (resourceDictionary is PdfDictionary resolvedDictionary)
|
||||
{
|
||||
resourceStore.LoadResourceDictionary(resolvedDictionary, reader, isLenientParsing);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException(
|
||||
$"No resource dictionary was found for this page ({pageNumber}), the page dictionary was {dictionary}.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -23,8 +23,31 @@
|
||||
|
||||
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
|
||||
if (dictionary.TryGetValue(CosName.FONT, out var fontBase))
|
||||
{
|
||||
PdfDictionary fontDictionary;
|
||||
if (fontBase is CosObject obj)
|
||||
{
|
||||
var parsedObj = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, isLenientParsing);
|
||||
|
||||
if (parsedObj is PdfDictionary indirectFontDictionary)
|
||||
{
|
||||
fontDictionary = indirectFontDictionary;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException($"No font dictionary could be found for the dictionary {dictionary}.");
|
||||
}
|
||||
}
|
||||
else if (fontBase is PdfDictionary directDictionary)
|
||||
{
|
||||
fontDictionary = directDictionary;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException($"No font dictionary could be found for the dictionary {dictionary}");
|
||||
}
|
||||
|
||||
LoadFontDictionary(fontDictionary, reader, isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
@@ -9,6 +9,8 @@
|
||||
using IO;
|
||||
using Operations;
|
||||
using Pdf.Core;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
|
||||
internal class ContentStreamProcessor : IOperationContext
|
||||
{
|
||||
@@ -134,6 +136,64 @@
|
||||
}
|
||||
}
|
||||
|
||||
public void ShowPositionedText(IReadOnlyList<IToken> tokens)
|
||||
{
|
||||
var currentState = GetCurrentState();
|
||||
|
||||
var textState = currentState.FontState;
|
||||
|
||||
var fontSize = textState.FontSize;
|
||||
var horizontalScaling = textState.HorizontalScaling;
|
||||
var font = resourceStore.GetFont(textState.FontName);
|
||||
|
||||
var isVertical = font.IsVertical;
|
||||
|
||||
foreach (var token in tokens)
|
||||
{
|
||||
if (token is NumericToken number)
|
||||
{
|
||||
var positionAdjustment = number.Data;
|
||||
|
||||
decimal tx, ty;
|
||||
if (isVertical)
|
||||
{
|
||||
tx = 0;
|
||||
ty = -positionAdjustment / 1000 * fontSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
tx = -positionAdjustment / 1000 * fontSize * horizontalScaling;
|
||||
ty = 0;
|
||||
}
|
||||
|
||||
AdjustTextMatrix(tx, ty);
|
||||
}
|
||||
else
|
||||
{
|
||||
IReadOnlyList<byte> bytes;
|
||||
if (token is HexToken hex)
|
||||
{
|
||||
bytes = hex.Bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes = OtherEncodings.StringAsLatin1Bytes(((StringToken) token).Data);
|
||||
}
|
||||
|
||||
ShowText(new ByteArrayInputBytes(bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void AdjustTextMatrix(decimal tx, decimal ty)
|
||||
{
|
||||
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);
|
||||
|
||||
var newMatrix = matrix.Multiply(TextMatrices.TextMatrix);
|
||||
|
||||
TextMatrices.TextMatrix = newMatrix;
|
||||
}
|
||||
|
||||
private void ShowGlyph(TransformationMatrix renderingMatrix, IFont font, int characterCode, string unicode, decimal width, decimal fontSize,
|
||||
decimal pointSize)
|
||||
{
|
||||
|
@@ -1,6 +1,8 @@
|
||||
namespace UglyToad.Pdf.Graphics
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using IO;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal interface IOperationContext
|
||||
{
|
||||
@@ -15,5 +17,7 @@
|
||||
void PushState();
|
||||
|
||||
void ShowText(IInputBytes bytes);
|
||||
|
||||
void ShowPositionedText(IReadOnlyList<IToken> tokens);
|
||||
}
|
||||
}
|
@@ -1,7 +1,9 @@
|
||||
namespace UglyToad.Pdf.Graphics.Operations.TextShowing
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal class ShowTextsWithPositioning : IGraphicsStateOperation
|
||||
{
|
||||
@@ -9,16 +11,30 @@
|
||||
|
||||
public string Operator => Symbol;
|
||||
|
||||
public object[] Array { get; }
|
||||
public IReadOnlyList<IToken> Array { get; }
|
||||
|
||||
public ShowTextsWithPositioning(object[] array)
|
||||
public ShowTextsWithPositioning(IReadOnlyList<IToken> array)
|
||||
{
|
||||
if (array == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(array));
|
||||
}
|
||||
|
||||
foreach (var token in array)
|
||||
{
|
||||
if (!(token is NumericToken) && !(token is HexToken)
|
||||
&& !(token is StringToken))
|
||||
{
|
||||
throw new ArgumentException($"Found invalid token for showing texts with position: {token}");
|
||||
}
|
||||
}
|
||||
|
||||
Array = array;
|
||||
}
|
||||
|
||||
public void Run(IOperationContext operationContext, IResourceStore resourceStore)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
operationContext.ShowPositionedText(Array);
|
||||
}
|
||||
}
|
||||
}
|
@@ -61,6 +61,20 @@ namespace UglyToad.Pdf.Graphics
|
||||
{
|
||||
throw new InvalidOperationException($"Tried to create a show text operation with operand type: {operands[0]?.GetType().Name ?? "null"}");
|
||||
}
|
||||
case ShowTextsWithPositioning.Symbol:
|
||||
if (operands.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot have 0 parameters for a TJ operator.");
|
||||
}
|
||||
|
||||
if (operands.Count == 1 && operands[0] is ArrayToken arrayToken)
|
||||
{
|
||||
return new ShowTextsWithPositioning(arrayToken.Data);
|
||||
}
|
||||
|
||||
var array = operands.ToArray();
|
||||
|
||||
return new ShowTextsWithPositioning(array);
|
||||
}
|
||||
|
||||
if (!operations.TryGetValue(op.Data, out Type operationType))
|
||||
|
@@ -166,7 +166,7 @@
|
||||
{
|
||||
if (currentBase is PdfDictionary dictionary)
|
||||
{
|
||||
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
|
||||
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing, null);
|
||||
|
||||
currentBase = stream;
|
||||
}
|
||||
|
@@ -147,7 +147,7 @@
|
||||
{
|
||||
if (currentBase is PdfDictionary dictionary)
|
||||
{
|
||||
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
|
||||
PdfRawStream stream = streamParser.Parse(reader, dictionary, isLenientParsing, this);
|
||||
|
||||
currentBase = stream;
|
||||
}
|
||||
|
@@ -32,7 +32,7 @@
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing)
|
||||
public PdfRawStream Parse(IRandomAccessRead reader, PdfDictionary streamDictionary, bool isLenientParsing, IPdfObjectParser parser)
|
||||
{
|
||||
PdfRawStream result;
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
skipWhiteSpaces(reader);
|
||||
|
||||
// This needs to be streamDictionary.getItem because when we are parsing, the underlying object might still be null.
|
||||
ICosNumber streamLength = getLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE));
|
||||
ICosNumber streamLength = GetLength(reader, streamDictionary.GetItemOrDefault(CosName.LENGTH), streamDictionary.GetName(CosName.TYPE), isLenientParsing, parser);
|
||||
|
||||
ValidateStreamLength(reader, isLenientParsing, streamLength);
|
||||
|
||||
@@ -87,63 +87,72 @@
|
||||
|
||||
private void ValidateStreamLength(IRandomAccessRead reader, bool isLenientParsing, ICosNumber streamLength)
|
||||
{
|
||||
if (streamLength == null)
|
||||
if (streamLength != null)
|
||||
{
|
||||
if (isLenientParsing)
|
||||
{
|
||||
log.Warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " +
|
||||
reader.GetPosition());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Missing length for stream.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isLenientParsing)
|
||||
{
|
||||
log.Warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " +
|
||||
reader.GetPosition());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Missing length for stream.");
|
||||
}
|
||||
}
|
||||
|
||||
private ICosNumber getLength(IRandomAccessRead source, CosBase lengthBaseObj, CosName streamType)
|
||||
private ICosNumber GetLength(IRandomAccessRead source, CosBase lengthBaseObj, CosName streamType, bool isLenientParsing, IPdfObjectParser parser)
|
||||
{
|
||||
if (lengthBaseObj == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
ICosNumber retVal = null;
|
||||
// maybe length was given directly
|
||||
if (lengthBaseObj is ICosNumber)
|
||||
|
||||
// Length is given directly in the stream dictionary
|
||||
if (lengthBaseObj is ICosNumber number)
|
||||
{
|
||||
retVal = (ICosNumber)lengthBaseObj;
|
||||
return number;
|
||||
}
|
||||
|
||||
// length in referenced object
|
||||
else if (lengthBaseObj is CosObject)
|
||||
if (lengthBaseObj is CosObject lengthObj)
|
||||
{
|
||||
CosObject lengthObj = (CosObject)lengthBaseObj;
|
||||
if (lengthObj.GetObject() == null)
|
||||
var currentObject = lengthObj.GetObject();
|
||||
|
||||
if (currentObject == null)
|
||||
{
|
||||
// not read so far, keep current stream position
|
||||
long curFileOffset = source.GetPosition();
|
||||
bool isObjectStream = CosName.OBJ_STM.Equals(streamType);
|
||||
throw new NotImplementedException();
|
||||
//parseObjectDynamically(lengthObj, isObjectStream);
|
||||
// reset current stream position
|
||||
source.Seek(curFileOffset);
|
||||
if (lengthObj.GetObject() == null)
|
||||
if (parser == null)
|
||||
{
|
||||
throw new InvalidOperationException("Length object content was not read.");
|
||||
throw new InvalidOperationException("This method required access to the PDF object parser but it was not created yet. Figure out how to fix this.");
|
||||
}
|
||||
|
||||
var currentOffset = source.GetPosition();
|
||||
|
||||
var obj = parser.Parse(lengthObj.ToIndirectReference(), source, isLenientParsing);
|
||||
|
||||
source.Seek(currentOffset);
|
||||
|
||||
if (obj is ICosNumber referenceNumber)
|
||||
{
|
||||
return referenceNumber;
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Length object content was not read.");
|
||||
}
|
||||
if (!(lengthObj.GetObject() is ICosNumber))
|
||||
|
||||
if (currentObject is ICosNumber objectNumber)
|
||||
{
|
||||
throw new InvalidOperationException("Wrong type of referenced length object " + lengthObj
|
||||
+ ": " + lengthObj.GetObject().GetType().Name);
|
||||
return objectNumber;
|
||||
}
|
||||
retVal = (ICosNumber)lengthObj.GetObject();
|
||||
|
||||
|
||||
throw new InvalidOperationException("Wrong type of referenced length object " + lengthObj
|
||||
+ ": " + lengthObj.GetObject().GetType().Name);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Wrong type of length object: "
|
||||
+ lengthBaseObj.GetType().Name);
|
||||
}
|
||||
return retVal;
|
||||
|
||||
throw new InvalidOperationException($"Wrong type of length object: {lengthBaseObj.GetType().Name}");
|
||||
}
|
||||
|
||||
private void ReadValidStream(IRandomAccessRead reader, BinaryWriter output, ICosNumber streamLengthObj)
|
||||
|
@@ -179,7 +179,7 @@
|
||||
|
||||
PdfDictionary dict = dictionaryParser.Parse(reader, baseParser, pool);
|
||||
|
||||
PdfRawStream xrefStream = streamParser.Parse(reader, dict, isLenientParsing);
|
||||
PdfRawStream xrefStream = streamParser.Parse(reader, dict, isLenientParsing, null);
|
||||
CrossReferenceTablePart xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, xrefStream);
|
||||
|
||||
return xrefTablePart;
|
||||
|
@@ -52,7 +52,7 @@
|
||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
|
||||
|
||||
var pool = new CosObjectPool();
|
||||
|
||||
|
||||
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
|
||||
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);
|
||||
|
||||
|
Reference in New Issue
Block a user