Merge pull request #561 from mvantzet/PageSizesAndRotation

Page sizes and rotation
This commit is contained in:
Eliot Jones
2023-03-17 20:48:32 +01:00
committed by GitHub
15 changed files with 375 additions and 87 deletions

View File

@@ -38,7 +38,13 @@
double cos; double cos;
double sin; double sin;
switch (degreesCounterclockwise) var deg = degreesCounterclockwise % 360;
if (deg < 0)
{
deg += 360;
}
switch (deg)
{ {
case 0: case 0:
case 360: case 360:

View File

@@ -1,9 +1,11 @@
namespace UglyToad.PdfPig.Tests.Geometry namespace UglyToad.PdfPig.Tests.Geometry
{ {
using Content;
using PdfPig.Geometry; using PdfPig.Geometry;
using PdfPig.Core; using PdfPig.Core;
using Xunit; using Xunit;
using System.Collections.Generic; using System.Collections.Generic;
using System.Drawing;
public class PdfRectangleTests public class PdfRectangleTests
{ {
@@ -1694,5 +1696,20 @@
Assert.True(rect.Height > 0); Assert.True(rect.Height > 0);
} }
[Theory]
[InlineData(595, 842, PageSize.A4)]
[InlineData(594, 843, PageSize.Custom)]
[InlineData(596, 841, PageSize.Custom)]
[InlineData(842, 595, PageSize.A4)]
[InlineData(595.3, 841.5, PageSize.A4)]
[InlineData(841.5, 595.3, PageSize.A4)]
[InlineData(1224, 792, PageSize.Ledger)]
[InlineData(792, 1224, PageSize.Tabloid)]
public void Parse(double w, double h, PageSize expectedPageSize)
{
var r = new PdfRectangle(0, 0, w, h);
Assert.Equal(expectedPageSize, r.GetPageSize());
}
} }
} }

View File

@@ -0,0 +1,169 @@
namespace UglyToad.PdfPig.Tests.Graphics
{
using Content;
using Logging;
using PdfPig.Core;
using PdfPig.Geometry;
using PdfPig.Graphics;
using System.Linq;
using Xunit;
public class ContentStreamProcessorTests
{
[Fact]
public void InitialMatrixHandlesDefaultCase()
{
// Normally the cropbox = mediabox, with origin 0,0
// Take A4 as a sample page size
var mediaBox = new PdfRectangle(0, 0, 595, 842);
var cropBox = new PdfRectangle(0, 0, 595, 842);
// Sample glyph at the top-left corner, with size 10x20
var glyph = new PdfRectangle(cropBox.Left, cropBox.Top - 20, cropBox.Left + 10, cropBox.Top);
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(0), out var m, out var i);
var transformedGlyph = m.Transform(glyph);
var inverseTransformedGlyph = i.Transform(transformedGlyph);
AssertAreEqual(glyph, transformedGlyph);
AssertAreEqual(glyph, inverseTransformedGlyph);
}
[Fact]
public void InitialMatrixHandlesCropBoxOutsideMediaBox()
{
// Normally the cropbox = mediabox, with origin 0,0
// Take A4 as a sample page size
var mediaBox = new PdfRectangle(0, 0, 595, 842);
var cropBox = new PdfRectangle(400, 400, 1000, 1000);
// The "view box" is then x=[400..595] y=[400..842], i.e. size 195x442
// Sample points
var pointInsideViewBox = new PdfPoint(500, 500);
var pointBelowViewBox = new PdfPoint(500, 100);
var pointLeftOfViewBox = new PdfPoint(200, 500);
var pointAboveViewBox = new PdfPoint(500, 1000);
var pointRightOfViewBox = new PdfPoint(1000, 500);
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(0), out var m, out var i);
var pt = m.Transform(pointInsideViewBox);
var p0 = i.Transform(pt);
AssertAreEqual(pointInsideViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y > 0 && pt.Y < 442);
pt = m.Transform(pointBelowViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointBelowViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y < 0);
pt = m.Transform(pointLeftOfViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointLeftOfViewBox, p0);
Assert.True(pt.X < 0 && pt.Y > 0 && pt.Y < 442);
// When we rotate by 180 degrees, points above/right view box
// should get a negative coordinate.
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(180), out m, out i);
pt = m.Transform(pointInsideViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointInsideViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y > 0 && pt.Y < 442);
pt = m.Transform(pointAboveViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointAboveViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y < 0);
pt = m.Transform(pointRightOfViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointRightOfViewBox, p0);
Assert.True(pt.X < 0 && pt.Y > 0 && pt.Y < 442);
}
[Fact]
public void InitialMatrixHandlesCropBoxAndRotation()
{
var mediaBox = new PdfRectangle(0, 0, 595, 842);
// Cropbox with bottom left at (100,200) with size 300x400
var cropBox = new PdfRectangle(100, 200, 400, 600);
// Sample glyph at the top-left corner, with size 10x20
var glyph = new PdfRectangle(cropBox.Left, cropBox.Top - 20, cropBox.Left + 10, cropBox.Top);
// Test with 0 degrees (no rotation)
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(0), out var initialMatrix, out var inverseMatrix);
var transformedGlyph = initialMatrix.Transform(glyph);
var inverseTransformedGlyph = inverseMatrix.Transform(transformedGlyph);
AssertAreEqual(glyph, inverseTransformedGlyph);
Assert.Equal(0, transformedGlyph.BottomLeft.X, 0);
Assert.Equal(cropBox.Height - glyph.Height, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(glyph.Width, transformedGlyph.TopRight.X, 0);
Assert.Equal(cropBox.Height, transformedGlyph.TopRight.Y, 0);
// Test with 90 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(90), out initialMatrix, out inverseMatrix);
transformedGlyph = initialMatrix.Transform(glyph);
inverseTransformedGlyph = inverseMatrix.Transform(transformedGlyph);
AssertAreEqual(glyph, inverseTransformedGlyph);
Assert.Equal(cropBox.Height - glyph.Height, transformedGlyph.BottomLeft.X, 0);
Assert.Equal(cropBox.Width, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(cropBox.Height, transformedGlyph.TopRight.X, 0);
Assert.Equal(cropBox.Width - glyph.Width, transformedGlyph.TopRight.Y, 0);
// Test with 180 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(180), out initialMatrix, out inverseMatrix);
transformedGlyph = initialMatrix.Transform(glyph);
inverseTransformedGlyph = inverseMatrix.Transform(transformedGlyph);
AssertAreEqual(glyph, inverseTransformedGlyph);
Assert.Equal(cropBox.Width, transformedGlyph.BottomLeft.X, 0);
Assert.Equal(glyph.Height, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(cropBox.Width - glyph.Width, transformedGlyph.TopRight.X, 0);
Assert.Equal(0, transformedGlyph.TopRight.Y, 0);
// Test with 270 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(270), out initialMatrix, out inverseMatrix);
transformedGlyph = initialMatrix.Transform(glyph);
inverseTransformedGlyph = inverseMatrix.Transform(transformedGlyph);
AssertAreEqual(glyph, inverseTransformedGlyph);
Assert.Equal(glyph.Height, transformedGlyph.BottomLeft.X, 0);
Assert.Equal(0, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(0, transformedGlyph.TopRight.X, 0);
Assert.Equal(glyph.Width, transformedGlyph.TopRight.Y, 0);
}
private static void GetInitialTransformationMatrices(
MediaBox mediaBox,
CropBox cropBox,
PageRotationDegrees rotation,
out TransformationMatrix initialMatrix,
out TransformationMatrix inverseMatrix)
{
initialMatrix = ContentStreamProcessor.GetInitialMatrix(UserSpaceUnit.Default, mediaBox, cropBox, rotation, new TestingLog());
inverseMatrix = initialMatrix.Inverse();
}
private static void GetInitialTransformationMatrices(
PdfRectangle mediaBox,
PdfRectangle cropBox,
PageRotationDegrees rotation,
out TransformationMatrix initialMatrix,
out TransformationMatrix inverseMatrix)
{
GetInitialTransformationMatrices(new MediaBox(mediaBox), new CropBox(cropBox), rotation, out initialMatrix, out inverseMatrix);
}
private static void AssertAreEqual(PdfRectangle r1, PdfRectangle r2)
{
AssertAreEqual(r1.BottomLeft, r2.BottomLeft);
AssertAreEqual(r1.TopRight, r2.TopRight);
}
private static void AssertAreEqual(PdfPoint p1, PdfPoint p2)
{
Assert.Equal(p1.X, p2.X, 0);
Assert.Equal(p1.Y, p2.Y, 0);
}
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

View File

@@ -0,0 +1,30 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
public class RotationAndCroppingTests
{
[Fact]
public void CroppedPageHasCorrectTextCoordinates()
{
var path = IntegrationHelpers.GetDocumentPath("SPARC - v9 Architecture Manual");
using (var document = PdfDocument.Open(path))
{
var page = document.GetPage(1);
Assert.Equal(612, page.Width); // Due to cropping
Assert.Equal(792, page.Height); // Due to cropping
var minX = page.Letters.Select(l => l.GlyphRectangle.Left).Min();
var maxX = page.Letters.Select(l => l.GlyphRectangle.Right).Max();
Assert.Equal(72, minX, 0); // If cropping is not applied correctly, these values will be off
Assert.Equal(540, maxX, 0); // If cropping is not applied correctly, these values will be off
// The page is cropped at
Assert.NotNull(page.Content);
}
}
}
}

View File

@@ -19,7 +19,9 @@
private const string SinglePage90ClockwiseRotation = "SinglePage90ClockwiseRotation - from PdfPig"; private const string SinglePage90ClockwiseRotation = "SinglePage90ClockwiseRotation - from PdfPig";
private const string SinglePage180ClockwiseRotation = "SinglePage180ClockwiseRotation - from PdfPig"; private const string SinglePage180ClockwiseRotation = "SinglePage180ClockwiseRotation - from PdfPig";
private const string SinglePage270ClockwiseRotation = "SinglePage270ClockwiseRotation - from PdfPig"; private const string SinglePage270ClockwiseRotation = "SinglePage270ClockwiseRotation - from PdfPig";
private const string SPARCv9ArchitectureManual = "SPARC - v9 Architecture Manual";
private const string CroppedAndRotatedFile = "cropped-and-rotated";
private static string GetFilename(string name) private static string GetFilename(string name)
{ {
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
@@ -116,6 +118,18 @@
Run(SinglePage270ClockwiseRotation, 595); Run(SinglePage270ClockwiseRotation, 595);
} }
[Fact]
public void SPARCv9ArchitectureManualTest()
{
Run(SPARCv9ArchitectureManual);
}
[Fact]
public void CroppedAndRotatedTest()
{
Run(CroppedAndRotatedFile, 205);
}
private static void Run(string file, int imageHeight = 792) private static void Run(string file, int imageHeight = 792)
{ {
var pdfFileName = GetFilename(file); var pdfFileName = GetFilename(file);
@@ -127,6 +141,7 @@
var violetPen = new Pen(Color.BlueViolet, 1); var violetPen = new Pen(Color.BlueViolet, 1);
var redPen = new Pen(Color.Crimson, 1); var redPen = new Pen(Color.Crimson, 1);
var bluePen = new Pen(Color.GreenYellow, 1);
using (var bitmap = new Bitmap(image)) using (var bitmap = new Bitmap(image))
using (var graphics = Graphics.FromImage(bitmap)) using (var graphics = Graphics.FromImage(bitmap))
@@ -141,6 +156,11 @@
DrawRectangle(letter.GlyphRectangle, graphics, violetPen, imageHeight); DrawRectangle(letter.GlyphRectangle, graphics, violetPen, imageHeight);
} }
foreach (var annotation in page.ExperimentalAccess.GetAnnotations())
{
DrawRectangle(annotation.Rectangle, graphics, bluePen, imageHeight);
}
var imageName = $"{file}.jpg"; var imageName = $"{file}.jpg";
if (!Directory.Exists("Images")) if (!Directory.Exists("Images"))

View File

@@ -11,6 +11,10 @@
/// </summary> /// </summary>
public class Annotation public class Annotation
{ {
private readonly StreamToken normalAppearanceStream;
private readonly StreamToken rollOverAppearanceStream;
private readonly StreamToken downAppearanceStream;
/// <summary> /// <summary>
/// The underlying PDF dictionary which this annotation was created from. /// The underlying PDF dictionary which this annotation was created from.
/// </summary> /// </summary>
@@ -62,11 +66,22 @@
/// </summary> /// </summary>
public IReadOnlyList<QuadPointsQuadrilateral> QuadPoints { get; } public IReadOnlyList<QuadPointsQuadrilateral> QuadPoints { get; }
/// <summary>
/// Indicates if a roll over appearance is present for this annotation (shown when you hover over this annotation)
/// </summary>
public bool HasRollOverAppearance => rollOverAppearanceStream != null;
/// <summary>
/// Indicates if a down appearance is present for this annotation (shown when you click on this annotation)
/// </summary>
public bool HasDownAppearance => downAppearanceStream != null;
/// <summary> /// <summary>
/// Create a new <see cref="Annotation"/>. /// Create a new <see cref="Annotation"/>.
/// </summary> /// </summary>
public Annotation(DictionaryToken annotationDictionary, AnnotationType type, PdfRectangle rectangle, string content, string name, string modifiedDate, public Annotation(DictionaryToken annotationDictionary, AnnotationType type, PdfRectangle rectangle, string content, string name, string modifiedDate,
AnnotationFlags flags, AnnotationBorder border, IReadOnlyList<QuadPointsQuadrilateral> quadPoints) AnnotationFlags flags, AnnotationBorder border, IReadOnlyList<QuadPointsQuadrilateral> quadPoints,
StreamToken normalAppearanceStream, StreamToken rollOverAppearanceStream, StreamToken downAppearanceStream)
{ {
AnnotationDictionary = annotationDictionary ?? throw new ArgumentNullException(nameof(annotationDictionary)); AnnotationDictionary = annotationDictionary ?? throw new ArgumentNullException(nameof(annotationDictionary));
Type = type; Type = type;
@@ -77,6 +92,9 @@
Flags = flags; Flags = flags;
Border = border; Border = border;
QuadPoints = quadPoints ?? EmptyArray<QuadPointsQuadrilateral>.Instance; QuadPoints = quadPoints ?? EmptyArray<QuadPointsQuadrilateral>.Instance;
this.normalAppearanceStream = normalAppearanceStream;
this.rollOverAppearanceStream = rollOverAppearanceStream;
this.downAppearanceStream = downAppearanceStream;
} }
/// <inheritdoc /> /// <inheritdoc />

View File

@@ -13,9 +13,12 @@
{ {
private readonly IPdfTokenScanner tokenScanner; private readonly IPdfTokenScanner tokenScanner;
private readonly DictionaryToken pageDictionary; private readonly DictionaryToken pageDictionary;
private readonly TransformationMatrix matrix;
public AnnotationProvider(IPdfTokenScanner tokenScanner, DictionaryToken pageDictionary) public AnnotationProvider(IPdfTokenScanner tokenScanner, DictionaryToken pageDictionary,
TransformationMatrix matrix)
{ {
this.matrix = matrix;
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner)); this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
this.pageDictionary = pageDictionary ?? throw new ArgumentNullException(nameof(pageDictionary)); this.pageDictionary = pageDictionary ?? throw new ArgumentNullException(nameof(pageDictionary));
} }
@@ -37,10 +40,11 @@
var type = annotationDictionary.Get<NameToken>(NameToken.Subtype, tokenScanner); var type = annotationDictionary.Get<NameToken>(NameToken.Subtype, tokenScanner);
var annotationType = type.ToAnnotationType(); var annotationType = type.ToAnnotationType();
var rectangle = annotationDictionary.Get<ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(tokenScanner); var rectangle = matrix.Transform(annotationDictionary.Get<ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(tokenScanner));
var contents = GetNamedString(NameToken.Contents, annotationDictionary); var contents = GetNamedString(NameToken.Contents, annotationDictionary);
var name = GetNamedString(NameToken.Nm, annotationDictionary); var name = GetNamedString(NameToken.Nm, annotationDictionary);
// As indicated in PDF reference 8.4.1, the modified date can be anything, but is usually a date formatted according to sec. 3.8.3
var modifiedDate = GetNamedString(NameToken.M, annotationDictionary); var modifiedDate = GetNamedString(NameToken.M, annotationDictionary);
var flags = (AnnotationFlags)0; var flags = (AnnotationFlags)0;
@@ -83,10 +87,10 @@
{ {
quadPointRectangles.Add(new QuadPointsQuadrilateral(new[] quadPointRectangles.Add(new QuadPointsQuadrilateral(new[]
{ {
new PdfPoint(values[0], values[1]), matrix.Transform(new PdfPoint(values[0], values[1])),
new PdfPoint(values[2], values[3]), matrix.Transform(new PdfPoint(values[2], values[3])),
new PdfPoint(values[4], values[5]), matrix.Transform(new PdfPoint(values[4], values[5])),
new PdfPoint(values[6], values[7]) matrix.Transform(new PdfPoint(values[6], values[7]))
})); }));
values.Clear(); values.Clear();
@@ -94,8 +98,29 @@
} }
} }
yield return new Annotation(annotationDictionary, annotationType, rectangle, contents, name, modifiedDate, flags, border, StreamToken normalAppearanceStream = null, downAppearanceStream = null, rollOverAppearanceStream = null;
quadPointRectangles); if (annotationDictionary.TryGet(NameToken.Ap, out DictionaryToken appearanceDictionary))
{
// The normal appearance of this annotation
if (appearanceDictionary.TryGet(NameToken.N, out IndirectReferenceToken normalAppearanceRef))
{
normalAppearanceStream = tokenScanner.Get(normalAppearanceRef.Data)?.Data as StreamToken;
}
// If present, the 'roll over' appearance of this annotation (when hovering the mouse pointer over this annotation)
if (appearanceDictionary.TryGet(NameToken.R, out IndirectReferenceToken rollOverAppearanceRef))
{
rollOverAppearanceStream = tokenScanner.Get(rollOverAppearanceRef.Data)?.Data as StreamToken;
}
// If present, the 'down' appearance of this annotation (when you click on it)
if (appearanceDictionary.TryGet(NameToken.D, out IndirectReferenceToken downAppearanceRef))
{
downAppearanceStream = tokenScanner.Get(downAppearanceRef.Data)?.Data as StreamToken;
}
}
yield return new Annotation(annotationDictionary, annotationType, rectangle,
contents, name, modifiedDate, flags, border, quadPointRectangles,
normalAppearanceStream, rollOverAppearanceStream, downAppearanceStream);
} }
} }

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Text; using System.Text;
using Annotations; using Annotations;
using Geometry;
using Graphics.Operations; using Graphics.Operations;
using Tokens; using Tokens;
using Util; using Util;
@@ -107,10 +108,13 @@
Content = content; Content = content;
textLazy = new Lazy<string>(() => GetText(Content)); textLazy = new Lazy<string>(() => GetText(Content));
Width = mediaBox.Bounds.Width; // Special case where cropbox is outside mediabox: use cropbox instead of intersection
Height = mediaBox.Bounds.Height; var viewBox = mediaBox.Bounds.Intersect(cropBox.Bounds) ?? cropBox.Bounds;
Width = rotation.SwapsAxis ? viewBox.Height : viewBox.Width;
Height = rotation.SwapsAxis ? viewBox.Width : viewBox.Height;
Size = viewBox.GetPageSize();
Size = mediaBox.Bounds.GetPageSize();
ExperimentalAccess = new Experimental(this, annotationProvider); ExperimentalAccess = new Experimental(this, annotationProvider);
this.annotationProvider = annotationProvider; this.annotationProvider = annotationProvider;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));

View File

@@ -3,6 +3,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using Core; using Core;
using System;
/// <summary> /// <summary>
/// The corresponding named size of the <see cref="Page"/>. /// The corresponding named size of the <see cref="Page"/>.
@@ -102,6 +103,7 @@
{new WidthHeight(74, 105), PageSize.A10}, {new WidthHeight(74, 105), PageSize.A10},
{new WidthHeight(612, 792), PageSize.Letter}, {new WidthHeight(612, 792), PageSize.Letter},
{new WidthHeight(612, 1008), PageSize.Legal}, {new WidthHeight(612, 1008), PageSize.Legal},
// Ledger and Tabloid differ by orientation
{new WidthHeight(1224, 792), PageSize.Ledger}, {new WidthHeight(1224, 792), PageSize.Ledger},
{new WidthHeight(792, 1224), PageSize.Tabloid}, {new WidthHeight(792, 1224), PageSize.Tabloid},
// Again there is disagreement here // Again there is disagreement here
@@ -111,11 +113,11 @@
public static PageSize GetPageSize(this PdfRectangle rectangle) public static PageSize GetPageSize(this PdfRectangle rectangle)
{ {
if (!Lookup.TryGetValue(new WidthHeight(rectangle.Width, rectangle.Height), out var size)) if (!Lookup.TryGetValue(new WidthHeight(rectangle.Width, rectangle.Height), out var size)
&& !Lookup.TryGetValue(new WidthHeight(rectangle.Height, rectangle.Width), out size))
{ {
return PageSize.Custom; return PageSize.Custom;
} }
return size; return size;
} }
@@ -148,15 +150,15 @@
public override bool Equals(object obj) public override bool Equals(object obj)
{ {
return obj is WidthHeight height && return obj is WidthHeight height &&
Width == height.Width && Math.Round(Width) == Math.Round(height.Width) &&
Height == height.Height; Math.Round(Height) == Math.Round(height.Height);
} }
public override int GetHashCode() public override int GetHashCode()
{ {
var hashCode = 859600377; var hashCode = 859600377;
hashCode = hashCode * -1521134295 + Width.GetHashCode(); hashCode = hashCode * -1521134295 + Math.Round(Width).GetHashCode();
hashCode = hashCode * -1521134295 + Height.GetHashCode(); hashCode = hashCode * -1521134295 + Math.Round(Height).GetHashCode();
return hashCode; return hashCode;
} }
} }

View File

@@ -5,6 +5,7 @@
using Core; using Core;
using Filters; using Filters;
using Geometry; using Geometry;
using Logging;
using Operations; using Operations;
using Parser; using Parser;
using PdfFonts; using PdfFonts;
@@ -48,7 +49,6 @@
private readonly IPdfTokenScanner pdfScanner; private readonly IPdfTokenScanner pdfScanner;
private readonly IPageContentParser pageContentParser; private readonly IPageContentParser pageContentParser;
private readonly ILookupFilterProvider filterProvider; private readonly ILookupFilterProvider filterProvider;
private readonly PdfVector pageSize;
private readonly InternalParsingOptions parsingOptions; private readonly InternalParsingOptions parsingOptions;
private readonly MarkedContentStack markedContentStack = new MarkedContentStack(); private readonly MarkedContentStack markedContentStack = new MarkedContentStack();
@@ -84,11 +84,14 @@
{XObjectType.PostScript, new List<XObjectContentRecord>()} {XObjectType.PostScript, new List<XObjectContentRecord>()}
}; };
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, public ContentStreamProcessor(IResourceStore resourceStore,
UserSpaceUnit userSpaceUnit,
MediaBox mediaBox,
CropBox cropBox,
PageRotationDegrees rotation,
IPdfTokenScanner pdfScanner, IPdfTokenScanner pdfScanner,
IPageContentParser pageContentParser, IPageContentParser pageContentParser,
ILookupFilterProvider filterProvider, ILookupFilterProvider filterProvider,
PdfVector pageSize,
InternalParsingOptions parsingOptions) InternalParsingOptions parsingOptions)
{ {
this.resourceStore = resourceStore; this.resourceStore = resourceStore;
@@ -97,18 +100,17 @@
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser)); this.pageContentParser = pageContentParser ?? throw new ArgumentNullException(nameof(pageContentParser));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.pageSize = pageSize;
this.parsingOptions = parsingOptions; this.parsingOptions = parsingOptions;
// initiate CurrentClippingPath to cropBox // initiate CurrentClippingPath to cropBox
var clippingSubpath = new PdfSubpath(); var clippingSubpath = new PdfSubpath();
clippingSubpath.Rectangle(cropBox.BottomLeft.X, cropBox.BottomLeft.Y, cropBox.Width, cropBox.Height); clippingSubpath.Rectangle(cropBox.Bounds.BottomLeft.X, cropBox.Bounds.BottomLeft.Y, cropBox.Bounds.Width, cropBox.Bounds.Height);
var clippingPath = new PdfPath() { clippingSubpath }; var clippingPath = new PdfPath() { clippingSubpath };
clippingPath.SetClipping(FillingRule.EvenOdd); clippingPath.SetClipping(FillingRule.EvenOdd);
graphicsStack.Push(new CurrentGraphicsState() graphicsStack.Push(new CurrentGraphicsState()
{ {
CurrentTransformationMatrix = GetInitialMatrix(), CurrentTransformationMatrix = GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, parsingOptions.Logger),
CurrentClippingPath = clippingPath CurrentClippingPath = clippingPath
}); });
@@ -116,63 +118,69 @@
} }
[System.Diagnostics.Contracts.Pure] [System.Diagnostics.Contracts.Pure]
private TransformationMatrix GetInitialMatrix() internal static TransformationMatrix GetInitialMatrix(UserSpaceUnit userSpaceUnit,
MediaBox mediaBox,
CropBox cropBox,
PageRotationDegrees rotation,
ILog log)
{ {
// TODO: this is a bit of a hack because I don't understand matrices // Cater for scenario where the cropbox is larger than the mediabox.
// TODO: use MediaBox (i.e. pageSize) or CropBox? // If there is no intersection (method returns null), fall back to the cropbox.
var viewBox = mediaBox.Bounds.Intersect(cropBox.Bounds) ?? cropBox.Bounds;
/* if (rotation.Value == 0
* There should be a single Affine Transform we can apply to any point resulting && viewBox.Left == 0
* from a content stream operation which will rotate the point and translate it back to && viewBox.Bottom == 0
* a point where the origin is in the page's lower left corner. && userSpaceUnit.PointMultiples == 1)
* {
* For example this matrix represents a (clockwise) rotation and translation: return TransformationMatrix.Identity;
* [ cos sin tx ] }
* [ -sin cos ty ]
* [ 0 0 1 ]
* Warning: rotation is counter-clockwise here
*
* The values of tx and ty are those required to move the origin back to the expected origin (lower-left).
* The corresponding values should be:
* Rotation: 0 90 180 270
* tx: 0 0 w w
* ty: 0 h h 0
*
* Where w and h are the page width and height after rotation.
*/
double cos, sin; // Move points so that (0,0) is equal to the viewbox bottom left corner.
double dx = 0, dy = 0; var t1 = TransformationMatrix.GetTranslationMatrix(-viewBox.Left, -viewBox.Bottom);
if (userSpaceUnit.PointMultiples != 1)
{
log.Warn("User space unit other than 1 is not implemented");
}
// After rotating around the origin, our points will have negative x/y coordinates.
// Fix this by translating them by a certain dx/dy after rotation based on the viewbox.
double dx, dy;
switch (rotation.Value) switch (rotation.Value)
{ {
case 0: case 0:
cos = 1; // No need to rotate / translate after rotation, just return the initial
sin = 0; // translation matrix.
break; return t1;
case 90: case 90:
cos = 0; // Move rotated points up by our (unrotated) viewbox width
sin = 1; dx = 0;
dy = pageSize.Y; dy = viewBox.Width;
break; break;
case 180: case 180:
cos = -1; // Move rotated points up/right using the (unrotated) viewbox width/height
sin = 0; dx = viewBox.Width;
dx = pageSize.X; dy = viewBox.Height;
dy = pageSize.Y;
break; break;
case 270: case 270:
cos = 0; // Move rotated points right using the (unrotated) viewbox height
sin = -1; dx = viewBox.Height;
dx = pageSize.X; dy = 0;
break; break;
default: default:
throw new InvalidOperationException($"Invalid value for page rotation: {rotation.Value}."); throw new InvalidOperationException($"Invalid value for page rotation: {rotation.Value}.");
} }
return new TransformationMatrix( // GetRotationMatrix uses counter clockwise angles, whereas our page rotation
cos, -sin, 0, // is a clockwise angle, so flip the sign.
sin, cos, 0, var r = TransformationMatrix.GetRotationMatrix(-rotation.Value);
dx, dy, 1);
// Fix up negative coordinates after rotation
var t2 = TransformationMatrix.GetTranslationMatrix(dx, dy);
// Now get the final combined matrix T1 > R > T2
return t1.Multiply(r.Multiply(t2));
} }
public PageContent Process(int pageNumberCurrent, IReadOnlyList<IGraphicsStateOperation> operations) public PageContent Process(int pageNumberCurrent, IReadOnlyList<IGraphicsStateOperation> operations)

View File

@@ -73,19 +73,6 @@
stackDepth++; stackDepth++;
} }
// Apply rotation.
if (rotation.SwapsAxis)
{
mediaBox = new MediaBox(new PdfRectangle(mediaBox.Bounds.Bottom,
mediaBox.Bounds.Left,
mediaBox.Bounds.Top,
mediaBox.Bounds.Right));
cropBox = new CropBox(new PdfRectangle(cropBox.Bounds.Bottom,
cropBox.Bounds.Left,
cropBox.Bounds.Top,
cropBox.Bounds.Right));
}
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
PageContent content; PageContent content;
@@ -146,8 +133,10 @@
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
} }
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, parsingOptions.Logger);
new AnnotationProvider(pdfScanner, dictionary),
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
new AnnotationProvider(pdfScanner, dictionary, initialMatrix),
pdfScanner); pdfScanner);
for (var i = 0; i < stackDepth; i++) for (var i = 0; i < stackDepth; i++)
@@ -171,14 +160,14 @@
parsingOptions.Logger); parsingOptions.Logger);
var context = new ContentStreamProcessor( var context = new ContentStreamProcessor(
cropBox.Bounds,
resourceStore, resourceStore,
userSpaceUnit, userSpaceUnit,
mediaBox,
cropBox,
rotation, rotation,
pdfScanner, pdfScanner,
pageContentParser, pageContentParser,
filterProvider, filterProvider,
new PdfVector(mediaBox.Bounds.Width, mediaBox.Bounds.Height),
parsingOptions); parsingOptions);
return context.Process(pageNumber, operations); return context.Process(pageNumber, operations);
@@ -214,7 +203,7 @@
return cropBox; return cropBox;
} }
cropBox = new CropBox(cropBoxArray.ToIntRectangle(pdfScanner)); cropBox = new CropBox(cropBoxArray.ToRectangle(pdfScanner));
} }
else else
{ {
@@ -243,7 +232,7 @@
return mediaBox; return mediaBox;
} }
mediaBox = new MediaBox(mediaboxArray.ToIntRectangle(pdfScanner)); mediaBox = new MediaBox(mediaboxArray.ToRectangle(pdfScanner));
} }
else else
{ {

View File

@@ -8,7 +8,7 @@
public static class DateFormatHelper public static class DateFormatHelper
{ {
/// <summary> /// <summary>
/// Try parsing a pdf formated date string into a <see cref="DateTimeOffset"/>. /// Try parsing a pdf formatted date string into a <see cref="DateTimeOffset"/>.
/// <para>Date values used in a PDF shall conform to a standard date format, which closely /// <para>Date values used in a PDF shall conform to a standard date format, which closely
/// follows that of the international standard ASN.1, defined in ISO/IEC 8824. A date shall be a text string /// follows that of the international standard ASN.1, defined in ISO/IEC 8824. A date shall be a text string
/// of the form (D:YYYYMMDDHHmmSSOHH'mm).</para> /// of the form (D:YYYYMMDDHHmmSSOHH'mm).</para>