Properly handle page rotation for crop box and media box and fix #665

This commit is contained in:
BobLd
2023-08-26 18:34:48 +01:00
parent 81cdb474e1
commit fe0e4db419
9 changed files with 201 additions and 136 deletions

View File

@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Core
{
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
@@ -10,22 +9,22 @@
/// <summary>
/// Specifies the conversion from the transformed coordinate space to the original untransformed coordinate space.
/// </summary>
public struct TransformationMatrix
public readonly struct TransformationMatrix
{
/// <summary>
/// The default <see cref="TransformationMatrix"/>.
/// </summary>
public static TransformationMatrix Identity = new TransformationMatrix(1,0,0,
0,1,0,
0,0,1);
public static readonly TransformationMatrix Identity = new TransformationMatrix(1, 0, 0,
0, 1, 0,
0, 0, 1);
/// <summary>
/// Create a new <see cref="TransformationMatrix"/> with the X and Y translation values set.
/// </summary>
public static TransformationMatrix GetTranslationMatrix(double x, double y) => new TransformationMatrix(1, 0, 0,
0, 1, 0,
x, y, 1);
/// <summary>
/// Create a new <see cref="TransformationMatrix"/> with the X and Y scaling values set.
/// </summary>
@@ -105,7 +104,7 @@
/// The value at (2, 1) - translation in Y.
/// </summary>
public readonly double F;
/// <summary>
/// Get the value at the specific row and column.
/// </summary>
@@ -353,7 +352,7 @@
/// <param name="values">Either all 9 values of the matrix, 6 values in the default PDF order or the 4 values of the top left square.</param>
/// <returns></returns>
public static TransformationMatrix FromArray(decimal[] values)
=> FromArray(values.Select(x => (double) x).ToArray());
=> FromArray(values.Select(x => (double)x).ToArray());
/// <summary>
/// Create a new <see cref="TransformationMatrix"/> from the values.
@@ -404,8 +403,8 @@
var f = (E * matrix.B) + (F * matrix.D) + (row3 * matrix.F);
var r3 = (E * matrix.row1) + (F * matrix.row2) + (row3 * matrix.row3);
return new TransformationMatrix(a, b, r1,
c, d, r2,
return new TransformationMatrix(a, b, r1,
c, d, r2,
e, f, r3);
}
@@ -529,5 +528,17 @@
{
return $"{A}, {B}, {row1}\r\n{C}, {D}, {row2}\r\n{E}, {F}, {row3}";
}
/// <inheritdoc/>
public static bool operator ==(TransformationMatrix left, TransformationMatrix right)
{
return left.Equals(right);
}
/// <inheritdoc/>
public static bool operator !=(TransformationMatrix left, TransformationMatrix right)
{
return !(left == right);
}
}
}

View File

@@ -1,11 +1,9 @@
namespace UglyToad.PdfPig.Tests.Graphics
{
using Content;
using Logging;
using PdfPig.Core;
using PdfPig.Geometry;
using PdfPig.Graphics;
using System.Linq;
using Xunit;
public class ContentStreamProcessorTests
@@ -54,7 +52,7 @@
p0 = i.Transform(pt);
AssertAreEqual(pointBelowViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y < 0);
pt = m.Transform(pointLeftOfViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointLeftOfViewBox, p0);
@@ -72,7 +70,7 @@
p0 = i.Transform(pt);
AssertAreEqual(pointAboveViewBox, p0);
Assert.True(pt.X > 0 && pt.X < 195 && pt.Y < 0);
pt = m.Transform(pointRightOfViewBox);
p0 = i.Transform(pt);
AssertAreEqual(pointRightOfViewBox, p0);
@@ -99,7 +97,6 @@
Assert.Equal(cropBox.Height - glyph.Height, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(glyph.Width, transformedGlyph.TopRight.X, 0);
Assert.Equal(cropBox.Height, transformedGlyph.TopRight.Y, 0);
// Test with 90 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(90), out initialMatrix, out inverseMatrix);
@@ -110,7 +107,7 @@
Assert.Equal(cropBox.Width, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(cropBox.Height, transformedGlyph.TopRight.X, 0);
Assert.Equal(cropBox.Width - glyph.Width, transformedGlyph.TopRight.Y, 0);
// Test with 180 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(180), out initialMatrix, out inverseMatrix);
transformedGlyph = initialMatrix.Transform(glyph);
@@ -120,7 +117,7 @@
Assert.Equal(glyph.Height, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(cropBox.Width - glyph.Width, transformedGlyph.TopRight.X, 0);
Assert.Equal(0, transformedGlyph.TopRight.Y, 0);
// Test with 270 degrees
GetInitialTransformationMatrices(mediaBox, cropBox, new PageRotationDegrees(270), out initialMatrix, out inverseMatrix);
transformedGlyph = initialMatrix.Transform(glyph);
@@ -130,7 +127,6 @@
Assert.Equal(0, transformedGlyph.BottomLeft.Y, 0);
Assert.Equal(0, transformedGlyph.TopRight.X, 0);
Assert.Equal(glyph.Width, transformedGlyph.TopRight.Y, 0);
}
private static void GetInitialTransformationMatrices(
@@ -140,7 +136,7 @@
out TransformationMatrix initialMatrix,
out TransformationMatrix inverseMatrix)
{
initialMatrix = ContentStreamProcessor.GetInitialMatrix(UserSpaceUnit.Default, mediaBox, cropBox, rotation, new TestingLog());
initialMatrix = OperationContextHelper.GetInitialMatrix(UserSpaceUnit.Default, mediaBox, cropBox, rotation, new TestingLog());
inverseMatrix = initialMatrix.Inverse();
}

View File

@@ -1,9 +1,6 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
public class RotationAndCroppingTests
@@ -26,5 +23,52 @@
Assert.NotNull(page.Content);
}
}
[Fact]
public void WrongPathCount()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf"),
new ParsingOptions()
{
ClipPaths = true
}))
{
var page = document.GetPage(1);
Assert.Equal(612, page.Height);
Assert.Equal(224, page.ExperimentalAccess.Paths.Count);
}
}
[Fact]
public void Issue665()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("SmallCropbox.pdf")))
{
var page = document.GetPage(1);
Assert.Equal(270, page.Rotation.Value); // Clockwise
Assert.Equal(680, (int)page.Height);
Assert.Equal(433, (int)page.Width);
Assert.Equal(Content.PageSize.Custom, page.Size);
Assert.Equal(2429, page.Letters.Count);
var cropBox = page.CropBox.Bounds;
Assert.Equal(0, cropBox.Rotation);
Assert.Equal(680, (int)cropBox.Height);
Assert.Equal(433, (int)cropBox.Width);
Assert.Equal(0, (int)cropBox.Bottom);
Assert.Equal(0, (int)cropBox.Left);
Assert.Equal(433, (int)cropBox.Right);
Assert.Equal(680, (int)cropBox.Top);
var mediaBox = page.MediaBox.Bounds;
Assert.Equal(0, mediaBox.Rotation);
Assert.Equal(680, (int)mediaBox.Height);
Assert.Equal(433, (int)mediaBox.Width);
Assert.Equal(0, (int)mediaBox.Bottom);
Assert.Equal(0, (int)mediaBox.Left);
Assert.Equal(433, (int)mediaBox.Right);
Assert.Equal(680, (int)mediaBox.Top);
}
}
}
}

View File

@@ -1,22 +0,0 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using Xunit;
public class WrongPathCountClippingTests
{
[Fact]
public void WrongPathCount()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf"),
new ParsingOptions()
{
ClipPaths = true
}))
{
var page = document.GetPage(1);
Assert.Equal(612, page.Height);
Assert.Equal(224, page.ExperimentalAccess.Paths.Count);
}
}
}
}

View File

@@ -111,8 +111,8 @@
// Special case where cropbox is outside mediabox: use cropbox instead of intersection
var viewBox = mediaBox.Bounds.Intersect(cropBox.Bounds) ?? cropBox.Bounds;
Width = rotation.SwapsAxis ? viewBox.Height : viewBox.Width;
Height = rotation.SwapsAxis ? viewBox.Width : viewBox.Height;
Width = viewBox.Width;
Height = viewBox.Height;
Size = viewBox.GetPageSize();
ExperimentalAccess = new Experimental(this, annotationProvider);

View File

@@ -5,8 +5,8 @@
using Core;
using Filters;
using Geometry;
using Logging;
using Operations;
using Operations.TextPositioning;
using Parser;
using PdfFonts;
using PdfPig.Core;
@@ -16,7 +16,6 @@
using System.Linq;
using Tokenization.Scanner;
using Tokens;
using Operations.TextPositioning;
using Util;
using XObjects;
using static PdfPig.Core.PdfSubpath;
@@ -86,8 +85,8 @@
int pageNumber,
IResourceStore resourceStore,
UserSpaceUnit userSpaceUnit,
MediaBox mediaBox,
CropBox cropBox,
TransformationMatrix initialMatrix,
PageRotationDegrees rotation,
IPdfTokenScanner pdfScanner,
IPageContentParser pageContentParser,
@@ -103,12 +102,10 @@
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.parsingOptions = parsingOptions;
TransformationMatrix initialMatrix = GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, parsingOptions.Logger);
graphicsStack.Push(new CurrentGraphicsState()
{
CurrentTransformationMatrix = initialMatrix,
CurrentClippingPath = GetInitialClipping(cropBox, initialMatrix),
CurrentClippingPath = GetInitialClipping(cropBox),
ColorSpaceContext = new ColorSpaceContext(GetCurrentState, resourceStore)
});
}
@@ -116,88 +113,18 @@
/// <summary>
/// Get the initial clipping path using the crop box and the initial transformation matrix.
/// </summary>
private static PdfPath GetInitialClipping(CropBox cropBox, TransformationMatrix initialMatrix)
private static PdfPath GetInitialClipping(CropBox cropBox)
{
var transformedCropBox = initialMatrix.Transform(cropBox.Bounds);
// We re-compute width and height to get possible negative values.
double width = transformedCropBox.TopRight.X - transformedCropBox.BottomLeft.X;
double height = transformedCropBox.TopRight.Y - transformedCropBox.BottomLeft.Y;
var cropBoxBounds = cropBox.Bounds;
// initiate CurrentClippingPath to cropBox
var clippingSubpath = new PdfSubpath();
clippingSubpath.Rectangle(transformedCropBox.BottomLeft.X, transformedCropBox.BottomLeft.Y, width, height);
clippingSubpath.Rectangle(cropBoxBounds.BottomLeft.X, cropBoxBounds.BottomLeft.Y, cropBoxBounds.Width, cropBoxBounds.Height);
var clippingPath = new PdfPath() { clippingSubpath };
clippingPath.SetClipping(FillingRule.EvenOdd);
return clippingPath;
}
[System.Diagnostics.Contracts.Pure]
internal static TransformationMatrix GetInitialMatrix(UserSpaceUnit userSpaceUnit,
MediaBox mediaBox,
CropBox cropBox,
PageRotationDegrees rotation,
ILog log)
{
// Cater for scenario where the cropbox is larger than the mediabox.
// If there is no intersection (method returns null), fall back to the cropbox.
var viewBox = mediaBox.Bounds.Intersect(cropBox.Bounds) ?? cropBox.Bounds;
if (rotation.Value == 0
&& viewBox.Left == 0
&& viewBox.Bottom == 0
&& userSpaceUnit.PointMultiples == 1)
{
return TransformationMatrix.Identity;
}
// Move points so that (0,0) is equal to the viewbox bottom left corner.
var t1 = TransformationMatrix.GetTranslationMatrix(-viewBox.Left, -viewBox.Bottom);
if (userSpaceUnit.PointMultiples != 1)
{
log.Warn("User space unit other than 1 is not implemented");
}
// After rotating around the origin, our points will have negative x/y coordinates.
// Fix this by translating them by a certain dx/dy after rotation based on the viewbox.
double dx, dy;
switch (rotation.Value)
{
case 0:
// No need to rotate / translate after rotation, just return the initial
// translation matrix.
return t1;
case 90:
// Move rotated points up by our (unrotated) viewbox width
dx = 0;
dy = viewBox.Width;
break;
case 180:
// Move rotated points up/right using the (unrotated) viewbox width/height
dx = viewBox.Width;
dy = viewBox.Height;
break;
case 270:
// Move rotated points right using the (unrotated) viewbox height
dx = viewBox.Height;
dy = 0;
break;
default:
throw new InvalidOperationException($"Invalid value for page rotation: {rotation.Value}.");
}
// GetRotationMatrix uses counter clockwise angles, whereas our page rotation
// is a clockwise angle, so flip the sign.
var r = TransformationMatrix.GetRotationMatrix(-rotation.Value);
// Fix up negative coordinates after rotation
var t2 = TransformationMatrix.GetTranslationMatrix(dx, dy);
// Now get the final combined matrix T1 > R > T2
return t1.Multiply(r.Multiply(t2));
}
public PageContent Process(int pageNumberCurrent, IReadOnlyList<IGraphicsStateOperation> operations)
{
pageNumber = pageNumberCurrent;

View File

@@ -0,0 +1,89 @@
namespace UglyToad.PdfPig.Graphics
{
using System;
using System.Linq;
using Content;
using Geometry;
using Logging;
using PdfPig.Core;
/// <summary>
/// Operation context helper methods.
/// </summary>
internal static class OperationContextHelper
{
/// <summary>
/// Get the initial transformation matrix.
/// </summary>
/// <param name="userSpaceUnit">User space unit.</param>
/// <param name="mediaBox">The Media box as define in the document, without any applied transform.</param>
/// <param name="cropBox">The Crop box as define in the document, without any applied transform.</param>
/// <param name="rotation">The page rotation.</param>
/// <param name="log"></param>
[System.Diagnostics.Contracts.Pure]
internal static TransformationMatrix GetInitialMatrix(UserSpaceUnit userSpaceUnit,
MediaBox mediaBox,
CropBox cropBox,
PageRotationDegrees rotation,
ILog log)
{
// Cater for scenario where the cropbox is larger than the mediabox.
// If there is no intersection (method returns null), fall back to the cropbox.
var viewBox = mediaBox.Bounds.Intersect(cropBox.Bounds) ?? cropBox.Bounds;
if (rotation.Value == 0
&& viewBox.Left == 0
&& viewBox.Bottom == 0
&& userSpaceUnit.PointMultiples == 1)
{
return TransformationMatrix.Identity;
}
// Move points so that (0,0) is equal to the viewbox bottom left corner.
var t1 = TransformationMatrix.GetTranslationMatrix(-viewBox.Left, -viewBox.Bottom);
if (userSpaceUnit.PointMultiples != 1)
{
log.Warn("User space unit other than 1 is not implemented");
}
// After rotating around the origin, our points will have negative x/y coordinates.
// Fix this by translating them by a certain dx/dy after rotation based on the viewbox.
double dx, dy;
switch (rotation.Value)
{
case 0:
// No need to rotate / translate after rotation, just return the initial
// translation matrix.
return t1;
case 90:
// Move rotated points up by our (unrotated) viewbox width
dx = 0;
dy = viewBox.Width;
break;
case 180:
// Move rotated points up/right using the (unrotated) viewbox width/height
dx = viewBox.Width;
dy = viewBox.Height;
break;
case 270:
// Move rotated points right using the (unrotated) viewbox height
dx = viewBox.Height;
dy = 0;
break;
default:
throw new InvalidOperationException($"Invalid value for page rotation: {rotation.Value}.");
}
// GetRotationMatrix uses counter clockwise angles, whereas our page rotation
// is a clockwise angle, so flip the sign.
var r = TransformationMatrix.GetRotationMatrix(-rotation.Value);
// Fix up negative coordinates after rotation
var t2 = TransformationMatrix.GetTranslationMatrix(dx, dy);
// Now get the final combined matrix T1 > R > T2
return t1.Multiply(r.Multiply(t2));
}
}
}

View File

@@ -53,9 +53,6 @@
parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
{
@@ -80,6 +77,13 @@
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
var initialMatrix = OperationContextHelper.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
ApplyTransformNormalise(initialMatrix, ref mediaBox, ref cropBox);
PageContent content;
if (!dictionary.TryGet(NameToken.Contents, out var contents))
@@ -122,7 +126,7 @@
}
}
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, initialMatrix, parsingOptions);
}
else
{
@@ -135,10 +139,9 @@
var bytes = contentStream.Decode(filterProvider, pdfScanner);
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, initialMatrix, parsingOptions);
}
var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, log);
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
@@ -156,7 +159,7 @@
CropBox cropBox,
UserSpaceUnit userSpaceUnit,
PageRotationDegrees rotation,
MediaBox mediaBox,
TransformationMatrix initialMatrix,
InternalParsingOptions parsingOptions)
{
var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentBytes),
@@ -166,8 +169,8 @@
pageNumber,
resourceStore,
userSpaceUnit,
mediaBox,
cropBox,
initialMatrix,
rotation,
pdfScanner,
pageContentParser,
@@ -200,7 +203,7 @@
if (cropBoxArray.Length != 4)
{
log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox.");
cropBox = new CropBox(mediaBox.Bounds);
return cropBox;
@@ -222,7 +225,7 @@
PageTreeMembers pageTreeMembers)
{
MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
&& DirectObjectFinder.TryGet(mediaBoxObject, pdfScanner, out ArrayToken mediaBoxArray))
{
if (mediaBoxArray.Length != 4)
@@ -251,5 +254,22 @@
return mediaBox;
}
/// <summary>
/// Apply the matrix transform to the media box and crop box.
/// Then Normalise() in order to obtain rectangles with rotation=0
/// and width and height as viewed on screen.
/// </summary>
/// <param name="transformationMatrix"></param>
/// <param name="mediaBox"></param>
/// <param name="cropBox"></param>
private static void ApplyTransformNormalise(TransformationMatrix transformationMatrix, ref MediaBox mediaBox, ref CropBox cropBox)
{
if (transformationMatrix != TransformationMatrix.Identity)
{
mediaBox = new MediaBox(transformationMatrix.Transform(mediaBox.Bounds).Normalise());
cropBox = new CropBox(transformationMatrix.Transform(cropBox.Bounds).Normalise());
}
}
}
}