From 1d551d6de3866a7085504d20d8b90b23099b99fb Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 4 Aug 2019 12:57:06 +0100 Subject: [PATCH 01/13] add and document core classes for colorspace information --- .../PublicApiScannerTests.cs | 3 + src/UglyToad.PdfPig.sln.DotSettings | 4 + src/UglyToad.PdfPig/Core/ICosStreamWriter.cs | 9 -- .../Graphics/Colors/ColorSpace.cs | 74 ++++++++++ .../Graphics/Colors/ColorSpaceExtensions.cs | 130 ++++++++++++++++++ .../Graphics/Colors/ColorSpaceFamily.cs | 26 ++++ 6 files changed, 237 insertions(+), 9 deletions(-) delete mode 100644 src/UglyToad.PdfPig/Core/ICosStreamWriter.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/ColorSpace.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceExtensions.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceFamily.cs diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 2b26f516..5f3e20a3 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -77,6 +77,9 @@ "UglyToad.PdfPig.Geometry.PdfPoint", "UglyToad.PdfPig.Geometry.PdfLine", "UglyToad.PdfPig.Geometry.PdfRectangle", + "UglyToad.PdfPig.Graphics.Colors.ColorSpace", + "UglyToad.PdfPig.Graphics.Colors.ColorSpaceExtensions", + "UglyToad.PdfPig.Graphics.Colors.ColorSpaceFamily", "UglyToad.PdfPig.Graphics.Core.LineCapStyle", "UglyToad.PdfPig.Graphics.Core.LineDashPattern", "UglyToad.PdfPig.Graphics.Core.LineJoinStyle", diff --git a/src/UglyToad.PdfPig.sln.DotSettings b/src/UglyToad.PdfPig.sln.DotSettings index 39e42e16..9934bcf8 100644 --- a/src/UglyToad.PdfPig.sln.DotSettings +++ b/src/UglyToad.PdfPig.sln.DotSettings @@ -1,2 +1,6 @@  + CIE + CMYK + ICC + RGB XY \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Core/ICosStreamWriter.cs b/src/UglyToad.PdfPig/Core/ICosStreamWriter.cs deleted file mode 100644 index ea6a7b51..00000000 --- a/src/UglyToad.PdfPig/Core/ICosStreamWriter.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace UglyToad.PdfPig.Core -{ - using System.IO; - - internal interface ICosStreamWriter - { - void WriteToPdfStream(BinaryWriter output); - } -} diff --git a/src/UglyToad.PdfPig/Graphics/Colors/ColorSpace.cs b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpace.cs new file mode 100644 index 00000000..30b0aa76 --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpace.cs @@ -0,0 +1,74 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// Color values in a PDF are interpreted according to the current color space. + /// Color spaces enable a PDF to specify abstract colors in a device independent way. + /// + public enum ColorSpace + { + /// + /// Grayscale. Controls the intensity of achromatic light on a scale from black to white. + /// + DeviceGray = 0, + /// + /// RGB. Controls the intensities of red, green and blue light. + /// + DeviceRGB = 1, + /// + /// CMYK. Controls the concentrations of cyan, magenta, yellow and black (K) inks. + /// + DeviceCMYK = 2, + /// + /// CIE (Commission Internationale de l'Éclairage) colorspace. + /// Specifies color related to human visual perception with the aim of producing consistent color on different output devices. + /// CalGray - Special case of the CIE colorspace using a single channel (A) and a single transformation. + /// A represents the gray component of a calibrated gray space in the range 0 to 1. + /// + CalGray = 3, + /// + /// CIE (Commission Internationale de l'Éclairage) colorspace. + /// Specifies color related to human visual perception with the aim of producing consistent color on different output devices. + /// CalRGB - A CIE ABC color space with a single transformation. + /// A, B and C represent red, green and blue color values in the range 0 to 1. + /// + CalRGB = 4, + /// + /// CIE (Commission Internationale de l'Éclairage) colorspace. + /// Specifies color related to human visual perception with the aim of producing consistent color on different output devices. + /// Lab - A CIE ABC color space with two transforms. A, B and C represent the L*, a* and b* + /// components of a CIE 1976 L*a*b* space. The range of A (L*) is 0 to 100. + /// The range of B (a*) and C (b*) are defined by the Range of the color space. + /// + Lab = 5, + /// + /// ICC (International Color Consortium) colorspace. + /// ICC - Colorspace specified by a sequence of bytes which are interpreted according to the + /// ICC specification. + /// + ICCBased = 6, + /// + /// An Indexed color space allows a PDF content stream to use small integers as indices into a color map or color table of arbitrary colors in some other space. + /// A PDF consumer application treats each sample value as an index into the color table and uses the color value it finds there. + /// + Indexed = 7, + /// + /// Enables a PDF content stream to paint an area with a pattern rather than a single color. + /// The pattern may be either a tiling pattern (type 1) or a shading pattern (type 2). + /// + Pattern = 8, + /// + /// Provides a means for specifying the use of additional colorants or for isolating the control of individual color components of + /// a device color space for a subtractive device. + /// When such a space is the current color space, the current color is a single-component value, called a tint, + /// that controls the application of the given colorant or color components only. + /// + Separation = 9, + /// + /// Can contain an arbitrary number of color components. They provide greater flexibility than is possible with standard device color + /// spaces such as or with individual color spaces. + /// For example, it is possible to create a DeviceN color space consisting of only the cyan, magenta, and yellow color components, + /// with the black component excluded. + /// + DeviceN = 10 + } +} diff --git a/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceExtensions.cs b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceExtensions.cs new file mode 100644 index 00000000..0bb6461f --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceExtensions.cs @@ -0,0 +1,130 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + using System; + using Tokens; + + /// + /// Provides utility extension methods for dealing with s. + /// + public static class ColorSpaceExtensions + { + /// + /// Gets the corresponding for a given . + /// + public static ColorSpaceFamily GetFamily(this ColorSpace colorSpace) + { + switch (colorSpace) + { + case ColorSpace.DeviceGray: + case ColorSpace.DeviceRGB: + case ColorSpace.DeviceCMYK: + return ColorSpaceFamily.Device; + case ColorSpace.CalGray: + case ColorSpace.CalRGB: + case ColorSpace.Lab: + case ColorSpace.ICCBased: + return ColorSpaceFamily.CIEBased; + case ColorSpace.Indexed: + case ColorSpace.Pattern: + case ColorSpace.Separation: + case ColorSpace.DeviceN: + return ColorSpaceFamily.Special; + default: + throw new ArgumentException($"Unrecognized colorspace: {colorSpace}."); + } + } + + /// + /// Maps from a to the corresponding if one exists. + /// + public static bool TryMapToColorSpace(this NameToken name, out ColorSpace colorspace) + { + colorspace = ColorSpace.DeviceGray; + + if (name.Data == NameToken.Devicegray.Data) + { + colorspace = ColorSpace.DeviceGray; + } + else if (name.Data == NameToken.Devicergb.Data) + { + colorspace = ColorSpace.DeviceRGB; + } + else if (name.Data == NameToken.Devicecmyk.Data) + { + colorspace = ColorSpace.DeviceCMYK; + } + else if (name.Data == NameToken.Calgray.Data) + { + colorspace = ColorSpace.CalGray; + } + else if (name.Data == NameToken.Calrgb.Data) + { + colorspace = ColorSpace.CalRGB; + } + else if (name.Data == NameToken.Lab.Data) + { + colorspace = ColorSpace.Lab; + } + else if (name.Data == NameToken.Iccbased.Data) + { + colorspace = ColorSpace.ICCBased; + } + else if (name.Data == NameToken.Indexed.Data) + { + colorspace = ColorSpace.Indexed; + } + else if (name.Data == NameToken.Pattern.Data) + { + colorspace = ColorSpace.Pattern; + } + else if (name.Data == NameToken.Separation.Data) + { + colorspace = ColorSpace.Separation; + } + else if (name.Data == NameToken.Devicen.Data) + { + colorspace = ColorSpace.DeviceN; + } + else + { + return false; + } + + return true; + } + + /// + /// Gets the corresponding for a given . + /// + public static NameToken ToNameToken(this ColorSpace colorSpace) + { + switch (colorSpace) + { + case ColorSpace.DeviceGray: + return NameToken.Devicegray; + case ColorSpace.DeviceRGB: + return NameToken.Devicergb; + case ColorSpace.DeviceCMYK: + return NameToken.Devicecmyk; + case ColorSpace.CalGray: + return NameToken.Calgray; + case ColorSpace.CalRGB: + return NameToken.Calrgb; + case ColorSpace.Lab: + return NameToken.Lab; + case ColorSpace.ICCBased: + return NameToken.Iccbased; + case ColorSpace.Indexed: + return NameToken.Indexed; + case ColorSpace.Pattern: + return NameToken.Pattern; + case ColorSpace.Separation: + return NameToken.Separation; + case ColorSpace.DeviceN: + return NameToken.Devicen; + default: + throw new ArgumentException($"Unrecognized colorspace: {colorSpace}."); + } + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceFamily.cs b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceFamily.cs new file mode 100644 index 00000000..7c7c6166 --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceFamily.cs @@ -0,0 +1,26 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// s can be classified into colorspace families. + /// s within the same family share general characteristics. + /// + public enum ColorSpaceFamily + { + /// + /// Device colorspaces directly specify colors or shades of gray that the output device + /// should produce. + /// + Device, + /// + /// CIE-based color spaces are based on an international standard for color specification created by + /// the Commission Internationale de l'Éclairage (International Commission on Illumination) (CIE). + /// These spaces specify colors in a way that is independent of the characteristics of any particular output device. + /// + CIEBased, + /// + /// Special color spaces add features or properties to an underlying color space. + /// They include facilities for patterns, color mapping, separations, and high-fidelity and multitone color. + /// + Special + } +} \ No newline at end of file From 0b9ae1db131705d4ddff38d1b866eaf5f3bb2b24 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 4 Aug 2019 16:47:47 +0100 Subject: [PATCH 02/13] add color information to the operation context. create color classes for letters and paths to use --- .../Graphics/TestOperationContext.cs | 20 ++++-- .../PublicApiScannerTests.cs | 3 +- .../Graphics/Colors/CMYKColor.cs | 53 ++++++++++++++ .../Graphics/Colors/GrayColor.cs | 33 +++++++++ src/UglyToad.PdfPig/Graphics/Colors/IColor.cs | 18 +++++ .../Graphics/Colors/RGBColor.cs | 45 ++++++++++++ .../Graphics/ColorspaceContext.cs | 69 ++++++++++++++++++- .../Graphics/ContentStreamProcessor.cs | 2 +- .../Graphics/IColorspaceContext.cs | 25 ++++++- .../Graphics/IOperationContext.cs | 2 +- .../Graphics/Operations/SetNonStrokeColor.cs | 14 ++++ .../Operations/SetNonStrokeColorAdvanced.cs | 20 ++++++ .../Operations/SetNonStrokeColorDeviceCmyk.cs | 2 +- .../Operations/SetNonStrokeColorDeviceGray.cs | 2 +- .../Operations/SetNonStrokeColorDeviceRgb.cs | 2 +- .../Operations/SetNonStrokeColorSpace.cs | 2 +- .../Graphics/Operations/SetStrokeColor.cs | 6 +- .../Operations/SetStrokeColorAdvanced.cs | 22 +++++- .../Operations/SetStrokeColorDeviceCmyk.cs | 2 +- .../Operations/SetStrokeColorDeviceGray.cs | 2 +- .../Operations/SetStrokeColorDeviceRgb.cs | 2 +- .../Operations/SetStrokeColorSpace.cs | 2 +- 22 files changed, 323 insertions(+), 25 deletions(-) create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/IColor.cs create mode 100644 src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs diff --git a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs index eec41f81..a88e530d 100644 --- a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs +++ b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs @@ -3,9 +3,10 @@ using System.Collections.Generic; using PdfPig.Geometry; using PdfPig.Graphics; + using PdfPig.Graphics.Colors; using PdfPig.IO; using PdfPig.Tokens; - using UglyToad.PdfPig.Core; + using PdfPig.Core; internal class TestOperationContext : IOperationContext { @@ -17,14 +18,11 @@ public TextMatrices TextMatrices { get; set; } = new TextMatrices(); - public TransformationMatrix CurrentTransformationMatrix - { - get { return GetCurrentState().CurrentTransformationMatrix; } - } + public TransformationMatrix CurrentTransformationMatrix => GetCurrentState().CurrentTransformationMatrix; public PdfPath CurrentPath { get; set; } - public IColorspaceContext ColorspaceContext { get; } = new ColorspaceContext(); + public IColorSpaceContext ColorSpaceContext { get; } = new ColorSpaceContext(); public PdfPoint CurrentPosition { get; set; } @@ -81,8 +79,16 @@ } } - public class TestColorspaceContext : IColorspaceContext + public class TestColorSpaceContext : IColorSpaceContext { + public ColorSpace CurrentStrokingColorSpace { get; } = ColorSpace.DeviceGray; + + public ColorSpace CurrentNonStrokingColorSpace { get; } = ColorSpace.DeviceGray; + + public IColor CurrentStrokingColor { get; } = GrayColor.Black; + + public IColor CurrentNonStrokingColor { get; } = GrayColor.Black; + public void SetStrokingColorspace(NameToken colorspace) { } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 5f3e20a3..184dd905 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -80,6 +80,7 @@ "UglyToad.PdfPig.Graphics.Colors.ColorSpace", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceExtensions", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceFamily", + "UglyToad.PdfPig.Graphics.Colors.IColor", "UglyToad.PdfPig.Graphics.Core.LineCapStyle", "UglyToad.PdfPig.Graphics.Core.LineDashPattern", "UglyToad.PdfPig.Graphics.Core.LineJoinStyle", @@ -87,7 +88,7 @@ "UglyToad.PdfPig.Graphics.Core.TextRenderingMode", "UglyToad.PdfPig.Graphics.CurrentFontState", "UglyToad.PdfPig.Graphics.CurrentGraphicsState", - "UglyToad.PdfPig.Graphics.IColorspaceContext", + "UglyToad.PdfPig.Graphics.IColorSpaceContext", "UglyToad.PdfPig.Graphics.IOperationContext", "UglyToad.PdfPig.Graphics.Operations.ClippingPaths.ModifyClippingByEvenOddIntersect", "UglyToad.PdfPig.Graphics.Operations.ClippingPaths.ModifyClippingByNonZeroWindingIntersect", diff --git a/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs new file mode 100644 index 00000000..2f6f8369 --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs @@ -0,0 +1,53 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// A color with cyan, magenta, yellow and black (K) components. + /// + internal class CMYKColor : IColor + { + public static IColor Black { get; } = new CMYKColor(0, 0, 0, 1); + public static IColor White { get; } = new CMYKColor(0, 0, 0, 0); + + /// + public ColorSpace ColorSpace { get; } = ColorSpace.DeviceCMYK; + + /// + /// The cyan value. + /// + public decimal C { get; } + + /// + /// The magenta value. + /// + public decimal M { get; } + + /// + /// The yellow value. + /// + public decimal Y { get; } + + /// + /// The black value. + /// + public decimal K { get; } + + /// + /// Create a new . + /// + public CMYKColor(decimal c, decimal m, decimal y, decimal k) + { + C = c; + M = m; + Y = y; + K = k; + } + + /// + public (decimal r, decimal g, decimal b) ToRGBValues() + { + return ((255 * (1 - C) * (1 - K)) / 255m, + (255 * (1 - M) * (1 - K)) / 255m, + (255 * (1 - Y) * (1 - K)) / 255m); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs new file mode 100644 index 00000000..f375507f --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs @@ -0,0 +1,33 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// A grayscale color with a single gray component. + /// + internal class GrayColor : IColor + { + public static GrayColor Black { get; } = new GrayColor(0); + public static GrayColor White { get; } = new GrayColor(1); + + /// + public ColorSpace ColorSpace { get; } = ColorSpace.DeviceGray; + + /// + /// The gray value between 0 and 1. + /// + public decimal Gray { get; } + + /// + /// Create a new . + /// + public GrayColor(decimal gray) + { + Gray = gray; + } + + /// + public (decimal r, decimal g, decimal b) ToRGBValues() + { + return (Gray, Gray, Gray); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/IColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/IColor.cs new file mode 100644 index 00000000..a3fcaf4f --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/IColor.cs @@ -0,0 +1,18 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// A color used for text or paths in a PDF. + /// + public interface IColor + { + /// + /// The colorspace used for this color. + /// + ColorSpace ColorSpace { get; } + + /// + /// The color as RGB values (between 0 and 1). + /// + (decimal r, decimal g, decimal b) ToRGBValues(); + } +} diff --git a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs new file mode 100644 index 00000000..99bb407e --- /dev/null +++ b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs @@ -0,0 +1,45 @@ +namespace UglyToad.PdfPig.Graphics.Colors +{ + /// + /// A color with red, green and blue components. + /// + internal class RGBColor : IColor + { + public static RGBColor Black = new RGBColor(0, 0, 0); + public static RGBColor White = new RGBColor(1, 1, 1); + + /// + public ColorSpace ColorSpace { get; } = ColorSpace.DeviceRGB; + + /// + /// The red value. + /// + public decimal R { get; } + + /// + /// The green value. + /// + public decimal G { get; } + + /// + /// The blue value. + /// + public decimal B { get; } + + /// + /// Create a new . + /// + public RGBColor(decimal r, decimal g, decimal b) + { + R = r; + G = g; + B = b; + } + + /// + public (decimal r, decimal g, decimal b) ToRGBValues() + { + return (R, G, B); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs b/src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs index 3ef2a7bc..dd57c50f 100644 --- a/src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs +++ b/src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs @@ -1,39 +1,106 @@ namespace UglyToad.PdfPig.Graphics { + using Colors; using Tokens; - internal class ColorspaceContext : IColorspaceContext + internal class ColorSpaceContext : IColorSpaceContext { + public ColorSpace CurrentStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; + public ColorSpace CurrentNonStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; + + public IColor CurrentStrokingColor { get; private set; } = GrayColor.Black; + public IColor CurrentNonStrokingColor { get; private set; } = GrayColor.Black; + public void SetStrokingColorspace(NameToken colorspace) { + if (colorspace.TryMapToColorSpace(out var colorspaceActual)) + { + CurrentStrokingColorSpace = colorspaceActual; + switch (colorspaceActual) + { + case ColorSpace.DeviceGray: + CurrentStrokingColor = GrayColor.Black; + break; + case ColorSpace.DeviceRGB: + CurrentStrokingColor = RGBColor.Black; + break; + case ColorSpace.DeviceCMYK: + CurrentStrokingColor = CMYKColor.Black; + break; + default: + CurrentStrokingColor = GrayColor.Black; + break; + } + } + else + { + CurrentStrokingColorSpace = ColorSpace.DeviceGray; + CurrentStrokingColor = GrayColor.Black; + } } public void SetNonStrokingColorspace(NameToken colorspace) { + if (colorspace.TryMapToColorSpace(out var colorspaceActual)) + { + CurrentNonStrokingColorSpace = colorspaceActual; + switch (colorspaceActual) + { + case ColorSpace.DeviceGray: + CurrentNonStrokingColor = GrayColor.Black; + break; + case ColorSpace.DeviceRGB: + CurrentNonStrokingColor = RGBColor.Black; + break; + case ColorSpace.DeviceCMYK: + CurrentNonStrokingColor = CMYKColor.Black; + break; + default: + CurrentNonStrokingColor = GrayColor.Black; + break; + } + } + else + { + CurrentNonStrokingColorSpace = ColorSpace.DeviceGray; + CurrentNonStrokingColor = GrayColor.Black; + } } public void SetStrokingColorGray(decimal gray) { + CurrentStrokingColorSpace = ColorSpace.DeviceGray; + CurrentStrokingColor = new GrayColor(gray); } public void SetStrokingColorRgb(decimal r, decimal g, decimal b) { + CurrentStrokingColorSpace = ColorSpace.DeviceRGB; + CurrentStrokingColor = new RGBColor(r, g, b); } public void SetStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) { + CurrentStrokingColorSpace = ColorSpace.DeviceCMYK; + CurrentStrokingColor = new CMYKColor(c, m, y, k); } public void SetNonStrokingColorGray(decimal gray) { + CurrentNonStrokingColorSpace = ColorSpace.DeviceGray; + CurrentNonStrokingColor = new GrayColor(gray); } public void SetNonStrokingColorRgb(decimal r, decimal g, decimal b) { + CurrentNonStrokingColorSpace = ColorSpace.DeviceRGB; + CurrentNonStrokingColor = new RGBColor(r, g, b); } public void SetNonStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) { + CurrentNonStrokingColorSpace = ColorSpace.DeviceCMYK; + CurrentNonStrokingColor = new CMYKColor(c, m, y, k); } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 6d4d9aa2..1ee653ca 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -39,7 +39,7 @@ public PdfPath CurrentPath { get; private set; } - public IColorspaceContext ColorspaceContext { get; } = new ColorspaceContext(); + public IColorSpaceContext ColorSpaceContext { get; } = new ColorSpaceContext(); public PdfPoint CurrentPosition { get; set; } diff --git a/src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs b/src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs index 8d336038..594b675e 100644 --- a/src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs +++ b/src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs @@ -1,12 +1,33 @@ namespace UglyToad.PdfPig.Graphics { + using Colors; using Tokens; /// /// Methods for manipulating and retrieving the current color state for a PDF content stream. /// - public interface IColorspaceContext + public interface IColorSpaceContext { + /// + /// The used for stroking operations. + /// + ColorSpace CurrentStrokingColorSpace { get; } + + /// + /// The used for non-stroking operations. + /// + ColorSpace CurrentNonStrokingColorSpace { get; } + + /// + /// The used for stroking operations. + /// + IColor CurrentStrokingColor { get; } + + /// + /// The used for non-stroking operations. + /// + IColor CurrentNonStrokingColor { get; } + /// /// Set the current color space to use for stroking operations. /// @@ -39,7 +60,7 @@ /// Yellow - A number between 0 (minimum concentration) and 1 (maximum concentration). /// Black - A number between 0 (minimum concentration) and 1 (maximum concentration). void SetStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k); - + /// /// Set the nonstroking color space to DeviceGray and set the gray level to use for nonstroking operations. /// diff --git a/src/UglyToad.PdfPig/Graphics/IOperationContext.cs b/src/UglyToad.PdfPig/Graphics/IOperationContext.cs index eb6b6cfa..b77a415a 100644 --- a/src/UglyToad.PdfPig/Graphics/IOperationContext.cs +++ b/src/UglyToad.PdfPig/Graphics/IOperationContext.cs @@ -21,7 +21,7 @@ /// /// The active colorspaces for this content stream. /// - IColorspaceContext ColorspaceContext { get; } + IColorSpaceContext ColorSpaceContext { get; } /// /// The current position. diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColor.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColor.cs index 09789070..0be5d155 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColor.cs @@ -35,6 +35,20 @@ /// public void Run(IOperationContext operationContext) { + switch (Operands.Count) + { + case 1: + operationContext.ColorSpaceContext.SetNonStrokingColorGray(Operands[0]); + break; + case 3: + operationContext.ColorSpaceContext.SetNonStrokingColorRgb(Operands[0], Operands[1], Operands[2]); + break; + case 4: + operationContext.ColorSpaceContext.SetNonStrokingColorCmyk(Operands[0], Operands[1], Operands[2], Operands[3]); + break; + default: + return; + } } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorAdvanced.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorAdvanced.cs index ccc43157..07bc465e 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorAdvanced.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorAdvanced.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; + using Colors; using Tokens; using Writer; @@ -53,6 +54,25 @@ /// public void Run(IOperationContext operationContext) { + if (operationContext.ColorSpaceContext.CurrentNonStrokingColorSpace.GetFamily() != ColorSpaceFamily.Device) + { + return; + } + + switch (Operands.Count) + { + case 1: + operationContext.ColorSpaceContext.SetNonStrokingColorGray(Operands[0]); + break; + case 3: + operationContext.ColorSpaceContext.SetNonStrokingColorRgb(Operands[0], Operands[1], Operands[2]); + break; + case 4: + operationContext.ColorSpaceContext.SetNonStrokingColorCmyk(Operands[0], Operands[1], Operands[2], Operands[3]); + break; + default: + return; + } } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs index 8d0ae3be..cc813718 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs @@ -54,7 +54,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetNonStrokingColorCmyk(C, M, Y, K); + operationContext.ColorSpaceContext.SetNonStrokingColorCmyk(C, M, Y, K); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceGray.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceGray.cs index 3c256ceb..6267efb8 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceGray.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceGray.cs @@ -33,7 +33,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetNonStrokingColorGray(Gray); + operationContext.ColorSpaceContext.SetNonStrokingColorGray(Gray); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs index cc063792..71f45b82 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs @@ -47,7 +47,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetNonStrokingColorRgb(R, G, B); + operationContext.ColorSpaceContext.SetNonStrokingColorRgb(R, G, B); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorSpace.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorSpace.cs index b057637b..7715e82d 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorSpace.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetNonStrokeColorSpace.cs @@ -36,7 +36,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetNonStrokingColorspace(Name); + operationContext.ColorSpaceContext.SetNonStrokingColorspace(Name); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColor.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColor.cs index 9e7b0fbb..5061de31 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColor.cs @@ -38,13 +38,13 @@ switch (Operands.Count) { case 1: - operationContext.ColorspaceContext.SetStrokingColorGray(Operands[0]); + operationContext.ColorSpaceContext.SetStrokingColorGray(Operands[0]); break; case 3: - operationContext.ColorspaceContext.SetStrokingColorRgb(Operands[0], Operands[1], Operands[2]); + operationContext.ColorSpaceContext.SetStrokingColorRgb(Operands[0], Operands[1], Operands[2]); break; case 4: - operationContext.ColorspaceContext.SetStrokingColorCmyk(Operands[0], Operands[1], Operands[2], Operands[3]); + operationContext.ColorSpaceContext.SetStrokingColorCmyk(Operands[0], Operands[1], Operands[2], Operands[3]); break; default: return; diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorAdvanced.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorAdvanced.cs index 60a90ab3..3bd5dcf9 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorAdvanced.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorAdvanced.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; + using Colors; using Tokens; using Writer; @@ -40,7 +41,7 @@ } /// - /// Create a new . + /// Create a new . /// /// The color operands. /// The pattern name. @@ -53,6 +54,25 @@ /// public void Run(IOperationContext operationContext) { + if (operationContext.ColorSpaceContext.CurrentStrokingColorSpace.GetFamily() != ColorSpaceFamily.Device) + { + return; + } + + switch (Operands.Count) + { + case 1: + operationContext.ColorSpaceContext.SetStrokingColorGray(Operands[0]); + break; + case 3: + operationContext.ColorSpaceContext.SetStrokingColorRgb(Operands[0], Operands[1], Operands[2]); + break; + case 4: + operationContext.ColorSpaceContext.SetStrokingColorCmyk(Operands[0], Operands[1], Operands[2], Operands[3]); + break; + default: + return; + } } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceCmyk.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceCmyk.cs index f5ce5880..8976c946 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceCmyk.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceCmyk.cs @@ -54,7 +54,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetStrokingColorCmyk(C, M, Y, K); + operationContext.ColorSpaceContext.SetStrokingColorCmyk(C, M, Y, K); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceGray.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceGray.cs index 397cab25..c5dc26c7 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceGray.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceGray.cs @@ -33,7 +33,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetStrokingColorGray(Gray); + operationContext.ColorSpaceContext.SetStrokingColorGray(Gray); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceRgb.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceRgb.cs index fe9bd5f0..d1e5bb40 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceRgb.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorDeviceRgb.cs @@ -47,7 +47,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetStrokingColorRgb(R, G, B); + operationContext.ColorSpaceContext.SetStrokingColorRgb(R, G, B); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorSpace.cs b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorSpace.cs index a0369ecc..8c03e731 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorSpace.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/SetStrokeColorSpace.cs @@ -36,7 +36,7 @@ /// public void Run(IOperationContext operationContext) { - operationContext.ColorspaceContext.SetStrokingColorspace(Name); + operationContext.ColorSpaceContext.SetStrokingColorspace(Name); } /// From 0df35b8488551c563c6eefc2688e846e5405a369 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Mon, 5 Aug 2019 18:30:46 +0100 Subject: [PATCH 03/13] fix naming of color space to be 2 words --- .../Graphics/{ColorspaceContext.cs => ColorSpaceContext.cs} | 0 .../Graphics/{IColorspaceContext.cs => IColorSpaceContext.cs} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/UglyToad.PdfPig/Graphics/{ColorspaceContext.cs => ColorSpaceContext.cs} (100%) rename src/UglyToad.PdfPig/Graphics/{IColorspaceContext.cs => IColorSpaceContext.cs} (100%) diff --git a/src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs b/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs similarity index 100% rename from src/UglyToad.PdfPig/Graphics/ColorspaceContext.cs rename to src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs diff --git a/src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs b/src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs similarity index 100% rename from src/UglyToad.PdfPig/Graphics/IColorspaceContext.cs rename to src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs From 4dde4ca0c1c0603579887e8548d78b4ea758af98 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Mon, 5 Aug 2019 19:26:10 +0100 Subject: [PATCH 04/13] add colors to letters based on current font and graphics state --- .../Graphics/TestOperationContext.cs | 47 +------------------ .../Integration/PigProductionHandbookTests.cs | 33 +++++++++++++ src/UglyToad.PdfPig/Content/Letter.cs | 46 ++++++++++++------ .../Graphics/ColorSpaceContext.cs | 46 ++++++++++-------- .../Graphics/Colors/CMYKColor.cs | 12 +++-- .../Graphics/Colors/GrayColor.cs | 6 +++ .../Graphics/Colors/RGBColor.cs | 6 +++ .../Graphics/ContentStreamProcessor.cs | 40 ++++++++++++++-- .../Graphics/CurrentGraphicsState.cs | 15 +++++- .../Graphics/IColorSpaceContext.cs | 12 +---- ...ReflectionGraphicsStateOperationFactory.cs | 4 +- src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs | 5 +- 12 files changed, 171 insertions(+), 101 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs index a88e530d..ba2e478a 100644 --- a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs +++ b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using PdfPig.Geometry; using PdfPig.Graphics; - using PdfPig.Graphics.Colors; using PdfPig.IO; using PdfPig.Tokens; using PdfPig.Core; @@ -22,7 +21,7 @@ public PdfPath CurrentPath { get; set; } - public IColorSpaceContext ColorSpaceContext { get; } = new ColorSpaceContext(); + public IColorSpaceContext ColorSpaceContext { get; } public PdfPoint CurrentPosition { get; set; } @@ -30,6 +29,7 @@ { StateStack.Push(new CurrentGraphicsState()); CurrentPath = new PdfPath(CurrentTransformationMatrix); + ColorSpaceContext = new ColorSpaceContext(GetCurrentState); } public CurrentGraphicsState GetCurrentState() @@ -78,47 +78,4 @@ { } } - - public class TestColorSpaceContext : IColorSpaceContext - { - public ColorSpace CurrentStrokingColorSpace { get; } = ColorSpace.DeviceGray; - - public ColorSpace CurrentNonStrokingColorSpace { get; } = ColorSpace.DeviceGray; - - public IColor CurrentStrokingColor { get; } = GrayColor.Black; - - public IColor CurrentNonStrokingColor { get; } = GrayColor.Black; - - public void SetStrokingColorspace(NameToken colorspace) - { - } - - public void SetNonStrokingColorspace(NameToken colorspace) - { - } - - public void SetStrokingColorGray(decimal gray) - { - } - - public void SetStrokingColorRgb(decimal r, decimal g, decimal b) - { - } - - public void SetStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) - { - } - - public void SetNonStrokingColorGray(decimal gray) - { - } - - public void SetNonStrokingColorRgb(decimal r, decimal g, decimal b) - { - } - - public void SetNonStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) - { - } - } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/PigProductionHandbookTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PigProductionHandbookTests.cs index 7d97984b..a6d49d1e 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/PigProductionHandbookTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/PigProductionHandbookTests.cs @@ -27,6 +27,39 @@ } } + [Fact] + public void LettersHaveCorrectColors() + { + using (var document = PdfDocument.Open(GetFilename(), new ParsingOptions + { + UseLenientParsing = false + })) + { + var page = document.GetPage(1); + + // Pinkish. + var (r, g , b) = page.Letters[0].Color.ToRGBValues(); + + Assert.Equal(1, r); + Assert.Equal(0.914m, g); + Assert.Equal(0.765m, b); + + // White. + (r, g, b) = page.Letters[37].Color.ToRGBValues(); + + Assert.Equal(1, r); + Assert.Equal(1, g); + Assert.Equal(1, b); + + // Blackish. + (r, g, b) = page.Letters[76].Color.ToRGBValues(); + + Assert.Equal(0.137m, r); + Assert.Equal(0.122m, g); + Assert.Equal(0.125m, b); + } + } + [Fact] public void Page1HasCorrectWords() { diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index b3bb10ed..dc34f367 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -1,6 +1,7 @@ namespace UglyToad.PdfPig.Content { using Geometry; + using Graphics.Colors; /// /// A glyph or combination of glyphs (characters) drawn by a PDF content stream. @@ -54,6 +55,11 @@ /// public string FontName { get; } + /// + /// The color of the letter. + /// + public IColor Color { get; } + /// /// The size of the font in points. This is not ready for public consumption as the calculation is incorrect. /// @@ -62,27 +68,27 @@ /// /// Create a new letter to represent some text drawn by the Tj operator. /// - internal Letter(string value, PdfRectangle glyphRectangle, PdfPoint startBaseLine, PdfPoint endBaseLine, decimal width, decimal fontSize, string fontName, decimal pointSize) + internal Letter(string value, PdfRectangle glyphRectangle, + PdfPoint startBaseLine, + PdfPoint endBaseLine, + decimal width, + decimal fontSize, + string fontName, + IColor color, + decimal pointSize) { Value = value; GlyphRectangle = glyphRectangle; - FontSize = fontSize; - FontName = fontName; - PointSize = pointSize; - Width = width; StartBaseLine = startBaseLine; EndBaseLine = endBaseLine; + Width = width; + FontSize = fontSize; + FontName = fontName; + Color = color ?? GrayColor.Black; + PointSize = pointSize; TextDirection = GetTextDirection(); } - /// - /// Produces a string representation of the letter and its position. - /// - public override string ToString() - { - return $"{Value} {Location} {FontName} {PointSize}"; - } - private TextDirection GetTextDirection() { if (System.Math.Abs(StartBaseLine.Y - EndBaseLine.Y) < 10e-5m) @@ -91,17 +97,29 @@ { return TextDirection.Rotate180; } + return TextDirection.Horizontal; } - else if (System.Math.Abs(StartBaseLine.X - EndBaseLine.X) < 10e-5m) + + if (System.Math.Abs(StartBaseLine.X - EndBaseLine.X) < 10e-5m) { if (StartBaseLine.Y > EndBaseLine.Y) { return TextDirection.Rotate90; } + return TextDirection.Rotate270; } + return TextDirection.Unknown; } + + /// + /// Produces a string representation of the letter and its position. + /// + public override string ToString() + { + return $"{Value} {Location} {FontName} {PointSize}"; + } } } diff --git a/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs b/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs index dd57c50f..160bf342 100644 --- a/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs +++ b/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs @@ -1,15 +1,21 @@ namespace UglyToad.PdfPig.Graphics { + using System; using Colors; using Tokens; internal class ColorSpaceContext : IColorSpaceContext { - public ColorSpace CurrentStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; - public ColorSpace CurrentNonStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; + private readonly Func currentStateFunc; - public IColor CurrentStrokingColor { get; private set; } = GrayColor.Black; - public IColor CurrentNonStrokingColor { get; private set; } = GrayColor.Black; + public ColorSpace CurrentStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; + + public ColorSpace CurrentNonStrokingColorSpace { get; private set; } = ColorSpace.DeviceGray; + + public ColorSpaceContext(Func currentStateFunc) + { + this.currentStateFunc = currentStateFunc ?? throw new ArgumentNullException(nameof(currentStateFunc)); + } public void SetStrokingColorspace(NameToken colorspace) { @@ -19,23 +25,23 @@ switch (colorspaceActual) { case ColorSpace.DeviceGray: - CurrentStrokingColor = GrayColor.Black; + currentStateFunc().CurrentStrokingColor = GrayColor.Black; break; case ColorSpace.DeviceRGB: - CurrentStrokingColor = RGBColor.Black; + currentStateFunc().CurrentStrokingColor = RGBColor.Black; break; case ColorSpace.DeviceCMYK: - CurrentStrokingColor = CMYKColor.Black; + currentStateFunc().CurrentStrokingColor = CMYKColor.Black; break; default: - CurrentStrokingColor = GrayColor.Black; + currentStateFunc().CurrentStrokingColor = GrayColor.Black; break; } } else { CurrentStrokingColorSpace = ColorSpace.DeviceGray; - CurrentStrokingColor = GrayColor.Black; + currentStateFunc().CurrentStrokingColor = GrayColor.Black; } } @@ -47,60 +53,60 @@ switch (colorspaceActual) { case ColorSpace.DeviceGray: - CurrentNonStrokingColor = GrayColor.Black; + currentStateFunc().CurrentNonStrokingColor = GrayColor.Black; break; case ColorSpace.DeviceRGB: - CurrentNonStrokingColor = RGBColor.Black; + currentStateFunc().CurrentNonStrokingColor = RGBColor.Black; break; case ColorSpace.DeviceCMYK: - CurrentNonStrokingColor = CMYKColor.Black; + currentStateFunc().CurrentNonStrokingColor = CMYKColor.Black; break; default: - CurrentNonStrokingColor = GrayColor.Black; + currentStateFunc().CurrentNonStrokingColor = GrayColor.Black; break; } } else { CurrentNonStrokingColorSpace = ColorSpace.DeviceGray; - CurrentNonStrokingColor = GrayColor.Black; + currentStateFunc().CurrentNonStrokingColor = GrayColor.Black; } } public void SetStrokingColorGray(decimal gray) { CurrentStrokingColorSpace = ColorSpace.DeviceGray; - CurrentStrokingColor = new GrayColor(gray); + currentStateFunc().CurrentStrokingColor = new GrayColor(gray); } public void SetStrokingColorRgb(decimal r, decimal g, decimal b) { CurrentStrokingColorSpace = ColorSpace.DeviceRGB; - CurrentStrokingColor = new RGBColor(r, g, b); + currentStateFunc().CurrentStrokingColor = new RGBColor(r, g, b); } public void SetStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) { CurrentStrokingColorSpace = ColorSpace.DeviceCMYK; - CurrentStrokingColor = new CMYKColor(c, m, y, k); + currentStateFunc().CurrentStrokingColor = new CMYKColor(c, m, y, k); } public void SetNonStrokingColorGray(decimal gray) { CurrentNonStrokingColorSpace = ColorSpace.DeviceGray; - CurrentNonStrokingColor = new GrayColor(gray); + currentStateFunc().CurrentNonStrokingColor = new GrayColor(gray); } public void SetNonStrokingColorRgb(decimal r, decimal g, decimal b) { CurrentNonStrokingColorSpace = ColorSpace.DeviceRGB; - CurrentNonStrokingColor = new RGBColor(r, g, b); + currentStateFunc().CurrentNonStrokingColor = new RGBColor(r, g, b); } public void SetNonStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) { CurrentNonStrokingColorSpace = ColorSpace.DeviceCMYK; - CurrentNonStrokingColor = new CMYKColor(c, m, y, k); + currentStateFunc().CurrentNonStrokingColor = new CMYKColor(c, m, y, k); } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs index 2f6f8369..a4238cff 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs @@ -45,9 +45,15 @@ /// public (decimal r, decimal g, decimal b) ToRGBValues() { - return ((255 * (1 - C) * (1 - K)) / 255m, - (255 * (1 - M) * (1 - K)) / 255m, - (255 * (1 - Y) * (1 - K)) / 255m); + return ((1 - C) * (1 - K), + (1 - M) * (1 - K), + (1 - Y) * (1 - K)); + } + + /// + public override string ToString() + { + return $"CMYK: ({C}, {M}, {Y}, {K})"; } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs index f375507f..8ef14a00 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs @@ -29,5 +29,11 @@ { return (Gray, Gray, Gray); } + + /// + public override string ToString() + { + return $"Gray: {Gray}"; + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs index 99bb407e..ce8e5a11 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs @@ -41,5 +41,11 @@ { return (R, G, B); } + + /// + public override string ToString() + { + return $"RGB: ({R}, {G}, {B})"; + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 1ee653ca..7e921474 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; + using Colors; using Content; using Core; using Fonts; @@ -39,7 +40,7 @@ public PdfPath CurrentPath { get; private set; } - public IColorSpaceContext ColorSpaceContext { get; } = new ColorSpaceContext(); + public IColorSpaceContext ColorSpaceContext { get; } public PdfPoint CurrentPosition { get; set; } @@ -66,6 +67,7 @@ this.xObjectFactory = xObjectFactory; this.log = log; graphicsStack.Push(new CurrentGraphicsState()); + ColorSpaceContext = new ColorSpaceContext(GetCurrentState); } public PageContent Process(IReadOnlyList operations) @@ -165,11 +167,27 @@ .Transform(TextMatrices.TextMatrix .Transform(renderingMatrix .Transform(boundingBox.GlyphBounds))); + var transformedPdfBounds = rotation.Rotate(transformationMatrix) .Transform(TextMatrices.TextMatrix .Transform(renderingMatrix.Transform(new PdfRectangle(0, 0, boundingBox.Width, 0)))); - ShowGlyph(font, transformedGlyphBounds, transformedPdfBounds.BottomLeft, transformedPdfBounds.BottomRight, transformedPdfBounds.Width, unicode, fontSize, pointSize); + // If the text rendering mode calls for filling, the current nonstroking color in the graphics state is used; + // if it calls for stroking, the current stroking color is used. + // In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. + // TODO: expose color as something more advanced + var color = currentState.FontState.TextRenderingMode != TextRenderingMode.Stroke + ? currentState.CurrentNonStrokingColor + : currentState.CurrentStrokingColor; + + ShowGlyph(font, transformedGlyphBounds, + transformedPdfBounds.BottomLeft, + transformedPdfBounds.BottomRight, + transformedPdfBounds.Width, + unicode, + fontSize, + color, + pointSize); decimal tx, ty; if (font.IsVertical) @@ -336,9 +354,23 @@ TextMatrices.TextMatrix = newMatrix; } - private void ShowGlyph(IFont font, PdfRectangle glyphRectangle, PdfPoint startBaseLine, PdfPoint endBaseLine, decimal width, string unicode, decimal fontSize, decimal pointSize) + private void ShowGlyph(IFont font, PdfRectangle glyphRectangle, + PdfPoint startBaseLine, + PdfPoint endBaseLine, + decimal width, + string unicode, + decimal fontSize, + IColor color, + decimal pointSize) { - var letter = new Letter(unicode, glyphRectangle, startBaseLine, endBaseLine, width, fontSize, font.Name.Data, pointSize); + var letter = new Letter(unicode, glyphRectangle, + startBaseLine, + endBaseLine, + width, + fontSize, + font.Name.Data, + color, + pointSize); Letters.Add(letter); } diff --git a/src/UglyToad.PdfPig/Graphics/CurrentGraphicsState.cs b/src/UglyToad.PdfPig/Graphics/CurrentGraphicsState.cs index f3270926..223babbc 100644 --- a/src/UglyToad.PdfPig/Graphics/CurrentGraphicsState.cs +++ b/src/UglyToad.PdfPig/Graphics/CurrentGraphicsState.cs @@ -1,6 +1,7 @@ // ReSharper disable RedundantDefaultMemberInitializer namespace UglyToad.PdfPig.Graphics { + using Colors; using Core; using PdfPig.Core; @@ -96,6 +97,16 @@ namespace UglyToad.PdfPig.Graphics /// public decimal Smoothness { get; set; } = 0; + /// + /// The current active stroking color for paths. + /// + public IColor CurrentStrokingColor { get; set; } + + /// + /// The current active non-stroking color for text and fill. + /// + public IColor CurrentNonStrokingColor { get; set; } + #endregion /// @@ -118,7 +129,9 @@ namespace UglyToad.PdfPig.Graphics NonStrokingOverprint = NonStrokingOverprint, OverprintMode = OverprintMode, Smoothness = Smoothness, - StrokeAdjustment = StrokeAdjustment + StrokeAdjustment = StrokeAdjustment, + CurrentStrokingColor = CurrentStrokingColor, + CurrentNonStrokingColor = CurrentNonStrokingColor }; } } diff --git a/src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs b/src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs index 594b675e..d059e18a 100644 --- a/src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs +++ b/src/UglyToad.PdfPig/Graphics/IColorSpaceContext.cs @@ -17,17 +17,7 @@ /// The used for non-stroking operations. /// ColorSpace CurrentNonStrokingColorSpace { get; } - - /// - /// The used for stroking operations. - /// - IColor CurrentStrokingColor { get; } - - /// - /// The used for non-stroking operations. - /// - IColor CurrentNonStrokingColor { get; } - + /// /// Set the current color space to use for stroking operations. /// diff --git a/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs b/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs index 854f7e16..5d576c00 100644 --- a/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs +++ b/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs @@ -126,11 +126,11 @@ namespace UglyToad.PdfPig.Graphics case SetNonStrokeColorAdvanced.Symbol: if (operands[operands.Count - 1] is NameToken scnLowerPatternName) { - return new SetStrokeColorAdvanced(operands.Take(operands.Count - 1).Select(x => ((NumericToken)x).Data).ToList(), scnLowerPatternName); + return new SetNonStrokeColorAdvanced(operands.Take(operands.Count - 1).Select(x => ((NumericToken)x).Data).ToList(), scnLowerPatternName); } else if (operands.All(x => x is NumericToken)) { - return new SetStrokeColorAdvanced(operands.Select(x => ((NumericToken)x).Data).ToList()); + return new SetNonStrokeColorAdvanced(operands.Select(x => ((NumericToken)x).Data).ToList()); } var errorMessageScnLower = string.Join(", ", operands.Select(x => x.ToString())); diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index c1c447dc..0288496a 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -5,6 +5,7 @@ using Content; using Core; using Geometry; + using Graphics.Colors; using Graphics.Operations; using Graphics.Operations.General; using Graphics.Operations.PathConstruction; @@ -258,7 +259,9 @@ var documentSpace = textMatrix.Transform(renderingMatrix.Transform(fontMatrix.Transform(rect))); - var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, fontSize, font.Name, fontSize); + var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, fontSize, font.Name, + GrayColor.Black, + fontSize); letters.Add(letter); var tx = advanceRect.Width * horizontalScaling; From c5d03bca9703ffe4d20d2c2ab1c60b8fd383fa0b Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 8 Aug 2019 21:19:18 +0100 Subject: [PATCH 05/13] move application of transformation matrix outside path --- .../CharStrings/Type2BuildCharContext.cs | 2 +- .../Commands/Type1BuildCharContext.cs | 2 +- src/UglyToad.PdfPig/Geometry/PdfPath.cs | 25 ++++++------------- .../Graphics/ContentStreamProcessor.cs | 2 +- .../AppendDualControlPointBezierCurve.cs | 11 +++++--- .../AppendEndControlPointBezierCurve.cs | 14 ++++++----- .../PathConstruction/AppendRectangle.cs | 3 ++- .../AppendStartControlPointBezierCurve.cs | 12 +++++---- .../AppendStraightLineSegment.cs | 5 ++-- .../PathConstruction/BeginNewSubpath.cs | 5 ++-- 10 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2BuildCharContext.cs b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2BuildCharContext.cs index 3be3094b..538556e3 100644 --- a/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2BuildCharContext.cs +++ b/src/UglyToad.PdfPig/Fonts/CompactFontFormat/CharStrings/Type2BuildCharContext.cs @@ -18,7 +18,7 @@ /// /// The current path. /// - public PdfPath Path { get; } = new PdfPath(Core.TransformationMatrix.Identity); + public PdfPath Path { get; } = new PdfPath(); /// /// The current location of the active point. diff --git a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs index 35ec3a10..c6f421ae 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs @@ -22,7 +22,7 @@ public bool IsFlexing { get; set; } [NotNull] - public PdfPath Path { get; private set; } = new PdfPath(Core.TransformationMatrix.Identity); + public PdfPath Path { get; private set; } = new PdfPath(); public PdfPoint CurrentPosition { get; set; } diff --git a/src/UglyToad.PdfPig/Geometry/PdfPath.cs b/src/UglyToad.PdfPig/Geometry/PdfPath.cs index 7b4061ed..89a2861f 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfPath.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfPath.cs @@ -5,7 +5,6 @@ namespace UglyToad.PdfPig.Geometry using System.Collections.Generic; using System.Linq; using System.Text; - using Core; /// /// A path in a PDF document, used by glyphs and page content. Can contain multiple sub-paths. @@ -20,21 +19,10 @@ namespace UglyToad.PdfPig.Geometry public IReadOnlyList Commands => commands; private PdfPoint? currentPosition; - - private readonly TransformationMatrix currentTransformationMatrix; - - /// - /// Create a new . - /// - /// The transformation to apply to all points in this path. - public PdfPath(TransformationMatrix transformationMatrix) - { - currentTransformationMatrix = transformationMatrix; - } - + internal void MoveTo(decimal x, decimal y) { - currentPosition = currentTransformationMatrix.Transform(new PdfPoint(x, y)); + currentPosition = new PdfPoint(x, y); commands.Add(new Move(currentPosition.Value)); } @@ -42,12 +30,13 @@ namespace UglyToad.PdfPig.Geometry { if (currentPosition.HasValue) { - var to = currentTransformationMatrix.Transform(new PdfPoint(x, y)); + var to = new PdfPoint(x, y); commands.Add(new Line(currentPosition.Value, to)); currentPosition = to; } else { + // TODO: probably the wrong behaviour here, maybe line starts from (0, 0)? MoveTo(x, y); } } @@ -58,9 +47,9 @@ namespace UglyToad.PdfPig.Geometry { if (currentPosition.HasValue) { - var to = currentTransformationMatrix.Transform(new PdfPoint(x3, y3)); + var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, - currentTransformationMatrix.Transform(new PdfPoint(x1, y1)), currentTransformationMatrix.Transform(new PdfPoint(x2, y2)), to)); + new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); currentPosition = to; } else @@ -460,7 +449,7 @@ namespace UglyToad.PdfPig.Geometry internal void Rectangle(decimal x, decimal y, decimal width, decimal height) { - currentPosition = currentTransformationMatrix.Transform(new PdfPoint(x, y)); + currentPosition = new PdfPoint(x, y); LineTo(x + width, y); LineTo(x + width, y + height); LineTo(x, y + height); diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 7e921474..972049bf 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -288,7 +288,7 @@ public void BeginSubpath() { - CurrentPath = new PdfPath(CurrentTransformationMatrix); + CurrentPath = new PdfPath(); } public void StrokePath(bool close) diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendDualControlPointBezierCurve.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendDualControlPointBezierCurve.cs index cf191f9c..39e5945f 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendDualControlPointBezierCurve.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendDualControlPointBezierCurve.cs @@ -52,10 +52,13 @@ /// public void Run(IOperationContext operationContext) { - operationContext.CurrentPath.BezierCurveTo(ControlPoint1.X, ControlPoint1.Y, - ControlPoint2.X, ControlPoint2.Y, - End.X, End.Y); - operationContext.CurrentPosition = End; + var controlPoint1Transform = operationContext.CurrentTransformationMatrix.Transform(ControlPoint1); + var controlPoint2Transform = operationContext.CurrentTransformationMatrix.Transform(ControlPoint2); + var endTransform = operationContext.CurrentTransformationMatrix.Transform(End); + operationContext.CurrentPath.BezierCurveTo(controlPoint1Transform.X, controlPoint1Transform.Y, + controlPoint2Transform.X, controlPoint2Transform.Y, + endTransform.X, endTransform.Y); + operationContext.CurrentPosition = endTransform; } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendEndControlPointBezierCurve.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendEndControlPointBezierCurve.cs index 5c1ebfc5..f1d300c3 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendEndControlPointBezierCurve.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendEndControlPointBezierCurve.cs @@ -44,12 +44,14 @@ /// public void Run(IOperationContext operationContext) { - operationContext.CurrentPath.BezierCurveTo(ControlPoint1.X, ControlPoint1.Y, - End.X, - End.Y, - End.X, - End.Y); - operationContext.CurrentPosition = End; + var controlPoint1Transform = operationContext.CurrentTransformationMatrix.Transform(ControlPoint1); + var endTransform = operationContext.CurrentTransformationMatrix.Transform(End); + operationContext.CurrentPath.BezierCurveTo(controlPoint1Transform.X, controlPoint1Transform.Y, + endTransform.X, + endTransform.Y, + endTransform.X, + endTransform.Y); + operationContext.CurrentPosition = endTransform; } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendRectangle.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendRectangle.cs index 87d721cc..ab27dc48 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendRectangle.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendRectangle.cs @@ -51,7 +51,8 @@ public void Run(IOperationContext operationContext) { operationContext.BeginSubpath(); - operationContext.CurrentPath.Rectangle(LowerLeft.X, LowerLeft.Y, Width, Height); + var lowerLeftTransform = operationContext.CurrentTransformationMatrix.Transform(LowerLeft); + operationContext.CurrentPath.Rectangle(lowerLeftTransform.X, lowerLeftTransform.Y, Width, Height); } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStartControlPointBezierCurve.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStartControlPointBezierCurve.cs index 9895670c..55851649 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStartControlPointBezierCurve.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStartControlPointBezierCurve.cs @@ -44,13 +44,15 @@ /// public void Run(IOperationContext operationContext) { + var controlPoint2Transform = operationContext.CurrentTransformationMatrix.Transform(ControlPoint2); + var endTransform = operationContext.CurrentTransformationMatrix.Transform(End); operationContext.CurrentPath.BezierCurveTo(operationContext.CurrentPosition.X, operationContext.CurrentPosition.Y, - ControlPoint2.X, - ControlPoint2.Y, - End.X, - End.Y); - operationContext.CurrentPosition = End; + controlPoint2Transform.X, + controlPoint2Transform.Y, + endTransform.X, + endTransform.Y); + operationContext.CurrentPosition = endTransform; } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStraightLineSegment.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStraightLineSegment.cs index 22ccd217..b6d513e4 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStraightLineSegment.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/AppendStraightLineSegment.cs @@ -35,8 +35,9 @@ /// public void Run(IOperationContext operationContext) { - operationContext.CurrentPath.LineTo(End.X, End.Y); - operationContext.CurrentPosition = End; + var endPoint = operationContext.CurrentTransformationMatrix.Transform(new PdfPoint(End.X, End.Y)); + operationContext.CurrentPath.LineTo(endPoint.X, endPoint.Y); + operationContext.CurrentPosition = endPoint; } /// diff --git a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/BeginNewSubpath.cs b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/BeginNewSubpath.cs index b08d83ba..5c28dc74 100644 --- a/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/BeginNewSubpath.cs +++ b/src/UglyToad.PdfPig/Graphics/Operations/PathConstruction/BeginNewSubpath.cs @@ -36,8 +36,9 @@ public void Run(IOperationContext operationContext) { operationContext.BeginSubpath(); - operationContext.CurrentPosition = Point; - operationContext.CurrentPath.LineTo(Point.X, Point.Y); + var pointTransform = operationContext.CurrentTransformationMatrix.Transform(Point); + operationContext.CurrentPosition = pointTransform; + operationContext.CurrentPath.MoveTo(pointTransform.X, pointTransform.Y); } /// From fc2d532b82709ea2efd4406bf5cae96da63dbc5c Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 10 Aug 2019 14:58:02 +0100 Subject: [PATCH 06/13] use single instances of black and white for rgb/gray colors --- .../Graphics/TestOperationContext.cs | 2 +- .../Graphics/ColorSpaceContext.cs | 56 +++++++++++++++++-- .../Graphics/Colors/RGBColor.cs | 53 ++++++++++++++++-- 3 files changed, 100 insertions(+), 11 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs index ba2e478a..508e8c20 100644 --- a/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs +++ b/src/UglyToad.PdfPig.Tests/Graphics/TestOperationContext.cs @@ -28,7 +28,7 @@ public TestOperationContext() { StateStack.Push(new CurrentGraphicsState()); - CurrentPath = new PdfPath(CurrentTransformationMatrix); + CurrentPath = new PdfPath(); ColorSpaceContext = new ColorSpaceContext(GetCurrentState); } diff --git a/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs b/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs index 160bf342..eb380e85 100644 --- a/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs +++ b/src/UglyToad.PdfPig/Graphics/ColorSpaceContext.cs @@ -76,13 +76,37 @@ public void SetStrokingColorGray(decimal gray) { CurrentStrokingColorSpace = ColorSpace.DeviceGray; - currentStateFunc().CurrentStrokingColor = new GrayColor(gray); + + if (gray == 0) + { + currentStateFunc().CurrentStrokingColor = GrayColor.Black; + } + else if (gray == 1) + { + currentStateFunc().CurrentStrokingColor = GrayColor.White; + } + else + { + currentStateFunc().CurrentStrokingColor = new GrayColor(gray); + } } public void SetStrokingColorRgb(decimal r, decimal g, decimal b) { CurrentStrokingColorSpace = ColorSpace.DeviceRGB; - currentStateFunc().CurrentStrokingColor = new RGBColor(r, g, b); + + if (r == 0 && g == 0 && b == 0) + { + currentStateFunc().CurrentStrokingColor = RGBColor.Black; + } + else if (r == 1 && g == 1 && b == 1) + { + currentStateFunc().CurrentStrokingColor = RGBColor.White; + } + else + { + currentStateFunc().CurrentStrokingColor = new RGBColor(r, g, b); + } } public void SetStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) @@ -94,13 +118,37 @@ public void SetNonStrokingColorGray(decimal gray) { CurrentNonStrokingColorSpace = ColorSpace.DeviceGray; - currentStateFunc().CurrentNonStrokingColor = new GrayColor(gray); + + if (gray == 0) + { + currentStateFunc().CurrentNonStrokingColor = GrayColor.Black; + } + else if (gray == 1) + { + currentStateFunc().CurrentNonStrokingColor = GrayColor.White; + } + else + { + currentStateFunc().CurrentNonStrokingColor = new GrayColor(gray); + } } public void SetNonStrokingColorRgb(decimal r, decimal g, decimal b) { CurrentNonStrokingColorSpace = ColorSpace.DeviceRGB; - currentStateFunc().CurrentNonStrokingColor = new RGBColor(r, g, b); + + if (r == 0 && g == 0 && b == 0) + { + currentStateFunc().CurrentNonStrokingColor = RGBColor.Black; + } + else if (r == 1 && g == 1 && b == 1) + { + currentStateFunc().CurrentNonStrokingColor = RGBColor.White; + } + else + { + currentStateFunc().CurrentNonStrokingColor = new RGBColor(r, g, b); + } } public void SetNonStrokingColorCmyk(decimal c, decimal m, decimal y, decimal k) diff --git a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs index ce8e5a11..12775c56 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs @@ -1,34 +1,47 @@ namespace UglyToad.PdfPig.Graphics.Colors { + using System; + using System.Collections.Generic; + /// /// A color with red, green and blue components. /// - internal class RGBColor : IColor + internal class RGBColor : IColor, IEquatable { + /// + /// RGB Black value (all 0). + /// public static RGBColor Black = new RGBColor(0, 0, 0); + + /// + /// RGB White value (all 1). + /// public static RGBColor White = new RGBColor(1, 1, 1); /// public ColorSpace ColorSpace { get; } = ColorSpace.DeviceRGB; /// - /// The red value. + /// The red value between 0 and 1. /// public decimal R { get; } /// - /// The green value. + /// The green value between 0 and 1. /// public decimal G { get; } /// - /// The blue value. + /// The blue value between 0 and 1. /// public decimal B { get; } /// /// Create a new . /// + /// The red value between 0 and 1. + /// The green value between 0 and 1. + /// The blue value between 0 and 1. public RGBColor(decimal r, decimal g, decimal b) { R = r; @@ -37,11 +50,39 @@ } /// - public (decimal r, decimal g, decimal b) ToRGBValues() + public (decimal r, decimal g, decimal b) ToRGBValues() => (R, G, B); + + /// + public override bool Equals(object obj) { - return (R, G, B); + if (obj is RGBColor color) + { + return Equals(color); + } + + return false; } + /// + /// + /// Whether 2 RGB colors are equal across all channels. + /// + public bool Equals(RGBColor other) + { + return other != null && + R == other.R && + G == other.G && + B == other.B; + } + + /// + public override int GetHashCode() => (R, G, B).GetHashCode(); + + public static bool operator ==(RGBColor color1, RGBColor color2) => + EqualityComparer.Default.Equals(color1, color2); + + public static bool operator !=(RGBColor color1, RGBColor color2) => !(color1 == color2); + /// public override string ToString() { From eb9a9fd00ecb14409a132e0d20e07c4fbce24acc Mon Sep 17 00:00:00 2001 From: BobLd Date: Sat, 10 Aug 2019 16:01:27 +0100 Subject: [PATCH 07/13] Document Layout Analysis - IPageSegmenter, Docstrum - Create a TextBlock class - Creates IPageSegmenter - Add other useful distances: angle, etc. - Update RecursiveXYCut - With IPageSegmenter and TextBlock - Make XYNode and XYLeaf internal - Optimise (faster) NearestNeighbourWordExtractor and isolate the clustering algorithms for use outside of this class - Implement a Docstrum inspired page segmentation algorithm --- .../PublicApiScannerTests.cs | 6 +- src/UglyToad.PdfPig/Content/TextBlock.cs | 68 ++++++ .../ClusteringAlgorithms.cs | 164 ++++++++++++++ .../DocumentLayoutAnalysis/Distances.cs | 33 +++ .../DocumentLayoutAnalysis/DocstrumBB.cs | 212 ++++++++++++++++++ .../DocumentLayoutAnalysis/IPageSegmenter.cs | 19 ++ .../NearestNeighbourWordExtractor .cs | 115 +--------- .../DocumentLayoutAnalysis/RecursiveXYCut.cs | 42 +++- .../DocumentLayoutAnalysis/XYLeaf.cs | 2 +- .../DocumentLayoutAnalysis/XYNode.cs | 2 +- 10 files changed, 544 insertions(+), 119 deletions(-) create mode 100644 src/UglyToad.PdfPig/Content/TextBlock.cs create mode 100644 src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs create mode 100644 src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs create mode 100644 src/UglyToad.PdfPig/DocumentLayoutAnalysis/IPageSegmenter.cs diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 865a6240..273f59f7 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -54,17 +54,19 @@ "UglyToad.PdfPig.Content.PageSize", "UglyToad.PdfPig.Content.Word", "UglyToad.PdfPig.Content.TextLine", + "UglyToad.PdfPig.Content.TextBlock", "UglyToad.PdfPig.Content.TextDirection", "UglyToad.PdfPig.Core.TransformationMatrix", "UglyToad.PdfPig.CrossReference.CrossReferenceTable", "UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.TrailerDictionary", + "UglyToad.PdfPig.DocumentLayoutAnalysis.ClusteringAlgorithms", "UglyToad.PdfPig.DocumentLayoutAnalysis.Distances", + "UglyToad.PdfPig.DocumentLayoutAnalysis.DocstrumBB", + "UglyToad.PdfPig.DocumentLayoutAnalysis.IPageSegmenter", "UglyToad.PdfPig.DocumentLayoutAnalysis.MathExtensions", "UglyToad.PdfPig.DocumentLayoutAnalysis.NearestNeighbourWordExtractor", "UglyToad.PdfPig.DocumentLayoutAnalysis.RecursiveXYCut", - "UglyToad.PdfPig.DocumentLayoutAnalysis.XYNode", - "UglyToad.PdfPig.DocumentLayoutAnalysis.XYLeaf", "UglyToad.PdfPig.DocumentLayoutAnalysis.TextEdgesExtractor", "UglyToad.PdfPig.DocumentLayoutAnalysis.EdgeType", "UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException", diff --git a/src/UglyToad.PdfPig/Content/TextBlock.cs b/src/UglyToad.PdfPig/Content/TextBlock.cs new file mode 100644 index 00000000..85c10b63 --- /dev/null +++ b/src/UglyToad.PdfPig/Content/TextBlock.cs @@ -0,0 +1,68 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using UglyToad.PdfPig.Geometry; + +namespace UglyToad.PdfPig.Content +{ + /// + /// A block of text. + /// + public class TextBlock + { + /// + /// The text of the block. + /// + public string Text { get; } + + /// + /// The text direction of the block. + /// + public TextDirection TextDirection { get; } + + /// + /// The rectangle completely containing the block. + /// + public PdfRectangle BoundingBox { get; } + + /// + /// The text lines contained in the block. + /// + public IReadOnlyList TextLines { get; } + + /// + /// Create a new . + /// + /// + public TextBlock(IReadOnlyList lines) + { + if (lines == null) + { + throw new ArgumentNullException(nameof(lines)); + } + + if (lines.Count == 0) + { + throw new ArgumentException("Empty lines provided.", nameof(lines)); + } + + TextLines = lines; + + Text = string.Join(" ", lines.Select(x => x.Text)); + + var minX = lines.Min(x => x.BoundingBox.Left); + var minY = lines.Min(x => x.BoundingBox.Bottom); + var maxX = lines.Max(x => x.BoundingBox.Right); + var maxY = lines.Max(x => x.BoundingBox.Top); + BoundingBox = new PdfRectangle(minX, minY, maxX, maxY); + + TextDirection = lines[0].TextDirection; + } + + /// + public override string ToString() + { + return Text; + } + } +} diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs new file mode 100644 index 00000000..be7a8299 --- /dev/null +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -0,0 +1,164 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using UglyToad.PdfPig.Geometry; + +namespace UglyToad.PdfPig.DocumentLayoutAnalysis +{ + /// + /// Clustering Algorithms. + /// + internal class ClusteringAlgorithms + { + /// + /// Algorithm to group elements via transitive closure, using nearest neighbours and maximum distance. + /// https://en.wikipedia.org/wiki/Transitive_closure + /// + /// Letter, Word, TextLine, etc. + /// Array of elements to group. + /// The distance measure between two points. + /// The function that determines the distance between to points in the same cluster. + /// The pivot's point to use. + /// The candidates to pair point to use. + /// Filter to apply to the pivot point. + /// Filter to apply to both the pivot and the paired point. + internal static IEnumerable> SimpleTransitiveClosure(T[] elements, + Func distMeasure, + Func maxDistanceFunction, + Func pivotPoint, Func candidatesPoint, + Func filterPivot, Func filterFinal) + { + /************************************************************************************* + * Algorithm steps + * 1. Find nearest neighbours indexes (done in parallel) + * Iterate every point (pivot) and put its nearest neighbour's index in an array + * e.g. if nearest neighbour of point i is point j, then indexes[i] = j. + * Only conciders a neighbour if it is within the maximum distance. + * If not within the maximum distance, index will be set to -1. + * NB: Given the possible asymmetry in the relationship, it is possible + * that if indexes[i] = j then indexes[j] != i. + * + * 2. Group indexes + * Group indexes if share neighbours in common - Transitive closure + * e.g. if we have indexes[i] = j, indexes[j] = k, indexes[m] = n and indexes[n] = -1 + * (i,j,k) will form a group and (m,n) will form another group. + * + * 3. Merge groups that have indexes in common - If any + * If there are group with indexes in common, merge them. + * (Could be improved and put in step 2) + *************************************************************************************/ + + int[] indexes = Enumerable.Repeat((int)-1, elements.Length).ToArray(); + var candidatesPoints = elements.Select(x => candidatesPoint(x)).ToList(); + + // 1. Find nearest neighbours indexes + Parallel.For(0, elements.Length, e => + { + var pivot = elements[e]; + + if (filterPivot(pivot)) + { + int index = pivotPoint(pivot).FindIndexNearest(candidatesPoints, distMeasure, out double dist); + var paired = elements[index]; + + if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + { + indexes[e] = index; + } + } + }); + + // 2. Group indexes + List> groupedIndexes = new List>(); + HashSet indexDone = new HashSet(); + + for (int e = 0; e < elements.Length; e++) + { + int index = indexes[e]; + + if (index == -1) // This element is not connected + { + // Check if another element index is connected to this element (nb: distance measure is asymetric) + if (!indexes.Contains(e)) + { + // If no other element is connected to this element, add it as a standalone element + groupedIndexes.Add(new HashSet() { e }); + indexDone.Add(e); + } + continue; + } + + bool isDoneC = indexDone.Contains(e); + bool isDoneI = indexDone.Contains(index); + if (isDoneC || isDoneI) + { + if (isDoneC && !isDoneI) + { + foreach (var pair in groupedIndexes.Where(x => x.Contains(e))) + { + pair.Add(index); + } + indexDone.Add(index); + } + else if (!isDoneC && isDoneI) + { + foreach (var pair in groupedIndexes.Where(x => x.Contains(index))) + { + pair.Add(e); + } + indexDone.Add(e); + } + else // isDoneC && isDoneI + { + foreach (var pair in groupedIndexes.Where(x => x.Contains(index))) + { + if (!pair.Contains(e)) pair.Add(e); + } + + foreach (var pair in groupedIndexes.Where(x => x.Contains(e))) + { + if (!pair.Contains(index)) pair.Add(index); + } + } + } + else + { + groupedIndexes.Add(new HashSet() { e, index }); + indexDone.Add(e); + indexDone.Add(index); + } + } + + // Check that all elements are done + if (elements.Length != indexDone.Count) + { + throw new Exception("ClusteringAlgorithms.GetNNGroupedIndexes(): Some elements were not done."); + } + + // 3. Merge groups that have indexes in common + // Check if duplicates (if duplicates, then same index in different groups) + if (indexDone.Count != groupedIndexes.SelectMany(x => x).Count()) + { + for (int e = 0; e < elements.Length; e++) + { + List> candidates = groupedIndexes.Where(x => x.Contains(e)).ToList(); + int count = candidates.Count(); + if (count < 2) continue; // Only one group with this index + + HashSet merged = candidates.First(); + groupedIndexes.Remove(merged); + for (int i = 1; i < count; i++) + { + var current = candidates.ElementAt(i); + merged.UnionWith(current); + groupedIndexes.Remove(current); + } + groupedIndexes.Add(merged); + } + } + + return groupedIndexes; + } + } +} diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs index 2b06eea9..8921392f 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs @@ -47,6 +47,39 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis return (double)(Math.Abs(point1.X - point2.X) + Math.Abs(point1.Y - point2.Y)); } + /// + /// The angle in degrees between the horizontal axis and the line between two points. + /// + /// The first point. + /// The second point. + /// + public static double Angle(PdfPoint point1, PdfPoint point2) + { + return Math.Atan2((float)(point2.Y - point1.Y), (float)(point2.X - point1.X)) * 180.0 / Math.PI; + } + + /// + /// The absolute distance between the Y coordinates of two points. + /// + /// The first point. + /// The second point. + /// + public static double Vertical(PdfPoint point1, PdfPoint point2) + { + return Math.Abs((double)(point2.Y - point1.Y)); + } + + /// + /// The absolute distance between the X coordinates of two points. + /// + /// The first point. + /// The second point. + /// + public static double Horizontal(PdfPoint point1, PdfPoint point2) + { + return Math.Abs((double)(point2.X - point1.X)); + } + /// /// Find the nearest point. /// diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs new file mode 100644 index 00000000..3ced0778 --- /dev/null +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs @@ -0,0 +1,212 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.Geometry; + +namespace UglyToad.PdfPig.DocumentLayoutAnalysis +{ + /// + /// The Docstrum algorithm is a bottom-up page segmentation technique based on nearest-neighborhood + /// clustering of connected components extracted from the document. + /// This implementation leverages bounding boxes and does not exactly replicates the original algorithm. + /// See 'The document spectrum for page layout analysis.' by L. O’Gorman. + /// + public class DocstrumBB : IPageSegmenter + { + /// + /// Create an instance of Docstrum for bounding boxes page segmenter, . + /// + public static DocstrumBB Instance { get; } = new DocstrumBB(); + + /// + /// Get the blocks. + /// Uses wlAngleLB = -30, wlAngleUB = 30, blAngleLB = -135, blAngleUB = -45, blMulti = 1.3. + /// + /// + /// + public IReadOnlyList GetBlocks(IEnumerable pageWords) + { + return GetBlocks(pageWords, -30, 30, -135, -45, 1.3); + } + + /// + /// Get the blocks. See original paper for more information. + /// + /// + /// Within-line lower bound angle. + /// Within-line upper bound angle. + /// Between-line lower bound angle. + /// Between-line upper bound angle. + /// Multiplier that gives the maximum perpendicular distance between + /// text lines for blocking. Maximum distance will be this number times the between-line + /// distance found by the analysis. + /// + public IReadOnlyList GetBlocks(IEnumerable pageWords, double wlAngleLB, double wlAngleUB, + double blAngleLB, double blAngleUB, double blMultiplier) + { + var pageWordsArr = pageWords.Where(w => !string.IsNullOrWhiteSpace(w.Text)).ToArray(); // remove white spaces + + var withinLineDistList = new ConcurrentBag(); + var betweenLineDistList = new ConcurrentBag(); + + // 1. Estimate in line and between line spacing + Parallel.For(0, pageWordsArr.Length, i => + { + var word = pageWordsArr[i]; + + // Within-line distance + var pointWL = GetNearestPointData(pageWordsArr, word, + bb => bb.BottomRight, bb => bb.BottomRight, + bb => bb.BottomLeft, bb => bb.BottomLeft, + wlAngleLB, wlAngleUB, Distances.Horizontal); + if (pointWL != null) withinLineDistList.Add(pointWL); + + // Between-line distance + var pointBL = GetNearestPointData(pageWordsArr, word, + bb => bb.BottomLeft, bb => bb.Centroid, + bb => bb.TopLeft, bb => bb.Centroid, + blAngleLB, blAngleUB, Distances.Vertical); + if (pointBL != null) betweenLineDistList.Add(pointBL); + }); + + double withinLineDistance = GetPeakAverageDistance(withinLineDistList); + double betweenLineDistance = GetPeakAverageDistance(betweenLineDistList); + + // 2. Find lines of text + double maxDistWL = Math.Min(3 * withinLineDistance, Math.Sqrt(2) * betweenLineDistance); + var lines = GetLines(pageWordsArr, maxDistWL).ToArray(); + + // 3. Find blocks of text + double maxDistBL = blMultiplier * betweenLineDistance; + return GetLinesGroups(lines, maxDistBL).ToList(); + } + + /// + /// Get information on the nearest point, filtered for angle. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + private double[] GetNearestPointData(Word[] words, Word pivot, Func funcPivotDist, Func funcPivotAngle, + Func funcPointsDist, Func funcPointsAngle, + double angleStart, double angleEnd, + Func finalDistMEasure) + { + var pointR = funcPivotDist(pivot.BoundingBox); + var filtered = words.Where(w => + { + var angleWL = Distances.Angle(funcPivotAngle(pivot.BoundingBox), funcPointsAngle(w.BoundingBox)); + return (angleWL >= angleStart && angleWL <= angleEnd); + }).ToList(); + filtered.Remove(pivot); // remove itself + + if (filtered.Count > 0) + { + int index = pointR.FindIndexNearest( + filtered.Select(w => funcPointsDist(w.BoundingBox)).ToList(), + Distances.Euclidean, out double distWL); + + if (index >= 0) + { + var matchWL = filtered[index]; + return new double[] + { + (double)pivot.Letters.Select(l => l.FontSize).Mode(), + finalDistMEasure(pointR, funcPointsDist(matchWL.BoundingBox)) + }; + } + } + return null; + } + + /// + /// Build lines via transitive closure. + /// + /// + /// + /// + private IEnumerable GetLines(Word[] words, double maxDist) + { + TextDirection textDirection = words[0].TextDirection; + var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(words, Distances.Euclidean, + (w1, w2) => maxDist, + w => w.BoundingBox.BottomRight, w => w.BoundingBox.BottomLeft, + w => true, + (w1, w2) => + { + var angleWL = Distances.Angle(w1.BoundingBox.BottomRight, w2.BoundingBox.BottomLeft); // compare bottom right with bottom left for angle + return (angleWL >= -30 && angleWL <= 30); + }).ToList(); + + Func, IReadOnlyList> orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList(); + if (textDirection == TextDirection.Rotate180) + { + orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Right).ToList(); + } + else if (textDirection == TextDirection.Rotate90) + { + orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Top).ToList(); + } + else if (textDirection == TextDirection.Rotate270) + { + orderFunc = l => l.OrderBy(x => x.BoundingBox.Bottom).ToList(); + } + + for (int a = 0; a < groupedIndexes.Count(); a++) + { + yield return new TextLine(orderFunc(groupedIndexes[a].Select(i => words[i]))); + } + } + + /// + /// Build blocks via transitive closure. + /// + /// + /// + /// + private IEnumerable GetLinesGroups(TextLine[] lines, double maxDist) + { + var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(lines, Distances.Euclidean, + (l1, l2) => maxDist, + l => l.BoundingBox.TopLeft, l => l.BoundingBox.BottomLeft, + l => true, (l1, l2) => true).ToList(); + + for (int a = 0; a < groupedIndexes.Count(); a++) + { + yield return new TextBlock(groupedIndexes[a].Select(i => lines[i]).ToList()); + } + } + + /// + /// Get the average distance value of the peak bucket of the histogram. + /// + /// array[0]=font size, array[1]=distance + /// + private double GetPeakAverageDistance(IEnumerable values) + { + int max = (int)values.Max(x => x[1]) + 1; + int[] distrib = new int[max]; + + // Create histogram with buckets of size 1. + for (int i = 0; i < max; i++) + { + distrib[i] = values.Where(x => x[1] > i && x[1] <= i + 1).Count(); + } + + var peakIndex = Array.IndexOf(distrib, distrib.Max()); + + return values.Where(v => v[1] > peakIndex && v[1] <= peakIndex + 1).Average(x => x[1]); + } + } +} diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/IPageSegmenter.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/IPageSegmenter.cs new file mode 100644 index 00000000..27511786 --- /dev/null +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/IPageSegmenter.cs @@ -0,0 +1,19 @@ +using System.Collections.Generic; +using UglyToad.PdfPig.Content; + +namespace UglyToad.PdfPig.DocumentLayoutAnalysis +{ + /// + /// Page segmentation divides a page into areas, each consisting of a layout structure (blocks, lines, etc.). + /// See 'Performance Comparison of Six Algorithms for Page Segmentation' by Faisal Shafait, Daniel Keysers, and Thomas M. Breuel. + /// + public interface IPageSegmenter + { + /// + /// Get the text blocks. + /// + /// The words to generate text blocks for. + /// A list of text blocks from this approach. + IReadOnlyList GetBlocks(IEnumerable pageWords); + } +} diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs index 34455cda..3efb19be 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor .cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Threading.Tasks; using UglyToad.PdfPig.Content; using UglyToad.PdfPig.Geometry; using UglyToad.PdfPig.Util; @@ -71,7 +70,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// between 2 letters, e.g. GlyphRectangle.Width or GlyphRectangle.Height. /// The distance measure between two start and end base line points, /// e.g. the Manhattan distance. - private static List GetWords(IEnumerable pageLetters, + private List GetWords(IEnumerable pageLetters, Func metric, Func distMeasure) { if (pageLetters == null || pageLetters.Count() == 0) return new List(); @@ -97,116 +96,18 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } Letter[] letters = pageLetters.ToArray(); - int lettersCount = letters.Length; - List startBaseLines = letters.Select(x => x.StartBaseLine).ToList(); - int[] indexes = Enumerable.Repeat((int)-1, lettersCount).ToArray(); - - // Find nearest neighbours indexes - Parallel.For(0, lettersCount, c => - { - var currentLetter = letters[c]; - // only check neighbours if not a white space - if (!string.IsNullOrWhiteSpace(currentLetter.Value)) - { - int index = currentLetter.EndBaseLine.FindIndexNearest(startBaseLines, distMeasure, out double dist); - var pairedLetter = letters[index]; - - if (!string.IsNullOrWhiteSpace(pairedLetter.Value) && - string.Equals(currentLetter.FontName, pairedLetter.FontName, StringComparison.OrdinalIgnoreCase)) - { - decimal minDist = Math.Max(Math.Abs(metric(currentLetter)), Math.Abs(metric(pairedLetter))) * 0.60m; - if ((decimal)dist < minDist) - { - indexes[c] = index; - } - } - } - }); - - // Group indexes - List> groupedIndexes = new List>(); - List indexDone = new List(); - for (int c = 0; c < lettersCount; c++) - { - int i = indexes[c]; - if (i == -1) continue; - - bool isDoneC = indexDone.Contains(c); - bool isDoneI = indexDone.Contains(i); - if (isDoneC || isDoneI) - { - if (isDoneC && !isDoneI) - { - foreach (var pair in groupedIndexes.Where(x => x.Contains(c))) - { - pair.Add(i); - } - indexDone.Add(i); - } - else if (!isDoneC && isDoneI) - { - foreach (var pair in groupedIndexes.Where(x => x.Contains(i))) - { - pair.Add(c); - } - indexDone.Add(c); - } - else - { - foreach (var pair in groupedIndexes.Where(x => x.Contains(i))) - { - if (!pair.Contains(c)) pair.Add(c); - } - - foreach (var pair in groupedIndexes.Where(x => x.Contains(c))) - { - if (!pair.Contains(i)) pair.Add(i); - } - } - } - else - { - List pair = new List() { c, i }; - groupedIndexes.Add(pair); - indexDone.AddRange(pair); - } - } - - // Merge lists with common index - for (int c = 0; c < lettersCount; c++) - { - List> candidates = groupedIndexes.Where(x => x.Any(t => t == c)).ToList(); - if (candidates.Count < 2) continue; // only one group with this index - - List merged = candidates.First(); - groupedIndexes.Remove(merged); - for (int i = 1; i < candidates.Count; i++) - { - var current = candidates[i]; - merged = merged.Union(current).ToList(); - groupedIndexes.Remove(current); - } - groupedIndexes.Add(merged); - } + var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(letters, + distMeasure, + (l1, l2) => Math.Max((double)metric(l1), (double)metric(l2)) * 0.60, + l => l.EndBaseLine, l => l.StartBaseLine, + l => !string.IsNullOrWhiteSpace(l.Value), + (l1, l2) => string.Equals(l1.FontName, l2.FontName, StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(l2.Value)).ToList(); List words = new List(); for (int a = 0; a < groupedIndexes.Count(); a++) { - List groupedLetters = new List(); - foreach (int s in groupedIndexes[a]) - { - groupedLetters.Add(letters[s]); - } - - words.Add(new Word(orderFunc(groupedLetters))); - } - - List indexesNotDone = Enumerable.Range(0, lettersCount).Except(groupedIndexes.SelectMany(x => x)).ToList(); - for (int n = 0; n < indexesNotDone.Count(); n++) - { - Letter letter = letters[indexesNotDone[n]]; - words.Add(new Word(new Letter[] { letter })); + words.Add(new Word(orderFunc(groupedIndexes[a].Select(i => letters[i])))); } return words; diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/RecursiveXYCut.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/RecursiveXYCut.cs index ecaa610b..16258f1f 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/RecursiveXYCut.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/RecursiveXYCut.cs @@ -11,14 +11,31 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// https://en.wikipedia.org/wiki/Recursive_X-Y_cut /// See 'Recursive X-Y Cut using Bounding Boxes of Connected Components' by Jaekyu Ha, Robert M.Haralick and Ihsin T. Phillips /// - public static class RecursiveXYCut + public class RecursiveXYCut : IPageSegmenter { + /// + /// Create an instance of Recursive X-Y Cut page segmenter, . + /// + public static RecursiveXYCut Instance { get; } = new RecursiveXYCut(); + /// /// Get the blocks. + /// Uses 'minimumWidth' = 0, 'dominantFontWidthFunc' = Mode(Width), 'dominantFontHeightFunc' = 1.5 x Mode(Height) + /// + /// The words in the page. + /// + public IReadOnlyList GetBlocks(IEnumerable pageWords) + { + return GetBlocks(pageWords, 0); + } + + /// + /// Get the blocks. + /// Uses 'dominantFontWidthFunc' = Mode(Width), 'dominantFontHeightFunc' = 1.5 x Mode(Height) /// /// The words in the page. /// The minimum width for a block. - public static XYNode GetBlocks(IEnumerable pageWords, decimal minimumWidth = 0) + public IReadOnlyList GetBlocks(IEnumerable pageWords, decimal minimumWidth) { return GetBlocks(pageWords, minimumWidth, k => Math.Round(k.Mode(), 3), k => Math.Round(k.Mode() * 1.5m, 3)); } @@ -30,7 +47,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The minimum width for a block. /// The dominant font width. /// The dominant font height. - public static XYNode GetBlocks(IEnumerable pageWords, decimal minimumWidth, + public IReadOnlyList GetBlocks(IEnumerable pageWords, decimal minimumWidth, decimal dominantFontWidth, decimal dominantFontHeight) { return GetBlocks(pageWords, minimumWidth, k => dominantFontWidth, k => dominantFontHeight); @@ -43,15 +60,24 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The minimum width for a block. /// The function that determines the dominant font width. /// The function that determines the dominant font height. - public static XYNode GetBlocks(IEnumerable pageWords, decimal minimumWidth, + public IReadOnlyList GetBlocks(IEnumerable pageWords, decimal minimumWidth, Func, decimal> dominantFontWidthFunc, Func, decimal> dominantFontHeightFunc) { - var root = new XYLeaf(pageWords); // Create a root node. - return VerticalCut(root, minimumWidth, dominantFontWidthFunc, dominantFontHeightFunc); + XYLeaf root = new XYLeaf(pageWords); // Create a root node. + XYNode node = VerticalCut(root, minimumWidth, dominantFontWidthFunc, dominantFontHeightFunc); + + var leafs = node.GetLeafs(); + + if (leafs.Count > 0) + { + return leafs.Select(l => new TextBlock(l.GetLines())).ToList(); + } + + return new List(); } - private static XYNode VerticalCut(XYLeaf leaf, decimal minimumWidth, + private XYNode VerticalCut(XYLeaf leaf, decimal minimumWidth, Func, decimal> dominantFontWidthFunc, Func, decimal> dominantFontHeightFunc, int level = 0) { @@ -144,7 +170,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis return new XYNode(newNodes); } - private static XYNode HorizontalCut(XYLeaf leaf, decimal minimumWidth, + private XYNode HorizontalCut(XYLeaf leaf, decimal minimumWidth, Func, decimal> dominantFontWidthFunc, Func, decimal> dominantFontHeightFunc, int level = 0) { diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYLeaf.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYLeaf.cs index 9dab8daf..a5970693 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYLeaf.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYLeaf.cs @@ -9,7 +9,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// /// A Leaf node used in the algorithm, i.e. a block. /// - public class XYLeaf : XYNode + internal class XYLeaf : XYNode { /// /// Returns true if this node is a leaf, false otherwise. diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYNode.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYNode.cs index db423c55..9bac97fb 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYNode.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/XYNode.cs @@ -8,7 +8,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// /// A Node used in the algorithm. /// - public class XYNode + internal class XYNode { /// /// Returns true if this node is a leaf, false otherwise. From c14d77e414e115c0f8b7f30efb156f07d675aab4 Mon Sep 17 00:00:00 2001 From: BobLd Date: Sat, 10 Aug 2019 16:36:50 +0100 Subject: [PATCH 08/13] PublicApiScannerTests updated --- src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 273f59f7..b0de7ab1 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -60,7 +60,6 @@ "UglyToad.PdfPig.CrossReference.CrossReferenceTable", "UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.TrailerDictionary", - "UglyToad.PdfPig.DocumentLayoutAnalysis.ClusteringAlgorithms", "UglyToad.PdfPig.DocumentLayoutAnalysis.Distances", "UglyToad.PdfPig.DocumentLayoutAnalysis.DocstrumBB", "UglyToad.PdfPig.DocumentLayoutAnalysis.IPageSegmenter", From 0349bedd3e2c1da702b0a394e4325bcfa79502e2 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 11 Aug 2019 12:41:51 +0100 Subject: [PATCH 09/13] #57 add access to document metadata and expose wrapper type --- .../Integration/CatGeneticsTests.cs | 5 +- .../Integration/LaTexTests.cs | 22 +++++++- .../PublicApiScannerTests.cs | 1 + src/UglyToad.PdfPig/Content/XmpMetadata.cs | 50 +++++++++++++++++++ .../Parser/PdfDocumentFactory.cs | 3 +- src/UglyToad.PdfPig/ParsingOptions.cs | 8 +++ src/UglyToad.PdfPig/PdfDocument.cs | 35 ++++++++++++- .../Tokens/NameToken.Constants.cs | 3 +- 8 files changed, 120 insertions(+), 7 deletions(-) create mode 100644 src/UglyToad.PdfPig/Content/XmpMetadata.cs diff --git a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs index 67c824fe..453927b4 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs @@ -20,12 +20,11 @@ Assert.Contains("catus", page.Text); } } - - + [Fact] public void CanGetAnnotations() { - using (var document = PdfDocument.Open(GetFilename(), new ParsingOptions { UseLenientParsing = false })) + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) { var page = document.GetPage(1); diff --git a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs index c1f028be..71db0cf3 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; + using PdfPig.Util; using Xunit; public class LaTexTests @@ -16,7 +17,7 @@ [Fact] public void CanReadContent() { - using (var document = PdfDocument.Open(GetFilename())) + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) { var page = document.GetPage(1); @@ -125,6 +126,25 @@ used per estimate, we introduce a “complement class” Naive Bayes is often us } } + [Fact] + public void CanGetMetadata() + { + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) + { + var hasMetadata = document.TryGetXmpMetadata(out var metadata); + + Assert.True(hasMetadata); + + var xDocument = metadata.GetXDocument(); + + Assert.NotNull(xDocument); + + var text = OtherEncodings.BytesAsLatin1String(metadata.GetXmlBytes().ToArray()); + + Assert.StartsWith(" GetPdfBoxPositionData() { var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Integration", "Documents", "ICML03-081.Page1.Positions.txt"); diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 865a6240..19461919 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -55,6 +55,7 @@ "UglyToad.PdfPig.Content.Word", "UglyToad.PdfPig.Content.TextLine", "UglyToad.PdfPig.Content.TextDirection", + "UglyToad.PdfPig.Content.XmpMetadata", "UglyToad.PdfPig.Core.TransformationMatrix", "UglyToad.PdfPig.CrossReference.CrossReferenceTable", "UglyToad.PdfPig.CrossReference.CrossReferenceType", diff --git a/src/UglyToad.PdfPig/Content/XmpMetadata.cs b/src/UglyToad.PdfPig/Content/XmpMetadata.cs new file mode 100644 index 00000000..a2a88f44 --- /dev/null +++ b/src/UglyToad.PdfPig/Content/XmpMetadata.cs @@ -0,0 +1,50 @@ +namespace UglyToad.PdfPig.Content +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Xml.Linq; + using Filters; + using Tokens; + using Util; + using Util.JetBrains.Annotations; + + /// + /// Wraps an XML based Extensible Metadata Platform (XMP) document. These XML documents are embedded in PDFs to provide metadata + /// about objects (the entire document, images, etc). They can be present as plain text or encoded/encrypted streams. + /// + public class XmpMetadata + { + private readonly IFilterProvider filterProvider; + + /// + /// The underlying for this metadata. + /// + [NotNull] + public StreamToken MetadataStreamToken { get; } + + internal XmpMetadata(StreamToken stream, IFilterProvider filterProvider) + { + this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); + MetadataStreamToken = stream ?? throw new ArgumentNullException(nameof(stream)); + } + + /// + /// Get the decoded bytes for the metadata stream. This can be interpreted as a sequence of plain-text bytes. + /// + /// The bytes for the metadata object with any filters removed. + public IReadOnlyList GetXmlBytes() + { + return MetadataStreamToken.Decode(filterProvider); + } + + /// + /// Gets the metadata stream as an . + /// + /// The for the XMP XML. + public XDocument GetXDocument() + { + return XDocument.Parse(OtherEncodings.BytesAsLatin1String(GetXmlBytes().ToArray())); + } + } +} diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 32df6bd8..453253ec 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -140,7 +140,8 @@ return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information, encryptionDictionary, - pdfScanner, + pdfScanner, + filterProvider, acroFormFactory); } diff --git a/src/UglyToad.PdfPig/ParsingOptions.cs b/src/UglyToad.PdfPig/ParsingOptions.cs index 43db9dcf..8e9511bf 100644 --- a/src/UglyToad.PdfPig/ParsingOptions.cs +++ b/src/UglyToad.PdfPig/ParsingOptions.cs @@ -7,6 +7,14 @@ /// public class ParsingOptions { + /// + /// A default with set to false. + /// + public static ParsingOptions LenientParsingOff { get; } = new ParsingOptions + { + UseLenientParsing = false + }; + /// /// Should the parser ignore issues where the document does not conform to the PDF specification? /// diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index d2ad6850..3398ce1e 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -7,10 +7,12 @@ using CrossReference; using Encryption; using Exceptions; + using Filters; using IO; using Logging; using Parser; using Tokenization.Scanner; + using Tokens; using Util.JetBrains.Annotations; /// @@ -39,7 +41,9 @@ [NotNull] private readonly IPdfTokenScanner pdfScanner; - + + private readonly IFilterProvider filterProvider; + [NotNull] private readonly Pages pages; @@ -81,6 +85,7 @@ DocumentInformation information, EncryptionDictionary encryptionDictionary, IPdfTokenScanner pdfScanner, + IFilterProvider filterProvider, AcroFormFactory acroFormFactory) { this.log = log; @@ -90,6 +95,7 @@ this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders)); this.encryptionDictionary = encryptionDictionary; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); + this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); Information = information ?? throw new ArgumentNullException(nameof(information)); pages = new Pages(log, catalog, pageFactory, isLenientParsing, pdfScanner); Structure = new Structure(catalog, crossReferenceTable, pdfScanner); @@ -153,9 +159,36 @@ } } + /// + /// Get the document level metadata if present. + /// The metadata is XML in the (Extensible Metadata Platform) XMP format. + /// + /// This will throw a if called on a disposed . + /// The metadata stream if it exists. + /// if the metadata is present, otherwise. + public bool TryGetXmpMetadata(out XmpMetadata metadata) + { + if (isDisposed) + { + throw new ObjectDisposedException("Cannot access the document metadata after the document is disposed."); + } + + metadata = null; + + if (!Structure.Catalog.CatalogDictionary.TryGet(NameToken.Metadata, pdfScanner, out StreamToken xmpStreamToken)) + { + return false; + } + + metadata = new XmpMetadata(xmpStreamToken, filterProvider); + + return true; + } + /// /// Gets the form if this document contains one. /// + /// This will throw a if called on a disposed . /// An from the document or if not present. internal AcroForm GetForm() { diff --git a/src/UglyToad.PdfPig/Tokens/NameToken.Constants.cs b/src/UglyToad.PdfPig/Tokens/NameToken.Constants.cs index 23177074..2054f372 100644 --- a/src/UglyToad.PdfPig/Tokens/NameToken.Constants.cs +++ b/src/UglyToad.PdfPig/Tokens/NameToken.Constants.cs @@ -552,11 +552,12 @@ public static readonly NameToken WinAnsiEncoding = new NameToken("WinAnsiEncoding"); // X public static readonly NameToken Xfa = new NameToken("XFA"); - public static readonly NameToken XStep = new NameToken("XStep"); public static readonly NameToken Xheight = new NameToken("XHeight"); + public static readonly NameToken Xml = new NameToken("XML"); public static readonly NameToken Xobject = new NameToken("XObject"); public static readonly NameToken Xref = new NameToken("XRef"); public static readonly NameToken XrefStm = new NameToken("XRefStm"); + public static readonly NameToken XStep = new NameToken("XStep"); // Y public static readonly NameToken YStep = new NameToken("YStep"); public static readonly NameToken Yes = new NameToken("Yes"); From 7e8b3bdc854eb56be34ba4f4432a4d279a9ac337 Mon Sep 17 00:00:00 2001 From: BobLd Date: Sun, 11 Aug 2019 13:45:08 +0100 Subject: [PATCH 10/13] Update DocstrumBB to account for middle point of the overlapping area distance. For this, using distance between 2 lines. --- .../ClusteringAlgorithms.cs | 99 +++++++++++++++-- .../DocumentLayoutAnalysis/Distances.cs | 40 ++++++- .../DocumentLayoutAnalysis/DocstrumBB.cs | 101 +++++++++++++++--- 3 files changed, 215 insertions(+), 25 deletions(-) diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs index be7a8299..4e9e6182 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -18,11 +18,11 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// Letter, Word, TextLine, etc. /// Array of elements to group. /// The distance measure between two points. - /// The function that determines the distance between to points in the same cluster. - /// The pivot's point to use. - /// The candidates to pair point to use. - /// Filter to apply to the pivot point. - /// Filter to apply to both the pivot and the paired point. + /// The function that determines the maximum distance between two points in the same cluster. + /// The pivot's point to use for pairing, e.g. BottomLeft, TopLeft. + /// The candidates' point to use for pairing, e.g. BottomLeft, TopLeft. + /// Filter to apply to the pivot point. If false, point will not be paired at all, e.g. is white space. + /// Filter to apply to both the pivot and the paired point. If false, point will not be paired at all, e.g. pivot and paired point have same font. internal static IEnumerable> SimpleTransitiveClosure(T[] elements, Func distMeasure, Func maxDistanceFunction, @@ -69,17 +69,97 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } }); + // 2. Group indexes + // 3. Merge groups that have indexes in common + var groupedIndexes = GroupMergeIndexes(indexes); + + return groupedIndexes; + } + + /// + /// Algorithm to group elements via transitive closure, using nearest neighbours and maximum distance. + /// https://en.wikipedia.org/wiki/Transitive_closure + /// + /// Letter, Word, TextLine, etc. + /// Array of elements to group. + /// The distance measure between two lines. + /// The function that determines the maximum distance between two points in the same cluster. + /// The pivot's line to use for pairing. + /// The candidates' line to use for pairing. + /// Filter to apply to the pivot point. If false, point will not be paired at all, e.g. is white space. + /// Filter to apply to both the pivot and the paired point. If false, point will not be paired at all, e.g. pivot and paired point have same font. + internal static IEnumerable> SimpleTransitiveClosure(T[] elements, + Func distMeasure, + Func maxDistanceFunction, + Func pivotLine, Func candidatesLine, + Func filterPivot, Func filterFinal) + { + /************************************************************************************* + * Algorithm steps + * 1. Find nearest neighbours indexes (done in parallel) + * Iterate every point (pivot) and put its nearest neighbour's index in an array + * e.g. if nearest neighbour of point i is point j, then indexes[i] = j. + * Only conciders a neighbour if it is within the maximum distance. + * If not within the maximum distance, index will be set to -1. + * NB: Given the possible asymmetry in the relationship, it is possible + * that if indexes[i] = j then indexes[j] != i. + * + * 2. Group indexes + * Group indexes if share neighbours in common - Transitive closure + * e.g. if we have indexes[i] = j, indexes[j] = k, indexes[m] = n and indexes[n] = -1 + * (i,j,k) will form a group and (m,n) will form another group. + * + * 3. Merge groups that have indexes in common - If any + * If there are group with indexes in common, merge them. + * (Could be improved and put in step 2) + *************************************************************************************/ + + int[] indexes = Enumerable.Repeat((int)-1, elements.Length).ToArray(); + var candidatesLines = elements.Select(x => candidatesLine(x)).ToList(); + + // 1. Find nearest neighbours indexes + Parallel.For(0, elements.Length, e => + { + var pivot = elements[e]; + + if (filterPivot(pivot)) + { + int index = pivotLine(pivot).FindIndexNearest(candidatesLines, distMeasure, out double dist); + var paired = elements[index]; + + if (filterFinal(pivot, paired) && dist < maxDistanceFunction(pivot, paired)) + { + indexes[e] = index; + } + } + }); + + // 2. Group indexes + // 3. Merge groups that have indexes in common + var groupedIndexes = GroupMergeIndexes(indexes); + + return groupedIndexes; + } + + /// + /// Group elements via transitive closure. + /// https://en.wikipedia.org/wiki/Transitive_closure + /// + /// Array of paired elements index. + /// + internal static List> GroupMergeIndexes(int[] indexes) + { // 2. Group indexes List> groupedIndexes = new List>(); HashSet indexDone = new HashSet(); - for (int e = 0; e < elements.Length; e++) + for (int e = 0; e < indexes.Length; e++) { int index = indexes[e]; if (index == -1) // This element is not connected { - // Check if another element index is connected to this element (nb: distance measure is asymetric) + // Check if another element's index is connected to this element (nb: distance measure is asymmetric) if (!indexes.Contains(e)) { // If no other element is connected to this element, add it as a standalone element @@ -131,7 +211,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis } // Check that all elements are done - if (elements.Length != indexDone.Count) + if (indexes.Length != indexDone.Count) { throw new Exception("ClusteringAlgorithms.GetNNGroupedIndexes(): Some elements were not done."); } @@ -140,7 +220,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis // Check if duplicates (if duplicates, then same index in different groups) if (indexDone.Count != groupedIndexes.SelectMany(x => x).Count()) { - for (int e = 0; e < elements.Length; e++) + for (int e = 0; e < indexes.Length; e++) { List> candidates = groupedIndexes.Where(x => x.Contains(e)).ToList(); int count = candidates.Count(); @@ -157,7 +237,6 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis groupedIndexes.Add(merged); } } - return groupedIndexes; } } diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs index 8921392f..f099c175 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs @@ -86,7 +86,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The reference point, for which to find the nearest neighbour. /// The list of neighbours candidates. /// The distance measure to use. - /// The distance between reference point, and its nearest neighbour + /// The distance between reference point, and its nearest neighbour. public static PdfPoint FindNearest(this PdfPoint pdfPoint, IReadOnlyList points, Func distanceMeasure, out double distance) { @@ -122,7 +122,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The reference point, for which to find the nearest neighbour. /// The list of neighbours candidates. /// The distance measure to use. - /// The distance between reference point, and its nearest neighbour + /// The distance between reference point, and its nearest neighbour. public static int FindIndexNearest(this PdfPoint pdfPoint, IReadOnlyList points, Func distanceMeasure, out double distance) { @@ -151,5 +151,41 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis return closestPointIndex; } + + /// + /// Find the index of the nearest line. + /// + /// The reference line, for which to find the nearest neighbour. + /// The list of neighbours candidates. + /// The distance measure between two lines to use. + /// The distance between reference line, and its nearest neighbour. + public static int FindIndexNearest(this PdfLine pdfLine, IReadOnlyList lines, + Func distanceMeasure, out double distance) + { + if (lines == null || lines.Count == 0) + { + throw new ArgumentException("Distances.FindIndexNearest(): The list of neighbours candidates is either null or empty.", "lines"); + } + + if (distanceMeasure == null) + { + throw new ArgumentException("Distances.FindIndexNearest(): The distance measure must not be null.", "distanceMeasure"); + } + + distance = double.MaxValue; + int closestLineIndex = -1; + + for (var i = 0; i < lines.Count; i++) + { + double currentDistance = distanceMeasure(lines[i], pdfLine); + if (currentDistance < distance) + { + distance = currentDistance; + closestLineIndex = i; + } + } + + return closestLineIndex; + } } } diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs index 3ced0778..43d083a5 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs @@ -77,11 +77,48 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis // 2. Find lines of text double maxDistWL = Math.Min(3 * withinLineDistance, Math.Sqrt(2) * betweenLineDistance); - var lines = GetLines(pageWordsArr, maxDistWL).ToArray(); + var lines = GetLines(pageWordsArr, maxDistWL, wlAngleLB, wlAngleUB).ToArray(); // 3. Find blocks of text double maxDistBL = blMultiplier * betweenLineDistance; - return GetLinesGroups(lines, maxDistBL).ToList(); + var blocks = GetLinesGroups(lines, maxDistBL).ToList(); + + // 4. Merge overlapping blocks - might happen in certain conditions, e.g. justified text. + for (int b = 0; b < blocks.Count; b++) + { + if (blocks[b] == null) continue; + + for (int c = 0; c < blocks.Count; c++) + { + if (b == c) continue; + if (blocks[c] == null) continue; + + if (AreRectangleOverlapping(blocks[b].BoundingBox, blocks[c].BoundingBox)) + { + // Merge + // 1. Merge all words + var mergedWords = new List(blocks[b].TextLines.SelectMany(l => l.Words)); + mergedWords.AddRange(blocks[c].TextLines.SelectMany(l => l.Words)); + + // 2. Rebuild lines, using max distance = +Inf as we know all words will be in the + // same block. Filtering will still be done based on angle. + var mergedLines = GetLines(mergedWords.ToArray(), wlAngleLB, wlAngleUB, double.MaxValue); + blocks[b] = new TextBlock(mergedLines.ToList()); + + // Remove + blocks[c] = null; + } + } + } + + return blocks.Where(b => b != null).ToList(); + } + + private bool AreRectangleOverlapping(PdfRectangle rectangle1, PdfRectangle rectangle2) + { + if (rectangle1.Left > rectangle2.Right || rectangle2.Left > rectangle1.Right) return false; + if (rectangle1.Top < rectangle2.Bottom || rectangle2.Top < rectangle1.Bottom) return false; + return true; } /// @@ -104,6 +141,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis Func finalDistMEasure) { var pointR = funcPivotDist(pivot.BoundingBox); + + // Filter by angle var filtered = words.Where(w => { var angleWL = Distances.Angle(funcPivotAngle(pivot.BoundingBox), funcPointsAngle(w.BoundingBox)); @@ -135,18 +174,27 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// /// /// + /// + /// /// - private IEnumerable GetLines(Word[] words, double maxDist) + private IEnumerable GetLines(Word[] words, double maxDist, double wlAngleLB, double wlAngleUB) { + /*************************************************************************************************** + * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'word Width', the algo might not + * work as the FindIndexNearest() function might pair the pivot with itself (the pivot's right point + * (distance = width) is closer than other words' left point). + * -> Solution would be to find more than one nearest neighbours. Use KDTree? + ***************************************************************************************************/ + TextDirection textDirection = words[0].TextDirection; var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(words, Distances.Euclidean, - (w1, w2) => maxDist, - w => w.BoundingBox.BottomRight, w => w.BoundingBox.BottomLeft, - w => true, - (w1, w2) => + (pivot, candidate) => maxDist, + pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft, + pivot => true, + (pivot, candidate) => { - var angleWL = Distances.Angle(w1.BoundingBox.BottomRight, w2.BoundingBox.BottomLeft); // compare bottom right with bottom left for angle - return (angleWL >= -30 && angleWL <= 30); + var angleWL = Distances.Angle(pivot.BoundingBox.BottomRight, candidate.BoundingBox.BottomLeft); // compare bottom right with bottom left for angle + return (angleWL >= wlAngleLB && angleWL <= wlAngleUB); }).ToList(); Func, IReadOnlyList> orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList(); @@ -177,10 +225,37 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// private IEnumerable GetLinesGroups(TextLine[] lines, double maxDist) { - var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(lines, Distances.Euclidean, - (l1, l2) => maxDist, - l => l.BoundingBox.TopLeft, l => l.BoundingBox.BottomLeft, - l => true, (l1, l2) => true).ToList(); + /************************************************************************************************** + * We want to measure the distance between two lines using the following method: + * We check if two lines are overlapping horizontally. + * If they are overlapping, we compute the middle point (new X coordinate) of the overlapping area. + * We finally compute the Euclidean distance between these two middle points. + * If the two lines are not overlapping, the distance is set to the max distance. + * + * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'line Height', the algo won't + * work as the FindIndexNearest() function will always pair the pivot with itself (the pivot's top + * point (distance = height) is closer than other lines' top point). + * -> Solution would be to find more than one nearest neighbours. Use KDTree? + **************************************************************************************************/ + + Func euclidianOverlappingMiddleDistance = (l1, l2) => + { + var left = Math.Max(l1.Point1.X, l2.Point1.X); + var d = (Math.Min(l1.Point2.X, l2.Point2.X) - left); + + if (d < 0) return double.MaxValue; // not overlapping -> max distance + + return Distances.Euclidean( + new PdfPoint(left + d / 2, l1.Point1.Y), + new PdfPoint(left + d / 2, l2.Point1.Y)); + }; + + var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(lines, + euclidianOverlappingMiddleDistance, + (pivot, candidate) => maxDist, + pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight), + candidate => new PdfLine(candidate.BoundingBox.TopLeft, candidate.BoundingBox.TopRight), + pivot => true, (pivot, candidate) => true).ToList(); for (int a = 0; a < groupedIndexes.Count(); a++) { From 9f13739addc80e2ea5becea23be9aeeea964b00e Mon Sep 17 00:00:00 2001 From: BobLd Date: Sun, 11 Aug 2019 13:54:47 +0100 Subject: [PATCH 11/13] correcting typo --- src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs index 43d083a5..b2fca5c3 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DocstrumBB.cs @@ -9,7 +9,7 @@ using UglyToad.PdfPig.Geometry; namespace UglyToad.PdfPig.DocumentLayoutAnalysis { /// - /// The Docstrum algorithm is a bottom-up page segmentation technique based on nearest-neighborhood + /// The Docstrum algorithm is a bottom-up page segmentation technique based on nearest-neighbourhood /// clustering of connected components extracted from the document. /// This implementation leverages bounding boxes and does not exactly replicates the original algorithm. /// See 'The document spectrum for page layout analysis.' by L. O’Gorman. From f55091f3d2729065ea172b1723280f2741596a74 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Tue, 13 Aug 2019 20:48:22 +0100 Subject: [PATCH 12/13] make color types public and add stream based tests to prevent future breaking as observed in #52 --- .../Integration/IntegrationDocumentTests.cs | 21 ++++++++ .../PublicApiScannerTests.cs | 13 +++++ .../Graphics/Colors/CMYKColor.cs | 51 ++++++++++++++++++- .../Graphics/Colors/GrayColor.cs | 40 ++++++++++++++- .../Graphics/Colors/RGBColor.cs | 8 ++- 5 files changed, 128 insertions(+), 5 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs index 6087461e..b4847cb9 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs @@ -28,6 +28,27 @@ } } + [Theory] + [MemberData(nameof(GetAllDocuments))] + public void CanUseStreamForFirstPage(string documentName) + { + // Add the full path back on, we removed it so we could see it in the test explorer. + documentName = Path.Combine(DocumentFolder.Value, documentName); + + var bytes = File.ReadAllBytes(documentName); + + using (var memoryStream = new MemoryStream(bytes)) + using (var document = PdfDocument.Open(memoryStream, new ParsingOptions { UseLenientParsing = false })) + { + for (var i = 0; i < document.NumberOfPages; i++) + { + var page = document.GetPage(i + 1); + + Assert.NotNull(page.ExperimentalAccess.GetAnnotations().ToList()); + } + } + } + [Theory] [MemberData(nameof(GetAllDocuments))] public void CanTokenizeAllAccessibleObjects(string documentName) diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 184dd905..a5e6d7f2 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -20,6 +20,11 @@ foreach (var type in types) { + if (type.FullName == null) + { + continue; + } + // Skip coverage measuring instrumentation classes. if (type.FullName.StartsWith("Coverlet", StringComparison.OrdinalIgnoreCase)) { @@ -77,10 +82,13 @@ "UglyToad.PdfPig.Geometry.PdfPoint", "UglyToad.PdfPig.Geometry.PdfLine", "UglyToad.PdfPig.Geometry.PdfRectangle", + "UglyToad.PdfPig.Graphics.Colors.CMYKColor", "UglyToad.PdfPig.Graphics.Colors.ColorSpace", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceExtensions", "UglyToad.PdfPig.Graphics.Colors.ColorSpaceFamily", + "UglyToad.PdfPig.Graphics.Colors.GrayColor", "UglyToad.PdfPig.Graphics.Colors.IColor", + "UglyToad.PdfPig.Graphics.Colors.RGBColor", "UglyToad.PdfPig.Graphics.Core.LineCapStyle", "UglyToad.PdfPig.Graphics.Core.LineDashPattern", "UglyToad.PdfPig.Graphics.Core.LineJoinStyle", @@ -293,6 +301,11 @@ foreach (var type in types) { var symbol = type.GetField("Symbol", BindingFlags.Public | BindingFlags.Static); + if (symbol == null) + { + continue; + } + symbols.Add(symbol.GetValue(null).ToString()); } diff --git a/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs index a4238cff..e8919011 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/CMYKColor.cs @@ -1,11 +1,21 @@ -namespace UglyToad.PdfPig.Graphics.Colors +using System; +using System.Collections.Generic; + +namespace UglyToad.PdfPig.Graphics.Colors { /// /// A color with cyan, magenta, yellow and black (K) components. /// - internal class CMYKColor : IColor + public class CMYKColor : IColor, IEquatable { + /// + /// CMYK Black value (0, 0, 0, 1). + /// public static IColor Black { get; } = new CMYKColor(0, 0, 0, 1); + + /// + /// CMYK White value (all 0). + /// public static IColor White { get; } = new CMYKColor(0, 0, 0, 0); /// @@ -50,6 +60,43 @@ (1 - Y) * (1 - K)); } + /// + public override bool Equals(object obj) + { + return Equals(obj as CMYKColor); + } + + /// + public bool Equals(CMYKColor other) + { + return other != null && + C == other.C && + M == other.M && + Y == other.Y && + K == other.K; + } + + /// + public override int GetHashCode() + { + var hashCode = -492570696; + hashCode = hashCode * -1521134295 + C.GetHashCode(); + hashCode = hashCode * -1521134295 + M.GetHashCode(); + hashCode = hashCode * -1521134295 + Y.GetHashCode(); + hashCode = hashCode * -1521134295 + K.GetHashCode(); + return hashCode; + } + + /// + /// Equals. + /// + public static bool operator ==(CMYKColor color1, CMYKColor color2) => EqualityComparer.Default.Equals(color1, color2); + + /// + /// Not Equals. + /// + public static bool operator !=(CMYKColor color1, CMYKColor color2) => !(color1 == color2); + /// public override string ToString() { diff --git a/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs index 8ef14a00..1826575b 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/GrayColor.cs @@ -1,11 +1,21 @@ -namespace UglyToad.PdfPig.Graphics.Colors +using System; +using System.Collections.Generic; + +namespace UglyToad.PdfPig.Graphics.Colors { /// /// A grayscale color with a single gray component. /// - internal class GrayColor : IColor + public class GrayColor : IColor, IEquatable { + /// + /// Gray Black value (0). + /// public static GrayColor Black { get; } = new GrayColor(0); + + /// + /// Gray White value (1). + /// public static GrayColor White { get; } = new GrayColor(1); /// @@ -30,6 +40,32 @@ return (Gray, Gray, Gray); } + /// + public override bool Equals(object obj) + { + return Equals(obj as GrayColor); + } + + /// + public bool Equals(GrayColor other) + { + return other != null && + Gray == other.Gray; + } + + /// + public override int GetHashCode() => Gray.GetHashCode(); + + /// + /// Equals. + /// + public static bool operator ==(GrayColor color1, GrayColor color2) => EqualityComparer.Default.Equals(color1, color2); + + /// + /// Not Equals. + /// + public static bool operator !=(GrayColor color1, GrayColor color2) => !(color1 == color2); + /// public override string ToString() { diff --git a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs index 12775c56..8ab79a09 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/RGBColor.cs @@ -6,7 +6,7 @@ /// /// A color with red, green and blue components. /// - internal class RGBColor : IColor, IEquatable + public class RGBColor : IColor, IEquatable { /// /// RGB Black value (all 0). @@ -78,9 +78,15 @@ /// public override int GetHashCode() => (R, G, B).GetHashCode(); + /// + /// Equals. + /// public static bool operator ==(RGBColor color1, RGBColor color2) => EqualityComparer.Default.Equals(color1, color2); + /// + /// Not Equals. + /// public static bool operator !=(RGBColor color1, RGBColor color2) => !(color1 == color2); /// From ac62b7247b11d6126bb5160529305df4f54a0a2f Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Tue, 13 Aug 2019 21:24:54 +0100 Subject: [PATCH 13/13] version 0.0.9 --- src/UglyToad.PdfPig/UglyToad.PdfPig.csproj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj b/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj index 8e78c4aa..3c87f619 100644 --- a/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj +++ b/src/UglyToad.PdfPig/UglyToad.PdfPig.csproj @@ -7,14 +7,14 @@ UglyToad PdfPig Reads text content from PDF documents and supports document creation. Apache 2.0 licensed. - https://raw.githubusercontent.com/UglyToad/PdfPig/master/LICENSE + Apache-2.0 https://github.com/UglyToad/PdfPig PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract https://github.com/UglyToad/PdfPig true - 0.0.7 - 0.0.7.0 - 0.0.7.0 + 0.0.9 + 0.0.9.0 + 0.0.9.0 https://raw.githubusercontent.com/UglyToad/PdfPig/master/documentation/pdfpig.png PdfPig true