diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs index 5b1740d3..4bcdd146 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/ClusteringAlgorithms.cs @@ -49,7 +49,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ - int[] indexes = Enumerable.Repeat((int)-1, elements.Count).ToArray(); + int[] indexes = Enumerable.Repeat(-1, elements.Count).ToArray(); var candidatesPoints = elements.Select(candidatesPoint).ToList(); ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; @@ -118,7 +118,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ - int[] indexes = Enumerable.Repeat((int)-1, elements.Length).ToArray(); + int[] indexes = Enumerable.Repeat(-1, elements.Length).ToArray(); var candidatesPoints = elements.Select(candidatesPoint).ToList(); ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; @@ -187,7 +187,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ - int[] indexes = Enumerable.Repeat((int)-1, elements.Length).ToArray(); + int[] indexes = Enumerable.Repeat(-1, elements.Length).ToArray(); var candidatesLines = elements.Select(x => candidatesLine(x)).ToList(); ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs index b1a3cdd0..b47e1f21 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/Distances.cs @@ -17,8 +17,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The second point. public static double Euclidean(PdfPoint point1, PdfPoint point2) { - double dx = (double)(point1.X - point2.X); - double dy = (double)(point1.Y - point2.Y); + double dx = point1.X - point2.X; + double dy = point1.Y - point2.Y; return Math.Sqrt(dx * dx + dy * dy); } @@ -31,8 +31,8 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The weight of the Y coordinates. Default is 1. public static double WeightedEuclidean(PdfPoint point1, PdfPoint point2, double wX = 1.0, double wY = 1.0) { - double dx = (double)(point1.X - point2.X); - double dy = (double)(point1.Y - point2.Y); + double dx = point1.X - point2.X; + double dy = point1.Y - point2.Y; return Math.Sqrt(wX * dx * dx + wY * dy * dy); } @@ -44,7 +44,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// The second point. public static double Manhattan(PdfPoint point1, PdfPoint point2) { - return (double)(Math.Abs(point1.X - point2.X) + Math.Abs(point1.Y - point2.Y)); + return Math.Abs(point1.X - point2.X) + Math.Abs(point1.Y - point2.Y); } /// @@ -55,7 +55,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// public static double Angle(PdfPoint point1, PdfPoint point2) { - return Math.Atan2((float)(point2.Y - point1.Y), (float)(point2.X - point1.X)) * 180.0 / Math.PI; + return Math.Atan2(point2.Y - point1.Y, point2.X - point1.X) * 180.0 / Math.PI; } /// @@ -66,7 +66,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// public static double Vertical(PdfPoint point1, PdfPoint point2) { - return Math.Abs((double)(point2.Y - point1.Y)); + return Math.Abs(point2.Y - point1.Y); } /// @@ -77,7 +77,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis /// public static double Horizontal(PdfPoint point1, PdfPoint point2) { - return Math.Abs((double)(point2.X - point1.X)); + return Math.Abs(point2.X - point1.X); } /// diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor.cs index e9eed907..ca6562d8 100644 --- a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor.cs +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/NearestNeighbourWordExtractor.cs @@ -33,14 +33,14 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis { List wordsH = GetWords( letters.Where(l => l.TextDirection == TextDirection.Horizontal), - (l1, l2) => Math.Max((double)(l1.GlyphRectangle.Width), (double)(l2.GlyphRectangle.Width)) * 0.2, + (l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, Distances.Manhattan, MaxDegreeOfParallelism) .OrderByDescending(x => x.BoundingBox.Bottom) .ThenBy(x => x.BoundingBox.Left).ToList(); List words180 = GetWords( letters.Where(l => l.TextDirection == TextDirection.Rotate180), - (l1, l2) => Math.Max((double)(l1.GlyphRectangle.Width), (double)(l2.GlyphRectangle.Width)) * 0.2, + (l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, Distances.Manhattan, MaxDegreeOfParallelism) .OrderBy(x => x.BoundingBox.Top) .ThenByDescending(x => x.BoundingBox.Right).ToList(); @@ -48,7 +48,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis List words90 = GetWords( letters.Where(l => l.TextDirection == TextDirection.Rotate90), - (l1, l2) => Math.Max((double)(l1.GlyphRectangle.Height), (double)(l2.GlyphRectangle.Height)) * 0.2, + (l1, l2) => Math.Max(l1.GlyphRectangle.Height, l2.GlyphRectangle.Height) * 0.2, Distances.Manhattan, MaxDegreeOfParallelism) .OrderByDescending(x => x.BoundingBox.Left) .ThenBy(x => x.BoundingBox.Top).ToList(); @@ -56,7 +56,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis List words270 = GetWords( letters.Where(l => l.TextDirection == TextDirection.Rotate270), - (l1, l2) => Math.Max((double)(l1.GlyphRectangle.Height), (double)(l2.GlyphRectangle.Height)) * 0.2, + (l1, l2) => Math.Max(l1.GlyphRectangle.Height, l2.GlyphRectangle.Height) * 0.2, Distances.Manhattan, MaxDegreeOfParallelism) .OrderBy(x => x.BoundingBox.Right) .ThenByDescending(x => x.BoundingBox.Bottom).ToList(); @@ -64,7 +64,7 @@ namespace UglyToad.PdfPig.DocumentLayoutAnalysis List wordsU = GetWords( letters.Where(l => l.TextDirection == TextDirection.Unknown), - (l1, l2) => Math.Max((double)(l1.GlyphRectangle.Width), (double)(l2.GlyphRectangle.Width)) * 0.2, + (l1, l2) => Math.Max(l1.GlyphRectangle.Width, l2.GlyphRectangle.Width) * 0.2, Distances.Manhattan, MaxDegreeOfParallelism) .OrderByDescending(x => x.BoundingBox.Bottom) .ThenBy(x => x.BoundingBox.Left).ToList(); diff --git a/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs b/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs index 75167a82..60e3a6c8 100644 --- a/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs +++ b/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs @@ -249,16 +249,6 @@ namespace UglyToad.PdfPig.Export }; } - private static PageXmlDocument Deserialize(string xmlPath) - { - XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); - - using (var reader = XmlReader.Create(xmlPath)) - { - return (PageXmlDocument)serializer.Deserialize(reader); - } - } - private string Serialize(PageXmlDocument pageXmlDocument) { XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); @@ -276,5 +266,18 @@ namespace UglyToad.PdfPig.Export return System.Text.Encoding.UTF8.GetString(memoryStream.ToArray()); } } + + /// + /// Deserialize an from a given PAGE format XML document. + /// + public static PageXmlDocument Deserialize(string xmlPath) + { + XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); + + using (var reader = XmlReader.Create(xmlPath)) + { + return (PageXmlDocument)serializer.Deserialize(reader); + } + } } } diff --git a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs index a73c53d8..34063f3f 100644 --- a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs +++ b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs @@ -336,15 +336,15 @@ namespace UglyToad.PdfPig.Geometry foreach (var t in ts) { PdfPoint point = new PdfPoint( - BezierCurve.ValueWithT((double)bezierCurve.StartPoint.X, - (double)bezierCurve.FirstControlPoint.X, - (double)bezierCurve.SecondControlPoint.X, - (double)bezierCurve.EndPoint.X, + BezierCurve.ValueWithT(bezierCurve.StartPoint.X, + bezierCurve.FirstControlPoint.X, + bezierCurve.SecondControlPoint.X, + bezierCurve.EndPoint.X, t), - BezierCurve.ValueWithT((double)bezierCurve.StartPoint.Y, - (double)bezierCurve.FirstControlPoint.Y, - (double)bezierCurve.SecondControlPoint.Y, - (double)bezierCurve.EndPoint.Y, + BezierCurve.ValueWithT(bezierCurve.StartPoint.Y, + bezierCurve.FirstControlPoint.Y, + bezierCurve.SecondControlPoint.Y, + bezierCurve.EndPoint.Y, t) ); points.Add(point); @@ -368,10 +368,10 @@ namespace UglyToad.PdfPig.Geometry return null; } - double x1 = (double)line.Point1.X; - double y1 = (double)line.Point1.Y; - double x2 = (double)line.Point2.X; - double y2 = (double)line.Point2.Y; + double x1 = line.Point1.X; + double y1 = line.Point1.Y; + double x2 = line.Point2.X; + double y2 = line.Point2.Y; return FindIntersectionT(bezierCurve, x1, y1, x2, y2); } @@ -392,10 +392,10 @@ namespace UglyToad.PdfPig.Geometry return null; } - double x1 = (double)line.From.X; - double y1 = (double)line.From.Y; - double x2 = (double)line.To.X; - double y2 = (double)line.To.Y; + double x1 = line.From.X; + double y1 = line.From.Y; + double x2 = line.To.X; + double y2 = line.To.Y; return FindIntersectionT(bezierCurve, x1, y1, x2, y2); } @@ -405,10 +405,10 @@ namespace UglyToad.PdfPig.Geometry double B = (x1 - x2); double C = x1 * (y1 - y2) + y1 * (x2 - x1); - double alpha = (double)bezierCurve.StartPoint.X * A + (double)bezierCurve.StartPoint.Y * B; - double beta = 3.0 * ((double)bezierCurve.FirstControlPoint.X * A + (double)bezierCurve.FirstControlPoint.Y * B); - double gamma = 3.0 * ((double)bezierCurve.SecondControlPoint.X * A + (double)bezierCurve.SecondControlPoint.Y * B); - double delta = (double)bezierCurve.EndPoint.X * A + (double)bezierCurve.EndPoint.Y * B; + double alpha = bezierCurve.StartPoint.X * A + bezierCurve.StartPoint.Y * B; + double beta = 3.0 * (bezierCurve.FirstControlPoint.X * A + bezierCurve.FirstControlPoint.Y * B); + double gamma = 3.0 * (bezierCurve.SecondControlPoint.X * A + bezierCurve.SecondControlPoint.Y * B); + double delta = bezierCurve.EndPoint.X * A + bezierCurve.EndPoint.Y * B; double a = (-alpha + beta - gamma + delta); double b = (3 * alpha - 2 * beta + gamma); @@ -427,13 +427,13 @@ namespace UglyToad.PdfPig.Geometry { if ((point1.X - point2.X) != 0) // vertical line special case { - var slope = (double)((point2.Y - point1.Y) / (point2.X - point1.X)); - var intercept = (double)point2.Y - slope * (double)point2.X; + var slope = (point2.Y - point1.Y) / (point2.X - point1.X); + var intercept = point2.Y - slope * point2.X; return (slope, intercept); } else { - return (double.NaN, (double)point1.X); + return (double.NaN, point1.X); } } diff --git a/src/UglyToad.PdfPig/Geometry/PdfPath.cs b/src/UglyToad.PdfPig/Geometry/PdfPath.cs index 9e8114a3..9074067b 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfPath.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfPath.cs @@ -186,9 +186,9 @@ namespace UglyToad.PdfPig.Geometry { if (currentPosition.HasValue) { - shoeLaceSum += ((x1 - currentPosition.Value.X) * (y1 + currentPosition.Value.Y)); - shoeLaceSum += ((x2 - x1) * (y2 + y1)); - shoeLaceSum += ((x3 - x2) * (y3 + y2)); + shoeLaceSum += (x1 - currentPosition.Value.X) * (y1 + currentPosition.Value.Y); + shoeLaceSum += (x2 - x1) * (y2 + y1); + shoeLaceSum += (x3 - x2) * (y3 + y2); var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); @@ -563,7 +563,6 @@ namespace UglyToad.PdfPig.Geometry EndPoint.X, EndPoint.Y); } - private bool TrySolveQuadratic(bool isX, double currentMin, double currentMax, out (double min, double max) solutions) { solutions = default((double, double));