diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Clustering.cs similarity index 96% rename from src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs rename to src/UglyToad.PdfPig.DocumentLayoutAnalysis/Clustering.cs index 36b62120..7048b078 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/ClusteringAlgorithms.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Clustering.cs @@ -9,7 +9,7 @@ /// /// Clustering Algorithms. /// - internal class ClusteringAlgorithms + public static class Clustering { /// /// Algorithm to group elements using nearest neighbours. @@ -26,7 +26,7 @@ /// Sets the maximum number of concurrent tasks enabled. /// A positive property value limits the number of concurrent operations to the set value. /// If it is -1, there is no limit on the number of concurrently running operations. - internal static IEnumerable> ClusterNearestNeighbours(IReadOnlyList elements, + public static IEnumerable> NearestNeighbours(IReadOnlyList elements, Func distMeasure, Func maxDistanceFunction, Func pivotPoint, Func candidatesPoint, @@ -94,7 +94,7 @@ /// Sets the maximum number of concurrent tasks enabled. /// A positive property value limits the number of concurrent operations to the set value. /// If it is -1, there is no limit on the number of concurrently running operations. - internal static IEnumerable> ClusterNearestNeighbours(IReadOnlyList elements, int k, + public static IEnumerable> NearestNeighbours(IReadOnlyList elements, int k, Func distMeasure, Func maxDistanceFunction, Func pivotPoint, Func candidatesPoint, @@ -161,7 +161,7 @@ /// Sets the maximum number of concurrent tasks enabled. /// A positive property value limits the number of concurrent operations to the set value. /// If it is -1, there is no limit on the number of concurrently running operations. - internal static IEnumerable> ClusterNearestNeighbours(T[] elements, + public static IEnumerable> NearestNeighbours(IReadOnlyList elements, Func distMeasure, Func maxDistanceFunction, Func pivotLine, Func candidatesLine, @@ -185,12 +185,12 @@ * (i,j,k) will form a group and (m,n) will form another group. *************************************************************************************/ - int[] indexes = Enumerable.Repeat(-1, elements.Length).ToArray(); + int[] indexes = Enumerable.Repeat(-1, elements.Count).ToArray(); ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; // 1. Find nearest neighbours indexes - Parallel.For(0, elements.Length, parallelOptions, e => + Parallel.For(0, elements.Count, parallelOptions, e => { var pivot = elements[e]; diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DocstrumBoundingBoxes.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DocstrumBoundingBoxes.cs index 9a624a45..2488014b 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DocstrumBoundingBoxes.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/PageSegmenter/DocstrumBoundingBoxes.cs @@ -197,7 +197,7 @@ private static IEnumerable GetLines(List words, double maxDist, AngleBounds withinLine, int maxDegreeOfParallelism) { TextDirection textDirection = words[0].TextDirection; - var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(words, 2, Distances.Euclidean, + var groupedIndexes = Clustering.NearestNeighbours(words, 2, Distances.Euclidean, (pivot, candidate) => maxDist, pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft, pivot => true, @@ -246,7 +246,7 @@ new PdfPoint(left + d / 2, l2.Point1.Y)); } - var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(lines, + var groupedIndexes = Clustering.NearestNeighbours(lines, euclidianOverlappingMiddleDistance, (pivot, candidate) => maxDist, pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight), diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs index be3f87c6..89ded8ce 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WordExtractor/NearestNeighbourWordExtractor.cs @@ -108,7 +108,7 @@ { if (pageLetters == null || pageLetters.Count == 0) return new List(); - var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(pageLetters, + var groupedIndexes = Clustering.NearestNeighbours(pageLetters, distMeasure, maxDistanceFunction, l => l.EndBaseLine, l => l.StartBaseLine, l => !string.IsNullOrWhiteSpace(l.Value), diff --git a/src/UglyToad.PdfPig.Fonts/TrueType/Parser/HorizontalMetricsTableParser.cs b/src/UglyToad.PdfPig.Fonts/TrueType/Parser/HorizontalMetricsTableParser.cs index 98ca5926..cb1f8ba7 100644 --- a/src/UglyToad.PdfPig.Fonts/TrueType/Parser/HorizontalMetricsTableParser.cs +++ b/src/UglyToad.PdfPig.Fonts/TrueType/Parser/HorizontalMetricsTableParser.cs @@ -26,10 +26,18 @@ bytesRead += 4; } - + + int numberNonHorizontal = glyphCount - metricCount; + + // handle bad fonts with too many hmetrics + if (numberNonHorizontal < 0) + { + numberNonHorizontal = glyphCount; + } + // The number of entries in the left side bearing field per entry is number of glyphs - number of metrics // For bearings over the metric count, the width is the same as the last width in advanced widths. - var additionalLeftSideBearings = new short[glyphCount - metricCount]; + var additionalLeftSideBearings = new short[numberNonHorizontal]; for (var i = 0; i < additionalLeftSideBearings.Length; i++) { diff --git a/src/UglyToad.PdfPig.sln.DotSettings b/src/UglyToad.PdfPig.sln.DotSettings index 8306290d..c7263dcd 100644 --- a/src/UglyToad.PdfPig.sln.DotSettings +++ b/src/UglyToad.PdfPig.sln.DotSettings @@ -1,4 +1,5 @@  + True BE CIE CMYK diff --git a/src/UglyToad.PdfPig/Content/Page.cs b/src/UglyToad.PdfPig/Content/Page.cs index faf297a9..f611bd1e 100644 --- a/src/UglyToad.PdfPig/Content/Page.cs +++ b/src/UglyToad.PdfPig/Content/Page.cs @@ -4,13 +4,12 @@ using System.Collections.Generic; using System.Text; using Annotations; - using Core; using Graphics.Operations; using Tokens; using Util; using Util.JetBrains.Annotations; using Tokenization.Scanner; - using UglyToad.PdfPig.Graphics; + using Graphics; /// /// Contains the content and provides access to methods of a single page in the .