diff --git a/examples/AdvancedTextExtraction.cs b/examples/AdvancedTextExtraction.cs index ff605fb5..036ed1aa 100644 --- a/examples/AdvancedTextExtraction.cs +++ b/examples/AdvancedTextExtraction.cs @@ -10,7 +10,8 @@ public static class AdvancedTextExtraction { public static void Run(string filePath) - { + { +#if YET_TO_BE_DONE var sb = new StringBuilder(); using (var document = PdfDocument.Open(filePath)) @@ -86,6 +87,7 @@ } Console.WriteLine(sb.ToString()); +#endif } } } diff --git a/examples/Program.cs b/examples/Program.cs index 5297a657..00e9136a 100644 --- a/examples/Program.cs +++ b/examples/Program.cs @@ -45,9 +45,14 @@ }, {7, ("Advance text extraction using layout analysis algorithms", - () => AdvancedTextExtraction.Run(Path.Combine(filesDirectory, "ICML03-081.pdf"))) - } - }; + () => AdvancedTextExtraction.Run(Path.Combine(filesDirectory, "ICML03-081.pdf"))) + }, + { + 8, + ("Extract Words with newline detection (example with algorithm). Issue 512", + () => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "OPEN.RABBIT.ENGLISH.LOP.pdf"))) + } + }; var choices = string.Join(Environment.NewLine, examples.Select(x => $"{x.Key}: {x.Value.name}")); diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/OPEN.RABBIT.ENGLISH.LOP.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/OPEN.RABBIT.ENGLISH.LOP.pdf new file mode 100644 index 00000000..0ca6c9a6 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/OPEN.RABBIT.ENGLISH.LOP.pdf differ diff --git a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs index 2dda4ecc..ae91268c 100644 --- a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs +++ b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs @@ -176,7 +176,7 @@ const string searchTerm = "%%EOF"; - var minimumEndOffset = bytes.Length - searchTerm.Length; + var minimumEndOffset = bytes.Length - searchTerm.Length + 1; // Issue #512 - Unable to open PDF - BruteForceScan starts from earlier of two EOF marker due to min end offset off by 1 bytes.Seek(minimumEndOffset);