mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 08:25:09 +08:00
Fix for Issue#512: Unable to open PDF. BruteForceSearcher::GetLastEndOfFileMarker() minimumEndOffset out by 1.
This commit is contained in:
parent
9c9c7c99ea
commit
afe473e10e
@ -10,7 +10,8 @@
|
|||||||
public static class AdvancedTextExtraction
|
public static class AdvancedTextExtraction
|
||||||
{
|
{
|
||||||
public static void Run(string filePath)
|
public static void Run(string filePath)
|
||||||
{
|
{
|
||||||
|
#if YET_TO_BE_DONE
|
||||||
var sb = new StringBuilder();
|
var sb = new StringBuilder();
|
||||||
|
|
||||||
using (var document = PdfDocument.Open(filePath))
|
using (var document = PdfDocument.Open(filePath))
|
||||||
@ -86,6 +87,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
Console.WriteLine(sb.ToString());
|
Console.WriteLine(sb.ToString());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -45,9 +45,14 @@
|
|||||||
},
|
},
|
||||||
{7,
|
{7,
|
||||||
("Advance text extraction using layout analysis algorithms",
|
("Advance text extraction using layout analysis algorithms",
|
||||||
() => AdvancedTextExtraction.Run(Path.Combine(filesDirectory, "ICML03-081.pdf")))
|
() => AdvancedTextExtraction.Run(Path.Combine(filesDirectory, "ICML03-081.pdf")))
|
||||||
}
|
},
|
||||||
};
|
{
|
||||||
|
8,
|
||||||
|
("Extract Words with newline detection (example with algorithm). Issue 512",
|
||||||
|
() => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "OPEN.RABBIT.ENGLISH.LOP.pdf")))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
var choices = string.Join(Environment.NewLine, examples.Select(x => $"{x.Key}: {x.Value.name}"));
|
var choices = string.Join(Environment.NewLine, examples.Select(x => $"{x.Key}: {x.Value.name}"));
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@ -176,7 +176,7 @@
|
|||||||
|
|
||||||
const string searchTerm = "%%EOF";
|
const string searchTerm = "%%EOF";
|
||||||
|
|
||||||
var minimumEndOffset = bytes.Length - searchTerm.Length;
|
var minimumEndOffset = bytes.Length - searchTerm.Length + 1; // Issue #512 - Unable to open PDF - BruteForceScan starts from earlier of two EOF marker due to min end offset off by 1
|
||||||
|
|
||||||
bytes.Seek(minimumEndOffset);
|
bytes.Seek(minimumEndOffset);
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user