mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 06:38:07 +08:00
Skip control chars in CoreTokenScanner.MoveNext() and fix #1048
This commit is contained in:
parent
67d3dde04a
commit
ca9f70ffb0
@ -7,6 +7,24 @@
|
||||
|
||||
public class GithubIssuesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Issue1048()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("InvalidCast.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
Assert.NotNull(page.Letters);
|
||||
|
||||
var words = NearestNeighbourWordExtractor.Instance.GetWords(page.Letters);
|
||||
var blocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
|
||||
|
||||
Assert.Single(blocks);
|
||||
Assert.Equal("hey, i'm a bug.", blocks[0].Text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue554()
|
||||
{
|
||||
|
Binary file not shown.
@ -113,7 +113,7 @@
|
||||
|
||||
if (tokenizer == null)
|
||||
{
|
||||
if (ReadHelper.IsWhitespace(currentByte))
|
||||
if (ReadHelper.IsWhitespace(currentByte) || char.IsControl(c))
|
||||
{
|
||||
isSkippingSymbol = false;
|
||||
continue;
|
||||
|
Loading…
Reference in New Issue
Block a user