tests for end of line tokenizer and branch coverage for string tokenizer

This commit is contained in:
Eliot Jones
2018-01-06 12:08:52 +00:00
parent bbcb5af2be
commit 03f31a84e5
7 changed files with 142 additions and 16 deletions

View File

@@ -2,6 +2,7 @@
{
using System;
using System.IO;
using Content;
using Xunit;
public class MultiplePageMortalityStatisticsTests
@@ -43,6 +44,30 @@
Assert.Contains("Mortality Statistics: Metadata", page.Text);
Assert.Contains("Notification to the registrar by the coroner that he does not consider it necessary to hold an inquest no post-mortem held (Form 100A salmon pink)", page.Text);
Assert.Contains("Presumption of death certificate", page.Text);
Assert.Equal(PageSize.Letter, page.Size);
}
}
[Fact]
public void GetsPagesContent()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var pages = new[]
{
document.GetPage(1),
document.GetPage(2),
document.GetPage(3),
document.GetPage(4),
document.GetPage(5),
document.GetPage(6)
};
Assert.Contains(@"Up to 1992, publications gave numbers of deaths registered in the period concerned. From 1993 to 2005, the figures in annual reference volumes relate to the number of deaths that "
+ "occurred in the reference period. From 2006 onwards, all tables in Series DR are based on "
+ "deaths registered in a calendar period. More details on these changes can be found in the "
+ "publication Mortality Statistics: Deaths Registered in 2006 (ONS, 2008)", pages[5].Text);
}
}
}

View File

@@ -0,0 +1,45 @@
namespace UglyToad.Pdf.Tests.Tokenization
{
using Pdf.Tokenization;
using Pdf.Tokenization.Tokens;
using Xunit;
public class EndOfLineTokenizerTests
{
private readonly EndOfLineTokenizer tokenizer = new EndOfLineTokenizer();
[Fact]
public void CurrentByteIsNotEndOfLineFalse()
{
var input = StringBytesTestConverter.Convert("\r something \n", false);
var result = tokenizer.TryTokenize((byte)'\0', input.Bytes, out var _);
Assert.False(result);
}
[Fact]
public void CurrentByteIsCarriageReturnTrue()
{
var input = StringBytesTestConverter.Convert("\r", false);
var result = tokenizer.TryTokenize((byte)'\r', input.Bytes, out var token);
Assert.True(result);
Assert.Same(EndOfLineToken.Token, token);
}
[Fact]
public void CurrentByteIsEndOfLineTrue()
{
var input = StringBytesTestConverter.Convert("\n", false);
var result = tokenizer.TryTokenize((byte)'\n', input.Bytes, out var token);
Assert.True(result);
Assert.Same(EndOfLineToken.Token, token);
}
}
}

View File

@@ -188,6 +188,65 @@ are the same.)";
Assert.Equal("This string has two +Öctals", AssertStringToken(token).Data);
}
[Fact]
public void HandlesEscapedBackslash()
{
const string s = @"(listen\\learn)";
var input = StringBytesTestConverter.Convert(s);
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(@"listen\learn", AssertStringToken(token).Data);
}
[Theory]
[InlineData(@"(new line \n)", "new line \n")]
[InlineData(@"(carriage return \r)", "carriage return \r")]
[InlineData(@"(tab \t)", "tab \t")]
[InlineData(@"(bell \b)", "bell \b")]
[InlineData(@"(uhmmm \f)", "uhmmm \f")]
public void WritesEscapedCharactersToOutput(string input, string expected)
{
var bytes = StringBytesTestConverter.Convert(input);
var result = tokenizer.TryTokenize(bytes.First, bytes.Bytes, out var token);
Assert.True(result);
Assert.Equal(expected, AssertStringToken(token).Data);
}
[Fact]
public void EscapedNonEscapeCharacterWritesPlainCharacter()
{
const string s = @"(this does not need escaping \e)";
var input = StringBytesTestConverter.Convert(s);
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(@"this does not need escaping e", AssertStringToken(token).Data);
}
[Fact]
public void ReachesEndOfInputAssumesEndOfString()
{
const string s = @"(this does not end with bracket";
var input = StringBytesTestConverter.Convert(s);
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(@"this does not end with bracket", AssertStringToken(token).Data);
}
private static StringToken AssertStringToken(IToken token)
{
Assert.NotNull(token);

View File

@@ -51,7 +51,7 @@
var definition = new TableSubsectionDefinition(firstObjectNumber.Long, objectCount.Int);
var tokenizer = new CrossReferenceEndOfLineTokenizer();
var tokenizer = new EndOfLineTokenizer();
scanner.RegisterCustomTokenizer((byte)'\r', tokenizer);
scanner.RegisterCustomTokenizer((byte)'\n', tokenizer);

View File

@@ -3,7 +3,7 @@
using IO;
using Tokens;
internal class CrossReferenceEndOfLineTokenizer : ITokenizer
internal class EndOfLineTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = false;
@@ -20,13 +20,4 @@
return true;
}
}
internal class EndOfLineToken : IToken
{
public static EndOfLineToken Token { get; } = new EndOfLineToken();
private EndOfLineToken()
{
}
}
}

View File

@@ -222,11 +222,6 @@
isOctalActive = true;
octalsRead = 1;
break;
case ')':
// TODO: Handle the weird malformed use case "/Something (C:\)"
// numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);
builder.Append(c);
break;
default:
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
{

View File

@@ -0,0 +1,11 @@
namespace UglyToad.Pdf.Tokenization.Tokens
{
internal class EndOfLineToken : IToken
{
public static EndOfLineToken Token { get; } = new EndOfLineToken();
private EndOfLineToken()
{
}
}
}