mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 19:05:01 +08:00
tests for end of line tokenizer and branch coverage for string tokenizer
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class MultiplePageMortalityStatisticsTests
|
||||
@@ -43,6 +44,30 @@
|
||||
Assert.Contains("Mortality Statistics: Metadata", page.Text);
|
||||
Assert.Contains("Notification to the registrar by the coroner that he does not consider it necessary to hold an inquest – no post-mortem held (Form 100A – salmon pink)", page.Text);
|
||||
Assert.Contains("Presumption of death certificate", page.Text);
|
||||
|
||||
Assert.Equal(PageSize.Letter, page.Size);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetsPagesContent()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var pages = new[]
|
||||
{
|
||||
document.GetPage(1),
|
||||
document.GetPage(2),
|
||||
document.GetPage(3),
|
||||
document.GetPage(4),
|
||||
document.GetPage(5),
|
||||
document.GetPage(6)
|
||||
};
|
||||
|
||||
Assert.Contains(@"Up to 1992, publications gave numbers of deaths registered in the period concerned. From 1993 to 2005, the figures in annual reference volumes relate to the number of deaths that "
|
||||
+ "occurred in the reference period. From 2006 onwards, all tables in Series DR are based on "
|
||||
+ "deaths registered in a calendar period. More details on these changes can be found in the "
|
||||
+ "publication Mortality Statistics: Deaths Registered in 2006 (ONS, 2008)", pages[5].Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,45 @@
|
||||
namespace UglyToad.Pdf.Tests.Tokenization
|
||||
{
|
||||
using Pdf.Tokenization;
|
||||
using Pdf.Tokenization.Tokens;
|
||||
using Xunit;
|
||||
|
||||
public class EndOfLineTokenizerTests
|
||||
{
|
||||
private readonly EndOfLineTokenizer tokenizer = new EndOfLineTokenizer();
|
||||
|
||||
[Fact]
|
||||
public void CurrentByteIsNotEndOfLineFalse()
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert("\r something \n", false);
|
||||
|
||||
var result = tokenizer.TryTokenize((byte)'\0', input.Bytes, out var _);
|
||||
|
||||
Assert.False(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CurrentByteIsCarriageReturnTrue()
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert("\r", false);
|
||||
|
||||
var result = tokenizer.TryTokenize((byte)'\r', input.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Same(EndOfLineToken.Token, token);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CurrentByteIsEndOfLineTrue()
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert("\n", false);
|
||||
|
||||
var result = tokenizer.TryTokenize((byte)'\n', input.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Same(EndOfLineToken.Token, token);
|
||||
}
|
||||
}
|
||||
}
|
@@ -188,6 +188,65 @@ are the same.)";
|
||||
Assert.Equal("This string has two +Öctals", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HandlesEscapedBackslash()
|
||||
{
|
||||
const string s = @"(listen\\learn)";
|
||||
|
||||
var input = StringBytesTestConverter.Convert(s);
|
||||
|
||||
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(@"listen\learn", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(@"(new line \n)", "new line \n")]
|
||||
[InlineData(@"(carriage return \r)", "carriage return \r")]
|
||||
[InlineData(@"(tab \t)", "tab \t")]
|
||||
[InlineData(@"(bell \b)", "bell \b")]
|
||||
[InlineData(@"(uhmmm \f)", "uhmmm \f")]
|
||||
public void WritesEscapedCharactersToOutput(string input, string expected)
|
||||
{
|
||||
var bytes = StringBytesTestConverter.Convert(input);
|
||||
|
||||
var result = tokenizer.TryTokenize(bytes.First, bytes.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(expected, AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EscapedNonEscapeCharacterWritesPlainCharacter()
|
||||
{
|
||||
const string s = @"(this does not need escaping \e)";
|
||||
|
||||
var input = StringBytesTestConverter.Convert(s);
|
||||
|
||||
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(@"this does not need escaping e", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReachesEndOfInputAssumesEndOfString()
|
||||
{
|
||||
const string s = @"(this does not end with bracket";
|
||||
|
||||
var input = StringBytesTestConverter.Convert(s);
|
||||
|
||||
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
|
||||
|
||||
Assert.True(result);
|
||||
|
||||
Assert.Equal(@"this does not end with bracket", AssertStringToken(token).Data);
|
||||
}
|
||||
|
||||
private static StringToken AssertStringToken(IToken token)
|
||||
{
|
||||
Assert.NotNull(token);
|
||||
|
@@ -51,7 +51,7 @@
|
||||
|
||||
var definition = new TableSubsectionDefinition(firstObjectNumber.Long, objectCount.Int);
|
||||
|
||||
var tokenizer = new CrossReferenceEndOfLineTokenizer();
|
||||
var tokenizer = new EndOfLineTokenizer();
|
||||
|
||||
scanner.RegisterCustomTokenizer((byte)'\r', tokenizer);
|
||||
scanner.RegisterCustomTokenizer((byte)'\n', tokenizer);
|
||||
|
@@ -3,7 +3,7 @@
|
||||
using IO;
|
||||
using Tokens;
|
||||
|
||||
internal class CrossReferenceEndOfLineTokenizer : ITokenizer
|
||||
internal class EndOfLineTokenizer : ITokenizer
|
||||
{
|
||||
public bool ReadsNextByte { get; } = false;
|
||||
|
||||
@@ -20,13 +20,4 @@
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
internal class EndOfLineToken : IToken
|
||||
{
|
||||
public static EndOfLineToken Token { get; } = new EndOfLineToken();
|
||||
|
||||
private EndOfLineToken()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
@@ -222,11 +222,6 @@
|
||||
isOctalActive = true;
|
||||
octalsRead = 1;
|
||||
break;
|
||||
case ')':
|
||||
// TODO: Handle the weird malformed use case "/Something (C:\)"
|
||||
// numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);
|
||||
builder.Append(c);
|
||||
break;
|
||||
default:
|
||||
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
|
||||
{
|
||||
|
11
src/UglyToad.Pdf/Tokenization/Tokens/EndOfLineToken.cs
Normal file
11
src/UglyToad.Pdf/Tokenization/Tokens/EndOfLineToken.cs
Normal file
@@ -0,0 +1,11 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
internal class EndOfLineToken : IToken
|
||||
{
|
||||
public static EndOfLineToken Token { get; } = new EndOfLineToken();
|
||||
|
||||
private EndOfLineToken()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user