rename hex tokenizer and add numeric tokenizer

This commit is contained in:
Eliot Jones 2017-11-10 21:13:45 +00:00
parent 8dbeb4b822
commit f986e16c97
7 changed files with 223 additions and 26 deletions

View File

@ -4,9 +4,9 @@
using Pdf.Tokenization.Tokens;
using Xunit;
public class HexStringTokenizerTests
public class HexTokenizerTests
{
private readonly HexStringTokenizer tokenizer = new HexStringTokenizer();
private readonly HexTokenizer tokenizer = new HexTokenizer();
[Theory]
[InlineData(">not hex")]

View File

@ -0,0 +1,119 @@
namespace UglyToad.Pdf.Tests.Tokenization
{
using System.Collections.Generic;
using Pdf.Tokenization;
using Pdf.Tokenization.Tokens;
using Xunit;
public class NumericTokenizerTests
{
private readonly NumericTokenizer tokenizer = new NumericTokenizer();
[Theory]
[InlineData("a")]
[InlineData("b")]
[InlineData("A")]
[InlineData("|")]
[InlineData("z")]
[InlineData("e")]
[InlineData("E")]
[InlineData("\n")]
public void FirstByteInvalid_ReturnsFalse(string s)
{
var input = StringBytesTestConverter.Convert(s);
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.False(result);
Assert.Null(token);
}
public static IEnumerable<object[]> ValidNumberTestData => new []
{
new object[] {"0", 0m},
new object[] {"1", 1m},
new object[] {"2", 2m},
new object[] {"3", 3m},
new object[] {"4", 4m},
new object[] {"5", 5m},
new object[] {"6", 6m},
new object[] {"7", 7m},
new object[] {"8", 8m},
new object[] {"9", 9m},
new object[] {"10", 10m},
new object[] {"11", 11m},
new object[] {"29", 29m},
new object[] {"-0", 0m},
new object[] {"-0123", -123m},
new object[] {"-6.9000", -6.9m},
new object[] {"57473.3458382", 57473.3458382m},
new object[] { "123", 123m},
new object[] { "43445", 43445m},
new object[] { "+17", 17m},
new object[] { "-98", -98m},
new object[] { "34.5", 34.5m},
new object[] { "-3.62", -3.62m},
new object[] { "+123.6", 123.6m},
new object[] { "4.", 4m},
new object[] { "-.002", -0.002m},
new object[] { "0.0", 0m},
new object[] {"1.57e3", 1570m}
};
[Theory]
[MemberData(nameof(ValidNumberTestData))]
public void ParsesValidNumbers(string s, decimal expected)
{
var input = StringBytesTestConverter.Convert(s);
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(expected, AssertNumericToken(token).Data);
}
[Fact]
public void OnlyParsesNumberPart()
{
var input = StringBytesTestConverter.Convert("135.6654/Type");
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(135.6654m, AssertNumericToken(token).Data);
Assert.Equal('/', (char)input.Bytes.CurrentByte);
}
[Fact]
public void HandlesDash()
{
var input = StringBytesTestConverter.Convert("-");
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(0m, AssertNumericToken(token).Data);
}
[Fact]
public void HandlesDot()
{
var input = StringBytesTestConverter.Convert(".");
var result = tokenizer.TryTokenize(input.First, input.Bytes, out var token);
Assert.True(result);
Assert.Equal(0m, AssertNumericToken(token).Data);
}
private static NumericToken AssertNumericToken(IToken token)
{
Assert.NotNull(token);
var result = Assert.IsType<NumericToken>(token);
return result;
}
}
}

View File

@ -15,7 +15,7 @@
private readonly IInputBytes inputBytes;
private readonly List<byte> currentBuffer = new List<byte>();
private static readonly HexStringTokenizer HexStringTokenizer = new HexStringTokenizer();
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
@ -68,7 +68,7 @@
}
else
{
tokenizer = HexStringTokenizer;
tokenizer = HexTokenizer;
}
break;
case '/':
@ -87,7 +87,7 @@
case '-':
case '+':
case '.':
tokenizer = NumericTokenizer;
tokenizer = null;
break;
}

View File

@ -6,7 +6,7 @@
using Text.Operators;
using Tokenization.Tokens;
public class NumericTokenizer : ITokenizer
public class NumericTokenizer
{
private static readonly HashSet<byte> SupportedCharacterSet = new HashSet<byte>
{
@ -51,24 +51,6 @@
return new OperandComponent(new NumericOperand(bytes), TextObjectComponentType.Numeric);
}
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
var bytes = new List<byte> { currentByte };
while (inputBytes.MoveNext() && !BaseTextComponentApproach.IsEmpty(inputBytes.CurrentByte))
{
if (!SupportedCharacterSet.Contains(inputBytes.CurrentByte))
{
return false;
}
bytes.Add(inputBytes.CurrentByte);
}
return true;
}
}
}

View File

@ -5,7 +5,7 @@
using Parser.Parts;
using Tokens;
public class HexStringTokenizer : ITokenizer
public class HexTokenizer : ITokenizer
{
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{

View File

@ -0,0 +1,75 @@
namespace UglyToad.Pdf.Tokenization
{
using System;
using System.Globalization;
using System.Text;
using IO;
using Tokens;
public class NumericTokenizer : ITokenizer
{
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
StringBuilder characters;
if ((currentByte >= '0' && currentByte <= '9') || currentByte == '-' || currentByte == '+' || currentByte == '.')
{
characters = new StringBuilder();
characters.Append((char)currentByte);
}
else
{
return false;
}
while (inputBytes.MoveNext())
{
var b = inputBytes.CurrentByte;
var c = (char) b;
if (char.IsDigit(c) ||
c == '-' ||
c == '+' ||
c == '.' ||
c == 'E' ||
c == 'e')
{
characters.Append(c);
}
else
{
break;
}
}
decimal value;
try
{
if (characters.Length == 1 && (characters[0] == '-' || characters[0] == '.'))
{
value = 0;
}
else
{
value = decimal.Parse(characters.ToString(), NumberStyles.Any);
}
}
catch (FormatException)
{
return false;
}
catch (OverflowException)
{
return false;
}
token = new NumericToken(value);
return true;
}
}
}

View File

@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Tokenization.Tokens
{
public class NumericToken : IDataToken<decimal>
{
public decimal Data { get; }
public bool IsWhole { get; }
public int Int { get; }
public long Long { get; }
public NumericToken(decimal value)
{
Data = value;
IsWhole = decimal.Floor(value) == value;
Int = (int) value;
Long = (long) value;
}
}
}