mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-21 04:17:57 +08:00
cache some more common values and improve performance of tokenizers
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Tokenization
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using IO;
|
||||
@@ -8,6 +9,11 @@
|
||||
|
||||
internal class NumericTokenizer : ITokenizer
|
||||
{
|
||||
private const byte Zero = 48;
|
||||
private const byte Nine = 57;
|
||||
|
||||
private readonly Dictionary<string, NumericToken> cachedTokens = new Dictionary<string, NumericToken>();
|
||||
|
||||
public bool ReadsNextByte { get; } = true;
|
||||
|
||||
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
|
||||
@@ -16,7 +22,7 @@
|
||||
|
||||
StringBuilder characters;
|
||||
|
||||
if ((currentByte >= '0' && currentByte <= '9') || currentByte == '-' || currentByte == '+' || currentByte == '.')
|
||||
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
|
||||
{
|
||||
characters = new StringBuilder();
|
||||
characters.Append((char)currentByte);
|
||||
@@ -29,16 +35,15 @@
|
||||
while (inputBytes.MoveNext())
|
||||
{
|
||||
var b = inputBytes.CurrentByte;
|
||||
var c = (char) b;
|
||||
|
||||
if (char.IsDigit(c) ||
|
||||
c == '-' ||
|
||||
c == '+' ||
|
||||
c == '.' ||
|
||||
c == 'E' ||
|
||||
c == 'e')
|
||||
if ((b >= Zero && b <= Nine) ||
|
||||
b == '-' ||
|
||||
b == '+' ||
|
||||
b == '.' ||
|
||||
b == 'E' ||
|
||||
b == 'e')
|
||||
{
|
||||
characters.Append(c);
|
||||
characters.Append((char)b);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -56,7 +61,38 @@
|
||||
}
|
||||
else
|
||||
{
|
||||
value = decimal.Parse(characters.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture);
|
||||
var str = characters.ToString();
|
||||
|
||||
switch (str)
|
||||
{
|
||||
case "0":
|
||||
token = NumericToken.Zero;
|
||||
return true;
|
||||
case "1":
|
||||
token = NumericToken.One;
|
||||
return true;
|
||||
case "2":
|
||||
token = NumericToken.Two;
|
||||
return true;
|
||||
case "3":
|
||||
token = NumericToken.Three;
|
||||
return true;
|
||||
case "8":
|
||||
token = NumericToken.Eight;
|
||||
return true;
|
||||
default:
|
||||
{
|
||||
if (!cachedTokens.TryGetValue(str, out var result))
|
||||
{
|
||||
value = decimal.Parse(str, NumberStyles.Any, CultureInfo.InvariantCulture);
|
||||
result = new NumericToken(value);
|
||||
cachedTokens[str] = result;
|
||||
}
|
||||
|
||||
token = result;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (FormatException)
|
||||
|
@@ -11,13 +11,13 @@
|
||||
{
|
||||
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
|
||||
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
|
||||
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
|
||||
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
|
||||
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
|
||||
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
|
||||
private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer();
|
||||
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
|
||||
|
||||
private readonly NumericTokenizer numericTokenizer = new NumericTokenizer();
|
||||
private readonly ScannerScope scope;
|
||||
private readonly IInputBytes inputBytes;
|
||||
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
||||
@@ -83,7 +83,7 @@
|
||||
|
||||
if (tokenizer == null)
|
||||
{
|
||||
if (IsEmpty(currentByte) || ReadHelper.IsWhitespace(currentByte))
|
||||
if (ReadHelper.IsWhitespace(currentByte))
|
||||
{
|
||||
isSkippingSymbol = false;
|
||||
continue;
|
||||
@@ -143,7 +143,7 @@
|
||||
case '-':
|
||||
case '+':
|
||||
case '.':
|
||||
tokenizer = NumericTokenizer;
|
||||
tokenizer = numericTokenizer;
|
||||
break;
|
||||
default:
|
||||
tokenizer = PlainTokenizer;
|
||||
@@ -284,10 +284,5 @@
|
||||
|
||||
throw new PdfDocumentFormatException($"No end of inline image data (EI) was found for image data at position {startsAt}.");
|
||||
}
|
||||
|
||||
private static bool IsEmpty(byte b)
|
||||
{
|
||||
return b == ' ' || b == '\r' || b == '\n' || b == 0;
|
||||
}
|
||||
}
|
||||
}
|
@@ -10,6 +10,12 @@
|
||||
/// </summary>
|
||||
public class NumericToken : IDataToken<decimal>
|
||||
{
|
||||
internal static readonly NumericToken Zero = new NumericToken(0);
|
||||
internal static readonly NumericToken One = new NumericToken(1);
|
||||
internal static readonly NumericToken Two = new NumericToken(2);
|
||||
internal static readonly NumericToken Three = new NumericToken(3);
|
||||
internal static readonly NumericToken Eight = new NumericToken(8);
|
||||
|
||||
/// <inheritdoc />
|
||||
public decimal Data { get; }
|
||||
|
||||
@@ -42,6 +48,12 @@
|
||||
Data = value;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return Data.GetHashCode();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
|
@@ -7,18 +7,26 @@
|
||||
private static readonly object Lock = new object();
|
||||
private static readonly Dictionary<string, string> PooledNames = new Dictionary<string, string>();
|
||||
|
||||
public static readonly OperatorToken R = new OperatorToken("R");
|
||||
public static readonly OperatorToken StartObject = new OperatorToken("obj");
|
||||
public static readonly OperatorToken EndObject = new OperatorToken("endobj");
|
||||
public static readonly OperatorToken StartStream = new OperatorToken("stream");
|
||||
public static readonly OperatorToken EndStream = new OperatorToken("endstream");
|
||||
public static readonly OperatorToken Eexec = new OperatorToken("eexec");
|
||||
public static readonly OperatorToken Bt = new OperatorToken("BT");
|
||||
public static readonly OperatorToken Def = new OperatorToken("def");
|
||||
public static readonly OperatorToken Dict = new OperatorToken("dict");
|
||||
public static readonly OperatorToken Readonly = new OperatorToken("readonly");
|
||||
public static readonly OperatorToken Dup = new OperatorToken("dup");
|
||||
public static readonly OperatorToken Eexec = new OperatorToken("eexec");
|
||||
public static readonly OperatorToken EndObject = new OperatorToken("endobj");
|
||||
public static readonly OperatorToken EndStream = new OperatorToken("endstream");
|
||||
public static readonly OperatorToken Et = new OperatorToken("ET");
|
||||
public static readonly OperatorToken For = new OperatorToken("for");
|
||||
public static readonly OperatorToken N = new OperatorToken("n");
|
||||
public static readonly OperatorToken Put = new OperatorToken("put");
|
||||
public static readonly OperatorToken QPop = new OperatorToken("Q");
|
||||
public static readonly OperatorToken QPush = new OperatorToken("q");
|
||||
public static readonly OperatorToken R = new OperatorToken("R");
|
||||
public static readonly OperatorToken Re = new OperatorToken("re");
|
||||
public static readonly OperatorToken Readonly = new OperatorToken("readonly");
|
||||
public static readonly OperatorToken StartObject = new OperatorToken("obj");
|
||||
public static readonly OperatorToken StartStream = new OperatorToken("stream");
|
||||
public static readonly OperatorToken Tf = new OperatorToken("Tf");
|
||||
public static readonly OperatorToken WStar = new OperatorToken("W*");
|
||||
public static readonly OperatorToken Xref = new OperatorToken("xref");
|
||||
|
||||
public string Data { get; }
|
||||
@@ -43,30 +51,46 @@
|
||||
{
|
||||
switch (data)
|
||||
{
|
||||
case "R":
|
||||
return R;
|
||||
case "obj":
|
||||
return StartObject;
|
||||
case "endobj":
|
||||
return EndObject;
|
||||
case "stream":
|
||||
return StartStream;
|
||||
case "endstream":
|
||||
return EndStream;
|
||||
case "BT":
|
||||
return Bt;
|
||||
case "eexec":
|
||||
return Eexec;
|
||||
case "endobj":
|
||||
return EndObject;
|
||||
case "endstream":
|
||||
return EndStream;
|
||||
case "ET":
|
||||
return Et;
|
||||
case "def":
|
||||
return Def;
|
||||
case "dict":
|
||||
return Dict;
|
||||
case "readonly":
|
||||
return Readonly;
|
||||
case "dup":
|
||||
return Dup;
|
||||
case "for":
|
||||
return For;
|
||||
case "dup":
|
||||
return Dup;
|
||||
case "n":
|
||||
return N;
|
||||
case "obj":
|
||||
return StartObject;
|
||||
case "put":
|
||||
return Put;
|
||||
case "Q":
|
||||
return QPop;
|
||||
case "q":
|
||||
return QPush;
|
||||
case "R":
|
||||
return R;
|
||||
case "re":
|
||||
return Re;
|
||||
case "readonly":
|
||||
return Readonly;
|
||||
case "stream":
|
||||
return StartStream;
|
||||
case "Tf":
|
||||
return Tf;
|
||||
case "W*":
|
||||
return WStar;
|
||||
case "xref":
|
||||
return Xref;
|
||||
default:
|
||||
@@ -74,6 +98,13 @@
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return Data.GetHashCode();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
return Data;
|
||||
|
Reference in New Issue
Block a user