mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-21 04:17:57 +08:00
cache some more common values and improve performance of tokenizers
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
namespace UglyToad.PdfPig.Tokenization
|
namespace UglyToad.PdfPig.Tokenization
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using IO;
|
using IO;
|
||||||
@@ -8,6 +9,11 @@
|
|||||||
|
|
||||||
internal class NumericTokenizer : ITokenizer
|
internal class NumericTokenizer : ITokenizer
|
||||||
{
|
{
|
||||||
|
private const byte Zero = 48;
|
||||||
|
private const byte Nine = 57;
|
||||||
|
|
||||||
|
private readonly Dictionary<string, NumericToken> cachedTokens = new Dictionary<string, NumericToken>();
|
||||||
|
|
||||||
public bool ReadsNextByte { get; } = true;
|
public bool ReadsNextByte { get; } = true;
|
||||||
|
|
||||||
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
|
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
|
||||||
@@ -16,7 +22,7 @@
|
|||||||
|
|
||||||
StringBuilder characters;
|
StringBuilder characters;
|
||||||
|
|
||||||
if ((currentByte >= '0' && currentByte <= '9') || currentByte == '-' || currentByte == '+' || currentByte == '.')
|
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
|
||||||
{
|
{
|
||||||
characters = new StringBuilder();
|
characters = new StringBuilder();
|
||||||
characters.Append((char)currentByte);
|
characters.Append((char)currentByte);
|
||||||
@@ -29,16 +35,15 @@
|
|||||||
while (inputBytes.MoveNext())
|
while (inputBytes.MoveNext())
|
||||||
{
|
{
|
||||||
var b = inputBytes.CurrentByte;
|
var b = inputBytes.CurrentByte;
|
||||||
var c = (char) b;
|
|
||||||
|
|
||||||
if (char.IsDigit(c) ||
|
if ((b >= Zero && b <= Nine) ||
|
||||||
c == '-' ||
|
b == '-' ||
|
||||||
c == '+' ||
|
b == '+' ||
|
||||||
c == '.' ||
|
b == '.' ||
|
||||||
c == 'E' ||
|
b == 'E' ||
|
||||||
c == 'e')
|
b == 'e')
|
||||||
{
|
{
|
||||||
characters.Append(c);
|
characters.Append((char)b);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -56,7 +61,38 @@
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
value = decimal.Parse(characters.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture);
|
var str = characters.ToString();
|
||||||
|
|
||||||
|
switch (str)
|
||||||
|
{
|
||||||
|
case "0":
|
||||||
|
token = NumericToken.Zero;
|
||||||
|
return true;
|
||||||
|
case "1":
|
||||||
|
token = NumericToken.One;
|
||||||
|
return true;
|
||||||
|
case "2":
|
||||||
|
token = NumericToken.Two;
|
||||||
|
return true;
|
||||||
|
case "3":
|
||||||
|
token = NumericToken.Three;
|
||||||
|
return true;
|
||||||
|
case "8":
|
||||||
|
token = NumericToken.Eight;
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
if (!cachedTokens.TryGetValue(str, out var result))
|
||||||
|
{
|
||||||
|
value = decimal.Parse(str, NumberStyles.Any, CultureInfo.InvariantCulture);
|
||||||
|
result = new NumericToken(value);
|
||||||
|
cachedTokens[str] = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
token = result;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (FormatException)
|
catch (FormatException)
|
||||||
|
@@ -11,13 +11,13 @@
|
|||||||
{
|
{
|
||||||
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
|
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
|
||||||
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
|
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
|
||||||
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
|
|
||||||
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
|
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
|
||||||
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
|
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
|
||||||
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
|
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
|
||||||
private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer();
|
private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer();
|
||||||
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
|
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
|
||||||
|
|
||||||
|
private readonly NumericTokenizer numericTokenizer = new NumericTokenizer();
|
||||||
private readonly ScannerScope scope;
|
private readonly ScannerScope scope;
|
||||||
private readonly IInputBytes inputBytes;
|
private readonly IInputBytes inputBytes;
|
||||||
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
|
||||||
@@ -83,7 +83,7 @@
|
|||||||
|
|
||||||
if (tokenizer == null)
|
if (tokenizer == null)
|
||||||
{
|
{
|
||||||
if (IsEmpty(currentByte) || ReadHelper.IsWhitespace(currentByte))
|
if (ReadHelper.IsWhitespace(currentByte))
|
||||||
{
|
{
|
||||||
isSkippingSymbol = false;
|
isSkippingSymbol = false;
|
||||||
continue;
|
continue;
|
||||||
@@ -143,7 +143,7 @@
|
|||||||
case '-':
|
case '-':
|
||||||
case '+':
|
case '+':
|
||||||
case '.':
|
case '.':
|
||||||
tokenizer = NumericTokenizer;
|
tokenizer = numericTokenizer;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
tokenizer = PlainTokenizer;
|
tokenizer = PlainTokenizer;
|
||||||
@@ -284,10 +284,5 @@
|
|||||||
|
|
||||||
throw new PdfDocumentFormatException($"No end of inline image data (EI) was found for image data at position {startsAt}.");
|
throw new PdfDocumentFormatException($"No end of inline image data (EI) was found for image data at position {startsAt}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool IsEmpty(byte b)
|
|
||||||
{
|
|
||||||
return b == ' ' || b == '\r' || b == '\n' || b == 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -10,6 +10,12 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public class NumericToken : IDataToken<decimal>
|
public class NumericToken : IDataToken<decimal>
|
||||||
{
|
{
|
||||||
|
internal static readonly NumericToken Zero = new NumericToken(0);
|
||||||
|
internal static readonly NumericToken One = new NumericToken(1);
|
||||||
|
internal static readonly NumericToken Two = new NumericToken(2);
|
||||||
|
internal static readonly NumericToken Three = new NumericToken(3);
|
||||||
|
internal static readonly NumericToken Eight = new NumericToken(8);
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public decimal Data { get; }
|
public decimal Data { get; }
|
||||||
|
|
||||||
@@ -21,17 +27,17 @@
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// The value of this number as an <see langword="int"/>.
|
/// The value of this number as an <see langword="int"/>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public int Int => (int) Data;
|
public int Int => (int)Data;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The value of this number as a <see langword="long"/>.
|
/// The value of this number as a <see langword="long"/>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public long Long => (long) Data;
|
public long Long => (long)Data;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The value of this number as a <see langword="double"/>.
|
/// The value of this number as a <see langword="double"/>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public double Double => (double) Data;
|
public double Double => (double)Data;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Create a <see cref="NumericToken"/>.
|
/// Create a <see cref="NumericToken"/>.
|
||||||
@@ -42,6 +48,12 @@
|
|||||||
Data = value;
|
Data = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public override int GetHashCode()
|
||||||
|
{
|
||||||
|
return Data.GetHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
{
|
{
|
||||||
|
@@ -7,18 +7,26 @@
|
|||||||
private static readonly object Lock = new object();
|
private static readonly object Lock = new object();
|
||||||
private static readonly Dictionary<string, string> PooledNames = new Dictionary<string, string>();
|
private static readonly Dictionary<string, string> PooledNames = new Dictionary<string, string>();
|
||||||
|
|
||||||
public static readonly OperatorToken R = new OperatorToken("R");
|
public static readonly OperatorToken Bt = new OperatorToken("BT");
|
||||||
public static readonly OperatorToken StartObject = new OperatorToken("obj");
|
|
||||||
public static readonly OperatorToken EndObject = new OperatorToken("endobj");
|
|
||||||
public static readonly OperatorToken StartStream = new OperatorToken("stream");
|
|
||||||
public static readonly OperatorToken EndStream = new OperatorToken("endstream");
|
|
||||||
public static readonly OperatorToken Eexec = new OperatorToken("eexec");
|
|
||||||
public static readonly OperatorToken Def = new OperatorToken("def");
|
public static readonly OperatorToken Def = new OperatorToken("def");
|
||||||
public static readonly OperatorToken Dict = new OperatorToken("dict");
|
public static readonly OperatorToken Dict = new OperatorToken("dict");
|
||||||
public static readonly OperatorToken Readonly = new OperatorToken("readonly");
|
|
||||||
public static readonly OperatorToken Dup = new OperatorToken("dup");
|
public static readonly OperatorToken Dup = new OperatorToken("dup");
|
||||||
|
public static readonly OperatorToken Eexec = new OperatorToken("eexec");
|
||||||
|
public static readonly OperatorToken EndObject = new OperatorToken("endobj");
|
||||||
|
public static readonly OperatorToken EndStream = new OperatorToken("endstream");
|
||||||
|
public static readonly OperatorToken Et = new OperatorToken("ET");
|
||||||
public static readonly OperatorToken For = new OperatorToken("for");
|
public static readonly OperatorToken For = new OperatorToken("for");
|
||||||
|
public static readonly OperatorToken N = new OperatorToken("n");
|
||||||
public static readonly OperatorToken Put = new OperatorToken("put");
|
public static readonly OperatorToken Put = new OperatorToken("put");
|
||||||
|
public static readonly OperatorToken QPop = new OperatorToken("Q");
|
||||||
|
public static readonly OperatorToken QPush = new OperatorToken("q");
|
||||||
|
public static readonly OperatorToken R = new OperatorToken("R");
|
||||||
|
public static readonly OperatorToken Re = new OperatorToken("re");
|
||||||
|
public static readonly OperatorToken Readonly = new OperatorToken("readonly");
|
||||||
|
public static readonly OperatorToken StartObject = new OperatorToken("obj");
|
||||||
|
public static readonly OperatorToken StartStream = new OperatorToken("stream");
|
||||||
|
public static readonly OperatorToken Tf = new OperatorToken("Tf");
|
||||||
|
public static readonly OperatorToken WStar = new OperatorToken("W*");
|
||||||
public static readonly OperatorToken Xref = new OperatorToken("xref");
|
public static readonly OperatorToken Xref = new OperatorToken("xref");
|
||||||
|
|
||||||
public string Data { get; }
|
public string Data { get; }
|
||||||
@@ -43,30 +51,46 @@
|
|||||||
{
|
{
|
||||||
switch (data)
|
switch (data)
|
||||||
{
|
{
|
||||||
case "R":
|
case "BT":
|
||||||
return R;
|
return Bt;
|
||||||
case "obj":
|
|
||||||
return StartObject;
|
|
||||||
case "endobj":
|
|
||||||
return EndObject;
|
|
||||||
case "stream":
|
|
||||||
return StartStream;
|
|
||||||
case "endstream":
|
|
||||||
return EndStream;
|
|
||||||
case "eexec":
|
case "eexec":
|
||||||
return Eexec;
|
return Eexec;
|
||||||
|
case "endobj":
|
||||||
|
return EndObject;
|
||||||
|
case "endstream":
|
||||||
|
return EndStream;
|
||||||
|
case "ET":
|
||||||
|
return Et;
|
||||||
case "def":
|
case "def":
|
||||||
return Def;
|
return Def;
|
||||||
case "dict":
|
case "dict":
|
||||||
return Dict;
|
return Dict;
|
||||||
case "readonly":
|
|
||||||
return Readonly;
|
|
||||||
case "dup":
|
|
||||||
return Dup;
|
|
||||||
case "for":
|
case "for":
|
||||||
return For;
|
return For;
|
||||||
|
case "dup":
|
||||||
|
return Dup;
|
||||||
|
case "n":
|
||||||
|
return N;
|
||||||
|
case "obj":
|
||||||
|
return StartObject;
|
||||||
case "put":
|
case "put":
|
||||||
return Put;
|
return Put;
|
||||||
|
case "Q":
|
||||||
|
return QPop;
|
||||||
|
case "q":
|
||||||
|
return QPush;
|
||||||
|
case "R":
|
||||||
|
return R;
|
||||||
|
case "re":
|
||||||
|
return Re;
|
||||||
|
case "readonly":
|
||||||
|
return Readonly;
|
||||||
|
case "stream":
|
||||||
|
return StartStream;
|
||||||
|
case "Tf":
|
||||||
|
return Tf;
|
||||||
|
case "W*":
|
||||||
|
return WStar;
|
||||||
case "xref":
|
case "xref":
|
||||||
return Xref;
|
return Xref;
|
||||||
default:
|
default:
|
||||||
@@ -74,6 +98,13 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public override int GetHashCode()
|
||||||
|
{
|
||||||
|
return Data.GetHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
public override string ToString()
|
public override string ToString()
|
||||||
{
|
{
|
||||||
return Data;
|
return Data;
|
||||||
|
Reference in New Issue
Block a user