Reduce Allocations (#821)

* Introduce ValueStringBuilder

* Make NumericTokenizer and PlanTextTokenizer thread-safe

* Replace ListPool with ArrayPoolBufferWriter

* Seal ITokenizer classes

* Eliminate array allocation in Type1ArrayTokenizer

* Eliminate array allocation in AcroFormFactory

* Eliminate StringBuilder allocation in Page.GetText

* Optimize PdfSubpath.ToLines

* Eliminate various allocations when parsing CompactFontFormat

* Remove unused FromOctalInt helper

* Ensure Pdf.Content is not null

* Write ASCII values directly to stream (avoiding allocations)

* Avoid encoding additional ASCII values

* Eliminate allocations in TokenWriter.WriteName

* Eliminate allocation in TokenWriter.WriteNumber

* Add System.Memory reference to Fonts
This commit is contained in:
Jason Nelson
2024-04-28 10:55:58 -07:00
committed by GitHub
parent 1ef2e127a6
commit 7f42a8d60c
32 changed files with 516 additions and 471 deletions

View File

@@ -42,14 +42,6 @@
return sum;
}
/// <summary>
/// Interpret an int as octal.
/// </summary>
public static int FromOctalInt(int input)
{
return Convert.ToInt32($"{input}", 8);
}
private static int QuickPower(int x, int pow)
{
int ret = 1;

View File

@@ -634,7 +634,7 @@
throw new ArgumentException("BezierCurve.ToLines(): n must be greater than 0.");
}
List<Line> lines = new List<Line>();
var lines = new Line[n];
var previousPoint = StartPoint;
for (int p = 1; p <= n; p++)
@@ -642,7 +642,7 @@
double t = p / (double)n;
var currentPoint = new PdfPoint(ValueWithT(StartPoint.X, ControlPoint.X, EndPoint.X, t),
ValueWithT(StartPoint.Y, ControlPoint.Y, EndPoint.Y, t));
lines.Add(new Line(previousPoint, currentPoint));
lines[p - 1] = new Line(previousPoint, currentPoint);
previousPoint = currentPoint;
}
@@ -793,7 +793,7 @@
throw new ArgumentException("BezierCurve.ToLines(): n must be greater than 0.");
}
List<Line> lines = new List<Line>();
var lines = new Line[n];
var previousPoint = StartPoint;
for (int p = 1; p <= n; p++)
@@ -801,7 +801,7 @@
double t = p / (double)n;
var currentPoint = new PdfPoint(ValueWithT(StartPoint.X, FirstControlPoint.X, SecondControlPoint.X, EndPoint.X, t),
ValueWithT(StartPoint.Y, FirstControlPoint.Y, SecondControlPoint.Y, EndPoint.Y, t));
lines.Add(new Line(previousPoint, currentPoint));
lines[p - 1] = new Line(previousPoint, currentPoint);
previousPoint = currentPoint;
}
return lines;

View File

@@ -10,13 +10,8 @@
public bool IsCidCharset { get; } = false;
protected CompactFontFormatCharset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
protected CompactFontFormatCharset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
var dictionary = new Dictionary<int, (int stringId, string name)>
{
{0, (0, ".notdef")}

View File

@@ -1,14 +1,14 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;
/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with relatively unordered string ids.
/// </summary>
internal class CompactFontFormatFormat0Charset : CompactFontFormatCharset
internal sealed class CompactFontFormatFormat0Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat0Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
:base(data)
public CompactFontFormatFormat0Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}
}

View File

@@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;
/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with well ordered string ids.
/// </summary>
internal class CompactFontFormatFormat1Charset : CompactFontFormatCharset
internal sealed class CompactFontFormatFormat1Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat1Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
public CompactFontFormatFormat1Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}

View File

@@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;
/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with a large number of well ordered string ids.
/// </summary>
internal class CompactFontFormatFormat2Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat2Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
public CompactFontFormatFormat2Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}

View File

@@ -5,6 +5,7 @@
using Charsets;
using Encodings;
using Fonts;
using UglyToad.PdfPig.Core;
internal static class CompactFontFormatEncodingReader
{
@@ -36,13 +37,13 @@
{
var numberOfCodes = data.ReadCard8();
var values = new List<(int code, int sid, string str)>();
using var values = new ArrayPoolBufferWriter<(int code, int sid, string str)>();
for (var i = 1; i <= numberOfCodes; i++)
{
var code = data.ReadCard8();
var sid = charset.GetStringIdByGlyphId(i);
var str = ReadString(sid, stringIndex);
values.Add((code, sid, str));
values.Write((code, sid, str));
}
IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = [];
@@ -51,7 +52,7 @@
supplements = ReadSupplement(data, stringIndex);
}
return new CompactFontFormatFormat0Encoding(values, supplements);
return new CompactFontFormatFormat0Encoding(values.WrittenSpan, supplements);
}
private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, ReadOnlySpan<string> stringIndex, byte format)

View File

@@ -1,10 +1,12 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Collections.Generic;
internal sealed class CompactFontFormatFormat0Encoding : CompactFontFormatBuiltInEncoding
{
public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values,
public CompactFontFormatFormat0Encoding(
ReadOnlySpan<(int code, int sid, string str)> values,
IReadOnlyList<Supplement> supplements) : base(supplements)
{
Add(0, 0, NotDefined);

View File

@@ -139,42 +139,42 @@
{
case 0:
{
var glyphToNamesAndStringId = new List<(int glyphId, int stringId, string name)>();
using var glyphToNamesAndStringId = new ArrayPoolBufferWriter<(int glyphId, int stringId, string name)>();
for (var glyphId = 1; glyphId < charStringIndex.Count; glyphId++)
{
var stringId = data.ReadSid();
glyphToNamesAndStringId.Add((glyphId, stringId, ReadString(stringId, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, stringId, ReadString(stringId, stringIndex)));
}
return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId.WrittenSpan);
}
case 1:
case 2:
{
var glyphToNamesAndStringId = new List<(int glyphId, int stringId, string name)>();
using var glyphToNamesAndStringId = new ArrayPoolBufferWriter<(int glyphId, int stringId, string name)>();
for (var glyphId = 1; glyphId < charStringIndex.Count; glyphId++)
{
var firstSid = data.ReadSid();
var numberInRange = format == 1 ? data.ReadCard8() : data.ReadCard16();
glyphToNamesAndStringId.Add((glyphId, firstSid, ReadString(firstSid, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, firstSid, ReadString(firstSid, stringIndex)));
for (var i = 0; i < numberInRange; i++)
{
glyphId++;
var sid = firstSid + i + 1;
glyphToNamesAndStringId.Add((glyphId, sid, ReadString(sid, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, sid, ReadString(sid, stringIndex)));
}
}
if (format == 1)
{
return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId.WrittenSpan);
}
return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId.WrittenSpan);
}
default:
throw new InvalidOperationException($"Unrecognized format for the Charset table in a CFF font. Got: {format}.");

View File

@@ -9,11 +9,13 @@
using Tokenization;
/// <inheritdoc />
public class Type1ArrayTokenizer : ITokenizer
public sealed class Type1ArrayTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;
private static readonly string[] Space = [" "];
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
@@ -36,7 +38,7 @@
builder.Append((char) inputBytes.CurrentByte);
}
var parts = builder.ToString().Split(new[] {" "}, StringSplitOptions.RemoveEmptyEntries);
var parts = builder.ToString().Split(Space, StringSplitOptions.RemoveEmptyEntries);
var tokens = new List<IToken>();
@@ -50,7 +52,7 @@
}
else
{
tokens.Add(OperatorToken.Create(part));
tokens.Add(OperatorToken.Create(part.AsSpan()));
}
continue;
@@ -68,8 +70,7 @@
continue;
}
tokens.Add(OperatorToken.Create(part));
tokens.Add(OperatorToken.Create(part.AsSpan()));
}
token = new ArrayToken(tokens);

View File

@@ -38,4 +38,7 @@
<ItemGroup Condition="'$(TargetFramework)'=='net462'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' or '$(TargetFramework)'=='net471'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
</Project>

View File

@@ -1,15 +0,0 @@
namespace UglyToad.PdfPig.Tests.Util
{
using PdfPig.Core;
public class OctalHelpersTests
{
[Fact]
public void CorrectlyConverts()
{
var result = OctalHelpers.FromOctalInt(110);
Assert.Equal(72, result);
}
}
}

View File

@@ -5,7 +5,7 @@
using Scanner;
using Tokens;
internal class ArrayTokenizer : ITokenizer
internal sealed class ArrayTokenizer : ITokenizer
{
private readonly bool usePdfDocEncoding;

View File

@@ -1,10 +1,10 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Text;
using Core;
using System.Text;
using Tokens;
internal class CommentTokenizer : ITokenizer
internal sealed class CommentTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = true;
@@ -17,7 +17,7 @@
return false;
}
var builder = new StringBuilder();
using var builder = new ValueStringBuilder();
while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
{

View File

@@ -6,7 +6,7 @@
/// <summary>
/// Read an <see cref="EndOfLineToken"/>.
/// </summary>
public class EndOfLineTokenizer : ITokenizer
public sealed class EndOfLineTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;

View File

@@ -1,64 +0,0 @@
using System.Collections.Generic;
namespace UglyToad.PdfPig.Tokenization
{
/// <summary>
/// An object pool for lists.
/// </summary>
public class ListPool<T>
{
private readonly int capacity;
private readonly object locker = new object();
private readonly Stack<List<T>> pool = new Stack<List<T>>();
/// <summary>
/// Create a new <see cref="List{T}"/> holding the number of items specified by the capacity.
/// </summary>
public ListPool(int capacity = 5)
{
this.capacity = capacity;
for (var i = 0; i < capacity; i++)
{
pool.Push(new List<T>(10));
}
}
/// <summary>
/// Get an item from the pool, remember to return it using <see cref="Return"/> at the end.
/// </summary>
public List<T> Borrow()
{
lock (locker)
{
if (pool.Count == 0)
{
return new List<T>();
}
return pool.Pop();
}
}
/// <summary>
/// Returns an item to the pool of available lists..
/// </summary>
public void Return(List<T> instance)
{
if (instance == null)
{
return;
}
instance.Clear();
lock (locker)
{
if (pool.Count < capacity)
{
pool.Push(instance);
}
}
}
}
}

View File

@@ -9,7 +9,7 @@
using System.Text.Unicode;
#endif
internal class NameTokenizer : ITokenizer
internal sealed class NameTokenizer : ITokenizer
{
static NameTokenizer()
{

View File

@@ -6,10 +6,8 @@
using Core;
using Tokens;
internal class NumericTokenizer : ITokenizer
internal sealed class NumericTokenizer : ITokenizer
{
private readonly StringBuilder stringBuilder = new StringBuilder();
private const byte Zero = 48;
private const byte Nine = 57;
@@ -19,17 +17,16 @@
{
token = null;
StringBuilder characters;
var initialSymbol = currentByte == '-' || currentByte == '+';
using var characters = new ValueStringBuilder(stackalloc char[32]);
var initialSymbol = currentByte is (byte)'-' or (byte)'+';
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '.')
{
characters = stringBuilder;
characters.Append((char)currentByte);
}
else if (initialSymbol)
{
characters = stringBuilder;
characters.Append((char) currentByte);
}
else
@@ -67,115 +64,103 @@
}
}
try
var str = characters.ToString();
switch (str)
{
var str = characters.ToString();
characters.Clear();
switch (str)
{
case "-1":
token = NumericToken.MinusOne;
return true;
case "-":
case ".":
case "0":
case "0000":
token = NumericToken.Zero;
return true;
case "1":
token = NumericToken.One;
return true;
case "2":
token = NumericToken.Two;
return true;
case "3":
token = NumericToken.Three;
return true;
case "4":
token = NumericToken.Four;
return true;
case "5":
token = NumericToken.Five;
return true;
case "6":
token = NumericToken.Six;
return true;
case "7":
token = NumericToken.Seven;
return true;
case "8":
token = NumericToken.Eight;
return true;
case "9":
token = NumericToken.Nine;
return true;
case "10":
token = NumericToken.Ten;
return true;
case "11":
token = NumericToken.Eleven;
return true;
case "12":
token = NumericToken.Twelve;
return true;
case "13":
token = NumericToken.Thirteen;
return true;
case "14":
token = NumericToken.Fourteen;
return true;
case "15":
token = NumericToken.Fifteen;
return true;
case "16":
token = NumericToken.Sixteen;
return true;
case "17":
token = NumericToken.Seventeen;
return true;
case "18":
token = NumericToken.Eighteen;
return true;
case "19":
token = NumericToken.Nineteen;
return true;
case "20":
token = NumericToken.Twenty;
return true;
case "100":
token = NumericToken.OneHundred;
return true;
case "500":
token = NumericToken.FiveHundred;
return true;
case "1000":
token = NumericToken.OneThousand;
return true;
default:
if (!double.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
case "-1":
token = NumericToken.MinusOne;
return true;
case "-":
case ".":
case "0":
case "0000":
token = NumericToken.Zero;
return true;
case "1":
token = NumericToken.One;
return true;
case "2":
token = NumericToken.Two;
return true;
case "3":
token = NumericToken.Three;
return true;
case "4":
token = NumericToken.Four;
return true;
case "5":
token = NumericToken.Five;
return true;
case "6":
token = NumericToken.Six;
return true;
case "7":
token = NumericToken.Seven;
return true;
case "8":
token = NumericToken.Eight;
return true;
case "9":
token = NumericToken.Nine;
return true;
case "10":
token = NumericToken.Ten;
return true;
case "11":
token = NumericToken.Eleven;
return true;
case "12":
token = NumericToken.Twelve;
return true;
case "13":
token = NumericToken.Thirteen;
return true;
case "14":
token = NumericToken.Fourteen;
return true;
case "15":
token = NumericToken.Fifteen;
return true;
case "16":
token = NumericToken.Sixteen;
return true;
case "17":
token = NumericToken.Seventeen;
return true;
case "18":
token = NumericToken.Eighteen;
return true;
case "19":
token = NumericToken.Nineteen;
return true;
case "20":
token = NumericToken.Twenty;
return true;
case "100":
token = NumericToken.OneHundred;
return true;
case "500":
token = NumericToken.FiveHundred;
return true;
case "1000":
token = NumericToken.OneThousand;
return true;
default:
if (!double.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
{
if (TryParseInvalidNumber(str, out value))
{
if (TryParseInvalidNumber(str, out value))
{
token = new NumericToken(value);
return true;
}
return false;
token = new NumericToken(value);
return true;
}
token = new NumericToken(value);
return true;
}
}
catch (FormatException)
{
return false;
}
catch (OverflowException)
{
return false;
}
return false;
}
token = new NumericToken(value);
return true;
}
}
private static bool TryParseInvalidNumber(string numeric, out double result)

View File

@@ -4,23 +4,23 @@
using System.Text;
using Tokens;
internal class PlainTokenizer : ITokenizer
internal sealed class PlainTokenizer : ITokenizer
{
private readonly StringBuilder stringBuilder = new StringBuilder();
public bool ReadsNextByte { get; } = true;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
if (ReadHelper.IsWhitespace(currentByte))
{
token = null;
return false;
}
var builder = stringBuilder;
using var builder = new ValueStringBuilder(stackalloc char[16]);
builder.Append((char)currentByte);
while (inputBytes.MoveNext())
{
if (ReadHelper.IsWhitespace(inputBytes.CurrentByte))
@@ -28,10 +28,7 @@
break;
}
if (inputBytes.CurrentByte == '<' || inputBytes.CurrentByte == '['
|| inputBytes.CurrentByte == '/' || inputBytes.CurrentByte == ']'
|| inputBytes.CurrentByte == '>' || inputBytes.CurrentByte == '('
|| inputBytes.CurrentByte == ')')
if (inputBytes.CurrentByte is (byte)'<' or (byte)'[' or (byte)'/' or (byte)']' or (byte)'>' or (byte)'(' or (byte)')')
{
break;
}
@@ -39,24 +36,14 @@
builder.Append((char) inputBytes.CurrentByte);
}
var text = builder.ToString();
builder.Clear();
var text = builder.AsSpan();
switch (text)
{
case "true":
token = BooleanToken.True;
break;
case "false":
token = BooleanToken.False;
break;
case "null":
token = NullToken.Instance;
break;
default:
token = OperatorToken.Create(text);
break;
}
token = text switch {
"true" => BooleanToken.True,
"false" => BooleanToken.False,
"null" => NullToken.Instance,
_ => OperatorToken.Create(text),
};
return true;
}

View File

@@ -13,11 +13,9 @@
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
// NOTE: these are not thread safe so should not be static. Each instance includes a
// StringBuilder it re-uses.
private readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private readonly StringTokenizer stringTokenizer;
private readonly ArrayTokenizer arrayTokenizer;
private readonly DictionaryTokenizer dictionaryTokenizer;

View File

@@ -1,65 +0,0 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Collections.Generic;
using System.Text;
/// <summary>
/// A pool for <see cref="StringBuilder"/>s to reduce allocations during tokenization.
/// </summary>
internal class StringBuilderPool
{
private readonly int capacity;
private readonly object locker = new object();
private readonly Stack<StringBuilder> pool = new Stack<StringBuilder>();
/// <summary>
/// Create a new <see cref="StringBuilderPool"/> holding the number of items specified by the capacity.
/// </summary>
public StringBuilderPool(int capacity = 5)
{
this.capacity = capacity;
for (var i = 0; i < capacity; i++)
{
pool.Push(new StringBuilder());
}
}
/// <summary>
/// Get an item from the pool, remember to return it using <see cref="Return"/> at the end.
/// </summary>
public StringBuilder Borrow()
{
lock (locker)
{
if (pool.Count == 0)
{
return new StringBuilder();
}
return pool.Pop();
}
}
/// <summary>
/// Returns an item to the pool of available builders.
/// </summary>
public void Return(StringBuilder instance)
{
if (instance == null)
{
return;
}
instance.Clear();
lock (locker)
{
if (pool.Count < capacity)
{
pool.Push(instance);
}
}
}
}
}

View File

@@ -0,0 +1,207 @@
#nullable enable
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// Via: https://github.com/dotnet/runtime/blob/ab7492c4a77f0315d384b4c7648d0cefa36b18d1/src/libraries/Common/src/System/Text/ValueStringBuilder.cs
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
namespace System.Text;
internal ref struct ValueStringBuilder
{
private char[]? _arrayToReturnToPool;
private Span<char> _chars;
private int _pos;
public ValueStringBuilder(Span<char> initialBuffer)
{
_arrayToReturnToPool = null;
_chars = initialBuffer;
_pos = 0;
}
public ValueStringBuilder(int initialCapacity)
{
_arrayToReturnToPool = ArrayPool<char>.Shared.Rent(initialCapacity);
_chars = _arrayToReturnToPool;
_pos = 0;
}
public int Length
{
get => _pos;
set
{
Debug.Assert(value >= 0);
Debug.Assert(value <= _chars.Length);
_pos = value;
}
}
public int Capacity => _chars.Length;
public ref char this[int index]
{
get
{
Debug.Assert(index < _pos);
return ref _chars[index];
}
}
public override string ToString()
{
string s = _chars.Slice(0, _pos).ToString();
Dispose();
return s;
}
public readonly ReadOnlySpan<char> AsSpan() => _chars.Slice(0, _pos);
public readonly ReadOnlySpan<char> AsSpan(int start) => _chars.Slice(start, _pos - start);
public readonly ReadOnlySpan<char> AsSpan(int start, int length) => _chars.Slice(start, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Append(char c)
{
int pos = _pos;
if ((uint)pos < (uint)_chars.Length)
{
_chars[pos] = c;
_pos = pos + 1;
}
else
{
GrowAndAppend(c);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Append(string? s)
{
if (s == null)
{
return;
}
int pos = _pos;
if (s.Length == 1 && (uint)pos < (uint)_chars.Length) // very common case, e.g. appending strings from NumberFormatInfo like separators, percent symbols, etc.
{
_chars[pos] = s[0];
_pos = pos + 1;
}
else
{
AppendSlow(s);
}
}
private void AppendSlow(string s)
{
int pos = _pos;
if (pos > _chars.Length - s.Length)
{
Grow(s.Length);
}
s.AsSpan().CopyTo(_chars.Slice(pos));
_pos += s.Length;
}
public void Append(char c, int count)
{
if (_pos > _chars.Length - count)
{
Grow(count);
}
Span<char> dst = _chars.Slice(_pos, count);
for (int i = 0; i < dst.Length; i++)
{
dst[i] = c;
}
_pos += count;
}
public void Append(ReadOnlySpan<char> value)
{
int pos = _pos;
if (pos > _chars.Length - value.Length)
{
Grow(value.Length);
}
value.CopyTo(_chars.Slice(_pos));
_pos += value.Length;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<char> AppendSpan(int length)
{
int origPos = _pos;
if (origPos > _chars.Length - length)
{
Grow(length);
}
_pos = origPos + length;
return _chars.Slice(origPos, length);
}
[MethodImpl(MethodImplOptions.NoInlining)]
private void GrowAndAppend(char c)
{
Grow(1);
Append(c);
}
/// <summary>
/// Resize the internal buffer either by doubling current buffer size or
/// by adding <paramref name="additionalCapacityBeyondPos"/> to
/// <see cref="_pos"/> whichever is greater.
/// </summary>
/// <param name="additionalCapacityBeyondPos">
/// Number of chars requested beyond current position.
/// </param>
[MethodImpl(MethodImplOptions.NoInlining)]
private void Grow(int additionalCapacityBeyondPos)
{
Debug.Assert(additionalCapacityBeyondPos > 0);
Debug.Assert(_pos > _chars.Length - additionalCapacityBeyondPos, "Grow called incorrectly, no resize is needed.");
const uint ArrayMaxLength = 0x7FFFFFC7; // same as Array.MaxLength
// Increase to at least the required size (_pos + additionalCapacityBeyondPos), but try
// to double the size if possible, bounding the doubling to not go beyond the max array length.
int newCapacity = (int)Math.Max(
(uint)(_pos + additionalCapacityBeyondPos),
Math.Min((uint)_chars.Length * 2, ArrayMaxLength));
// Make sure to let Rent throw an exception if the caller has a bug and the desired capacity is negative.
// This could also go negative if the actual required length wraps around.
char[] poolArray = ArrayPool<char>.Shared.Rent(newCapacity);
_chars.Slice(0, _pos).CopyTo(poolArray);
char[]? toReturn = _arrayToReturnToPool;
_chars = _arrayToReturnToPool = poolArray;
if (toReturn != null)
{
ArrayPool<char>.Shared.Return(toReturn);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Dispose()
{
char[]? toReturn = _arrayToReturnToPool;
this = default; // for safety, to avoid using pooled array if this instance is erroneously appended to again
if (toReturn != null)
{
ArrayPool<char>.Shared.Return(toReturn);
}
}
}

View File

@@ -139,60 +139,32 @@
/// <summary>
/// Create a new <see cref="OperatorToken"/>.
/// </summary>
public static OperatorToken Create(string data)
public static OperatorToken Create(ReadOnlySpan<char> data)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
switch (data)
{
case "BT":
return Bt;
case "eexec":
return Eexec;
case "endobj":
return EndObject;
case "endstream":
return EndStream;
case "ET":
return Et;
case "def":
return Def;
case "dict":
return Dict;
case "for":
return For;
case "dup":
return Dup;
case "n":
return N;
case "obj":
return StartObject;
case "put":
return Put;
case "Q":
return QPop;
case "q":
return QPush;
case "R":
return R;
case "re":
return Re;
case "readonly":
return Readonly;
case "stream":
return StartStream;
case "Tf":
return Tf;
case "W*":
return WStar;
case "xref":
return Xref;
default:
return new OperatorToken(data);
}
return data switch {
"BT" => Bt,
"eexec" => Eexec,
"endobj" => EndObject,
"endstream" => EndStream,
"ET" => Et,
"def" => Def,
"dict" => Dict,
"for" => For,
"dup" => Dup,
"n" => N,
"obj" => StartObject,
"put" => Put,
"Q" => QPop,
"q" => QPush,
"R" => R,
"re" => Re,
"readonly" => Readonly,
"stream" => StartStream,
"Tf" => Tf,
"W*" => WStar,
"xref" => Xref,
_ => new OperatorToken(data.ToString())
};
}
/// <inheritdoc />

View File

@@ -311,7 +311,7 @@
}
else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StreamToken? valueStreamToken))
{
textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider, tokenScanner).ToArray());
textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider, tokenScanner).Span);
}
}

View File

@@ -51,7 +51,7 @@
/// <summary>
/// The set of <see cref="Letter"/>s drawn by the PDF content.
/// </summary>
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
public IReadOnlyList<Letter> Letters => Content.Letters;
/// <summary>
/// The full text of all characters on the page in the order they are presented in the PDF content.
@@ -88,7 +88,8 @@
/// </summary>
public Experimental ExperimentalAccess { get; }
internal Page(int number, DictionaryToken dictionary, MediaBox mediaBox, CropBox cropBox, PageRotationDegrees rotation, PageContent content,
internal Page(int number, DictionaryToken dictionary, MediaBox mediaBox, CropBox cropBox, PageRotationDegrees rotation,
PageContent content,
AnnotationProvider annotationProvider,
IPdfTokenScanner pdfScanner)
{
@@ -97,6 +98,11 @@
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
}
if (content is null)
{
throw new ArgumentNullException(nameof(content));
}
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
Number = number;
@@ -125,6 +131,27 @@
return string.Empty;
}
#if NET6_0_OR_GREATER
int length = 0;
for (var i = 0; i < content.Letters.Count; i++)
{
length += content.Letters[i].Value.Length;
}
return string.Create(length, content, static (buffer, content) => {
int position = 0;
for (var i = 0; i < content.Letters.Count; i++)
{
var value = content.Letters[i].Value;
value.AsSpan().CopyTo(buffer[position..]);
position += value.Length;
}
});
#else
var builder = new StringBuilder();
for (var i = 0; i < content.Letters.Count; i++)
{
@@ -132,6 +159,7 @@
}
return builder.ToString();
#endif
}
/// <summary>

View File

@@ -39,7 +39,7 @@
/// <inheritdoc />
public void Write(Stream stream)
{
stream.WriteText("[");
stream.WriteText("["u8);
for (var i = 0; i < Pattern.Array.Count; i++)
{
@@ -52,7 +52,7 @@
}
}
stream.WriteText("]");
stream.WriteText("]"u8);
stream.WriteWhiteSpace();

View File

@@ -1,26 +1,50 @@
namespace UglyToad.PdfPig.Graphics.Operations
{
using System;
using System.Globalization;
using System.IO;
using PdfPig.Core;
using System;
using System.Buffers.Text;
using System.IO;
using System.Text;
using Util;
internal static class OperationWriteHelper
{
private static readonly byte WhiteSpace = OtherEncodings.StringAsLatin1Bytes(" ")[0];
private static readonly byte NewLine = OtherEncodings.StringAsLatin1Bytes("\n")[0];
private const byte WhiteSpace = (byte)' ';
private const byte NewLine = (byte)'\n';
public static void WriteText(this Stream stream, string text, bool appendWhitespace = false)
{
#if NET8_0_OR_GREATER
if (Ascii.IsValid(text))
{
Span<byte> buffer = text.Length <= 64
? stackalloc byte[text.Length]
: new byte[text.Length];
Ascii.FromUtf16(text, buffer, out _);
stream.Write(buffer);
}
else
{
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
stream.Write(bytes);
}
#else
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
stream.Write(bytes, 0, bytes.Length);
#endif
if (appendWhitespace)
{
stream.WriteWhiteSpace();
stream.WriteByte(WhiteSpace);
}
}
public static void WriteText(this Stream stream, ReadOnlySpan<byte> asciiBytes)
{
stream.Write(asciiBytes);
}
public static void WriteHex(this Stream stream, ReadOnlySpan<byte> bytes)
{
Span<byte> hex = bytes.Length <= 64
@@ -46,7 +70,11 @@
public static void WriteDouble(this Stream stream, double value)
{
stream.WriteText(value.ToString("G", CultureInfo.InvariantCulture));
Span<byte> buffer = stackalloc byte[32]; // matches dotnet Number.CharStackBufferSize
Utf8Formatter.TryFormat(value, buffer, out int bytesWritten);
stream.Write(buffer.Slice(0, bytesWritten));
}
public static void WriteNumberText(this Stream stream, int number, string text)
@@ -57,6 +85,14 @@
stream.WriteNewLine();
}
public static void WriteNumberText(this Stream stream, int number, ReadOnlySpan<byte> asciiBytes)
{
stream.WriteDouble(number);
stream.WriteWhiteSpace();
stream.WriteText(asciiBytes);
stream.WriteNewLine();
}
public static void WriteNumberText(this Stream stream, double number, string text)
{
stream.WriteDouble(number);

View File

@@ -62,7 +62,7 @@
/// <inheritdoc />
public void Write(Stream stream)
{
stream.WriteText("[");
stream.WriteText("["u8);
for (var i = 0; i < Array.Count; i++)
{
TokenWriter.WriteToken(Array[i], stream);
@@ -72,7 +72,7 @@
stream.WriteWhiteSpace();
}
}
stream.WriteText("]");
stream.WriteText("]"u8);
stream.WriteWhiteSpace();
stream.WriteText(Symbol);
stream.WriteNewLine();

View File

@@ -24,8 +24,6 @@ namespace UglyToad.PdfPig.Graphics
internal class ReflectionGraphicsStateOperationFactory : IGraphicsStateOperationFactory
{
private static readonly ListPool<double> DoubleListPool = new ListPool<double>(10);
private readonly IReadOnlyDictionary<string, Type> operations;
public ReflectionGraphicsStateOperationFactory()
@@ -56,7 +54,7 @@ namespace UglyToad.PdfPig.Graphics
private static double[] TokensToDoubleArray(IReadOnlyList<IToken> tokens, bool exceptLast = false)
{
var result = DoubleListPool.Borrow();
using var result = new ArrayPoolBufferWriter<double>(16);
for (var i = 0; i < tokens.Count - (exceptLast ? 1 : 0); i++)
{
@@ -70,28 +68,22 @@ namespace UglyToad.PdfPig.Graphics
if (!(innerOperand is NumericToken innerNumeric))
{
var val = result.ToArray();
DoubleListPool.Return(result);
return val.ToArray();
return result.WrittenSpan.ToArray();
}
result.Add(innerNumeric.Data);
result.Write(innerNumeric.Data);
}
}
if (!(operand is NumericToken numeric))
{
var val = result.ToArray();
DoubleListPool.Return(result);
return val.ToArray();
return result.WrittenSpan.ToArray();
}
result.Add(numeric.Data);
result.Write(numeric.Data);
}
var returnValue = result.ToArray();
DoubleListPool.Return(result);
return returnValue;
return result.WrittenSpan.ToArray();
}
private static int OperandToInt(IToken token)

View File

@@ -63,18 +63,18 @@
TokenWriter.WriteToken(NameToken.CmapType, memoryStream);
memoryStream.WriteNumberText(2, DefToken);
memoryStream.WriteNumberText(1, "begincodespacerange");
memoryStream.WriteNumberText(1, "begincodespacerange"u8);
TokenWriter.WriteToken(new HexToken(['0', '0']), memoryStream);
TokenWriter.WriteToken(new HexToken(['F', 'F']), memoryStream);
memoryStream.WriteNewLine();
memoryStream.WriteText("endcodespacerange");
memoryStream.WriteText("endcodespacerange"u8);
memoryStream.WriteNewLine();
memoryStream.WriteNumberText(unicodeToCharacterCode.Count, "beginbfchar");
memoryStream.WriteNumberText(unicodeToCharacterCode.Count, "beginbfchar"u8);
foreach (var keyValuePair in unicodeToCharacterCode)
{
@@ -90,23 +90,23 @@
memoryStream.WriteNewLine();
}
memoryStream.WriteText("endbfchar");
memoryStream.WriteText("endbfchar"u8);
memoryStream.WriteNewLine();
memoryStream.WriteText("endcmap");
memoryStream.WriteText("endcmap"u8);
memoryStream.WriteNewLine();
memoryStream.WriteText("CMapName currentdict /CMap defineresource pop");
memoryStream.WriteText("CMapName currentdict /CMap defineresource pop"u8);
memoryStream.WriteNewLine();
memoryStream.WriteText("end");
memoryStream.WriteText("end"u8);
memoryStream.WriteNewLine();
memoryStream.WriteText("end");
memoryStream.WriteText("end"u8);
memoryStream.WriteNewLine();

View File

@@ -87,11 +87,8 @@
WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", Stream);
Stream.WriteText("%");
Stream.WriteByte(169);
Stream.WriteByte(205);
Stream.WriteByte(196);
Stream.WriteByte(210);
Stream.WriteText("%"u8);
Stream.Write([169, 205, 196, 210]);
Stream.WriteNewLine();
Initialized = true;
}

View File

@@ -3,6 +3,7 @@
using Core;
using Graphics.Operations;
using System;
using System.Buffers;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Globalization;
@@ -17,31 +18,31 @@
/// </summary>
public class TokenWriter : ITokenWriter
{
private static readonly byte ArrayStart = GetByte("[");
private static readonly byte ArrayEnd = GetByte("]");
private const byte ArrayStart = (byte)'[';
private const byte ArrayEnd = (byte)']';
private static readonly byte[] DictionaryStart = OtherEncodings.StringAsLatin1Bytes("<<");
private static readonly byte[] DictionaryEnd = OtherEncodings.StringAsLatin1Bytes(">>");
private static ReadOnlySpan<byte> DictionaryStart => "<<"u8;
private static ReadOnlySpan<byte> DictionaryEnd => ">>"u8;
private static readonly byte Comment = GetByte("%");
private const byte Comment = (byte)'%';
private static readonly byte[] Eof = OtherEncodings.StringAsLatin1Bytes("%%EOF");
private static ReadOnlySpan<byte> Eof => "%%EOF"u8;
private static ReadOnlySpan<byte> FalseBytes => "false"u8;
private static readonly byte HexStart = GetByte("<");
private static readonly byte HexEnd = GetByte(">");
private static readonly byte HexStart = (byte)'<';
private static readonly byte HexEnd = (byte)'>';
private static readonly byte InUseEntry = GetByte("n");
private const byte InUseEntry = (byte)'n';
private static readonly byte NameStart = GetByte("/");
private const byte NameStart = (byte)'/';
private static ReadOnlySpan<byte> Null => "null"u8;
private static ReadOnlySpan<byte> ObjStart => "obj"u8;
private static ReadOnlySpan<byte> ObjEnd => "endobj"u8;
private static readonly byte RByte = GetByte("R");
private const byte RByte = (byte)'R';
private static ReadOnlySpan<byte> StartXref => "startxref"u8;
@@ -55,15 +56,15 @@
/// </summary>
protected static ReadOnlySpan<byte> StreamEnd => "endstream"u8;
private static readonly byte StringStart = GetByte("(");
private const byte StringStart = (byte)'(';
private static readonly byte StringEnd = GetByte(")");
private const byte StringEnd = (byte)')';
private static ReadOnlySpan<byte> Trailer => "trailer"u8;
private static ReadOnlySpan<byte> TrueBytes => "true"u8;
private static readonly byte Whitespace = GetByte(" ");
private static readonly byte Whitespace = (byte)' ';
private static ReadOnlySpan<byte> Xref => "xref"u8;
@@ -369,7 +370,7 @@
/// <param name="outputStream"></param>
protected void WriteDictionary(DictionaryToken dictionary, Stream outputStream)
{
outputStream.Write(DictionaryStart, 0, DictionaryStart.Length);
outputStream.Write(DictionaryStart);
foreach (var pair in dictionary.Data)
{
@@ -386,7 +387,7 @@
}
}
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);
outputStream.Write(DictionaryEnd);
}
/// <summary>
@@ -425,26 +426,27 @@
* This is recommended for characters whose codes are outside the range 33 (!) to 126 (~).
*/
var sb = new StringBuilder();
using var sb = new ArrayPoolBufferWriter<byte>((name.Length * 2) + 1);
Span<byte> hexBuffer = stackalloc byte[2];
foreach (var c in name)
{
if (c < 33 || c > 126 || DelimiterChars.Contains(c))
{
var str = Hex.GetString([(byte)c]);
sb.Append('#');
sb.Append(str);
Hex.GetUtf8Chars([(byte)c], hexBuffer);
sb.Write((byte)'#');
sb.Write(hexBuffer);
}
else
{
sb.Append(c);
sb.Write((byte)c); // between 33 and 126 (ASCII is 0 - 128)
}
}
var bytes = OtherEncodings.StringAsLatin1Bytes(sb.ToString());
outputStream.WriteByte(NameStart);
outputStream.Write(bytes);
outputStream.Write(sb.WrittenSpan);
WriteWhitespace(outputStream);
}
@@ -461,8 +463,11 @@
}
else
{
var bytes = OtherEncodings.StringAsLatin1Bytes(number.Data.ToString("G", CultureInfo.InvariantCulture));
outputStream.Write(bytes);
Span<byte> buffer = stackalloc byte[32]; // matches dotnet Number.CharStackBufferSize
Utf8Formatter.TryFormat(number.Data, buffer, out int bytesWritten);
outputStream.Write(buffer.Slice(0, bytesWritten));
}
WriteWhitespace(outputStream);
@@ -639,25 +644,13 @@
outputStream.WriteText(new string('0', 10));
outputStream.WriteWhiteSpace();
outputStream.WriteText("65535");
outputStream.WriteText("65535"u8);
outputStream.WriteWhiteSpace();
outputStream.WriteText("f");
outputStream.WriteText("f"u8);
outputStream.WriteWhiteSpace();
outputStream.WriteNewLine();
}
private static byte GetByte(string value)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(value);
if (bytes.Length > 1)
{
throw new InvalidOperationException();
}
return bytes[0];
}
private class XrefSeries
{
public long First { get; }