add a bunch more performance improvements

filter provider becomes single instance and no longer has constructor parameters.

tokenizers use list and stringbuilder pools to reduce allocations.

system font finder becomes static to preserve file cache across all documents.
This commit is contained in:
Eliot Jones
2020-04-05 15:34:47 +01:00
parent 7baa18b5dd
commit f1be6634a7
20 changed files with 174 additions and 136 deletions

View File

@@ -1,4 +1,6 @@
namespace UglyToad.PdfPig.Fonts.SystemFonts
using System.Collections.Concurrent;
namespace UglyToad.PdfPig.Fonts.SystemFonts
{
using System;
using System.Collections.Generic;
@@ -14,10 +16,16 @@
public class SystemFontFinder : ISystemFontFinder
{
private static readonly IReadOnlyDictionary<string, string[]> NameSubstitutes;
private static readonly Lazy<IReadOnlyList<SystemFontRecord>> AvailableFonts;
private static readonly object CacheLock = new object();
private static readonly Dictionary<string, TrueTypeFont> Cache = new Dictionary<string, TrueTypeFont>(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// The instance of <see cref="SystemFontFinder"/>.
/// </summary>
public static readonly ISystemFontFinder Instance = new SystemFontFinder();
static SystemFontFinder()
{
var dict = new Dictionary<string, string[]>
@@ -66,18 +74,7 @@
}
NameSubstitutes = dict;
}
private readonly Lazy<IReadOnlyList<SystemFontRecord>> availableFonts;
private readonly Dictionary<string, string> nameToFileNameMap = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
private readonly HashSet<string> readFiles = new HashSet<string>();
/// <summary>
/// Create a new <see cref="SystemFontFinder"/>.
/// </summary>
public SystemFontFinder()
{
ISystemFontLister lister;
#if NETSTANDARD2_0
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
@@ -100,7 +97,18 @@
lister = new WindowsSystemFontLister();
#endif
availableFonts = new Lazy<IReadOnlyList<SystemFontRecord>>(() => lister.GetAllFonts().ToList());
AvailableFonts = new Lazy<IReadOnlyList<SystemFontRecord>>(() => lister.GetAllFonts().ToList());
}
private readonly ConcurrentDictionary<string, string> nameToFileNameMap = new ConcurrentDictionary<string, string>(StringComparer.OrdinalIgnoreCase);
private readonly object readFilesLock = new object();
private readonly HashSet<string> readFiles = new HashSet<string>();
/// <summary>
/// Create a new <see cref="SystemFontFinder"/>.
/// </summary>
private SystemFontFinder()
{
}
/// <inheritdoc />
@@ -179,7 +187,7 @@
return null;
}
var nameCandidates = availableFonts.Value.Where(x => Path.GetFileName(x.Path)?.StartsWith(name[0].ToString(), StringComparison.OrdinalIgnoreCase) == true);
var nameCandidates = AvailableFonts.Value.Where(x => Path.GetFileName(x.Path)?.StartsWith(name[0].ToString(), StringComparison.OrdinalIgnoreCase) == true);
foreach (var systemFontRecord in nameCandidates)
{
@@ -189,7 +197,7 @@
}
}
foreach (var record in availableFonts.Value)
foreach (var record in AvailableFonts.Value)
{
if (TryGetTrueTypeFont(name, record, out var font))
{
@@ -206,11 +214,14 @@
{
font = null;
if (record.Type == SystemFontType.TrueType)
{
lock (readFilesLock)
{
if (readFiles.Contains(record.Path))
{
return false;
}
}
return TryReadFile(record.Path, true, name, out font);
}
@@ -221,7 +232,6 @@
private bool TryReadFile(string fileName, bool readNameFirst, string fontName, out TrueTypeFont font)
{
font = null;
readFiles.Add(fileName);
var bytes = File.ReadAllBytes(fileName);
@@ -233,15 +243,25 @@
if (name == null)
{
lock (readFilesLock)
{
readFiles.Add(fileName);
}
return false;
}
var fontNameFromFile = name.GetPostscriptName() ?? name.FontName;
nameToFileNameMap[fontNameFromFile] = fileName;
nameToFileNameMap.TryAdd(fontNameFromFile, fileName);
if (!string.Equals(fontNameFromFile, fontName, StringComparison.OrdinalIgnoreCase))
{
lock (readFilesLock)
{
readFiles.Add(fileName);
}
return false;
}
}
@@ -258,6 +278,11 @@
}
}
lock (readFilesLock)
{
readFiles.Add(fileName);
}
return true;
}
}

View File

@@ -2,18 +2,16 @@
{
using System;
using System.Collections.Generic;
using PdfPig.Filters;
using PdfPig.Tokens;
using PdfPig.Filters;
using Xunit;
public class DecodeParameterResolverTests
{
private readonly DecodeParameterResolver resolver= new DecodeParameterResolver(new TestingLog());
[Fact]
public void NullDictionary_Throws()
{
Action action = () => resolver.GetFilterParameters(null, 0);
Action action = () => DecodeParameterResolver.GetFilterParameters(null, 0);
Assert.Throws<ArgumentNullException>(action);
}
@@ -21,7 +19,7 @@
[Fact]
public void NegativeIndex_Throws()
{
Action action = () => resolver.GetFilterParameters(new DictionaryToken(new Dictionary<NameToken, IToken>()), -1);
Action action = () => DecodeParameterResolver.GetFilterParameters(new DictionaryToken(new Dictionary<NameToken, IToken>()), -1);
Assert.Throws<ArgumentOutOfRangeException>(action);
}
@@ -29,7 +27,7 @@
[Fact]
public void EmptyDictionary_ReturnsEmptyDictionary()
{
var result = resolver.GetFilterParameters(new DictionaryToken(new Dictionary<NameToken, IToken>()), 0);
var result = DecodeParameterResolver.GetFilterParameters(new DictionaryToken(new Dictionary<NameToken, IToken>()), 0);
Assert.Empty(result.Data);
}

View File

@@ -8,7 +8,7 @@
public class FlateFilterTests
{
private readonly FlateFilter filter = new FlateFilter(new DecodeParameterResolver(new TestingLog()), new PngPredictor(), new TestingLog());
private readonly FlateFilter filter = new FlateFilter();
[Fact]
public void EncodeAndDecodePreservesInput()

View File

@@ -100,7 +100,7 @@
{NameToken.DecodeParms, paramsDict}
});
var filter = new FlateFilter(new DecodeParameterResolver(null), new PngPredictor(), null);
var filter = new FlateFilter();
var filtered = filter.Decode(streamBytes, dictionary, 0);
var expected =

View File

@@ -0,0 +1,64 @@
using System.Collections.Generic;
namespace UglyToad.PdfPig.Tokenization
{
/// <summary>
/// An object pool for lists.
/// </summary>
public class ListPool<T>
{
private readonly int capacity;
private readonly object locker = new object();
private readonly Stack<List<T>> pool = new Stack<List<T>>();
/// <summary>
/// Create a new <see cref="List{T}"/> holding the number of items specified by the capacity.
/// </summary>
public ListPool(int capacity = 5)
{
this.capacity = capacity;
for (var i = 0; i < capacity; i++)
{
pool.Push(new List<T>(10));
}
}
/// <summary>
/// Get an item from the pool, remember to return it using <see cref="Return"/> at the end.
/// </summary>
public List<T> Borrow()
{
lock (locker)
{
if (pool.Count == 0)
{
return new List<T>();
}
return pool.Pop();
}
}
/// <summary>
/// Returns an item to the pool of available lists..
/// </summary>
public void Return(List<T> instance)
{
if (instance == null)
{
return;
}
instance.Clear();
lock (locker)
{
if (pool.Count < capacity)
{
pool.Push(instance);
}
}
}
}
}

View File

@@ -8,6 +8,8 @@
internal class NameTokenizer : ITokenizer
{
private static readonly ListPool<byte> ListPool = new ListPool<byte>(10);
public bool ReadsNextByte { get; } = true;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
@@ -19,7 +21,7 @@
return false;
}
var bytes = new List<byte>();
var bytes = ListPool.Borrow();
bool escapeActive = false;
int postEscapeRead = 0;
@@ -90,7 +92,9 @@
}
}
byte[] byteArray = bytes.ToArray();
var byteArray = bytes.ToArray();
ListPool.Return(bytes);
var str = ReadHelper.IsValidUtf8(byteArray)
? Encoding.UTF8.GetString(byteArray)

View File

@@ -6,7 +6,7 @@
/// <summary>
/// A pool for <see cref="StringBuilder"/>s to reduce allocations during tokenization.
/// </summary>
public class StringBuilderPool
internal class StringBuilderPool
{
private readonly int capacity;
private readonly object locker = new object();

View File

@@ -38,14 +38,12 @@
throw new ArgumentNullException(nameof(data));
}
var previousPrevious = default(IToken);
var previous = default(IToken);
var result = new List<IToken>();
foreach (var token in data)
var result = new List<IToken>(data.Count);
for (var i = 0; i < data.Count; i++)
{
// Roll any "number number R" sequence into an indirect reference
if (ReferenceEquals(token, OperatorToken.R) && previous is NumericToken generation && previousPrevious is NumericToken objectNumber)
var token = data[i];
if (i >= 2 && ReferenceEquals(token, OperatorToken.R) && (data[i - 1] is NumericToken generation) && (data[i - 2] is NumericToken objectNumber))
{
// Clear the previous 2 tokens.
result.RemoveRange(result.Count - 2, 2);
@@ -56,9 +54,6 @@
{
result.Add(token);
}
previousPrevious = previous;
previous = token;
}
Data = result;

View File

@@ -2,19 +2,11 @@
{
using System;
using System.Collections.Generic;
using Logging;
using Tokens;
internal class DecodeParameterResolver : IDecodeParameterResolver
internal static class DecodeParameterResolver
{
private readonly ILog log;
public DecodeParameterResolver(ILog log)
{
this.log = log;
}
public DictionaryToken GetFilterParameters(DictionaryToken streamDictionary, int index)
public static DictionaryToken GetFilterParameters(DictionaryToken streamDictionary, int index)
{
if (streamDictionary == null)
{
@@ -48,10 +40,6 @@
}
break;
default:
if (parameters != null)
{
log?.Error("Expected the decode parameters for the stream to be either an array or dictionary");
}
break;
}

View File

@@ -5,7 +5,6 @@
using System.IO;
using System.IO.Compression;
using System.Linq;
using Logging;
using Tokens;
using Util;
@@ -30,17 +29,6 @@
private const byte Deflate32KbWindow = 120;
private const byte ChecksumBits = 1;
private readonly IDecodeParameterResolver decodeParameterResolver;
private readonly IPngPredictor pngPredictor;
private readonly ILog log;
public FlateFilter(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
{
this.decodeParameterResolver = decodeParameterResolver;
this.pngPredictor = pngPredictor;
this.log = log;
}
/// <inheritdoc />
public bool IsSupported { get; } = true;
@@ -52,7 +40,7 @@
throw new ArgumentNullException(nameof(input));
}
var parameters = decodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1);
@@ -70,21 +58,19 @@
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
var result = pngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
var result = PngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
return result;
}
catch (Exception ex)
catch
{
log.Error("Could not decode a flate stream due to an error.", ex);
// ignored.
}
return bytes;
}
private byte[] Decompress(byte[] input)
{
try
{
using (var memoryStream = new MemoryStream(input))
using (var output = new MemoryStream())
@@ -100,12 +86,6 @@
}
}
}
catch (Exception ex)
{
log?.Error("Could not decode the input using the deflate stream. Input was: " + input, ex);
throw;
}
}
public byte[] Encode(Stream input, DictionaryToken streamDictionary, int index)
{

View File

@@ -1,9 +0,0 @@
namespace UglyToad.PdfPig.Filters
{
using Tokens;
internal interface IDecodeParameterResolver
{
DictionaryToken GetFilterParameters(DictionaryToken streamDictionary, int index);
}
}

View File

@@ -1,7 +0,0 @@
namespace UglyToad.PdfPig.Filters
{
internal interface IPngPredictor
{
byte[] Decode(byte[] input, int predictor, int colors, int bitsPerComponent, int columns);
}
}

View File

@@ -23,22 +23,13 @@
private const int TenBitBoundary = 1023;
private const int ElevenBitBoundary = 2047;
private readonly IDecodeParameterResolver decodeParameterResolver;
private readonly IPngPredictor pngPredictor;
public LzwFilter(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor)
{
this.decodeParameterResolver = decodeParameterResolver ?? throw new ArgumentNullException(nameof(decodeParameterResolver));
this.pngPredictor = pngPredictor ?? throw new ArgumentNullException(nameof(pngPredictor));
}
/// <inheritdoc />
public bool IsSupported { get; } = true;
/// <inheritdoc />
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
var parameters = decodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1);
@@ -52,7 +43,7 @@
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
var result = pngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
var result = PngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
return result;
}
@@ -64,7 +55,8 @@
private static byte[] Decode(IReadOnlyList<byte> input, bool isEarlyChange)
{
var result = new List<byte>();
// A guess.
var result = new List<byte>((int)(input.Count * 1.5));
var table = GetDefaultTable();

View File

@@ -4,25 +4,25 @@
using System.Collections.Generic;
using System.Linq;
using Core;
using Exceptions;
using Logging;
using Tokens;
internal class MemoryFilterProvider : IFilterProvider
{
private readonly IReadOnlyDictionary<string, IFilter> filterInstances;
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
public static readonly IFilterProvider Instance = new MemoryFilterProvider();
private MemoryFilterProvider()
{
var ascii85 = new Ascii85Filter();
var asciiHex = new AsciiHexDecodeFilter();
var ccitt = new CcittFaxDecodeFilter();
var dct = new DctDecodeFilter();
var flate = new FlateFilter(decodeParameterResolver, pngPredictor, log);
var flate = new FlateFilter();
var jbig2 = new Jbig2DecodeFilter();
var jpx = new JpxDecodeFilter();
var runLength = new RunLengthFilter();
var lzw = new LzwFilter(decodeParameterResolver, pngPredictor);
var lzw = new LzwFilter();
filterInstances = new Dictionary<string, IFilter>
{

View File

@@ -5,9 +5,9 @@
using System.IO;
using IO;
internal class PngPredictor : IPngPredictor
internal static class PngPredictor
{
public byte[] Decode(byte[] inputBytes, int predictor, int colors, int bitsPerComponent, int columns)
public static byte[] Decode(byte[] inputBytes, int predictor, int colors, int bitsPerComponent, int columns)
{
if (inputBytes == null)
{

View File

@@ -39,10 +39,10 @@
throw new ArgumentNullException(nameof(array));
}
foreach (var token in array)
for (var i = 0; i < array.Count; i++)
{
if (!(token is NumericToken) && !(token is HexToken)
&& !(token is StringToken))
var token = array[i];
if (!(token is StringToken) && !(token is NumericToken) && !(token is HexToken))
{
throw new ArgumentException($"Found invalid token for showing texts with position: {token}");
}

View File

@@ -1,3 +1,5 @@
using UglyToad.PdfPig.Tokenization;
namespace UglyToad.PdfPig.Graphics
{
using System;
@@ -21,6 +23,8 @@ namespace UglyToad.PdfPig.Graphics
internal class ReflectionGraphicsStateOperationFactory : IGraphicsStateOperationFactory
{
private static readonly ListPool<decimal> DecimalListPool = new ListPool<decimal>(10);
private readonly IReadOnlyDictionary<string, Type> operations;
public ReflectionGraphicsStateOperationFactory()
@@ -51,7 +55,7 @@ namespace UglyToad.PdfPig.Graphics
private static decimal[] TokensToDecimalArray(IReadOnlyList<IToken> tokens, bool exceptLast = false)
{
var result = new List<decimal>();
var result = DecimalListPool.Borrow();
for (var i = 0; i < tokens.Count - (exceptLast ? 1 : 0); i++)
{
@@ -65,7 +69,9 @@ namespace UglyToad.PdfPig.Graphics
if (!(innerOperand is NumericToken innerNumeric))
{
return result.ToArray();
var val = result.ToArray();
DecimalListPool.Return(result);
return val.ToArray();
}
result.Add(innerNumeric.Data);
@@ -74,13 +80,17 @@ namespace UglyToad.PdfPig.Graphics
if (!(operand is NumericToken numeric))
{
return result.ToArray();
var val = result.ToArray();
DecimalListPool.Return(result);
return val.ToArray();
}
result.Add(numeric.Data);
}
return result.ToArray();
var returnValue = result.ToArray();
DecimalListPool.Return(result);
return returnValue;
}
private static int OperandToInt(IToken token)

View File

@@ -80,7 +80,7 @@
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList<string> passwords)
{
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(log), new PngPredictor(), log);
var filterProvider = MemoryFilterProvider.Instance;
CrossReferenceTable crossReferenceTable = null;
@@ -127,7 +127,7 @@
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, new SystemFontFinder(),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, encodingReader, SystemFontFinder.Instance,
type1Handler),
type1Handler,
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));

View File

@@ -4,7 +4,6 @@
using System.IO;
using System.Linq;
using Filters;
using Logging;
using Tokens;
internal static class DataCompresser
@@ -15,7 +14,7 @@
using (var memoryStream = new MemoryStream(bytes))
{
var parameters = new DictionaryToken(new Dictionary<NameToken, IToken>());
var flater = new FlateFilter(new DecodeParameterResolver(new NoOpLog()), new PngPredictor(), new NoOpLog());
var flater = new FlateFilter();
var result = flater.Encode(memoryStream, parameters, 0);
return result;
}

View File

@@ -24,8 +24,7 @@
{
private static readonly ILog Log = new NoOpLog();
private static readonly IFilterProvider FilterProvider = new MemoryFilterProvider(new DecodeParameterResolver(Log),
new PngPredictor(), Log);
private static readonly IFilterProvider FilterProvider = MemoryFilterProvider.Instance;
/// <summary>
/// Merge two PDF documents together with the pages from <paramref name="file1"/> followed by <paramref name="file2"/>.