Files
PdfPig/tools/UglyToad.PdfPig.Benchmarks/LayoutAnalysisBenchmarks.cs
BobLd 665623561a
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Tag Release / tag_if_version_changed (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Add UglyToad.PdfPig.Benchmarks and misc performance improvements
2026-02-17 11:53:09 +00:00

42 lines
1.2 KiB
C#

using BenchmarkDotNet.Attributes;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.DocumentLayoutAnalysis;
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
namespace UglyToad.PdfPig.Benchmarks;
[Config(typeof(NuGetPackageConfig))]
[MemoryDiagnoser(displayGenColumns: false)]
public class LayoutAnalysisBenchmarks
{
private readonly Letter[] _letters;
private readonly Word[] _words;
public LayoutAnalysisBenchmarks()
{
using (var doc = PdfDocument.Open("fseprd1102849.pdf"))
{
_letters = doc.GetPage(1).Letters.ToArray();
_words = NearestNeighbourWordExtractor.Instance.GetWords(_letters).ToArray();
}
}
[Benchmark]
public IReadOnlyList<Word> GetWords_NearestNeighbourWord()
{
return NearestNeighbourWordExtractor.Instance.GetWords(_letters).ToArray();
}
[Benchmark]
public IReadOnlyList<TextBlock> GetBlocks_Docstrum()
{
return DocstrumBoundingBoxes.Instance.GetBlocks(_words);
}
[Benchmark]
public IReadOnlyList<Letter> DuplicateOverlappingText()
{
return DuplicateOverlappingTextProcessor.Get(_letters).ToArray();
}
}