From 52c0635273b53f8f4a126b06730a5dedb3a947d7 Mon Sep 17 00:00:00 2001 From: EliotJones Date: Sat, 26 Jul 2025 15:04:03 -0500 Subject: [PATCH] support performance profiling information in console runner --- .../UglyToad.PdfPig.ConsoleRunner/Program.cs | 266 +++++++++++++++++- .../Properties/launchSettings.json | 4 +- .../UglyToad.PdfPig.ConsoleRunner.csproj | 1 + 3 files changed, 256 insertions(+), 15 deletions(-) diff --git a/tools/UglyToad.PdfPig.ConsoleRunner/Program.cs b/tools/UglyToad.PdfPig.ConsoleRunner/Program.cs index 8cde2d77..1fe36cdc 100644 --- a/tools/UglyToad.PdfPig.ConsoleRunner/Program.cs +++ b/tools/UglyToad.PdfPig.ConsoleRunner/Program.cs @@ -1,5 +1,10 @@ using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO; +using System.Linq; using System.Text; using Console = System.Console; @@ -7,6 +12,113 @@ namespace UglyToad.PdfPig.ConsoleRunner { public static class Program { + private class OptionalArg + { + public required string ShortSymbol { get; init; } + + public required string Symbol { get; init; } + + public required bool SupportsValue { get; init; } + + public string? Value { get; set; } + } + + private class ParsedArgs + { + public required IReadOnlyList SuppliedArgs { get; init; } + + public required string SuppliedDirectoryPath { get; init; } + } + + private static IReadOnlyList GetSupportedArgs() => + [ + new OptionalArg + { + SupportsValue = false, + ShortSymbol = "nr", + Symbol = "no-recursion" + }, + new OptionalArg + { + SupportsValue = true, + ShortSymbol = "o", + Symbol = "output" + }, + new OptionalArg + { + SupportsValue = true, + ShortSymbol = "l", + Symbol = "limit" + } + ]; + + private static bool TryParseArgs( + string[] args, + [NotNullWhen(true)] out ParsedArgs? parsed) + { + parsed = null; + string? path = null; + var suppliedOpts = new List(); + + var opts = GetSupportedArgs(); + + for (var i = 0; i < args.Length; i++) + { + var str = args[i]; + + var isOptFlag = str.StartsWith('-'); + + if (!isOptFlag) + { + if (path == null) + { + path = str; + } + else + { + return false; + } + } + else + { + var item = opts.SingleOrDefault(x => + string.Equals("-" + x.ShortSymbol, str, StringComparison.OrdinalIgnoreCase) + || string.Equals("--" + x.Symbol, str, StringComparison.OrdinalIgnoreCase)); + + if (item == null) + { + return false; + } + + if (item.SupportsValue) + { + if (i == args.Length - 1) + { + return false; + } + + i++; + item.Value = args[i]; + } + + suppliedOpts.Add(item); + } + } + + if (path == null) + { + return false; + } + + parsed = new ParsedArgs + { + SuppliedArgs = suppliedOpts, + SuppliedDirectoryPath = path + }; + + return true; + } + public static int Main(string[] args) { if (args.Length == 0) @@ -15,30 +127,47 @@ namespace UglyToad.PdfPig.ConsoleRunner return 7; } - var path = args[0]; - - if (!Directory.Exists(path)) + if (!TryParseArgs(args, out var parsed)) { - Console.WriteLine($"The provided path is not a valid directory: {path}."); + var strJoined = string.Join(" ", args); + Console.WriteLine($"Unrecognized arguments passed: {strJoined}"); return 7; } - var maxCount = default(int?); - - if (args.Length > 1 && int.TryParse(args[1], out var countIn)) + if (!Directory.Exists(parsed.SuppliedDirectoryPath)) { - maxCount = countIn; + Console.WriteLine($"The provided path is not a valid directory: {parsed.SuppliedDirectoryPath}."); + return 7; } + int? maxCount = null; + var limit = parsed.SuppliedArgs.SingleOrDefault(x => x.ShortSymbol == "l"); + if (limit?.Value != null && int.TryParse(limit.Value, CultureInfo.InvariantCulture, out var maxCountArg)) + { + Console.WriteLine($"Limiting input files to first: {maxCountArg}"); + maxCount = maxCountArg; + } + + var noRecursionMode = parsed.SuppliedArgs.Any(x => x.ShortSymbol == "nr"); + var outputOpt = parsed.SuppliedArgs.SingleOrDefault(x => x.ShortSymbol == "o" && x.Value != null); + var hasError = false; var errorBuilder = new StringBuilder(); - var fileList = Directory.GetFiles(path, "*.pdf", SearchOption.AllDirectories); + var fileList = Directory.GetFiles( + parsed.SuppliedDirectoryPath, + "*.pdf", + noRecursionMode ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories) + .OrderBy(x => x).ToList(); var runningCount = 0; - Console.WriteLine($"Found {fileList.Length} files."); + Console.WriteLine($"Found {fileList.Count} files."); + Console.WriteLine(); - Console.WriteLine($"{GetCleanFilename("File")}| Size\t| Words\t| Pages"); + PrintTableColumns("File", "Size", "Words", "Pages", "Open cost (μs)", "Total cost (μs)", "Page cost (μs)"); + var dataList = new List(); + + var sw = new Stopwatch(); foreach (var file in fileList) { if (maxCount.HasValue && runningCount >= maxCount) @@ -50,8 +179,20 @@ namespace UglyToad.PdfPig.ConsoleRunner { var numWords = 0; var numPages = 0; + long openMicros; + long totalPageMicros; + + sw.Reset(); + sw.Start(); + using (var pdfDocument = PdfDocument.Open(file)) { + sw.Stop(); + + openMicros = sw.Elapsed.Microseconds; + + sw.Start(); + foreach (var page in pdfDocument.GetPages()) { numPages++; @@ -63,13 +204,36 @@ namespace UglyToad.PdfPig.ConsoleRunner } } } + + sw.Stop(); + totalPageMicros = sw.Elapsed.Microseconds; } var filename = Path.GetFileName(file); var size = new FileInfo(file); - Console.WriteLine($"{GetCleanFilename(filename)}| {size.Length}\t| {numWords}\t| {numPages}"); + var item = new DataRecord + { + FileName = filename, + OpenCostMicros = openMicros, + Pages = numPages, + Size = size.Length, + Words = numWords, + TotalCostMicros = totalPageMicros + openMicros, + PerPageMicros = Math.Round(totalPageMicros / (double)Math.Max(numPages, 1), 2) + }; + + dataList.Add(item); + + PrintTableColumns( + item.FileName, + item.Size, + item.Words, + item.Pages, + item.OpenCostMicros, + item.TotalCostMicros, + item.PerPageMicros); } catch (Exception ex) { @@ -88,12 +252,71 @@ namespace UglyToad.PdfPig.ConsoleRunner return 5; } + if (outputOpt != null && outputOpt.Value != null) + { + WriteOutput(outputOpt.Value, dataList); + } + Console.WriteLine("Complete! :)"); return 0; } - private static string GetCleanFilename(string name, int maxLength = 30) + private static void WriteOutput(string outPath, IReadOnlyList records) + { + using var fs = File.OpenWrite(outPath); + using var sw = new StreamWriter(fs); + + sw.WriteLine("File,Size,Words,Pages,Open Cost,Total Cost,Per Page"); + foreach (var record in records) + { + var sizeStr = record.Size.ToString("D", CultureInfo.InvariantCulture); + var wordsStr = record.Words.ToString("D", CultureInfo.InvariantCulture); + var pagesStr = record.Pages.ToString("D", CultureInfo.InvariantCulture); + var openCostStr = record.OpenCostMicros.ToString("D", CultureInfo.InvariantCulture); + var totalCostStr = record.TotalCostMicros.ToString("D", CultureInfo.InvariantCulture); + var ppcStr = record.PerPageMicros.ToString("F2", CultureInfo.InvariantCulture); + + var numericPartsStr = string.Join(",", + [ + sizeStr, + wordsStr, + pagesStr, + openCostStr, + totalCostStr, + ppcStr + ]); + + sw.WriteLine($"\"{record.FileName}\",{numericPartsStr}"); + } + + sw.Flush(); + } + + private static void PrintTableColumns(params object[] values) + { + for (var i = 0; i < values.Length; i++) + { + var value = values[i]; + var valueStr = value.ToString(); + + var cleaned = GetCleanStr(valueStr ?? string.Empty); + + var padChars = 16 - cleaned.Length; + + var padding = padChars > 0 ? new string(' ', padChars) : string.Empty; + + var padded = cleaned + padding; + + Console.Write("| "); + + Console.Write(padded); + } + + Console.WriteLine(); + } + + private static string GetCleanStr(string name, int maxLength = 16) { if (name.Length <= maxLength) { @@ -105,4 +328,21 @@ namespace UglyToad.PdfPig.ConsoleRunner return name.Substring(0, maxLength); } } + + internal class DataRecord + { + public required string FileName { get; init; } + + public required long Size { get; init; } + + public required int Words { get; init; } + + public required int Pages { get; init; } + + public required long OpenCostMicros { get; init; } + + public required long TotalCostMicros { get; init; } + + public required double PerPageMicros { get; init; } + } } diff --git a/tools/UglyToad.PdfPig.ConsoleRunner/Properties/launchSettings.json b/tools/UglyToad.PdfPig.ConsoleRunner/Properties/launchSettings.json index 0f28b296..d08c46ea 100644 --- a/tools/UglyToad.PdfPig.ConsoleRunner/Properties/launchSettings.json +++ b/tools/UglyToad.PdfPig.ConsoleRunner/Properties/launchSettings.json @@ -1,8 +1,8 @@ { "profiles": { "UglyToad.PdfPig.ConsoleRunner": { - "commandName": "Project", - "commandLineArgs": "\"C:\\temp\\pdfs\\archive\"" + "commandName": "Project", + "commandLineArgs": "\"C:\\temp\\pdfs\\archive\"" } } } \ No newline at end of file diff --git a/tools/UglyToad.PdfPig.ConsoleRunner/UglyToad.PdfPig.ConsoleRunner.csproj b/tools/UglyToad.PdfPig.ConsoleRunner/UglyToad.PdfPig.ConsoleRunner.csproj index c2a33530..e9e26983 100644 --- a/tools/UglyToad.PdfPig.ConsoleRunner/UglyToad.PdfPig.ConsoleRunner.csproj +++ b/tools/UglyToad.PdfPig.ConsoleRunner/UglyToad.PdfPig.ConsoleRunner.csproj @@ -4,6 +4,7 @@ latest Exe net8 + enable