using System; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Linq; using System.Text; using Console = System.Console; namespace UglyToad.PdfPig.ConsoleRunner { public static class Program { private class OptionalArg { public required string ShortSymbol { get; init; } public required string Symbol { get; init; } public required bool SupportsValue { get; init; } public string? Value { get; set; } } private class ParsedArgs { public required IReadOnlyList SuppliedArgs { get; init; } public required string SuppliedDirectoryPath { get; init; } } private static IReadOnlyList GetSupportedArgs() => [ new OptionalArg { SupportsValue = false, ShortSymbol = "nr", Symbol = "no-recursion" }, new OptionalArg { SupportsValue = true, ShortSymbol = "o", Symbol = "output" }, new OptionalArg { SupportsValue = true, ShortSymbol = "l", Symbol = "limit" } ]; private static bool TryParseArgs( string[] args, [NotNullWhen(true)] out ParsedArgs? parsed) { parsed = null; string? path = null; var suppliedOpts = new List(); var opts = GetSupportedArgs(); for (var i = 0; i < args.Length; i++) { var str = args[i]; var isOptFlag = str.StartsWith('-'); if (!isOptFlag) { if (path == null) { path = str; } else { return false; } } else { var item = opts.SingleOrDefault(x => string.Equals("-" + x.ShortSymbol, str, StringComparison.OrdinalIgnoreCase) || string.Equals("--" + x.Symbol, str, StringComparison.OrdinalIgnoreCase)); if (item == null) { return false; } if (item.SupportsValue) { if (i == args.Length - 1) { return false; } i++; item.Value = args[i]; } suppliedOpts.Add(item); } } if (path == null) { return false; } parsed = new ParsedArgs { SuppliedArgs = suppliedOpts, SuppliedDirectoryPath = path }; return true; } public static int Main(string[] args) { if (args.Length == 0) { Console.WriteLine("At least 1 argument, path to test file directory, must be provided."); return 7; } if (!TryParseArgs(args, out var parsed)) { var strJoined = string.Join(" ", args); Console.WriteLine($"Unrecognized arguments passed: {strJoined}"); return 7; } if (!Directory.Exists(parsed.SuppliedDirectoryPath)) { Console.WriteLine($"The provided path is not a valid directory: {parsed.SuppliedDirectoryPath}."); return 7; } int? maxCount = null; var limit = parsed.SuppliedArgs.SingleOrDefault(x => x.ShortSymbol == "l"); if (limit?.Value != null && int.TryParse(limit.Value, CultureInfo.InvariantCulture, out var maxCountArg)) { Console.WriteLine($"Limiting input files to first: {maxCountArg}"); maxCount = maxCountArg; } var noRecursionMode = parsed.SuppliedArgs.Any(x => x.ShortSymbol == "nr"); var outputOpt = parsed.SuppliedArgs.SingleOrDefault(x => x.ShortSymbol == "o" && x.Value != null); var hasError = false; var errorBuilder = new StringBuilder(); var fileList = Directory.GetFiles( parsed.SuppliedDirectoryPath, "*.pdf", noRecursionMode ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories) .OrderBy(x => x).ToList(); var runningCount = 0; Console.WriteLine($"Found {fileList.Count} files."); Console.WriteLine(); PrintTableColumns("File", "Size", "Words", "Pages", "Open cost (μs)", "Total cost (μs)", "Page cost (μs)"); var dataList = new List(); var sw = new Stopwatch(); foreach (var file in fileList) { if (maxCount.HasValue && runningCount >= maxCount) { break; } try { var numWords = 0; var numPages = 0; long openMicros; long totalPageMicros; sw.Reset(); sw.Start(); using (var pdfDocument = PdfDocument.Open(file)) { sw.Stop(); openMicros = sw.Elapsed.Microseconds; sw.Start(); foreach (var page in pdfDocument.GetPages()) { numPages++; foreach (var word in page.GetWords()) { if (word != null) { numWords++; } } } sw.Stop(); totalPageMicros = sw.Elapsed.Microseconds; } var filename = Path.GetFileName(file); var size = new FileInfo(file); var item = new DataRecord { FileName = filename, OpenCostMicros = openMicros, Pages = numPages, Size = size.Length, Words = numWords, TotalCostMicros = totalPageMicros + openMicros, PerPageMicros = Math.Round(totalPageMicros / (double)Math.Max(numPages, 1), 2) }; dataList.Add(item); PrintTableColumns( item.FileName, item.Size, item.Words, item.Pages, item.OpenCostMicros, item.TotalCostMicros, item.PerPageMicros); } catch (Exception ex) { hasError = true; errorBuilder.AppendLine($"Parsing document {file} failed due to an error.") .Append(ex) .AppendLine(); } runningCount++; } if (hasError) { Console.WriteLine(errorBuilder.ToString()); return 5; } if (outputOpt != null && outputOpt.Value != null) { WriteOutput(outputOpt.Value, dataList); } Console.WriteLine("Complete! :)"); return 0; } private static void WriteOutput(string outPath, IReadOnlyList records) { using var fs = File.OpenWrite(outPath); using var sw = new StreamWriter(fs); sw.WriteLine("File,Size,Words,Pages,Open Cost,Total Cost,Per Page"); foreach (var record in records) { var sizeStr = record.Size.ToString("D", CultureInfo.InvariantCulture); var wordsStr = record.Words.ToString("D", CultureInfo.InvariantCulture); var pagesStr = record.Pages.ToString("D", CultureInfo.InvariantCulture); var openCostStr = record.OpenCostMicros.ToString("D", CultureInfo.InvariantCulture); var totalCostStr = record.TotalCostMicros.ToString("D", CultureInfo.InvariantCulture); var ppcStr = record.PerPageMicros.ToString("F2", CultureInfo.InvariantCulture); var numericPartsStr = string.Join(",", [ sizeStr, wordsStr, pagesStr, openCostStr, totalCostStr, ppcStr ]); sw.WriteLine($"\"{record.FileName}\",{numericPartsStr}"); } sw.Flush(); } private static void PrintTableColumns(params object[] values) { for (var i = 0; i < values.Length; i++) { var value = values[i]; var valueStr = value.ToString(); var cleaned = GetCleanStr(valueStr ?? string.Empty); var padChars = 16 - cleaned.Length; var padding = padChars > 0 ? new string(' ', padChars) : string.Empty; var padded = cleaned + padding; Console.Write("| "); Console.Write(padded); } Console.WriteLine(); } private static string GetCleanStr(string name, int maxLength = 16) { if (name.Length <= maxLength) { var fillLength = maxLength - name.Length; return name + new string(' ', fillLength); } return name.Substring(0, maxLength); } } internal class DataRecord { public required string FileName { get; init; } public required long Size { get; init; } public required int Words { get; init; } public required int Pages { get; init; } public required long OpenCostMicros { get; init; } public required long TotalCostMicros { get; init; } public required double PerPageMicros { get; init; } } }