From 6fba565d666270d6182495bb5c6c3a4088170159 Mon Sep 17 00:00:00 2001 From: Bert Huijben Date: Thu, 16 Oct 2025 10:32:14 +0200 Subject: [PATCH] Avoid doing a true file seek for simple peeking the next char in the token parser --- src/UglyToad.PdfPig.Core/StreamInputBytes.cs | 36 ++++++++++++------- .../Parser/PdfDocumentFactory.cs | 4 +-- src/UglyToad.PdfPig/PdfDocument.cs | 8 +++++ .../Tokenization/Scanner/PdfTokenScanner.cs | 9 ++--- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs index b7bf639a..d2986874 100644 --- a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs +++ b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs @@ -11,11 +11,12 @@ { private readonly Stream stream; private readonly bool shouldDispose; + private byte? peekByte; private bool isAtEnd; /// - public long CurrentOffset => stream.Position; + public long CurrentOffset => peekByte.HasValue ? stream.Position - 1 : stream.Position; /// public byte CurrentByte { get; private set; } @@ -52,7 +53,8 @@ /// public bool MoveNext() { - var b = stream.ReadByte(); + var b = peekByte ?? stream.ReadByte(); + peekByte = null; if (b == -1) { @@ -68,18 +70,21 @@ /// public byte? Peek() { - var current = CurrentOffset; - - var b = stream.ReadByte(); - - stream.Seek(current, SeekOrigin.Begin); - - if (b == -1) + if (!peekByte.HasValue) { - return null; + var v = stream.ReadByte(); + + if (v >= 0) + { + peekByte = (byte)v; + } + else + { + return null; + } } - return (byte)b; + return peekByte; } /// @@ -92,6 +97,7 @@ public void Seek(long position) { isAtEnd = false; + peekByte = null; if (position == 0) { @@ -112,9 +118,15 @@ { return 0; } + else if (peekByte.HasValue) + { + buffer[0] = peekByte.Value; + peekByte = null; + + return Read(buffer.Slice(1)) + 1; + } int read = stream.Read(buffer); - if (read > 0) { CurrentByte = buffer[read - 1]; diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 7d383a30..209a2570 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -25,9 +25,9 @@ internal static class PdfDocumentFactory { - public static PdfDocument Open(byte[] fileBytes, ParsingOptions? options = null) + public static PdfDocument Open(ReadOnlyMemory memory, ParsingOptions? options = null) { - var inputBytes = new MemoryInputBytes(fileBytes); + var inputBytes = new MemoryInputBytes(memory); return Open(inputBytes, options); } diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index 7aa394ed..5077410c 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -102,6 +102,14 @@ /// A providing access to the file contents. public static PdfDocument Open(byte[] fileBytes, ParsingOptions? options = null) => PdfDocumentFactory.Open(fileBytes, options); + /// + /// Creates a for reading from the provided file bytes. + /// + /// The bytes of the PDF file. + /// Optional parameters controlling parsing. + /// A providing access to the file contents. + public static PdfDocument Open(ReadOnlyMemory memory, ParsingOptions? options = null) => PdfDocumentFactory.Open(memory, options); + /// /// Opens a file and creates a for reading from the provided file path. /// diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index c5fa7a6a..8fbcf418 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -342,14 +342,9 @@ if ((char)inputBytes.CurrentByte == '\r') { - if (!inputBytes.MoveNext()) + if (inputBytes.Peek() == '\n') { - return false; - } - - if ((char)inputBytes.CurrentByte != '\n') - { - inputBytes.Seek(inputBytes.CurrentOffset - 1); + inputBytes.MoveNext(); } break; }