From f84caa04931eea5438a4ec642e1f8caf75b88ba1 Mon Sep 17 00:00:00 2001 From: EliotJones Date: Sun, 3 Aug 2025 18:20:24 -0500 Subject: [PATCH] only treat line breaks and spaces as whitespace for stream content --- .../Parser/FileStructure/XrefStreamParser.cs | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/XrefStreamParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/XrefStreamParser.cs index fe7985a6..8e5a460f 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/XrefStreamParser.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/XrefStreamParser.cs @@ -26,11 +26,8 @@ internal static class XrefStreamParser var offsetCorrection = 0L; bytes.Seek(xrefOffset); - if (!scanner.TryReadToken(out NumericToken _) - || !scanner.TryReadToken(out NumericToken _) - || !scanner.TryReadToken(out OperatorToken opToken) - || !ReferenceEquals(opToken, OperatorToken.StartObject) - || !scanner.TryReadToken(out DictionaryToken dictToken)) + if (!TryReadStreamObjAt(xrefOffset, scanner, out var dictToken) + || dictToken == null) { log.Debug($"Did not find the stream at {xrefOffset} attempting correction"); var recovered = TryRecoverOffset(fileHeaderOffset, xrefOffset, scanner); @@ -241,12 +238,20 @@ internal static class XrefStreamParser { } - var isWhitespaceActive = ReadHelper.IsWhitespace(bytes.CurrentByte); + bool IsStreamWhitespace() + { + return bytes.CurrentByte == (byte)' ' + || bytes.CurrentByte == (byte)'\r' + || bytes.CurrentByte == (byte)'\n'; + } + + var isWhitespaceActive = IsStreamWhitespace(); do { + // Normalize whitespace. - if (ReadHelper.IsWhitespace(bytes.CurrentByte)) + if (IsStreamWhitespace()) { buffer.Add((byte)' '); @@ -271,7 +276,7 @@ internal static class XrefStreamParser { startMarker = bytes.CurrentOffset; - isWhitespaceActive = ReadHelper.IsWhitespace(bytes.CurrentByte); + isWhitespaceActive = IsStreamWhitespace(); } else if (buffer.EndsWith("endobj ")) {