From 47a0a62eee77202ce139fb58bdca203811ca45a5 Mon Sep 17 00:00:00 2001 From: romain v Date: Tue, 17 Aug 2021 16:14:59 +0200 Subject: [PATCH] \r only in token scanner An edge case was lost with this commit https://github.com/UglyToad/PdfPig/commit/31ca3640d2c161e3635187bc1bd94158214a6452?branch=31ca3640d2c161e3635187bc1bd94158214a6452&diff=split when scanner is only followed by \r (without \n) --- .../Tokenization/Scanner/PdfTokenScanner.cs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index fb903e3a..2d5a570c 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -262,6 +262,7 @@ } // From the specification: The stream operator should be followed by \r\n or \n, not just \r. + // While the specification demands a \n we have seen files with \r only in the wild. // While the specification demands a \n we have seen files with `garbage` before the actual data do { @@ -269,6 +270,21 @@ { return false; } + + if ((char)inputBytes.CurrentByte == '\r') + { + if (!inputBytes.MoveNext()) + { + return false; + } + + if ((char)inputBytes.CurrentByte != '\n') + { + inputBytes.Seek(inputBytes.CurrentOffset - 1); + } + break; + } + } while ((char)inputBytes.CurrentByte != '\n'); // Store where we started reading the first byte of data.