From 3084a9aab6a18d6cf98ef5f14400cf9fab813011 Mon Sep 17 00:00:00 2001
From: Eliot Jones <elioty@hotmail.co.uk>
Date: Fri, 20 Dec 2019 14:04:58 +0000
Subject: [PATCH] support streams containing only carriage returns. handle
 comments in arrays and dictionaries

* while the pdf specification says stream data should follow a newline following a stream operator some files have only a carriage return following the stream operator.
* since comment tokens may appear inside an array or dictionary we ignore them if they occur here since they will break interpretation of the dictionary or array contents.
---
 src/UglyToad.PdfPig/Tokenization/ArrayTokenizer.cs     |  5 +++++
 .../Tokenization/DictionaryTokenizer.cs                |  5 +++++
 .../Tokenization/Scanner/PdfTokenScanner.cs            | 10 +++++++++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/UglyToad.PdfPig/Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig/Tokenization/ArrayTokenizer.cs
index 31e8436a..b07fccce 100644
--- a/src/UglyToad.PdfPig/Tokenization/ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig/Tokenization/ArrayTokenizer.cs
@@ -26,6 +26,11 @@
             while (!CurrentByteEndsCurrentArray(inputBytes, previousToken) && scanner.MoveNext())
             {
                 previousToken = scanner.CurrentToken;
+
+                if (scanner.CurrentToken is CommentToken)
+                {
+                    continue;
+                }
                 
                 contents.Add(scanner.CurrentToken);
             }
diff --git a/src/UglyToad.PdfPig/Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig/Tokenization/DictionaryTokenizer.cs
index 68498ec9..b64b855f 100644
--- a/src/UglyToad.PdfPig/Tokenization/DictionaryTokenizer.cs
+++ b/src/UglyToad.PdfPig/Tokenization/DictionaryTokenizer.cs
@@ -48,6 +48,11 @@
 
             while (coreScanner.MoveNext())
             {
+                if (coreScanner.CurrentToken is CommentToken)
+                {
+                    continue;
+                }
+
                 tokens.Add(coreScanner.CurrentToken);
             }
 
diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
index 28e8f575..06bce197 100644
--- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
@@ -229,14 +229,22 @@
                 return false;
             }
 
+            // While the specification demands a \n we have seen files with \r only in the wild.
+            var hadWhiteSpace = false;
             if (inputBytes.CurrentByte == '\r')
             {
+                hadWhiteSpace = true;
                 inputBytes.MoveNext();
             }
 
             if (inputBytes.CurrentByte != '\n')
             {
-                return false;
+                if (!hadWhiteSpace)
+                {
+                    return false;
+                }
+
+                inputBytes.Seek(inputBytes.CurrentOffset - 1);
             }
 
             // Store where we started reading the first byte of data.