From a561c8954e57bfc52a118844957a802c21efc39c Mon Sep 17 00:00:00 2001
From: Eliot Jones <elioty@hotmail.co.uk>
Date: Sat, 25 Jan 2020 16:53:41 +0000
Subject: [PATCH] handle the format header being preceded by nonsense

some files seem to have the format header preceded by large amounts of junk but this appears to be valid for chrome and acrobat reader. this change ups the amount of nonsense to be read prior to the version header.

also makes parsing of the version header culture invariant which may be related to #85.
---
 .../Parser/Parts/FileHeaderParserTests.cs            | 12 ++++++------
 .../Parser/FileStructure/FileHeaderParser.cs         | 10 +++++++---
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
index febebbb5..522f410e 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
@@ -60,14 +60,14 @@
         }
 
         [Fact]
-        public void HeaderPrecededByJunkNonLenientThrows()
+        public void HeaderPrecededByJunkNonLenientDoesNotThrow()
         {
             var scanner = StringBytesTestConverter.Scanner(@"one    
     %PDF-1.2");
 
-            Action action = () => FileHeaderParser.Parse(scanner, false, log);
+            var result = FileHeaderParser.Parse(scanner, false, log);
 
-            Assert.Throws<PdfDocumentFormatException>(action);
+            Assert.Equal(1.2m, result.Version);
         }
 
         [Fact]
@@ -82,14 +82,14 @@
         }
 
         [Fact]
-        public void HeaderPrecededByTooMuchJunkThrows()
+        public void HeaderPrecededByJunkDoesNotThrow()
         {
             var scanner = StringBytesTestConverter.Scanner(@"one two
 three %PDF-1.6");
 
-            Action action = () => FileHeaderParser.Parse(scanner, true, log);
+            var result = FileHeaderParser.Parse(scanner, true, log);
 
-            Assert.Throws<PdfDocumentFormatException>(action);
+            Assert.Equal(1.6m, result.Version);
         }
 
         [Fact]
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
index 095bc2a5..ba3ad139 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
@@ -1,6 +1,7 @@
 ﻿namespace UglyToad.PdfPig.Parser.FileStructure
 {
     using System;
+    using System.Globalization;
     using Content;
     using Core;
     using Logging;
@@ -43,11 +44,11 @@
 
             var comment = scanner.CurrentToken as CommentToken;
 
-            var junkSkip = isLenientParsing ? 2 : 0;
+            const int junkTokensTolerance = 25;
             var attempts = 0;
             while (comment == null)
             {
-                if (attempts == junkSkip)
+                if (attempts == junkTokensTolerance)
                 {
                     throw new PdfDocumentFormatException("Could not find the version header comment at the start of the document.");
                 }
@@ -69,7 +70,10 @@
 
             const int toDecimalStartLength = 4;
 
-            if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength), out var version))
+            if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength), 
+                NumberStyles.Number,
+                CultureInfo.InvariantCulture,
+                out var version))
             {
                 return HandleMissingVersion(comment, isLenientParsing, log);
             }