diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs index e122ca40..25ae26a6 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs @@ -11,6 +11,19 @@ public class GithubIssuesTests { + [Fact] + public void Issue1223() + { + var path = IntegrationHelpers.GetSpecificTestDocumentPath("23056.PMC2132516.pdf"); + using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true })) + { + Assert.NotNull(document); + var firstPage = document.GetPage(1); + Assert.NotNull(firstPage); + Assert.Contains("The Rockefeller University Press", firstPage.Text); + } + } + [Fact] public void Issue1213() { diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/23056.PMC2132516.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/23056.PMC2132516.pdf new file mode 100644 index 00000000..8598b82f Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/23056.PMC2132516.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs index 1e867d71..2e83a370 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs @@ -3,6 +3,7 @@ using System.Text; using PdfPig.Core; using PdfPig.Encryption; + using PdfPig.Parser.FileStructure; using PdfPig.Tokenization.Scanner; using PdfPig.Tokens; @@ -720,8 +721,12 @@ endobj"; { var input = StringBytesTestConverter.Convert(s, false); - return new PdfTokenScanner(input.Bytes, locationProvider ?? new TestObjectLocationProvider(), - new TestFilterProvider(), NoOpEncryptionHandler.Instance, useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff); + return new PdfTokenScanner(input.Bytes, + locationProvider ?? new TestObjectLocationProvider(), + new TestFilterProvider(), + NoOpEncryptionHandler.Instance, + new FileHeaderOffset(0), + useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff); } private static IReadOnlyList ReadToEnd(PdfTokenScanner scanner) diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 654d6a72..491f7965 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -124,8 +124,10 @@ var version = FileHeaderParser.Parse(scanner, inputBytes, parsingOptions.UseLenientParsing, parsingOptions.Logger); + var fileHeaderOffset = new FileHeaderOffset((int)version.OffsetInFile); + var initialParse = FirstPassParser.Parse( - new FileHeaderOffset((int)version.OffsetInFile), + fileHeaderOffset, inputBytes, scanner, parsingOptions.Logger); @@ -143,7 +145,7 @@ initialParse.BruteForceOffsets, inputBytes); - var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, parsingOptions); + var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions); var (rootReference, rootDictionary) = ParseTrailer( trailer, diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index 8fbcf418..e6a9ce9a 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -11,6 +11,7 @@ using Encryption; using Filters; using Tokens; + using UglyToad.PdfPig.Parser.FileStructure; internal class PdfTokenScanner : IPdfTokenScanner { @@ -23,6 +24,7 @@ private readonly ILookupFilterProvider filterProvider; private readonly CoreTokenScanner coreTokenScanner; private readonly ParsingOptions parsingOptions; + private readonly FileHeaderOffset fileHeaderOffset; private IEncryptionHandler encryptionHandler; private bool isDisposed; @@ -54,12 +56,14 @@ IObjectLocationProvider objectLocationProvider, ILookupFilterProvider filterProvider, IEncryptionHandler encryptionHandler, + FileHeaderOffset fileHeaderOffset, ParsingOptions parsingOptions) { this.inputBytes = inputBytes; this.objectLocationProvider = objectLocationProvider; this.filterProvider = filterProvider; this.encryptionHandler = encryptionHandler; + this.fileHeaderOffset = fileHeaderOffset; this.parsingOptions = parsingOptions; coreTokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: parsingOptions.UseLenientParsing); } @@ -795,8 +799,8 @@ { // Brute force read the entire file isBruteForcing = true; - - Seek(0); + + Seek(fileHeaderOffset.Value); while (MoveNext()) {