mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-12-27 07:05:48 +08:00
Use file header offset when doing brute force find and fix #1223
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
This commit is contained in:
@@ -11,6 +11,19 @@
|
||||
|
||||
public class GithubIssuesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Issue1223()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("23056.PMC2132516.pdf");
|
||||
using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }))
|
||||
{
|
||||
Assert.NotNull(document);
|
||||
var firstPage = document.GetPage(1);
|
||||
Assert.NotNull(firstPage);
|
||||
Assert.Contains("The Rockefeller University Press", firstPage.Text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue1213()
|
||||
{
|
||||
|
||||
Binary file not shown.
@@ -3,6 +3,7 @@
|
||||
using System.Text;
|
||||
using PdfPig.Core;
|
||||
using PdfPig.Encryption;
|
||||
using PdfPig.Parser.FileStructure;
|
||||
using PdfPig.Tokenization.Scanner;
|
||||
using PdfPig.Tokens;
|
||||
|
||||
@@ -720,8 +721,12 @@ endobj";
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert(s, false);
|
||||
|
||||
return new PdfTokenScanner(input.Bytes, locationProvider ?? new TestObjectLocationProvider(),
|
||||
new TestFilterProvider(), NoOpEncryptionHandler.Instance, useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff);
|
||||
return new PdfTokenScanner(input.Bytes,
|
||||
locationProvider ?? new TestObjectLocationProvider(),
|
||||
new TestFilterProvider(),
|
||||
NoOpEncryptionHandler.Instance,
|
||||
new FileHeaderOffset(0),
|
||||
useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<ObjectToken> ReadToEnd(PdfTokenScanner scanner)
|
||||
|
||||
@@ -124,8 +124,10 @@
|
||||
|
||||
var version = FileHeaderParser.Parse(scanner, inputBytes, parsingOptions.UseLenientParsing, parsingOptions.Logger);
|
||||
|
||||
var fileHeaderOffset = new FileHeaderOffset((int)version.OffsetInFile);
|
||||
|
||||
var initialParse = FirstPassParser.Parse(
|
||||
new FileHeaderOffset((int)version.OffsetInFile),
|
||||
fileHeaderOffset,
|
||||
inputBytes,
|
||||
scanner,
|
||||
parsingOptions.Logger);
|
||||
@@ -143,7 +145,7 @@
|
||||
initialParse.BruteForceOffsets,
|
||||
inputBytes);
|
||||
|
||||
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, parsingOptions);
|
||||
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions);
|
||||
|
||||
var (rootReference, rootDictionary) = ParseTrailer(
|
||||
trailer,
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
using Encryption;
|
||||
using Filters;
|
||||
using Tokens;
|
||||
using UglyToad.PdfPig.Parser.FileStructure;
|
||||
|
||||
internal class PdfTokenScanner : IPdfTokenScanner
|
||||
{
|
||||
@@ -23,6 +24,7 @@
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly CoreTokenScanner coreTokenScanner;
|
||||
private readonly ParsingOptions parsingOptions;
|
||||
private readonly FileHeaderOffset fileHeaderOffset;
|
||||
|
||||
private IEncryptionHandler encryptionHandler;
|
||||
private bool isDisposed;
|
||||
@@ -54,12 +56,14 @@
|
||||
IObjectLocationProvider objectLocationProvider,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IEncryptionHandler encryptionHandler,
|
||||
FileHeaderOffset fileHeaderOffset,
|
||||
ParsingOptions parsingOptions)
|
||||
{
|
||||
this.inputBytes = inputBytes;
|
||||
this.objectLocationProvider = objectLocationProvider;
|
||||
this.filterProvider = filterProvider;
|
||||
this.encryptionHandler = encryptionHandler;
|
||||
this.fileHeaderOffset = fileHeaderOffset;
|
||||
this.parsingOptions = parsingOptions;
|
||||
coreTokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: parsingOptions.UseLenientParsing);
|
||||
}
|
||||
@@ -795,8 +799,8 @@
|
||||
{
|
||||
// Brute force read the entire file
|
||||
isBruteForcing = true;
|
||||
|
||||
Seek(0);
|
||||
|
||||
Seek(fileHeaderOffset.Value);
|
||||
|
||||
while (MoveNext())
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user