handle objects without endobj markers #198

This commit is contained in:
Eliot Jones 2020-08-21 18:15:30 +01:00
parent 6f26b274c4
commit 6359ba5df1
2 changed files with 16 additions and 3 deletions

View File

@ -8,9 +8,7 @@
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using Core; using Core;
using Encryption; using Encryption;
using Exceptions;
using Filters; using Filters;
using Parser.Parts;
using Tokens; using Tokens;
internal class PdfTokenScanner : IPdfTokenScanner internal class PdfTokenScanner : IPdfTokenScanner
@ -144,6 +142,21 @@
coreTokenScanner.Seek(previousTokenPositions[0]); coreTokenScanner.Seek(previousTokenPositions[0]);
break; break;
} }
if (readTokens.Count == 3 && readTokens[1] is NumericToken extraObjNum && readTokens[2] is NumericToken extraGenNum)
{
// An obj was encountered after reading the actual token and the object and generation number of the following token.
var actualReference = new IndirectReference(objectNumber.Int, generation.Int);
var actualToken = encryptionHandler.Decrypt(actualReference, readTokens[0]);
CurrentToken = new ObjectToken(startPosition, actualReference, actualToken);
readTokens.Clear();
coreTokenScanner.Seek(previousTokenPositions[0]);
return true;
}
// This should never happen. // This should never happen.
Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object."); Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object.");
return false; return false;

View File

@ -9,7 +9,7 @@
<Description>Reads text content from PDF documents and supports document creation. Apache 2.0 licensed.</Description> <Description>Reads text content from PDF documents and supports document creation. Apache 2.0 licensed.</Description>
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression> <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/UglyToad/PdfPig</PackageProjectUrl> <PackageProjectUrl>https://github.com/UglyToad/PdfPig</PackageProjectUrl>
<PackageTags>PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract</PackageTags> <PackageTags>PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract;pdf-to-text;pdf;file;text;C#;dotnet;.NET</PackageTags>
<RepositoryUrl>https://github.com/UglyToad/PdfPig</RepositoryUrl> <RepositoryUrl>https://github.com/UglyToad/PdfPig</RepositoryUrl>
<GenerateDocumentationFile>true</GenerateDocumentationFile> <GenerateDocumentationFile>true</GenerateDocumentationFile>
<Version>0.1.2</Version> <Version>0.1.2</Version>