mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
Fix for #662: when encountering invalid content, try to continue parsing
if option "useLenientParsing" is in effect.
This commit is contained in:
parent
d59d2c61a0
commit
e3f281435a
33
src/UglyToad.PdfPig.Tests/Integration/InvalidOperator.cs
Normal file
33
src/UglyToad.PdfPig.Tests/Integration/InvalidOperator.cs
Normal file
@ -0,0 +1,33 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
public class InvalidOperatorTests
|
||||
{
|
||||
[Fact]
|
||||
public void InvalidOperatorThrowsExceptionIfNotUsingLenientParsing()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("invalid-operator.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = false }))
|
||||
{
|
||||
Assert.Throws<ArgumentException>(() => document.GetPage(1));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void InvalidOperatorDoesNotThrowExceptionIfUsingLenientParsing()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("invalid-operator.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = true }))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
var text = page.Text;
|
||||
Assert.True(text.Contains("Text line 1"));
|
||||
Assert.True(text.Contains("Text line 2"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -15,10 +15,12 @@
|
||||
internal class PageContentParser : IPageContentParser
|
||||
{
|
||||
private readonly IGraphicsStateOperationFactory operationFactory;
|
||||
private readonly bool useLenientParsing;
|
||||
|
||||
public PageContentParser(IGraphicsStateOperationFactory operationFactory)
|
||||
public PageContentParser(IGraphicsStateOperationFactory operationFactory, bool useLenientParsing = false)
|
||||
{
|
||||
this.operationFactory = operationFactory;
|
||||
this.useLenientParsing = useLenientParsing;
|
||||
}
|
||||
|
||||
public IReadOnlyList<IGraphicsStateOperation> Parse(
|
||||
@ -116,9 +118,10 @@
|
||||
catch (Exception ex)
|
||||
{
|
||||
// End images can cause weird state if the "EI" appears inside the inline data stream.
|
||||
if (TryGetLastEndImage(graphicsStateOperations, out _, out _))
|
||||
log.Error($"Failed reading operation at offset {inputBytes.CurrentOffset} for page {pageNumber}, data: '{op.Data}'", ex);
|
||||
if (TryGetLastEndImage(graphicsStateOperations, out _, out _)
|
||||
|| useLenientParsing)
|
||||
{
|
||||
log.Error($"Failed reading an operation at offset {inputBytes.CurrentOffset} for page {pageNumber}.", ex);
|
||||
operation = null;
|
||||
}
|
||||
else
|
||||
|
@ -195,7 +195,7 @@
|
||||
parsingOptions.UseLenientParsing);
|
||||
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
|
||||
new PageContentParser(new ReflectionGraphicsStateOperationFactory()), parsingOptions.Logger);
|
||||
new PageContentParser(new ReflectionGraphicsStateOperationFactory(), parsingOptions.UseLenientParsing), parsingOptions.Logger);
|
||||
|
||||
var catalog = CatalogFactory.Create(
|
||||
rootReference,
|
||||
|
Loading…
Reference in New Issue
Block a user