mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
Fix for #662: when encountering invalid content, try to continue parsing
if option "useLenientParsing" is in effect.
This commit is contained in:
parent
d59d2c61a0
commit
e3f281435a
33
src/UglyToad.PdfPig.Tests/Integration/InvalidOperator.cs
Normal file
33
src/UglyToad.PdfPig.Tests/Integration/InvalidOperator.cs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class InvalidOperatorTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void InvalidOperatorThrowsExceptionIfNotUsingLenientParsing()
|
||||||
|
{
|
||||||
|
var path = IntegrationHelpers.GetSpecificTestDocumentPath("invalid-operator.pdf");
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = false }))
|
||||||
|
{
|
||||||
|
Assert.Throws<ArgumentException>(() => document.GetPage(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void InvalidOperatorDoesNotThrowExceptionIfUsingLenientParsing()
|
||||||
|
{
|
||||||
|
var path = IntegrationHelpers.GetSpecificTestDocumentPath("invalid-operator.pdf");
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(path, new ParsingOptions { UseLenientParsing = true }))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
var text = page.Text;
|
||||||
|
Assert.True(text.Contains("Text line 1"));
|
||||||
|
Assert.True(text.Contains("Text line 2"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
@ -15,10 +15,12 @@
|
|||||||
internal class PageContentParser : IPageContentParser
|
internal class PageContentParser : IPageContentParser
|
||||||
{
|
{
|
||||||
private readonly IGraphicsStateOperationFactory operationFactory;
|
private readonly IGraphicsStateOperationFactory operationFactory;
|
||||||
|
private readonly bool useLenientParsing;
|
||||||
|
|
||||||
public PageContentParser(IGraphicsStateOperationFactory operationFactory)
|
public PageContentParser(IGraphicsStateOperationFactory operationFactory, bool useLenientParsing = false)
|
||||||
{
|
{
|
||||||
this.operationFactory = operationFactory;
|
this.operationFactory = operationFactory;
|
||||||
|
this.useLenientParsing = useLenientParsing;
|
||||||
}
|
}
|
||||||
|
|
||||||
public IReadOnlyList<IGraphicsStateOperation> Parse(
|
public IReadOnlyList<IGraphicsStateOperation> Parse(
|
||||||
@ -116,9 +118,10 @@
|
|||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
// End images can cause weird state if the "EI" appears inside the inline data stream.
|
// End images can cause weird state if the "EI" appears inside the inline data stream.
|
||||||
if (TryGetLastEndImage(graphicsStateOperations, out _, out _))
|
log.Error($"Failed reading operation at offset {inputBytes.CurrentOffset} for page {pageNumber}, data: '{op.Data}'", ex);
|
||||||
|
if (TryGetLastEndImage(graphicsStateOperations, out _, out _)
|
||||||
|
|| useLenientParsing)
|
||||||
{
|
{
|
||||||
log.Error($"Failed reading an operation at offset {inputBytes.CurrentOffset} for page {pageNumber}.", ex);
|
|
||||||
operation = null;
|
operation = null;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -195,7 +195,7 @@
|
|||||||
parsingOptions.UseLenientParsing);
|
parsingOptions.UseLenientParsing);
|
||||||
|
|
||||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
|
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
|
||||||
new PageContentParser(new ReflectionGraphicsStateOperationFactory()), parsingOptions.Logger);
|
new PageContentParser(new ReflectionGraphicsStateOperationFactory(), parsingOptions.UseLenientParsing), parsingOptions.Logger);
|
||||||
|
|
||||||
var catalog = CatalogFactory.Create(
|
var catalog = CatalogFactory.Create(
|
||||||
rootReference,
|
rootReference,
|
||||||
|
Loading…
Reference in New Issue
Block a user