Check and handle circular references when processing XObject forms and fix #671

This commit is contained in:
BobLd
2023-08-05 15:30:13 +01:00
parent e99fa4eef6
commit d1e8b42877
4 changed files with 72 additions and 3 deletions

View File

@@ -9,6 +9,10 @@
public class IntegrationDocumentTests
{
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")));
private static readonly HashSet<string> _documentsToIgnore = new HashSet<string>()
{
"issue_671.pdf"
};
[Theory]
[MemberData(nameof(GetAllDocuments))]
@@ -101,7 +105,7 @@
var files = Directory.GetFiles(DocumentFolder.Value, "*.pdf");
// Return the shortname so we can see it in the test explorer.
return files.Select(x => new object[] { Path.GetFileName(x) });
return files.Where(x => !_documentsToIgnore.Any(i => x.EndsWith(i))).Select(x => new object[] { Path.GetFileName(x) });
}
}
}

View File

@@ -0,0 +1,37 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using UglyToad.PdfPig.Core;
using Xunit;
public class XObjectFormTests
{
[Fact]
public void CanReadDocumentWithoutStackOverflowIssue671()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671")))
{
var page = document.GetPage(1);
}
}
[Fact]
public void CanReadDocumentThrowsIssue671()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671"), ParsingOptions.LenientParsingOff))
{
var exception = Assert.Throws<PdfDocumentFormatException>(() => document.GetPage(1));
Assert.Contains("is referencing itself which can cause unexpected behaviour", exception.Message);
}
}
[Fact]
public void CanReadDocumentMOZILLA_3136_0()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-3136-0"), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
}
}
}
}

View File

@@ -486,7 +486,7 @@
}
else if (subType.Equals(NameToken.Form))
{
ProcessFormXObject(xObjectStream);
ProcessFormXObject(xObjectStream, xObjectName);
}
else
{
@@ -494,7 +494,7 @@
}
}
private void ProcessFormXObject(StreamToken formStream)
private void ProcessFormXObject(StreamToken formStream, NameToken xObjectName)
{
/*
* When a form XObject is invoked the following should happen:
@@ -603,6 +603,20 @@
// 3. We don't respect clipping currently.
// 4. Paint the objects.
bool hasCircularReference = HasFormXObjectCircularReference(formStream, xObjectName, operations);
if (hasCircularReference)
{
if (parsingOptions.UseLenientParsing)
{
operations = operations.Where(o => o is not InvokeNamedXObject xo || xo.Name != xObjectName).ToArray();
parsingOptions.Logger.Warn($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour. The self reference was removed from the operations before further processing.");
}
else
{
throw new PdfDocumentFormatException($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour.");
}
}
ProcessOperations(operations);
// 5. Restore saved state.
@@ -614,6 +628,20 @@
}
}
/// <summary>
/// Check for circular reference in the XObject form.
/// </summary>
/// <param name="formStream">The original form stream.</param>
/// <param name="xObjectName">The form's name.</param>
/// <param name="operations">The form operations parsed from original form stream.</param>
private bool HasFormXObjectCircularReference(StreamToken formStream, NameToken xObjectName, IReadOnlyList<IGraphicsStateOperation> operations)
{
return xObjectName != null
&& operations.OfType<InvokeNamedXObject>()?.Any(o => o.Name == xObjectName) == true // operations contain another form with same name
&& resourceStore.TryGetXObject(xObjectName, out var result)
&& result.Data.SequenceEqual(formStream.Data); // The form contained in the operations has identical data to current form
}
public void BeginSubpath()
{
if (CurrentPath == null)