diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/SPE8EF26T0545.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/SPE8EF26T0545.pdf new file mode 100644 index 00000000..63798378 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/SPE8EF26T0545.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs index f0d5b00a..f8c46454 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs @@ -11,6 +11,31 @@ public class GithubIssuesTests { + [Fact] + public void Issues1250() + { + // Issue comes from HasFormXObjectCircularReference + var path = IntegrationHelpers.GetDocumentPath("SPE8EF26T0545.pdf"); + using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true })) + { + var page = document.GetPage(1); + Assert.NotNull(page); + Assert.NotEmpty(page.Letters); + + page = document.GetPage(7); + Assert.NotNull(page); + Assert.NotEmpty(page.Letters); + } + + // Ensure still no StackOverflowException + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671"))) + { + var page = document.GetPage(1); + Assert.NotNull(page); + Assert.NotEmpty(page.Letters); + } + } + [Fact] public void Issues1248() { @@ -29,7 +54,7 @@ } } } - + [Fact] public void Issues1238() { diff --git a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs index f72f3ddf..3b7360f0 100644 --- a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs @@ -587,7 +587,8 @@ if (hasCircularReference) { if (ParsingOptions.UseLenientParsing) - { + { + // TODO - We might be removing too much, good for the moment. See Issues1250() for examples operations = operations.Where(o => o is not InvokeNamedXObject xo || xo.Name != xObjectName) .ToArray(); ParsingOptions.Logger.Warn( @@ -618,14 +619,56 @@ /// The form's name. /// The form operations parsed from original form stream. protected virtual bool HasFormXObjectCircularReference(StreamToken formStream, - NameToken xObjectName, + NameToken? xObjectName, IReadOnlyList operations) { - return xObjectName != null - && operations.OfType()?.Any(o => o.Name == xObjectName) == - true // operations contain another form with same name - && ResourceStore.TryGetXObject(xObjectName, out var result) - && result.Data.Span.SequenceEqual(formStream.Data.Span); // The form contained in the operations has identical data to current form + if (xObjectName is null) + { + return false; + } + + if (operations.OfType()?.Any(o => o.Name == xObjectName) != true) + { + return false; + } + + if (!TryGetXObjectToken(formStream, xObjectName, PdfScanner, out var t1)) + { + return false; + } + + if (!ResourceStore.TryGetXObject(xObjectName, out var resourceStream)) + { + return false; + } + + if (!TryGetXObjectToken(resourceStream, xObjectName, PdfScanner, out var t2)) + { + return false; + } + + if (t1 is null || t2 is null) + { + return false; + } + + return t1.Equals(t2); + + static bool TryGetXObjectToken(StreamToken streamToken, NameToken xObjectName, IPdfTokenScanner scanner, out IToken? token) + { + token = null; + if (!streamToken.StreamDictionary.TryGet(NameToken.Resources, scanner, out var formResources)) + { + return false; + } + + if (!formResources.TryGet(NameToken.Xobject, out var xObjectBase) || !xObjectBase.TryGet(xObjectName, out token)) + { + return false; + } + + return token is not null; + } } ///