diff --git a/src/UglyToad.PdfPig/Content/PageTreeNode.cs b/src/UglyToad.PdfPig/Content/PageTreeNode.cs
index f9cc505d..2b48d303 100644
--- a/src/UglyToad.PdfPig/Content/PageTreeNode.cs
+++ b/src/UglyToad.PdfPig/Content/PageTreeNode.cs
@@ -38,7 +38,7 @@
/// The child nodes of this node if is
///
[NotNull]
- public IReadOnlyList Children { get; }
+ public IReadOnlyList Children { get; private set; }
///
/// The parent node of this node, unless it is the root node.
@@ -56,29 +56,33 @@
///
internal PageTreeNode(DictionaryToken nodeDictionary, IndirectReference reference,
bool isPage,
- int? pageNumber,
- IReadOnlyList children)
+ int? pageNumber)
{
NodeDictionary = nodeDictionary ?? throw new ArgumentNullException(nameof(nodeDictionary));
Reference = reference;
IsPage = isPage;
PageNumber = pageNumber;
+
+ if (!IsPage && PageNumber.HasValue)
+ {
+ throw new ArgumentException("Cannot define page number for a pages node.", nameof(pageNumber));
+ }
+ }
+
+ internal PageTreeNode WithChildren(IReadOnlyList children)
+ {
Children = children ?? throw new ArgumentNullException(nameof(children));
if (IsPage && Children.Count > 0)
{
- throw new ArgumentException("Cannot define children on a page node.", nameof(children));
- }
-
- if (!IsPage && pageNumber.HasValue)
- {
- throw new ArgumentException("Cannot define page number for a pages node.", nameof(pageNumber));
+ throw new ArgumentException("Cannot define children on a page node.", nameof(children));
}
foreach (var child in Children)
{
child.Parent = this;
}
+ return this;
}
///
diff --git a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs
index 71e6f79a..9f272f88 100644
--- a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs
+++ b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs
@@ -11,9 +11,14 @@
internal static class CatalogFactory
{
- // Keep the algorithm below from throwing a StackOverflow exception.
- // It probably should be refactored to not be recursive
- private const ushort MAX_TREE_DEPTH = 1024;
+ private class PageCounter
+ {
+ public int PageCount { get; private set; }
+ public void Increment()
+ {
+ PageCount++;
+ }
+ }
public static Catalog Create(IndirectReference rootReference, DictionaryToken dictionary,
IPdfTokenScanner scanner,
@@ -51,81 +56,157 @@
pages = DirectObjectFinder.Get(value, scanner);
}
- var pageNumber = 0;
+ var pageNumber = new PageCounter();
var pageTree = ProcessPagesNode(pagesReference, pages, new IndirectReference(1, 0), true,
- scanner, isLenientParsing, ref pageNumber, 0);
+ scanner, isLenientParsing, pageNumber);
return new Catalog(dictionary, pages, pageTree);
}
- private static PageTreeNode ProcessPagesNode(IndirectReference reference, DictionaryToken nodeDictionary,
- IndirectReference parentReference,
- bool isRoot,
- IPdfTokenScanner pdfTokenScanner,
- bool isLenientParsing,
- ref int pageNumber,
- ushort depth)
+#if NETSTANDARD2_0_OR_GREATER
+
+ private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput, DictionaryToken nodeDictionaryInput, IndirectReference parentReferenceInput, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing, PageCounter pageNumber)
{
- if(depth > MAX_TREE_DEPTH)
+ bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
+
+ if (isPage)
{
- throw new PdfDocumentFormatException($"Tree exceeded maximum depth of {MAX_TREE_DEPTH}, aborting.");
+ pageNumber.Increment();
+
+ return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray.Instance);
}
+ //If we got here, we have to iterate till we manage to exit
+
+ var toProcess = new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, List nodeChildren)>();
+ var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
+ var setChildren = new List();
+ var firstPageChildren = new List();
+
+ setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
+
+ toProcess.Enqueue((thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput, nodeChildren: firstPageChildren));
+
+ do
+ {
+ var current = toProcess.Dequeue();
+
+ if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
+ {
+ if (!isLenientParsing) { throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}."); }
+
+ kids = new ArrayToken(EmptyArray.Instance);
+ }
+
+ foreach (var kid in kids.Data)
+ {
+ if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); }
+
+ if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken)) { throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); }
+
+ bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
+
+ if (isChildPage)
+ {
+ pageNumber.Increment();
+
+ var kidPageNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray.Instance);
+ current.nodeChildren.Add(kidPageNode);
+ }
+ else
+ {
+ var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
+ var kidChildren = new List();
+ toProcess.Enqueue((thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference, nodeChildren: kidChildren));
+
+ setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
+
+ current.nodeChildren.Add(kidChildNode);
+ }
+ }
+ } while (toProcess.Count > 0);
+
+ foreach (var action in setChildren)
+ {
+ action();
+ }
+
+ return firstPage;
+
+
+ static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
+ {
+ var isPage = false;
+
+ if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
+ {
+ if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
+
+ if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
+ }
+ else
+ {
+ isPage = type.Equals(NameToken.Page);
+
+ if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
+ }
+
+ if (!isLenientParsing && !isRoot)
+ {
+ if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
+
+ if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
+ }
+
+ return isPage;
+ }
+ }
+
+#endif
+
+ // Keep the algorithm below from throwing a StackOverflow exception.
+ // It probably should be refactored to not be recursive
+ private const ushort MAX_TREE_DEPTH = 1024;
+
+ private static PageTreeNode ProcessPagesNode(IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing, PageCounter pageNumber, int depth = 0)
+ {
+ depth++;
+
+ if (depth > MAX_TREE_DEPTH) { throw new PdfDocumentFormatException($"Tree exceeded maximum depth of {MAX_TREE_DEPTH}, aborting."); }
+
var isPage = false;
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
{
- if (!isLenientParsing)
- {
- throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}.");
- }
+ if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
- if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _))
- {
- isPage = true;
- }
+ if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
}
else
{
isPage = type.Equals(NameToken.Page);
- if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing)
- {
- throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}.");
- }
+ if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
}
if (!isLenientParsing && !isRoot)
{
- if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken))
- {
- throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}.");
- }
+ if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
- if (!parentReferenceToken.Data.Equals(parentReference))
- {
- throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}.");
- }
+ if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
}
if (isPage)
{
- pageNumber++;
-
- var thisNode = new PageTreeNode(nodeDictionary, reference, true,
- pageNumber,
- EmptyArray.Instance);
-
- return thisNode;
+ pageNumber.Increment();
+ var newPage = new PageTreeNode(nodeDictionary, reference, true, pageNumber.PageCount).WithChildren(EmptyArray.Instance);
+ return newPage;
}
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
{
- if (!isLenientParsing)
- {
- throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {nodeDictionary}.");
- }
+ if (!isLenientParsing) { throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {nodeDictionary}."); }
kids = new ArrayToken(EmptyArray.Instance);
}
@@ -134,22 +215,16 @@
foreach (var kid in kids.Data)
{
- if (!(kid is IndirectReferenceToken kidRef))
- {
- throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
- }
+ if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); }
- if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
- {
- throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
- }
+ if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken)) { throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); }
- var kidNode = ProcessPagesNode(kidRef.Data, kidDictionaryToken, reference, false, pdfTokenScanner, isLenientParsing, ref pageNumber, depth + 1);
+ var kidNode = ProcessPagesNode(kidRef.Data, kidDictionaryToken, reference, false, pdfTokenScanner, isLenientParsing, pageNumber, depth);
nodeChildren.Add(kidNode);
}
- return new PageTreeNode(nodeDictionary, reference, false, null, nodeChildren);
+ return new PageTreeNode(nodeDictionary, reference, false, null).WithChildren(nodeChildren);
}
}
}