mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Merge pull request #435 from theolivenbaum/master
Remove recursive algorithm on netstandard2.0 and above
This commit is contained in:
@@ -38,7 +38,7 @@
|
|||||||
/// The child nodes of this node if <see cref="IsPage"/> is <see langword="false" />
|
/// The child nodes of this node if <see cref="IsPage"/> is <see langword="false" />
|
||||||
/// </summary>
|
/// </summary>
|
||||||
[NotNull]
|
[NotNull]
|
||||||
public IReadOnlyList<PageTreeNode> Children { get; }
|
public IReadOnlyList<PageTreeNode> Children { get; private set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The parent node of this node, unless it is the root node.
|
/// The parent node of this node, unless it is the root node.
|
||||||
@@ -56,13 +56,21 @@
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
internal PageTreeNode(DictionaryToken nodeDictionary, IndirectReference reference,
|
internal PageTreeNode(DictionaryToken nodeDictionary, IndirectReference reference,
|
||||||
bool isPage,
|
bool isPage,
|
||||||
int? pageNumber,
|
int? pageNumber)
|
||||||
IReadOnlyList<PageTreeNode> children)
|
|
||||||
{
|
{
|
||||||
NodeDictionary = nodeDictionary ?? throw new ArgumentNullException(nameof(nodeDictionary));
|
NodeDictionary = nodeDictionary ?? throw new ArgumentNullException(nameof(nodeDictionary));
|
||||||
Reference = reference;
|
Reference = reference;
|
||||||
IsPage = isPage;
|
IsPage = isPage;
|
||||||
PageNumber = pageNumber;
|
PageNumber = pageNumber;
|
||||||
|
|
||||||
|
if (!IsPage && PageNumber.HasValue)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Cannot define page number for a pages node.", nameof(pageNumber));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal PageTreeNode WithChildren(IReadOnlyList<PageTreeNode> children)
|
||||||
|
{
|
||||||
Children = children ?? throw new ArgumentNullException(nameof(children));
|
Children = children ?? throw new ArgumentNullException(nameof(children));
|
||||||
|
|
||||||
if (IsPage && Children.Count > 0)
|
if (IsPage && Children.Count > 0)
|
||||||
@@ -70,15 +78,11 @@
|
|||||||
throw new ArgumentException("Cannot define children on a page node.", nameof(children));
|
throw new ArgumentException("Cannot define children on a page node.", nameof(children));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IsPage && pageNumber.HasValue)
|
|
||||||
{
|
|
||||||
throw new ArgumentException("Cannot define page number for a pages node.", nameof(pageNumber));
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (var child in Children)
|
foreach (var child in Children)
|
||||||
{
|
{
|
||||||
child.Parent = this;
|
child.Parent = this;
|
||||||
}
|
}
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
|
|||||||
@@ -10,6 +10,16 @@
|
|||||||
|
|
||||||
internal static class CatalogFactory
|
internal static class CatalogFactory
|
||||||
{
|
{
|
||||||
|
|
||||||
|
private class PageCounter
|
||||||
|
{
|
||||||
|
public int PageCount { get; private set; }
|
||||||
|
public void Increment()
|
||||||
|
{
|
||||||
|
PageCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static Catalog Create(IndirectReference rootReference, DictionaryToken dictionary,
|
public static Catalog Create(IndirectReference rootReference, DictionaryToken dictionary,
|
||||||
IPdfTokenScanner scanner,
|
IPdfTokenScanner scanner,
|
||||||
bool isLenientParsing)
|
bool isLenientParsing)
|
||||||
@@ -46,75 +56,157 @@
|
|||||||
pages = DirectObjectFinder.Get<DictionaryToken>(value, scanner);
|
pages = DirectObjectFinder.Get<DictionaryToken>(value, scanner);
|
||||||
}
|
}
|
||||||
|
|
||||||
var pageNumber = 0;
|
var pageNumber = new PageCounter();
|
||||||
|
|
||||||
var pageTree = ProcessPagesNode(pagesReference, pages, new IndirectReference(1, 0), true,
|
var pageTree = ProcessPagesNode(pagesReference, pages, new IndirectReference(1, 0), true,
|
||||||
scanner, isLenientParsing, ref pageNumber);
|
scanner, isLenientParsing, pageNumber);
|
||||||
|
|
||||||
return new Catalog(dictionary, pages, pageTree);
|
return new Catalog(dictionary, pages, pageTree);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static PageTreeNode ProcessPagesNode(IndirectReference reference, DictionaryToken nodeDictionary,
|
#if NETSTANDARD2_0_OR_GREATER
|
||||||
IndirectReference parentReference,
|
|
||||||
bool isRoot,
|
private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput, DictionaryToken nodeDictionaryInput, IndirectReference parentReferenceInput, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing, PageCounter pageNumber)
|
||||||
IPdfTokenScanner pdfTokenScanner,
|
|
||||||
bool isLenientParsing,
|
|
||||||
ref int pageNumber)
|
|
||||||
{
|
{
|
||||||
|
bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
|
||||||
|
|
||||||
|
if (isPage)
|
||||||
|
{
|
||||||
|
pageNumber.Increment();
|
||||||
|
|
||||||
|
return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
//If we got here, we have to iterate till we manage to exit
|
||||||
|
|
||||||
|
var toProcess = new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, List<PageTreeNode> nodeChildren)>();
|
||||||
|
var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
|
||||||
|
var setChildren = new List<Action>();
|
||||||
|
var firstPageChildren = new List<PageTreeNode>();
|
||||||
|
|
||||||
|
setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
|
||||||
|
|
||||||
|
toProcess.Enqueue((thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput, nodeChildren: firstPageChildren));
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
var current = toProcess.Dequeue();
|
||||||
|
|
||||||
|
if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
|
||||||
|
{
|
||||||
|
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}."); }
|
||||||
|
|
||||||
|
kids = new ArrayToken(EmptyArray<IToken>.Instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var kid in kids.Data)
|
||||||
|
{
|
||||||
|
if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); }
|
||||||
|
|
||||||
|
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken)) { throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); }
|
||||||
|
|
||||||
|
bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
|
||||||
|
|
||||||
|
if (isChildPage)
|
||||||
|
{
|
||||||
|
pageNumber.Increment();
|
||||||
|
|
||||||
|
var kidPageNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
|
||||||
|
current.nodeChildren.Add(kidPageNode);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
|
||||||
|
var kidChildren = new List<PageTreeNode>();
|
||||||
|
toProcess.Enqueue((thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference, nodeChildren: kidChildren));
|
||||||
|
|
||||||
|
setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
|
||||||
|
|
||||||
|
current.nodeChildren.Add(kidChildNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (toProcess.Count > 0);
|
||||||
|
|
||||||
|
foreach (var action in setChildren)
|
||||||
|
{
|
||||||
|
action();
|
||||||
|
}
|
||||||
|
|
||||||
|
return firstPage;
|
||||||
|
|
||||||
|
|
||||||
|
static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
|
||||||
|
{
|
||||||
|
var isPage = false;
|
||||||
|
|
||||||
|
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
|
||||||
|
{
|
||||||
|
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
|
||||||
|
|
||||||
|
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
isPage = type.Equals(NameToken.Page);
|
||||||
|
|
||||||
|
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isLenientParsing && !isRoot)
|
||||||
|
{
|
||||||
|
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
|
||||||
|
|
||||||
|
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
|
||||||
|
}
|
||||||
|
|
||||||
|
return isPage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Keep the algorithm below from throwing a StackOverflow exception.
|
||||||
|
// It probably should be refactored to not be recursive
|
||||||
|
private const ushort MAX_TREE_DEPTH = 1024;
|
||||||
|
|
||||||
|
private static PageTreeNode ProcessPagesNode(IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing, PageCounter pageNumber, int depth = 0)
|
||||||
|
{
|
||||||
|
depth++;
|
||||||
|
|
||||||
|
if (depth > MAX_TREE_DEPTH) { throw new PdfDocumentFormatException($"Tree exceeded maximum depth of {MAX_TREE_DEPTH}, aborting."); }
|
||||||
|
|
||||||
var isPage = false;
|
var isPage = false;
|
||||||
|
|
||||||
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
|
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
|
||||||
{
|
{
|
||||||
if (!isLenientParsing)
|
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _))
|
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
|
||||||
{
|
|
||||||
isPage = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
isPage = type.Equals(NameToken.Page);
|
isPage = type.Equals(NameToken.Page);
|
||||||
|
|
||||||
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing)
|
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isLenientParsing && !isRoot)
|
if (!isLenientParsing && !isRoot)
|
||||||
{
|
{
|
||||||
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken))
|
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!parentReferenceToken.Data.Equals(parentReference))
|
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isPage)
|
if (isPage)
|
||||||
{
|
{
|
||||||
pageNumber++;
|
pageNumber.Increment();
|
||||||
|
var newPage = new PageTreeNode(nodeDictionary, reference, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
|
||||||
var thisNode = new PageTreeNode(nodeDictionary, reference, true,
|
return newPage;
|
||||||
pageNumber,
|
|
||||||
EmptyArray<PageTreeNode>.Instance);
|
|
||||||
|
|
||||||
return thisNode;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
|
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
|
||||||
{
|
{
|
||||||
if (!isLenientParsing)
|
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {nodeDictionary}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {nodeDictionary}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
kids = new ArrayToken(EmptyArray<IToken>.Instance);
|
kids = new ArrayToken(EmptyArray<IToken>.Instance);
|
||||||
}
|
}
|
||||||
@@ -123,22 +215,16 @@
|
|||||||
|
|
||||||
foreach (var kid in kids.Data)
|
foreach (var kid in kids.Data)
|
||||||
{
|
{
|
||||||
if (!(kid is IndirectReferenceToken kidRef))
|
if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
|
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken)) { throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); }
|
||||||
{
|
|
||||||
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
|
|
||||||
}
|
|
||||||
|
|
||||||
var kidNode = ProcessPagesNode(kidRef.Data, kidDictionaryToken, reference, false, pdfTokenScanner, isLenientParsing, ref pageNumber);
|
var kidNode = ProcessPagesNode(kidRef.Data, kidDictionaryToken, reference, false, pdfTokenScanner, isLenientParsing, pageNumber, depth);
|
||||||
|
|
||||||
nodeChildren.Add(kidNode);
|
nodeChildren.Add(kidNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new PageTreeNode(nodeDictionary, reference, false, null, nodeChildren);
|
return new PageTreeNode(nodeDictionary, reference, false, null).WithChildren(nodeChildren);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user