Fix integration tests for #579 (3)

This commit is contained in:
BobLd 2023-04-13 19:08:41 +01:00
parent 906066ec58
commit 42e4171c31
6 changed files with 548 additions and 546 deletions

View File

@ -1,66 +1,67 @@
namespace UglyToad.PdfPig.Annotations;
using System;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Appearance stream (PDF Reference 8.4.4) that describes what an annotation looks like. Each stream is a Form XObject.
/// The appearance stream is either stateless (in which case <see cref="IsStateless"/> is true)
/// or stateful, in which case <see cref="IsStateless"/> is false and the states can be retrieved via <see cref="GetStates"/>.
/// The states can then be used to retrieve the state-specific appearances using <see cref="Get"/>.
/// </summary>
public class AppearanceStream
namespace UglyToad.PdfPig.Annotations
{
private readonly IDictionary<string, StreamToken> appearanceStreamsByState;
private readonly StreamToken statelessAppearanceStream;
using System;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Indicates if this appearance stream is stateless, or whether you can get appearances by state.
/// Appearance stream (PDF Reference 8.4.4) that describes what an annotation looks like. Each stream is a Form XObject.
/// The appearance stream is either stateless (in which case <see cref="IsStateless"/> is true)
/// or stateful, in which case <see cref="IsStateless"/> is false and the states can be retrieved via <see cref="GetStates"/>.
/// The states can then be used to retrieve the state-specific appearances using <see cref="Get"/>.
/// </summary>
public bool IsStateless => statelessAppearanceStream != null;
/// <summary>
/// Get list of states. If this is a stateless appearance stream, an empty collection is returned.
/// </summary>
public ICollection<string> GetStates => appearanceStreamsByState != null ? appearanceStreamsByState.Keys : new string[0];
/// <summary>
/// Constructor for stateless appearance stream
/// </summary>
/// <param name="streamToken"></param>
internal AppearanceStream(StreamToken streamToken)
public class AppearanceStream
{
statelessAppearanceStream = streamToken;
}
private readonly IDictionary<string, StreamToken> appearanceStreamsByState;
/// <summary>
/// Constructor for stateful appearance stream
/// </summary>
/// <param name="appearanceStreamsByState"></param>
internal AppearanceStream(IDictionary<string, StreamToken> appearanceStreamsByState)
{
this.appearanceStreamsByState = appearanceStreamsByState;
}
private readonly StreamToken statelessAppearanceStream;
/// <summary>
/// Get appearance stream for particular state
/// </summary>
/// <param name="state"></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
/// <exception cref="ArgumentOutOfRangeException"></exception>
public StreamToken Get(string state)
{
if (appearanceStreamsByState == null)
/// <summary>
/// Indicates if this appearance stream is stateless, or whether you can get appearances by state.
/// </summary>
public bool IsStateless => statelessAppearanceStream != null;
/// <summary>
/// Get list of states. If this is a stateless appearance stream, an empty collection is returned.
/// </summary>
public ICollection<string> GetStates => appearanceStreamsByState != null ? appearanceStreamsByState.Keys : new string[0];
/// <summary>
/// Constructor for stateless appearance stream
/// </summary>
/// <param name="streamToken"></param>
internal AppearanceStream(StreamToken streamToken)
{
throw new Exception("Cannot get appearance by state when this is a stateless appearance stream");
statelessAppearanceStream = streamToken;
}
if (!appearanceStreamsByState.ContainsKey(state))
/// <summary>
/// Constructor for stateful appearance stream
/// </summary>
/// <param name="appearanceStreamsByState"></param>
internal AppearanceStream(IDictionary<string, StreamToken> appearanceStreamsByState)
{
throw new ArgumentOutOfRangeException(nameof(state), $"Appearance stream does not have state '{state}' (available states: {string.Join(",", appearanceStreamsByState.Keys)})");
this.appearanceStreamsByState = appearanceStreamsByState;
}
/// <summary>
/// Get appearance stream for particular state
/// </summary>
/// <param name="state"></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
/// <exception cref="ArgumentOutOfRangeException"></exception>
public StreamToken Get(string state)
{
if (appearanceStreamsByState == null)
{
throw new Exception("Cannot get appearance by state when this is a stateless appearance stream");
}
if (!appearanceStreamsByState.ContainsKey(state))
{
throw new ArgumentOutOfRangeException(nameof(state), $"Appearance stream does not have state '{state}' (available states: {string.Join(",", appearanceStreamsByState.Keys)})");
}
return appearanceStreamsByState[state];
}
return appearanceStreamsByState[state];
}
}
}

View File

@ -1,42 +1,42 @@
namespace UglyToad.PdfPig.Annotations;
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class AppearanceStreamFactory
namespace UglyToad.PdfPig.Annotations
{
public static bool TryCreate(DictionaryToken appearanceDictionary, NameToken name, IPdfTokenScanner tokenScanner, out AppearanceStream appearanceStream)
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class AppearanceStreamFactory
{
if (appearanceDictionary.TryGet(name, out IndirectReferenceToken appearanceReference))
public static bool TryCreate(DictionaryToken appearanceDictionary, NameToken name, IPdfTokenScanner tokenScanner, out AppearanceStream appearanceStream)
{
var streamToken = tokenScanner.Get(appearanceReference.Data)?.Data as StreamToken;
appearanceStream = new AppearanceStream(streamToken);
return true;
}
if (appearanceDictionary.TryGet(name, out DictionaryToken stateDictionary))
{
var dict = new Dictionary<string, StreamToken>();
foreach (var state in stateDictionary.Data.Keys)
if (appearanceDictionary.TryGet(name, out IndirectReferenceToken appearanceReference))
{
if (stateDictionary.Data.TryGetValue(state, out var stateRef) &&
stateRef is IndirectReferenceToken appearanceRef)
{
var streamToken = tokenScanner.Get(appearanceRef.Data)?.Data as StreamToken;
dict[state] = streamToken;
}
}
if (dict.Count > 0)
{
appearanceStream = new AppearanceStream(dict);
var streamToken = tokenScanner.Get(appearanceReference.Data)?.Data as StreamToken;
appearanceStream = new AppearanceStream(streamToken);
return true;
}
}
appearanceStream = null;
return false;
if (appearanceDictionary.TryGet(name, out DictionaryToken stateDictionary))
{
var dict = new Dictionary<string, StreamToken>();
foreach (var state in stateDictionary.Data.Keys)
{
if (stateDictionary.Data.TryGetValue(state, out var stateRef) &&
stateRef is IndirectReferenceToken appearanceRef)
{
var streamToken = tokenScanner.Get(appearanceRef.Data)?.Data as StreamToken;
dict[state] = streamToken;
}
}
if (dict.Count > 0)
{
appearanceStream = new AppearanceStream(dict);
return true;
}
}
appearanceStream = null;
return false;
}
}
}
}

View File

@ -1,241 +1,239 @@
namespace UglyToad.PdfPig.Content;
using Core;
using Logging;
using Parser.Parts;
using System;
using System.Collections.Generic;
using System.Linq;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class PagesFactory
namespace UglyToad.PdfPig.Content
{
private class PageCounter
using Core;
using Logging;
using Parser.Parts;
using System;
using System.Collections.Generic;
using System.Linq;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class PagesFactory
{
public int PageCount { get; private set; }
public void Increment()
private class PageCounter
{
PageCount++;
}
}
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory pageFactory, ILog log, bool isLenientParsing)
{
var pageNumber = new PageCounter();
var pageTree = ProcessPagesNode(pagesReference, pagesDictionary, new IndirectReference(1, 0), true,
scanner, isLenientParsing, pageNumber);
if (!pageTree.IsRoot)
{
throw new ArgumentException("Page tree must be the root page tree node.", nameof(pageTree));
}
var pagesByNumber = new Dictionary<int, PageTreeNode>();
PopulatePageByNumberDictionary(pageTree, pagesByNumber);
var dictionaryPageCount = pagesDictionary.GetIntOrDefault(NameToken.Count);
if (dictionaryPageCount != pagesByNumber.Count)
{
log.Warn($"Dictionary Page Count {dictionaryPageCount} different to discovered pages {pagesByNumber.Count}. Using {pagesByNumber.Count}.");
}
return new Pages(pageFactory, scanner, pageTree, pagesByNumber);
}
private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput,
DictionaryToken nodeDictionaryInput,
IndirectReference parentReferenceInput,
bool isRoot,
IPdfTokenScanner pdfTokenScanner,
bool isLenientParsing,
PageCounter pageNumber)
{
bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
if (isPage)
{
pageNumber.Increment();
return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
}
//If we got here, we have to iterate till we manage to exit
// Attempt to detect (and break) any infinite loop (IL) by recording the ids of the last 1000 (by default) tokens processed.
const int InfiniteLoopWorkingWindow = 1000;
var visitedTokens = new Dictionary<long, HashSet<int>>(); // Quick lookup containing ids (object number, generation) of tokens already processed (trimmed as we go to last 1000 (by default))
var visitedTokensWorkingWindow = new Queue<(long ObjectNumber, int Generation)>(InfiniteLoopWorkingWindow);
var toProcess =
new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference,
List<PageTreeNode> nodeChildren)>();
var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
var setChildren = new List<Action>();
var firstPageChildren = new List<PageTreeNode>();
setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
toProcess.Enqueue(
(thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput,
nodeChildren: firstPageChildren));
do
{
var current = toProcess.Dequeue();
#region Break any potential infinite loop
// Remember the last 1000 (by default) tokens and if we attempt to process again break out of loop
var currentReferenceObjectNumber = current.reference.ObjectNumber;
var currentReferenceGeneration = current.reference.Generation;
if (visitedTokens.ContainsKey(currentReferenceObjectNumber))
public int PageCount { get; private set; }
public void Increment()
{
var generations = visitedTokens[currentReferenceObjectNumber];
if (generations.Contains(currentReferenceGeneration))
{
var listOfLastVisitedToken = visitedTokensWorkingWindow.ToList();
var indexOfCurrentTokenInListOfLastVisitedToken = listOfLastVisitedToken.IndexOf((currentReferenceObjectNumber, currentReferenceGeneration));
var howManyTokensBack = Math.Abs(indexOfCurrentTokenInListOfLastVisitedToken - listOfLastVisitedToken.Count); //eg initate loop is taking us back to last token or five token back
System.Diagnostics.Debug.WriteLine($"Break infinite loop while processing page {pageNumber.PageCount+1} tokens. Token with object number {currentReferenceObjectNumber} and generation {currentReferenceGeneration} processed {howManyTokensBack} token(s) back. ");
continue; // don't reprocess token already processed. break infinite loop. Issue #519
}
else
{
generations.Add(currentReferenceGeneration);
visitedTokens[currentReferenceObjectNumber] = generations;
}
PageCount++;
}
else
{
visitedTokens.Add(currentReferenceObjectNumber, new HashSet<int>() { currentReferenceGeneration });
}
visitedTokensWorkingWindow.Enqueue((currentReferenceObjectNumber, currentReferenceGeneration));
if (visitedTokensWorkingWindow.Count >= InfiniteLoopWorkingWindow)
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory pageFactory, ILog log, bool isLenientParsing)
{
var pageNumber = new PageCounter();
var pageTree = ProcessPagesNode(pagesReference, pagesDictionary, new IndirectReference(1, 0), true,
scanner, isLenientParsing, pageNumber);
if (!pageTree.IsRoot)
{
throw new ArgumentException("Page tree must be the root page tree node.", nameof(pageTree));
}
var pagesByNumber = new Dictionary<int, PageTreeNode>();
PopulatePageByNumberDictionary(pageTree, pagesByNumber);
var dictionaryPageCount = pagesDictionary.GetIntOrDefault(NameToken.Count);
if (dictionaryPageCount != pagesByNumber.Count)
{
log.Warn($"Dictionary Page Count {dictionaryPageCount} different to discovered pages {pagesByNumber.Count}. Using {pagesByNumber.Count}.");
}
return new Pages(pageFactory, scanner, pageTree, pagesByNumber);
}
private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput,
DictionaryToken nodeDictionaryInput,
IndirectReference parentReferenceInput,
bool isRoot,
IPdfTokenScanner pdfTokenScanner,
bool isLenientParsing,
PageCounter pageNumber)
{
bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
if (isPage)
{
pageNumber.Increment();
return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
}
//If we got here, we have to iterate till we manage to exit
// Attempt to detect (and break) any infinite loop (IL) by recording the ids of the last 1000 (by default) tokens processed.
const int InfiniteLoopWorkingWindow = 1000;
var visitedTokens = new Dictionary<long, HashSet<int>>(); // Quick lookup containing ids (object number, generation) of tokens already processed (trimmed as we go to last 1000 (by default))
var visitedTokensWorkingWindow = new Queue<(long ObjectNumber, int Generation)>(InfiniteLoopWorkingWindow);
var toProcess =
new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference,
List<PageTreeNode> nodeChildren)>();
var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
var setChildren = new List<Action>();
var firstPageChildren = new List<PageTreeNode>();
setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
toProcess.Enqueue(
(thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput,
nodeChildren: firstPageChildren));
do
{
var current = toProcess.Dequeue();
#region Break any potential infinite loop
// Remember the last 1000 (by default) tokens and if we attempt to process again break out of loop
var currentReferenceObjectNumber = current.reference.ObjectNumber;
var currentReferenceGeneration = current.reference.Generation;
if (visitedTokens.ContainsKey(currentReferenceObjectNumber))
{
var toBeRemovedFromWorkingHashset = visitedTokensWorkingWindow.Dequeue();
var toBeRemovedObjectNumber = toBeRemovedFromWorkingHashset.ObjectNumber;
var toBeRemovedGeneration = toBeRemovedFromWorkingHashset.Generation;
var generations = visitedTokens[toBeRemovedObjectNumber];
generations.Remove(toBeRemovedGeneration);
if (generations.Count == 0)
var generations = visitedTokens[currentReferenceObjectNumber];
if (generations.Contains(currentReferenceGeneration))
{
visitedTokens.Remove(toBeRemovedObjectNumber);
var listOfLastVisitedToken = visitedTokensWorkingWindow.ToList();
var indexOfCurrentTokenInListOfLastVisitedToken = listOfLastVisitedToken.IndexOf((currentReferenceObjectNumber, currentReferenceGeneration));
var howManyTokensBack = Math.Abs(indexOfCurrentTokenInListOfLastVisitedToken - listOfLastVisitedToken.Count); //eg initate loop is taking us back to last token or five token back
System.Diagnostics.Debug.WriteLine($"Break infinite loop while processing page {pageNumber.PageCount + 1} tokens. Token with object number {currentReferenceObjectNumber} and generation {currentReferenceGeneration} processed {howManyTokensBack} token(s) back.");
continue; // don't reprocess token already processed. break infinite loop. Issue #519
}
else
{
visitedTokens[toBeRemovedObjectNumber] = generations;
generations.Add(currentReferenceGeneration);
visitedTokens[currentReferenceObjectNumber] = generations;
}
}
}
#endregion
if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}.");
}
kids = new ArrayToken(EmptyArray<IToken>.Instance);
}
foreach (var kid in kids.Data)
{
if (!(kid is IndirectReferenceToken kidRef))
{
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
}
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
}
bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
if (isChildPage)
{
var kidPageNode =
new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
current.nodeChildren.Add(kidPageNode);
}
else
{
var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
var kidChildren = new List<PageTreeNode>();
toProcess.Enqueue(
(thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference,
nodeChildren: kidChildren));
visitedTokens.Add(currentReferenceObjectNumber, new HashSet<int>() { currentReferenceGeneration });
setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
current.nodeChildren.Add(kidChildNode);
visitedTokensWorkingWindow.Enqueue((currentReferenceObjectNumber, currentReferenceGeneration));
if (visitedTokensWorkingWindow.Count >= InfiniteLoopWorkingWindow)
{
var toBeRemovedFromWorkingHashset = visitedTokensWorkingWindow.Dequeue();
var toBeRemovedObjectNumber = toBeRemovedFromWorkingHashset.ObjectNumber;
var toBeRemovedGeneration = toBeRemovedFromWorkingHashset.Generation;
var generations = visitedTokens[toBeRemovedObjectNumber];
generations.Remove(toBeRemovedGeneration);
if (generations.Count == 0)
{
visitedTokens.Remove(toBeRemovedObjectNumber);
}
else
{
visitedTokens[toBeRemovedObjectNumber] = generations;
}
}
}
}
} while (toProcess.Count > 0);
#endregion
if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}.");
}
foreach (var action in setChildren)
{
action();
}
kids = new ArrayToken(EmptyArray<IToken>.Instance);
}
foreach (var child in firstPage.Children.ToRecursiveOrderList(x=>x.Children).Where(child => child.IsPage))
{
pageNumber.Increment();
child.PageNumber = pageNumber.PageCount;
}
foreach (var kid in kids.Data)
{
if (!(kid is IndirectReferenceToken kidRef))
{
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
}
return firstPage;
}
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
}
private static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
{
var isPage = false;
bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
{
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
if (isChildPage)
{
var kidPageNode =
new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
current.nodeChildren.Add(kidPageNode);
}
else
{
var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
var kidChildren = new List<PageTreeNode>();
toProcess.Enqueue(
(thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference,
nodeChildren: kidChildren));
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
}
else
{
isPage = type.Equals(NameToken.Page);
setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
}
current.nodeChildren.Add(kidChildNode);
}
}
} while (toProcess.Count > 0);
if (!isLenientParsing && !isRoot)
{
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
}
return isPage;
}
private static void PopulatePageByNumberDictionary(PageTreeNode node, Dictionary<int, PageTreeNode> result)
{
if (node.IsPage)
{
if (!node.PageNumber.HasValue)
foreach (var action in setChildren)
{
throw new InvalidOperationException($"Node was page but did not have page number: {node}.");
action();
}
result[node.PageNumber.Value] = node;
return;
foreach (var child in firstPage.Children.ToRecursiveOrderList(x => x.Children).Where(child => child.IsPage))
{
pageNumber.Increment();
child.PageNumber = pageNumber.PageCount;
}
return firstPage;
}
foreach (var child in node.Children)
private static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
{
PopulatePageByNumberDictionary(child, result);
var isPage = false;
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
{
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
}
else
{
isPage = type.Equals(NameToken.Page);
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
}
if (!isLenientParsing && !isRoot)
{
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
}
return isPage;
}
private static void PopulatePageByNumberDictionary(PageTreeNode node, Dictionary<int, PageTreeNode> result)
{
if (node.IsPage)
{
if (!node.PageNumber.HasValue)
{
throw new InvalidOperationException($"Node was page but did not have page number: {node}.");
}
result[node.PageNumber.Value] = node;
return;
}
foreach (var child in node.Children)
{
PopulatePageByNumberDictionary(child, result);
}
}
}
}
}

View File

@ -1,44 +1,45 @@
namespace UglyToad.PdfPig.Outline;
using Content;
using Destinations;
using Logging;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Named destinations in a PDF document
/// </summary>
internal class NamedDestinations
namespace UglyToad.PdfPig.Outline
{
/// <summary>
/// Dictionary containing explicit destinations, keyed by name
/// </summary>
private readonly IReadOnlyDictionary<string, ExplicitDestination> namedDestinations;
using Content;
using Destinations;
using Logging;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Pages are required for getting explicit destinations
/// Named destinations in a PDF document
/// </summary>
private readonly Pages pages;
internal class NamedDestinations
{
/// <summary>
/// Dictionary containing explicit destinations, keyed by name
/// </summary>
private readonly IReadOnlyDictionary<string, ExplicitDestination> namedDestinations;
/// <summary>
/// Constructor
/// </summary>
/// <param name="namedDestinations"></param>
/// <param name="pages"></param>
internal NamedDestinations(IReadOnlyDictionary<string, ExplicitDestination> namedDestinations, Pages pages)
{
this.namedDestinations = namedDestinations;
this.pages = pages;
}
internal bool TryGet(string name, out ExplicitDestination destination)
{
return namedDestinations.TryGetValue(name, out destination);
}
/// <summary>
/// Pages are required for getting explicit destinations
/// </summary>
private readonly Pages pages;
internal bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
return NamedDestinationsProvider.TryGetExplicitDestination(explicitDestinationArray, pages, log, isRemoteDestination, out destination);
/// <summary>
/// Constructor
/// </summary>
/// <param name="namedDestinations"></param>
/// <param name="pages"></param>
internal NamedDestinations(IReadOnlyDictionary<string, ExplicitDestination> namedDestinations, Pages pages)
{
this.namedDestinations = namedDestinations;
this.pages = pages;
}
internal bool TryGet(string name, out ExplicitDestination destination)
{
return namedDestinations.TryGetValue(name, out destination);
}
internal bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
return NamedDestinationsProvider.TryGetExplicitDestination(explicitDestinationArray, pages, log, isRemoteDestination, out destination);
}
}
}
}

View File

@ -1,220 +1,221 @@
namespace UglyToad.PdfPig.Outline;
using Content;
using Destinations;
using Logging;
using Parser.Parts;
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class NamedDestinationsProvider
namespace UglyToad.PdfPig.Outline
{
internal static NamedDestinations Read(DictionaryToken catalogDictionary, IPdfTokenScanner pdfScanner, Pages pages, ILog log)
using Content;
using Destinations;
using Logging;
using Parser.Parts;
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class NamedDestinationsProvider
{
var destinationsByName = new Dictionary<string, ExplicitDestination>();
if (catalogDictionary.TryGet(NameToken.Dests, pdfScanner, out DictionaryToken destinations))
internal static NamedDestinations Read(DictionaryToken catalogDictionary, IPdfTokenScanner pdfScanner, Pages pages, ILog log)
{
/*
* In PDF 1.1, the correspondence between name objects and destinations is defined by the /Dests entry in the document catalog.
* The value of this entry is a dictionary in which each key is a destination name and the corresponding value is either an array
* defining the destination, using the explicit destination syntax, or a dictionary with a /D entry whose value is such an array.
*/
foreach (var kvp in destinations.Data)
{
var value = kvp.Value;
var destinationsByName = new Dictionary<string, ExplicitDestination>();
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
if (catalogDictionary.TryGet(NameToken.Dests, pdfScanner, out DictionaryToken destinations))
{
/*
* In PDF 1.1, the correspondence between name objects and destinations is defined by the /Dests entry in the document catalog.
* The value of this entry is a dictionary in which each key is a destination name and the corresponding value is either an array
* defining the destination, using the explicit destination syntax, or a dictionary with a /D entry whose value is such an array.
*/
foreach (var kvp in destinations.Data)
{
destinationsByName[kvp.Key] = destination;
var value = kvp.Value;
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
{
destinationsByName[kvp.Key] = destination;
}
}
}
}
else if (catalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken names)
&& names.TryGet(NameToken.Dests, pdfScanner, out destinations))
{
/*
* In PDF 1.2, the correspondence between strings and destinations is defined by the /Dests entry in the document's name dictionary.
* The value of the /Dests entry is a name tree mapping name strings to destinations.
* The keys in the name tree may be treated as text strings for display purposes.
* The destination value associated with a key in the name tree may be either an array or a dictionary.
*/
NameTreeParser.FlattenNameTree(destinations, pdfScanner, value =>
else if (catalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken names)
&& names.TryGet(NameToken.Dests, pdfScanner, out destinations))
{
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
/*
* In PDF 1.2, the correspondence between strings and destinations is defined by the /Dests entry in the document's name dictionary.
* The value of the /Dests entry is a name tree mapping name strings to destinations.
* The keys in the name tree may be treated as text strings for display purposes.
* The destination value associated with a key in the name tree may be either an array or a dictionary.
*/
NameTreeParser.FlattenNameTree(destinations, pdfScanner, value =>
{
return destination;
}
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
{
return destination;
}
return null;
}, destinationsByName);
return null;
}, destinationsByName);
}
return new NamedDestinations(destinationsByName, pages);
}
return new NamedDestinations(destinationsByName, pages);
}
private static bool TryReadExplicitDestination(IToken value, IPdfTokenScanner pdfScanner, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
destination = null;
if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
private static bool TryReadExplicitDestination(IToken value, IPdfTokenScanner pdfScanner, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
return true;
}
destination = null;
if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary)
&& valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
{
return true;
}
if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
{
return true;
}
return false;
}
internal static bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
destination = null;
if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary)
&& valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
{
return true;
}
if (explicitDestinationArray == null || explicitDestinationArray.Length == 0)
{
return false;
}
int pageNumber;
var pageToken = explicitDestinationArray[0];
if (pageToken is IndirectReferenceToken pageIndirectReferenceToken)
internal static bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
if (isRemoteDestination)
destination = null;
if (explicitDestinationArray == null || explicitDestinationArray.Length == 0)
{
// Table 8.50 Remote Go-To Actions
var errorMessage = $"{nameof(TryGetExplicitDestination)} Cannot use indirect reference for remote destination.";
return false;
}
int pageNumber;
var pageToken = explicitDestinationArray[0];
if (pageToken is IndirectReferenceToken pageIndirectReferenceToken)
{
if (isRemoteDestination)
{
// Table 8.50 Remote Go-To Actions
var errorMessage = $"{nameof(TryGetExplicitDestination)} Cannot use indirect reference for remote destination.";
log?.Error(errorMessage);
return false;
}
var page = pages.GetPageByReference(pageIndirectReferenceToken.Data);
if (page?.PageNumber == null)
{
return false;
}
pageNumber = page.PageNumber.Value;
}
else if (pageToken is NumericToken pageNumericToken)
{
pageNumber = pageNumericToken.Int + 1;
}
else
{
var errorMessage = $"{nameof(TryGetExplicitDestination)} No page number given in 'Dest': '{explicitDestinationArray}'.";
log?.Error(errorMessage);
return false;
}
var page = pages.GetPageByReference(pageIndirectReferenceToken.Data);
if (page?.PageNumber == null)
NameToken destTypeToken = null;
if (explicitDestinationArray.Length > 1)
{
return false;
destTypeToken = explicitDestinationArray[1] as NameToken;
}
if (destTypeToken == null)
{
var errorMessage = $"Missing name token as second argument to explicit destination: {explicitDestinationArray}.";
log?.Error(errorMessage);
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, ExplicitDestinationCoordinates.Empty);
return true;
}
pageNumber = page.PageNumber.Value;
}
else if (pageToken is NumericToken pageNumericToken)
{
pageNumber = pageNumericToken.Int + 1;
}
else
{
var errorMessage = $"{nameof(TryGetExplicitDestination)} No page number given in 'Dest': '{explicitDestinationArray}'.";
if (destTypeToken.Equals(NameToken.XYZ))
{
// [page /XYZ left top zoom]
var left = explicitDestinationArray[2] as NumericToken;
var top = explicitDestinationArray[3] as NumericToken;
log?.Error(errorMessage);
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.XyzCoordinates,
new ExplicitDestinationCoordinates(left?.Data, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.Fit))
{
// [page /Fit]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitH))
{
// [page /FitH top]
var top = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitHorizontally,
new ExplicitDestinationCoordinates(null, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitV))
{
// [page /FitV left]
var left = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitVertically,
new ExplicitDestinationCoordinates(left?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitR))
{
// [page /FitR left bottom right top]
var left = explicitDestinationArray[2] as NumericToken;
var bottom = explicitDestinationArray[3] as NumericToken;
var right = explicitDestinationArray[4] as NumericToken;
var top = explicitDestinationArray[5] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitRectangle,
new ExplicitDestinationCoordinates(left?.Data, top?.Data, right?.Data, bottom?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitB))
{
// [page /FitB]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBox,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitBH))
{
// [page /FitBH top]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxHorizontally,
new ExplicitDestinationCoordinates(null, (explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitBV))
{
// [page /FitBV left]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxVertically,
new ExplicitDestinationCoordinates((explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
return false;
}
NameToken destTypeToken = null;
if (explicitDestinationArray.Length > 1)
{
destTypeToken = explicitDestinationArray[1] as NameToken;
}
if (destTypeToken == null)
{
var errorMessage = $"Missing name token as second argument to explicit destination: {explicitDestinationArray}.";
log?.Error(errorMessage);
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.XYZ))
{
// [page /XYZ left top zoom]
var left = explicitDestinationArray[2] as NumericToken;
var top = explicitDestinationArray[3] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.XyzCoordinates,
new ExplicitDestinationCoordinates(left?.Data, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.Fit))
{
// [page /Fit]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitH))
{
// [page /FitH top]
var top = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitHorizontally,
new ExplicitDestinationCoordinates(null, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitV))
{
// [page /FitV left]
var left = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitVertically,
new ExplicitDestinationCoordinates(left?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitR))
{
// [page /FitR left bottom right top]
var left = explicitDestinationArray[2] as NumericToken;
var bottom = explicitDestinationArray[3] as NumericToken;
var right = explicitDestinationArray[4] as NumericToken;
var top = explicitDestinationArray[5] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitRectangle,
new ExplicitDestinationCoordinates(left?.Data, top?.Data, right?.Data, bottom?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitB))
{
// [page /FitB]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBox,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitBH))
{
// [page /FitBH top]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxHorizontally,
new ExplicitDestinationCoordinates(null, (explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitBV))
{
// [page /FitBV left]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxVertically,
new ExplicitDestinationCoordinates((explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
return false;
}
}
}

View File

@ -1,33 +1,34 @@
namespace UglyToad.PdfPig.Outline;
using Destinations;
using System;
using System.Collections.Generic;
/// <inheritdoc />
/// <summary>
/// A node in the <see cref="Bookmarks" /> of a PDF document which corresponds
/// to a location in an embedded file.
/// </summary>
public class EmbeddedBookmarkNode : DocumentBookmarkNode
namespace UglyToad.PdfPig.Outline
{
/// <summary>
/// The file specification for the embedded file
/// </summary>
public string FileSpecification { get; }
using Destinations;
using System;
using System.Collections.Generic;
/// <inheritdoc />
/// <summary>
/// Create a new <see cref="ExternalBookmarkNode" />.
/// A node in the <see cref="Bookmarks" /> of a PDF document which corresponds
/// to a location in an embedded file.
/// </summary>
public EmbeddedBookmarkNode(string title, int level, ExplicitDestination destination, IReadOnlyList<BookmarkNode> children, string fileSpecification) : base(title, level, destination, children)
public class EmbeddedBookmarkNode : DocumentBookmarkNode
{
FileSpecification = fileSpecification ?? throw new ArgumentNullException(nameof(fileSpecification));
}
/// <summary>
/// The file specification for the embedded file
/// </summary>
public string FileSpecification { get; }
/// <inheritdoc />
public override string ToString()
{
return $"Embedded file '{FileSpecification}', {Level}, {Title}";
/// <inheritdoc />
/// <summary>
/// Create a new <see cref="ExternalBookmarkNode" />.
/// </summary>
public EmbeddedBookmarkNode(string title, int level, ExplicitDestination destination, IReadOnlyList<BookmarkNode> children, string fileSpecification) : base(title, level, destination, children)
{
FileSpecification = fileSpecification ?? throw new ArgumentNullException(nameof(fileSpecification));
}
/// <inheritdoc />
public override string ToString()
{
return $"Embedded file '{FileSpecification}', {Level}, {Title}";
}
}
}
}