From ecf0b8743b36f77acba1cb01b0703db4fa0a36e8 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 5 Dec 2019 12:03:30 +0000 Subject: [PATCH] make bookmarknode immutable and use scanner when retrieving bookmarks --- .../PigReproductionPowerpointTests.cs | 10 + src/UglyToad.PdfPig/Outline/BookmarkNode.cs | 43 +- .../Outline/BookmarksProvider.cs | 999 +++++++++++------- .../Parser/PdfDocumentFactory.cs | 5 +- src/UglyToad.PdfPig/PdfDocument.cs | 10 +- 5 files changed, 684 insertions(+), 383 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs index 5e09d83d..e3f9ac20 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs @@ -40,5 +40,15 @@ } } } + + [Fact] + public void CanGetBookmarks() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var foundBookmarks = document.TryGetBookmarks(out var bookmarks); + Assert.True(foundBookmarks); + } + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Outline/BookmarkNode.cs b/src/UglyToad.PdfPig/Outline/BookmarkNode.cs index 2fd88d0d..2273aca0 100644 --- a/src/UglyToad.PdfPig/Outline/BookmarkNode.cs +++ b/src/UglyToad.PdfPig/Outline/BookmarkNode.cs @@ -3,6 +3,8 @@ using UglyToad.PdfPig.Geometry; namespace UglyToad.PdfPig.Outline { + using System; + /// /// A node in the of a PDF document. /// @@ -11,55 +13,60 @@ namespace UglyToad.PdfPig.Outline /// /// The text displayed for this node. /// - public string Title { get; internal set; } + public string Title { get; } /// /// The bookmark's coordinates in the pdf page. /// - public PdfPoint TopLeft { get; internal set; } + public PdfPoint TopLeft { get; } /// /// The bookmark's bounding box in the pdf page. /// - public PdfRectangle BoundingBox { get; internal set; } + public PdfRectangle BoundingBox { get; } /// /// The node's hierarchical level. /// - public int Level { get; internal set; } + public int Level { get; } /// - /// The page's number where the bookmark is located. + /// The page number where the bookmark is located. /// - public int PageNumber { get; internal set; } + public int PageNumber { get; } /// /// The link to an external source. /// - public string ExternalLink { get; internal set; } + public string ExternalLink { get; } /// /// True if bookmark refers to an external source. /// - public bool IsExternal { get; internal set; } + public bool IsExternal { get; } /// /// The bookmark's sub-bookmark. /// - public List Children { get; internal set; } + public IReadOnlyList Children { get; } - /// - /// Create a new instance of BookmarkNode. - /// - public BookmarkNode() + /// + public BookmarkNode(string title, PdfPoint topLeft, PdfRectangle boundingBox, int level, int pageNumber, + string externalLink, + bool isExternal, + IReadOnlyList children) { - Title = string.Empty; - Children = new List(); + Title = title; + TopLeft = topLeft; + BoundingBox = boundingBox; + Level = level; + PageNumber = pageNumber; + ExternalLink = externalLink; + IsExternal = isExternal; + Children = children ?? throw new ArgumentNullException(nameof(children)); } - /// - /// - /// + /// public override string ToString() { return "page #" + PageNumber + ", " + Level + ", " + Title; diff --git a/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs b/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs index 99378f77..72d2645d 100644 --- a/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs +++ b/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs @@ -5,122 +5,373 @@ using UglyToad.PdfPig.Tokens; namespace UglyToad.PdfPig.Outline { + using System.Collections.Generic; + using Content; + using Exceptions; + using Parser.Parts; + using Tokenization.Scanner; + using Util; + + internal class ExplicitDestination + { + public int? PageNumber { get; } + + public ExplicitDestinationType Type { get; } + + public ExplicitDestinationCoordinates Coordinates { get; } + + public ExplicitDestination(int? pageNumber, + ExplicitDestinationType type, + ExplicitDestinationCoordinates coordinates) + { + PageNumber = pageNumber; + Type = type; + Coordinates = coordinates; + } + } + + /// + /// The display type for opening an . + /// + internal enum ExplicitDestinationType + { + /// + /// Display the page with the given top left coordinates and + /// zoom level. + /// + XyzCoordinates = 0, + /// + /// Fit the entire page within the window. + /// + FitPage = 1, + /// + /// Fit the entire page width within the window. + /// + FitHorizontally = 2, + /// + /// Fit the entire page height within the window. + /// + FitVertically = 3, + /// + /// Fit the rectangle specified by the + /// within the window. + /// + FitRectangle = 4, + /// + /// Fit the page's bounding box within the window. + /// + FitBoundingBox = 5, + /// + /// Fit the page's bounding box width within the window. + /// + FitBoundingBoxHorizontally = 6, + /// + /// Fit the page's bounding box height within the window. + /// + FitBoundingBoxVertically = 7 + } + + /// + /// The coordinates of the region to display for a . + /// + internal class ExplicitDestinationCoordinates + { + public static ExplicitDestinationCoordinates Empty { get; } = new ExplicitDestinationCoordinates(null, null, null, null); + /// + /// The left side of the region to display. + /// + public decimal? Left { get; } + + /// + /// The top edge of the region to display. + /// + public decimal? Top { get; } + + /// + /// The right side of the region to display + /// + public decimal? Right { get; } + + /// + /// The bottom edge of the region to display. + /// + public decimal? Bottom { get; } + + public ExplicitDestinationCoordinates(decimal? left) + { + Left = left; + } + + public ExplicitDestinationCoordinates(decimal? left, decimal? top) + { + Left = left; + Top = top; + } + + public ExplicitDestinationCoordinates(decimal? left, decimal? top, decimal? right, decimal? bottom) + { + Left = left; + Top = top; + Right = right; + Bottom = bottom; + } + } + internal class BookmarksProvider { private readonly ILog log; - private readonly Structure structure; + private readonly IPdfTokenScanner pdfScanner; + private readonly bool isLenientParsing; - public BookmarksProvider(ILog log, Structure structure) + public BookmarksProvider(ILog log, IPdfTokenScanner pdfScanner, bool isLenientParsing) { this.log = log; - this.structure = structure; + this.pdfScanner = pdfScanner; + this.isLenientParsing = isLenientParsing; } /// /// Extract bookmarks, if any. /// - public Bookmarks GetBookmarks() + public Bookmarks GetBookmarks(Catalog catalog) { - if (structure.Catalog.CatalogDictionary.Data.TryGetValue(NameToken.Outlines, out IToken outlinesToken)) + if (!catalog.CatalogDictionary.TryGet(NameToken.Outlines, pdfScanner, out DictionaryToken outlinesDictionary)) { - var outlines = this.structure.GetObject(((IndirectReferenceToken)outlinesToken).Data).Data as DictionaryToken; - if (outlines.TryGet(NameToken.First, out IndirectReferenceToken firstToken)) + return null; + } + + if (!isLenientParsing && outlinesDictionary.TryGet(NameToken.Type, pdfScanner, out NameToken typeName) + && typeName != NameToken.Outlines) + { + throw new PdfDocumentFormatException($"Outlines (bookmarks) dictionary did not have correct type specified: {typeName}."); + } + + if (!outlinesDictionary.TryGet(NameToken.First, pdfScanner, out DictionaryToken next)) + { + return null; + } + + var namedDestinations = ReadNamedDestinations(catalog, pdfScanner, isLenientParsing, log); + + var roots = new List(); + var seen = new HashSet(); + + while (next != null) + { + ReadBookmarksRecursively(next, 0, false, seen, namedDestinations, catalog, roots); + + if (!next.TryGet(NameToken.Next, out IndirectReferenceToken nextReference) + || !seen.Add(nextReference.Data)) { - var rootNode = new BookmarkNode(); - RecursiveBookmarks(firstToken, ref rootNode); - return new Bookmarks(rootNode.Children); + break; + } + + next = DirectObjectFinder.Get(nextReference, pdfScanner); + } + + return null; + } + + private static IReadOnlyDictionary ReadNamedDestinations(Catalog catalog, IPdfTokenScanner pdfScanner, + bool isLenientParsing, ILog log) + { + var result = new Dictionary(); + + if (catalog.CatalogDictionary.TryGet(NameToken.Dests, pdfScanner, out DictionaryToken dests)) + { + /* + * In PDF 1.1, the correspondence between name objects and destinations is defined by the /Dests entry in the document catalog. + * The value of this entry is a dictionary in which each key is a destination name and the corresponding value is either an array + * defining the destination, using the explicit destination syntax, or a dictionary with a /D entry whose value is such an array. + */ + foreach (var kvp in dests.Data) + { + var value = kvp.Value; + + if (TryReadExplicitDestination(value, catalog, pdfScanner, isLenientParsing, log, out var destination)) + { + result[kvp.Key] = destination; + } + else if (!isLenientParsing) + { + throw new PdfDocumentFormatException($"Failed to find explicit destination for value '{value}' in: {dests}."); + } } } - return null; + else if (catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken names) + && names.TryGet(NameToken.Dests, pdfScanner, out dests)) + { + /* + * In PDF 1.2, the correspondence between strings and destinations is defined by the /Dests entry in the document's name dictionary. + * The value of the /Dests entry is a name tree mapping name strings to destinations. + * The keys in the name tree may be treated as text strings for display purposes. + * The destination value associated with a key in the name tree may be either an array or a dictionary. + */ + ExtractNameTree(dests, catalog, pdfScanner, isLenientParsing, log, result); + } + + return result; + } + + private static void ExtractNameTree(DictionaryToken nameTreeNodeDictionary, Catalog catalog, IPdfTokenScanner pdfScanner, + bool isLenientParsing, + ILog log, + Dictionary explicitDestinations) + { + if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames)) + { + for (var i = 0; i < nodeNames.Length; i += 2) + { + var key = nodeNames[i] as IDataToken; + + if (key == null) + { + if (isLenientParsing) + { + continue; + } + + throw new PdfDocumentFormatException($"Invalid key '{nodeNames[i]}' in names tree for explicit destinations: {nameTreeNodeDictionary}."); + } + + var value = nodeNames[i + 1]; + + if (TryReadExplicitDestination(value, catalog, pdfScanner, isLenientParsing, log, out var destination)) + { + explicitDestinations[key.Data] = destination; + } + else if (!isLenientParsing) + { + throw new PdfDocumentFormatException($"Failed to find explicit destination for value '{value}' in: {nameTreeNodeDictionary}."); + } + } + } + + if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids)) + { + foreach (var kid in kids.Data) + { + if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary)) + { + ExtractNameTree(kidDictionary, catalog, pdfScanner, isLenientParsing, log, explicitDestinations); + } + else if (!isLenientParsing) + { + throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}."); + } + } + } + } + + private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner, + bool isLenientParsing, ILog log, out ExplicitDestination destination) + { + if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray)) + { + destination = GetExplicitDestination(valueArray, catalog, isLenientParsing, log); + return true; + } + + if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary) + && valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray)) + { + destination = GetExplicitDestination(valueArray, catalog, isLenientParsing, log); + return true; + } + + destination = null; + return false; } /// /// Extract bookmarks recursively. /// - /// The outlines' location token, e.g. First, Next. - /// The current . - private void RecursiveBookmarks(IndirectReferenceToken locationToken, ref BookmarkNode node) + private void ReadBookmarksRecursively(DictionaryToken nodeDictionary, int level, bool readSiblings, HashSet seen, + IReadOnlyDictionary namedDestinations, + Catalog catalog, + List list) { // 12.3 Document-Level Navigation - BookmarkNode newNode = new BookmarkNode() { Level = node.Level + 1 }; - node.Children.Add(newNode); - - var dictionary = structure.GetObject(locationToken.Data).Data as DictionaryToken; - if (dictionary == null) - { - throw new ArgumentNullException("BookmarksProvider.RecursiveBookmarks(): DictionaryToken is null."); - } // 12.3.3 Document Outline - Title // (Required) The text that shall be displayed on the screen for this item. - newNode.Title = GetString(NameToken.Title, locationToken); - - // 12.3.2 Destinations - if (dictionary.TryGet(NameToken.Dest, out ArrayToken destToken)) + if (!nodeDictionary.TryGetOptionalStringDirect(NameToken.Title, pdfScanner, out var title)) { - // 12.3.2.2 Explicit Destinations - GetDestination(destToken, newNode); + throw new PdfDocumentFormatException($"Invalid title for outline (bookmark) node: {nodeDictionary}."); } - else if (dictionary.TryGet(NameToken.Dest, out IDataToken destStringToken)) + + if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out ArrayToken destArray)) + { + var desti = GetExplicitDestination(destArray, catalog, isLenientParsing, log); + } + else if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out IDataToken destStringToken)) { // 12.3.2.3 Named Destinations - GetNamedDestination(destStringToken, ref newNode); - } - else if (dictionary.TryGet(NameToken.A, out IToken actionToken)) - { - // 12.6 Actions - GetActions(actionToken, ref newNode); - } - else - { - log.Error("BookmarksProvider.RecursiveBookmark(): No 'Dest' or 'Action' token found."); + if (namedDestinations.TryGetValue(destStringToken.Data, out var destination)) + { + + } + else if (!isLenientParsing) + { + throw new PdfDocumentFormatException($"Invalid destination name for bookmark node: {destStringToken.Data}."); + } } - // Look for children - if (dictionary.TryGet(NameToken.First, out IndirectReferenceToken firstToken)) + var children = new List(); + if (nodeDictionary.TryGet(NameToken.First, pdfScanner, out DictionaryToken firstChild)) { - RecursiveBookmarks(firstToken, ref newNode); + ReadBookmarksRecursively(firstChild, level + 1, true, seen, namedDestinations, catalog, children); } - // Move to next - if (dictionary.TryGet(NameToken.Next, out IndirectReferenceToken nextToken)) + list.Add(new BookmarkNode(title, PdfPoint.Origin, new PdfRectangle(), level, 1, string.Empty, false, children)); + + if (!readSiblings) { - RecursiveBookmarks(nextToken, ref node); + return; } + + // Walk all siblings if this was the first child. + var current = nodeDictionary; + while (true) + { + if (!current.TryGet(NameToken.Next, out IndirectReferenceToken nextReference) + || !seen.Add(nextReference.Data)) + { + break; + } + + current = DirectObjectFinder.Get(nextReference, pdfScanner); + + if (current == null) + { + break; + } + + ReadBookmarksRecursively(current, level, false, seen, namedDestinations, catalog, list); + } + + //// 12.3.2 Destinations + //if (nodeDictionary.TryGet(NameToken.Dest, out ArrayToken destToken)) + //{ + // // 12.3.2.2 Explicit Destinations + // GetDestination(destToken, newNode); + //} + //else if (dictionary.TryGet(NameToken.Dest, out IDataToken destStringToken)) + //{ + // // 12.3.2.3 Named Destinations + // GetNamedDestination(destStringToken, ref newNode); + //} + //else if (dictionary.TryGet(NameToken.A, out IToken actionToken)) + //{ + // // 12.6 Actions + // GetActions(actionToken, ref newNode); + //} + //else + //{ + // log.Error("BookmarksProvider.RecursiveBookmark(): No 'Dest' or 'Action' token found."); + //} } - private string GetString(NameToken nameToken, IToken locationToken) - { - if (locationToken is IDataToken stringDataToken) - { - return stringDataToken.Data; - } - else if (locationToken is DictionaryToken dictionaryToken) - { - if (dictionaryToken.TryGet(nameToken, out IToken token)) - { - return GetString(nameToken, token); - } - else - { - throw new NotImplementedException("BookmarksProvider.GetString(): Unknown nameToken '" + nameToken + "'."); - } - } - else if (locationToken is IndirectReferenceToken indirectReferenceToken) - { - var tempToken = structure.GetObject(indirectReferenceToken.Data)?.Data; - - if (tempToken == null) - { - throw new ArgumentNullException("BookmarksProvider.GetString(): Cannot find '" + indirectReferenceToken.Data + "'."); - } - return GetString(nameToken, tempToken); - } - else - { - throw new NotImplementedException("BookmarksProvider.GetString(): Unknown string type '" + locationToken.GetType() + "'."); - } - } private static int ParsePageNumber(string goToStr) { @@ -131,340 +382,368 @@ namespace UglyToad.PdfPig.Outline return 0; } - #region Destinations - private void GetDestination(ArrayToken destToken, BookmarkNode currentNode) + //#region Destinations + private static ExplicitDestination GetExplicitDestination(ArrayToken explicitDestinationArray, Catalog catalog, + bool isLenientParsing, + ILog log) { - if (destToken == null || destToken.Length == 0) + if (explicitDestinationArray == null) { - throw new ArgumentNullException(nameof(destToken), "BookmarksProvider.GetDestination()"); + throw new ArgumentNullException(nameof(explicitDestinationArray)); } - // 12.3.2.2 Explicit Destinations - // Table 151 – Destination syntax - var pageToken = destToken[0]; + if (explicitDestinationArray.Length == 0) + { + throw new ArgumentException("Invalid (empty) array for an explicit destination.", nameof(explicitDestinationArray)); + } + + var pageNumber = default(int?); + + var pageToken = explicitDestinationArray[0]; if (pageToken is IndirectReferenceToken pageIndirectReferenceToken) { - var pageNumber = structure.Catalog.GetPageByReference(pageIndirectReferenceToken.Data).PageNumber; - if (pageNumber.HasValue) - { - currentNode.PageNumber = pageNumber.Value; - } - else - { - log.Error("BookmarksProvider.GetDestination(): Cannot find page number."); - } + pageNumber = catalog.GetPageByReference(pageIndirectReferenceToken.Data).PageNumber ?? 1; } else if (pageToken is NumericToken pageNumericToken) { - currentNode.PageNumber = pageNumericToken.Int + 1; + pageNumber = pageNumericToken.Int + 1; } else { - log.Error("BookmarksProvider.GetDestination(): No page number given in 'Dest': '" + destToken + "'."); + var errorMessage = $"{nameof(GetExplicitDestination)} No page number given in 'Dest': '{explicitDestinationArray}'."; + if (!isLenientParsing) + { + throw new PdfDocumentFormatException(errorMessage); + } + + log.Error(errorMessage); } - var destTypeToken = destToken[1] as NameToken; - if (destTypeToken == null) return; + var destTypeToken = explicitDestinationArray[1] as NameToken; + if (destTypeToken == null) + { + var errorMessage = $"Missing name token as second argument to explicit destination: {explicitDestinationArray}."; + if (!isLenientParsing) + { + throw new PdfDocumentFormatException(errorMessage); + } + + log.Error(errorMessage); + + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, ExplicitDestinationCoordinates.Empty); + } if (destTypeToken.Equals(NameToken.XYZ)) { // [page /XYZ left top zoom] - var left = destToken[2] as NumericToken; - var top = destToken[3] as NumericToken; - var zoom = destToken[4] as NumericToken; - currentNode.TopLeft = new PdfPoint(left?.Data ?? 0, top?.Data ?? 0); + var left = explicitDestinationArray[2] as NumericToken; + var top = explicitDestinationArray[3] as NumericToken; + + return new ExplicitDestination(pageNumber, ExplicitDestinationType.XyzCoordinates, + new ExplicitDestinationCoordinates(left?.Data, top?.Data)); } - else if (destTypeToken.Equals(NameToken.Fit)) + + if (destTypeToken.Equals(NameToken.Fit)) { // [page /Fit] + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, + ExplicitDestinationCoordinates.Empty); } - else if (destTypeToken.Equals(NameToken.FitH)) + + if (destTypeToken.Equals(NameToken.FitH)) { // [page /FitH top] - var top = destToken[2] as NumericToken; - currentNode.TopLeft = new PdfPoint(0, top?.Data ?? 0); + var top = explicitDestinationArray[2] as NumericToken; + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitHorizontally, + new ExplicitDestinationCoordinates(null, top?.Data)); } - else if (destTypeToken.Equals(NameToken.FitV)) + + if (destTypeToken.Equals(NameToken.FitV)) { // [page /FitV left] - var left = destToken[2] as NumericToken; - currentNode.TopLeft = new PdfPoint(left?.Data ?? 0, 0); + var left = explicitDestinationArray[2] as NumericToken; + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitVertically, + new ExplicitDestinationCoordinates(left?.Data)); } - else if (destTypeToken.Equals(NameToken.FitR)) + + if (destTypeToken.Equals(NameToken.FitR)) { // [page /FitR left bottom right top] - var left = destToken[2] as NumericToken; - var bottom = destToken[3] as NumericToken; - var right = destToken[4] as NumericToken; - var top = destToken[5] as NumericToken; - currentNode.TopLeft = new PdfPoint(left?.Data ?? 0, top?.Data ?? 0); - currentNode.BoundingBox = new PdfRectangle(left?.Data ?? 0, - bottom?.Data ?? 0, - right?.Data ?? 0, - top?.Data ?? 0); + var left = explicitDestinationArray[2] as NumericToken; + var bottom = explicitDestinationArray[3] as NumericToken; + var right = explicitDestinationArray[4] as NumericToken; + var top = explicitDestinationArray[5] as NumericToken; + + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitRectangle, + new ExplicitDestinationCoordinates(left?.Data, top?.Data, right?.Data, bottom?.Data)); } - else if (destTypeToken.Equals(NameToken.FitB)) + + if (destTypeToken.Equals(NameToken.FitB)) { // [page /FitB] + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBox, + ExplicitDestinationCoordinates.Empty); } - else if (destTypeToken.Equals(NameToken.FitBH)) + + if (destTypeToken.Equals(NameToken.FitBH)) { // [page /FitBH top] + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxHorizontally, + new ExplicitDestinationCoordinates(null, (explicitDestinationArray[2] as NumericToken)?.Data)); } - else if (destTypeToken.Equals(NameToken.FitBV)) + + if (destTypeToken.Equals(NameToken.FitBV)) { // [page /FitBV left] - var top = destToken[2] as NumericToken; - currentNode.TopLeft = new PdfPoint(0, top?.Data ?? 0); - } - else - { - throw new NotImplementedException("BookmarksProvider.GetDestination(): Unknown type '" + destTypeToken + "'."); + return new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxVertically, + new ExplicitDestinationCoordinates((explicitDestinationArray[2] as NumericToken)?.Data)); } + + throw new PdfDocumentFormatException($"Unknown explicit destination type: {destTypeToken}."); } - private void GetNamedDestination(IDataToken destStringToken, ref BookmarkNode currentNode) - { - if (destStringToken == null) - { - throw new ArgumentNullException(nameof(destStringToken), "BookmarksProvider.GetNamedDestination()"); - } + //private void GetNamedDestination(IDataToken destStringToken, ref BookmarkNode currentNode) + //{ + // if (destStringToken == null) + // { + // throw new ArgumentNullException(nameof(destStringToken), "BookmarksProvider.GetNamedDestination()"); + // } - // 12.3.2.3 Named Destinations - if (structure.Catalog.CatalogDictionary.TryGet(NameToken.Dests, out IndirectReferenceToken destsToken11)) - { - // In PDF 1.1, the correspondence between name objects and destinations shall be defined by the - // Dests entry in the document catalogue (see 7.7.2, “Document Catalog”). The value of this entry - // shall be a dictionary in which each key is a destination name and the corresponding value is - // either an array defining the destination, using the syntax shown in Table 151, or a dictionary - // with a D entry whose value is such an array. - throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): PDF 1.1."); - } - else if (structure.Catalog.CatalogDictionary.TryGet(NameToken.Names, out IndirectReferenceToken namesToken)) - { - // In PDF 1.2 and later, the correspondence between strings and destinations may alternatively be - // defined by the Dests entry in the document’s name dictionary (see 7.7.4, “Name Dictionary”). - // The value of this entry shall be a name tree (7.9.6, “Name Trees”) mapping name strings to - // destinations. (The keys in the name tree may be treated as text strings for display purposes.) - // The destination value associated with a key in the name tree may be either an array or a - // dictionary, as described in the preceding paragraph. - var namesDictionary = structure.GetObject(namesToken.Data).Data as DictionaryToken; - if (namesDictionary == null) - { - throw new ArgumentNullException(nameof(namesDictionary), "BookmarksProvider.GetNamedDestination()"); - } + // // 12.3.2.3 Named Destinations + // if (structure.Catalog.CatalogDictionary.TryGet(NameToken.Dests, out IndirectReferenceToken destsToken11)) + // { + // // In PDF 1.1, the correspondence between name objects and destinations shall be defined by the + // // Dests entry in the document catalogue (see 7.7.2, “Document Catalog”). The value of this entry + // // shall be a dictionary in which each key is a destination name and the corresponding value is + // // either an array defining the destination, using the syntax shown in Table 151, or a dictionary + // // with a D entry whose value is such an array. + // throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): PDF 1.1."); + // } + // else if (structure.Catalog.CatalogDictionary.TryGet(NameToken.Names, out IndirectReferenceToken namesToken)) + // { + // // In PDF 1.2 and later, the correspondence between strings and destinations may alternatively be + // // defined by the Dests entry in the document’s name dictionary (see 7.7.4, “Name Dictionary”). + // // The value of this entry shall be a name tree (7.9.6, “Name Trees”) mapping name strings to + // // destinations. (The keys in the name tree may be treated as text strings for display purposes.) + // // The destination value associated with a key in the name tree may be either an array or a + // // dictionary, as described in the preceding paragraph. + // var namesDictionary = structure.GetObject(namesToken.Data).Data as DictionaryToken; + // if (namesDictionary == null) + // { + // throw new ArgumentNullException(nameof(namesDictionary), "BookmarksProvider.GetNamedDestination()"); + // } - if (namesDictionary.TryGet(NameToken.Dests, out IndirectReferenceToken destsToken)) - { - var destsDictionary = structure.GetObject(destsToken.Data).Data as DictionaryToken; - if (destsDictionary == null) - { - throw new ArgumentNullException(nameof(destsDictionary), "BookmarksProvider.GetNamedDestination()"); - } + // if (namesDictionary.TryGet(NameToken.Dests, out IndirectReferenceToken destsToken)) + // { + // var destsDictionary = structure.GetObject(destsToken.Data).Data as DictionaryToken; + // if (destsDictionary == null) + // { + // throw new ArgumentNullException(nameof(destsDictionary), "BookmarksProvider.GetNamedDestination()"); + // } - IToken found = FindInNameTree(destStringToken, destsDictionary); - if (found != null) - { - ArrayToken destToken = null; - if (found is IndirectReferenceToken indirect) - { - var pageObject = structure.GetObject(indirect.Data); - if (pageObject.Data is DictionaryToken dictionaryToken) - { - if (!dictionaryToken.TryGet(NameToken.D, out destToken)) - { - throw new ArgumentException("BookmarksProvider.GetNamedDestination(): Cannot find token 'D'."); - } - } - else if (pageObject.Data is ArrayToken arrayToken) - { - destToken = arrayToken; - } - else - { - throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type '" + pageObject.Data + "'."); - } - } - else if (found is ArrayToken arrayToken) - { - destToken = arrayToken; - } - else if (found is DictionaryToken) - { - throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type 'DictionaryToken'."); - } - else - { - throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type '" + found.GetType() + "'."); - } + // IToken found = FindInNameTree(destStringToken, destsDictionary); + // if (found != null) + // { + // ArrayToken destToken = null; + // if (found is IndirectReferenceToken indirect) + // { + // var pageObject = structure.GetObject(indirect.Data); + // if (pageObject.Data is DictionaryToken dictionaryToken) + // { + // if (!dictionaryToken.TryGet(NameToken.D, out destToken)) + // { + // throw new ArgumentException("BookmarksProvider.GetNamedDestination(): Cannot find token 'D'."); + // } + // } + // else if (pageObject.Data is ArrayToken arrayToken) + // { + // destToken = arrayToken; + // } + // else + // { + // throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type '" + pageObject.Data + "'."); + // } + // } + // else if (found is ArrayToken arrayToken) + // { + // destToken = arrayToken; + // } + // else if (found is DictionaryToken) + // { + // throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type 'DictionaryToken'."); + // } + // else + // { + // throw new NotImplementedException("BookmarksProvider.GetNamedDestination(): Token type '" + found.GetType() + "'."); + // } - var pageNumber = structure.Catalog.GetPageByReference(((IndirectReferenceToken)destToken[0]).Data).PageNumber; - if (pageNumber.HasValue) - { - currentNode.PageNumber = pageNumber.Value; - } - GetDestination(destToken, currentNode); - } - } - } - } + // var pageNumber = structure.Catalog.GetPageByReference(((IndirectReferenceToken)destToken[0]).Data).PageNumber; + // if (pageNumber.HasValue) + // { + // currentNode.PageNumber = pageNumber.Value; + // } + // GetDestination(destToken, currentNode); + // } + // } + // } + //} - private IToken FindInNameTree(T find, DictionaryToken dictionaryToken) where T : IDataToken - { - // 7.9.6 Name Trees - // Intermediate node - if (dictionaryToken.TryGet(NameToken.Kids, out ArrayToken kidsToken)) - { - foreach (var kid in kidsToken.Data) - { - var dictionary = structure.GetObject(((IndirectReferenceToken)kid).Data).Data as DictionaryToken; - if (dictionary != null && dictionary.TryGet(NameToken.Limits, out ArrayToken limits)) - { - // (Intermediate and leaf nodes only; required) Shall be an array of two strings, - // that shall specify the (lexically) least and greatest keys included in the - // Names array of a leaf node or in the Names arrays of any leaf nodes that are - // descendants of an intermediate node. - var least = limits[0] as IDataToken; - var greatest = limits[1] as IDataToken; + //private IToken FindInNameTree(T find, DictionaryToken dictionaryToken) where T : IDataToken + //{ + // // 7.9.6 Name Trees + // // Intermediate node + // if (dictionaryToken.TryGet(NameToken.Kids, out ArrayToken kidsToken)) + // { + // foreach (var kid in kidsToken.Data) + // { + // var dictionary = structure.GetObject(((IndirectReferenceToken)kid).Data).Data as DictionaryToken; + // if (dictionary != null && dictionary.TryGet(NameToken.Limits, out ArrayToken limits)) + // { + // // (Intermediate and leaf nodes only; required) Shall be an array of two strings, + // // that shall specify the (lexically) least and greatest keys included in the + // // Names array of a leaf node or in the Names arrays of any leaf nodes that are + // // descendants of an intermediate node. + // var least = limits[0] as IDataToken; + // var greatest = limits[1] as IDataToken; - if (IsStringBetween(find.Data, least.Data, greatest.Data)) - { - var indRef = FindInNameTree(find, dictionary); - if (indRef != null) - { - return indRef; - } - else - { - throw new ArgumentException("BookmarksProvider.FindNamedDestination(): Did no find the key '" + find.Data + "' in Name Tree."); - } - } - } - } - } - else - { - // Leaf node - if (dictionaryToken.TryGet(NameToken.Names, out ArrayToken names)) - { - // Names - // Shall be an array of the form [key_1, value_1, key_2, value_2, …, key_n, value_n] - // where each key_i shall be a string and the corresponding value_i shall be the object - // associated with that key. The keys shall be sorted in lexical order, as described below. - for (int i = 0; i < names.Length; i += 2) - { - if (names[i] is IDataToken n && n.Data.Equals(find.Data)) - { - return names[i + 1]; - } - } - } - else - { - throw new ArgumentNullException("BookmarksProvider.FindNamedDestination(): Could not find ArrayToken 'Names' in dictionary."); - } - } - throw new ArgumentException("BookmarksProvider.FindNamedDestination(): Did no find the key '" + find.Data + "' in Name Tree."); - } + // if (IsStringBetween(find.Data, least.Data, greatest.Data)) + // { + // var indRef = FindInNameTree(find, dictionary); + // if (indRef != null) + // { + // return indRef; + // } + // else + // { + // throw new ArgumentException("BookmarksProvider.FindNamedDestination(): Did no find the key '" + find.Data + "' in Name Tree."); + // } + // } + // } + // } + // } + // else + // { + // // Leaf node + // if (dictionaryToken.TryGet(NameToken.Names, out ArrayToken names)) + // { + // // Names + // // Shall be an array of the form [key_1, value_1, key_2, value_2, …, key_n, value_n] + // // where each key_i shall be a string and the corresponding value_i shall be the object + // // associated with that key. The keys shall be sorted in lexical order, as described below. + // for (int i = 0; i < names.Length; i += 2) + // { + // if (names[i] is IDataToken n && n.Data.Equals(find.Data)) + // { + // return names[i + 1]; + // } + // } + // } + // else + // { + // throw new ArgumentNullException("BookmarksProvider.FindNamedDestination(): Could not find ArrayToken 'Names' in dictionary."); + // } + // } + // throw new ArgumentException("BookmarksProvider.FindNamedDestination(): Did no find the key '" + find.Data + "' in Name Tree."); + //} - private bool IsStringBetween(string str, string least, string greatest) - { - return (string.Compare(str, least, StringComparison.Ordinal) >= 0 && - string.Compare(str, greatest, StringComparison.Ordinal) <= 0); - } - #endregion + //private bool IsStringBetween(string str, string least, string greatest) + //{ + // return (string.Compare(str, least, StringComparison.Ordinal) >= 0 && + // string.Compare(str, greatest, StringComparison.Ordinal) <= 0); + //} + //#endregion - #region Actions - private void GetActions(IToken actionToken, ref BookmarkNode currentNode) - { - if (actionToken is DictionaryToken dictionaryToken) - { - if (dictionaryToken.TryGet(NameToken.S, out NameToken sToken)) - { - if (sToken.Equals(NameToken.GoTo)) // 12.6.4.2, Go-To Actions - { - if (dictionaryToken.TryGet(NameToken.D, out IToken goToToken)) - { - HandleGoToAction(goToToken, ref currentNode); - } - else - { - throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'D' in 'GoTo'."); - } - } - else if (sToken.Equals(NameToken.GoToR)) // 12.6.4.3, Remote Go-To Actions - { - if (dictionaryToken.TryGet(NameToken.D, out IToken goToRToken)) - { - if (dictionaryToken.TryGet(NameToken.F, out IToken remoteFileToken)) - { - currentNode.ExternalLink = GetString(NameToken.F, remoteFileToken); - } - HandleGoToRAction(goToRToken, ref currentNode); - } - else - { - throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'D' in 'GoToR'."); - } - } - else - { - currentNode.IsExternal = true; - log.Debug("BookmarksProvider.GetActions(): Ignoring unknown token '" + sToken.Data + "'."); - } - } - else - { - throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'S' in 'Action'."); - } - } - else if (actionToken is IndirectReferenceToken indirectReferenceToken) - { - var tempToken = structure.GetObject(indirectReferenceToken.Data).Data; - if (tempToken is DictionaryToken dictionaryAction) - { - GetActions(dictionaryAction, ref currentNode); - } - else - { - throw new NotImplementedException("BookmarksProvider.GetActions(): " + nameof(tempToken) + " of type " + tempToken.GetType() + "."); - } - } - else - { - throw new NotImplementedException("BookmarksProvider.GetActions(): " + nameof(actionToken) + " of type " + actionToken.GetType() + "."); - } - } + //#region Actions + //private void GetActions(IToken actionToken, ref BookmarkNode currentNode) + //{ + // if (actionToken is DictionaryToken dictionaryToken) + // { + // if (dictionaryToken.TryGet(NameToken.S, out NameToken sToken)) + // { + // if (sToken.Equals(NameToken.GoTo)) // 12.6.4.2, Go-To Actions + // { + // if (dictionaryToken.TryGet(NameToken.D, out IToken goToToken)) + // { + // HandleGoToAction(goToToken, ref currentNode); + // } + // else + // { + // throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'D' in 'GoTo'."); + // } + // } + // else if (sToken.Equals(NameToken.GoToR)) // 12.6.4.3, Remote Go-To Actions + // { + // if (dictionaryToken.TryGet(NameToken.D, out IToken goToRToken)) + // { + // if (dictionaryToken.TryGet(NameToken.F, out IToken remoteFileToken)) + // { + // currentNode.ExternalLink = GetString(NameToken.F, remoteFileToken); + // } + // HandleGoToRAction(goToRToken, ref currentNode); + // } + // else + // { + // throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'D' in 'GoToR'."); + // } + // } + // else + // { + // currentNode.IsExternal = true; + // log.Debug("BookmarksProvider.GetActions(): Ignoring unknown token '" + sToken.Data + "'."); + // } + // } + // else + // { + // throw new ArgumentException("BookmarksProvider.GetActions(): Could not find token 'S' in 'Action'."); + // } + // } + // else if (actionToken is IndirectReferenceToken indirectReferenceToken) + // { + // var tempToken = structure.GetObject(indirectReferenceToken.Data).Data; + // if (tempToken is DictionaryToken dictionaryAction) + // { + // GetActions(dictionaryAction, ref currentNode); + // } + // else + // { + // throw new NotImplementedException("BookmarksProvider.GetActions(): " + nameof(tempToken) + " of type " + tempToken.GetType() + "."); + // } + // } + // else + // { + // throw new NotImplementedException("BookmarksProvider.GetActions(): " + nameof(actionToken) + " of type " + actionToken.GetType() + "."); + // } + //} - private void HandleGoToRAction(IToken goToRToken, ref BookmarkNode currentNode) - { - currentNode.IsExternal = true; - HandleGoToAction(goToRToken, ref currentNode); - } + //private void HandleGoToRAction(IToken goToRToken, ref BookmarkNode currentNode) + //{ + // currentNode.IsExternal = true; + // HandleGoToAction(goToRToken, ref currentNode); + //} - private void HandleGoToAction(IToken goToToken, ref BookmarkNode currentNode) - { - if (goToToken is ArrayToken arrayToken) - { - GetDestination(arrayToken, currentNode); - } - else if (goToToken is IDataToken stringToken) - { - GetNamedDestination(stringToken, ref currentNode); - if (currentNode.PageNumber == 0) - { - currentNode.PageNumber = ParsePageNumber(stringToken.Data); - } - } - else if (goToToken is IndirectReferenceToken indirectReferenceToken) - { - HandleGoToAction(structure.GetObject(indirectReferenceToken.Data).Data, ref currentNode); - } - else - { - throw new NotImplementedException("BookmarksProvider.HandleGoToAction(): " + nameof(goToToken) + " of type " + goToToken.GetType()); - } - } - #endregion + //private void HandleGoToAction(IToken goToToken, ref BookmarkNode currentNode) + //{ + // if (goToToken is ArrayToken arrayToken) + // { + // GetDestination(arrayToken, currentNode); + // } + // else if (goToToken is IDataToken stringToken) + // { + // GetNamedDestination(stringToken, ref currentNode); + // if (currentNode.PageNumber == 0) + // { + // currentNode.PageNumber = ParsePageNumber(stringToken.Data); + // } + // } + // else if (goToToken is IndirectReferenceToken indirectReferenceToken) + // { + // HandleGoToAction(structure.GetObject(indirectReferenceToken.Data).Data, ref currentNode); + // } + // else + // { + // throw new NotImplementedException("BookmarksProvider.HandleGoToAction(): " + nameof(goToToken) + " of type " + goToToken.GetType()); + // } + //} + //#endregion } } diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 6cedbb45..3d350721 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -21,6 +21,7 @@ using Graphics; using IO; using Logging; + using Outline; using Parts; using Parts.CrossReference; using Tokenization.Scanner; @@ -136,12 +137,14 @@ var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer); var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider); + var bookmarksProvider = new BookmarksProvider(log, pdfScanner, isLenientParsing); return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information, encryptionDictionary, pdfScanner, filterProvider, - acroFormFactory); + acroFormFactory, + bookmarksProvider); } private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner, diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index bbe40816..5ad5d296 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -14,7 +14,7 @@ using Parser; using Tokenization.Scanner; using Tokens; - using UglyToad.PdfPig.Outline; + using Outline; using Util.JetBrains.Annotations; /// @@ -45,6 +45,7 @@ private readonly IPdfTokenScanner pdfScanner; private readonly IFilterProvider filterProvider; + private readonly BookmarksProvider bookmarksProvider; [NotNull] private readonly Pages pages; @@ -88,7 +89,8 @@ EncryptionDictionary encryptionDictionary, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, - AcroFormFactory acroFormFactory) + AcroFormFactory acroFormFactory, + BookmarksProvider bookmarksProvider) { this.log = log; this.inputBytes = inputBytes; @@ -98,6 +100,7 @@ this.encryptionDictionary = encryptionDictionary; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); + this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider)); Information = information ?? throw new ArgumentNullException(nameof(information)); pages = new Pages(catalog, pageFactory, isLenientParsing, pdfScanner); Structure = new Structure(catalog, crossReferenceTable, pdfScanner); @@ -209,8 +212,7 @@ throw new ObjectDisposedException("Cannot access the bookmarks after the document is disposed."); } - var bookmarksProvider = new BookmarksProvider(this.log, this.Structure); - bookmarks = bookmarksProvider.GetBookmarks(); + bookmarks = bookmarksProvider.GetBookmarks(Structure.Catalog); if (bookmarks != null) return true; return false; }