namespace UglyToad.PdfPig { using System; using System.Collections.Generic; using System.IO; using AcroForms; using Content; using CrossReference; using Encryption; using Exceptions; using Filters; using IO; using Logging; using Parser; using Tokenization.Scanner; using Tokens; using Outline; using Util.JetBrains.Annotations; /// /// /// Provides access to document level information for this PDF document as well as access to the s contained in the document. /// public class PdfDocument : IDisposable { private bool isDisposed; private readonly Lazy documentForm; private readonly bool isLenientParsing; [NotNull] private readonly HeaderVersion version; private readonly ILog log; private readonly IInputBytes inputBytes; [NotNull] private readonly ParsingCachingProviders cachingProviders; [CanBeNull] private readonly EncryptionDictionary encryptionDictionary; [NotNull] private readonly IPdfTokenScanner pdfScanner; private readonly IFilterProvider filterProvider; private readonly BookmarksProvider bookmarksProvider; [NotNull] private readonly Pages pages; /// /// The metadata associated with this document. /// [NotNull] public DocumentInformation Information { get; } /// /// Access to the underlying raw structure of the document. /// [NotNull] public Structure Structure { get; } /// /// The version number of the PDF specification which this file conforms to, for example 1.4. /// public decimal Version => version.Version; /// /// Get the number of pages in this document. /// public int NumberOfPages => pages.Count; /// /// Whether the document content is encrypted. /// public bool IsEncrypted => encryptionDictionary != null; internal PdfDocument(ILog log, IInputBytes inputBytes, HeaderVersion version, CrossReferenceTable crossReferenceTable, bool isLenientParsing, ParsingCachingProviders cachingProviders, IPageFactory pageFactory, Catalog catalog, DocumentInformation information, EncryptionDictionary encryptionDictionary, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, AcroFormFactory acroFormFactory, BookmarksProvider bookmarksProvider) { this.log = log; this.inputBytes = inputBytes; this.version = version ?? throw new ArgumentNullException(nameof(version)); this.isLenientParsing = isLenientParsing; this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders)); this.encryptionDictionary = encryptionDictionary; this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider)); Information = information ?? throw new ArgumentNullException(nameof(information)); pages = new Pages(catalog, pageFactory, isLenientParsing, pdfScanner); Structure = new Structure(catalog, crossReferenceTable, pdfScanner); documentForm = new Lazy(() => acroFormFactory.GetAcroForm(catalog)); } /// /// Creates a for reading from the provided file bytes. /// /// The bytes of the PDF file. /// Optional parameters controlling parsing. /// A providing access to the file contents. public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options); /// /// Opens a file and creates a for reading from the provided file path. /// /// The full path to the file location of the PDF file. /// Optional parameters controlling parsing. /// A providing access to the file contents. public static PdfDocument Open(string filePath, ParsingOptions options = null) => PdfDocumentFactory.Open(filePath, options); /// /// Creates a for reading from the provided stream. /// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream. /// /// /// A stream of the file contents, this must support reading and seeking. /// The PdfDocument will not dispose of the provided stream. /// /// Optional parameters controlling parsing. /// A providing access to the file contents. public static PdfDocument Open(Stream stream, ParsingOptions options = null) => PdfDocumentFactory.Open(stream, options); /// /// Get the page with the specified page number (1 indexed). /// /// The number of the page to return, this starts from 1. /// The page. public Page GetPage(int pageNumber) { if (isDisposed) { throw new ObjectDisposedException("Cannot access page after the document is disposed."); } log.Debug($"Accessing page {pageNumber}."); try { return pages.GetPage(pageNumber); } catch (Exception ex) { if (IsEncrypted) { throw new PdfDocumentEncryptedException("Document was encrypted which may have caused error when retrieving page.", encryptionDictionary, ex); } throw; } } /// /// Gets all pages in this document in order. /// public IEnumerable GetPages() { for (var i = 0; i < NumberOfPages; i++) { yield return GetPage(i + 1); } } /// /// Get the document level metadata if present. /// The metadata is XML in the (Extensible Metadata Platform) XMP format. /// /// This will throw a if called on a disposed . /// The metadata stream if it exists. /// if the metadata is present, otherwise. public bool TryGetXmpMetadata(out XmpMetadata metadata) { if (isDisposed) { throw new ObjectDisposedException("Cannot access the document metadata after the document is disposed."); } metadata = null; if (!Structure.Catalog.CatalogDictionary.TryGet(NameToken.Metadata, pdfScanner, out StreamToken xmpStreamToken)) { return false; } metadata = new XmpMetadata(xmpStreamToken, filterProvider); return true; } /// /// Gets the bookmarks if this document contains some. /// /// This will throw a if called on a disposed . public bool TryGetBookmarks(out Bookmarks bookmarks) { if (isDisposed) { throw new ObjectDisposedException("Cannot access the bookmarks after the document is disposed."); } bookmarks = bookmarksProvider.GetBookmarks(Structure.Catalog); if (bookmarks != null) return true; return false; } /// /// Gets the form if this document contains one. /// /// This will throw a if called on a disposed . /// An from the document or if not present. public bool TryGetForm(out AcroForm form) { if (isDisposed) { throw new ObjectDisposedException("Cannot access the form after the document is disposed."); } form = documentForm.Value; return form != null; } /// /// /// Dispose the and close any unmanaged resources. /// public void Dispose() { try { pdfScanner.Dispose(); inputBytes.Dispose(); } catch (Exception ex) { log.Error("Failed disposing the PdfDocument due to an error.", ex); } finally { isDisposed = true; } } } }