namespace UglyToad.PdfPig
{
using System;
using System.Collections.Generic;
using System.IO;
using AcroForms;
using Content;
using CrossReference;
using Encryption;
using Exceptions;
using Filters;
using IO;
using Logging;
using Parser;
using Tokenization.Scanner;
using Tokens;
using Outline;
using Util.JetBrains.Annotations;
///
///
/// Provides access to document level information for this PDF document as well as access to the s contained in the document.
///
public class PdfDocument : IDisposable
{
private bool isDisposed;
private readonly Lazy documentForm;
private readonly bool isLenientParsing;
[NotNull]
private readonly HeaderVersion version;
private readonly ILog log;
private readonly IInputBytes inputBytes;
[NotNull]
private readonly ParsingCachingProviders cachingProviders;
[CanBeNull]
private readonly EncryptionDictionary encryptionDictionary;
[NotNull]
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly BookmarksProvider bookmarksProvider;
[NotNull]
private readonly Pages pages;
///
/// The metadata associated with this document.
///
[NotNull]
public DocumentInformation Information { get; }
///
/// Access to the underlying raw structure of the document.
///
[NotNull]
public Structure Structure { get; }
///
/// The version number of the PDF specification which this file conforms to, for example 1.4.
///
public decimal Version => version.Version;
///
/// Get the number of pages in this document.
///
public int NumberOfPages => pages.Count;
///
/// Whether the document content is encrypted.
///
public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(ILog log,
IInputBytes inputBytes,
HeaderVersion version,
CrossReferenceTable crossReferenceTable,
bool isLenientParsing,
ParsingCachingProviders cachingProviders,
IPageFactory pageFactory,
Catalog catalog,
DocumentInformation information,
EncryptionDictionary encryptionDictionary,
IPdfTokenScanner pdfScanner,
IFilterProvider filterProvider,
AcroFormFactory acroFormFactory,
BookmarksProvider bookmarksProvider)
{
this.log = log;
this.inputBytes = inputBytes;
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.isLenientParsing = isLenientParsing;
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
this.encryptionDictionary = encryptionDictionary;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.bookmarksProvider = bookmarksProvider ?? throw new ArgumentNullException(nameof(bookmarksProvider));
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(catalog, pageFactory, isLenientParsing, pdfScanner);
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
documentForm = new Lazy(() => acroFormFactory.GetAcroForm(catalog));
}
///
/// Creates a for reading from the provided file bytes.
///
/// The bytes of the PDF file.
/// Optional parameters controlling parsing.
/// A providing access to the file contents.
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options);
///
/// Opens a file and creates a for reading from the provided file path.
///
/// The full path to the file location of the PDF file.
/// Optional parameters controlling parsing.
/// A providing access to the file contents.
public static PdfDocument Open(string filePath, ParsingOptions options = null) => PdfDocumentFactory.Open(filePath, options);
///
/// Creates a for reading from the provided stream.
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
///
///
/// A stream of the file contents, this must support reading and seeking.
/// The PdfDocument will not dispose of the provided stream.
///
/// Optional parameters controlling parsing.
/// A providing access to the file contents.
public static PdfDocument Open(Stream stream, ParsingOptions options = null) => PdfDocumentFactory.Open(stream, options);
///
/// Get the page with the specified page number (1 indexed).
///
/// The number of the page to return, this starts from 1.
/// The page.
public Page GetPage(int pageNumber)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access page after the document is disposed.");
}
log.Debug($"Accessing page {pageNumber}.");
try
{
return pages.GetPage(pageNumber);
}
catch (Exception ex)
{
if (IsEncrypted)
{
throw new PdfDocumentEncryptedException("Document was encrypted which may have caused error when retrieving page.", encryptionDictionary, ex);
}
throw;
}
}
///
/// Gets all pages in this document in order.
///
public IEnumerable GetPages()
{
for (var i = 0; i < NumberOfPages; i++)
{
yield return GetPage(i + 1);
}
}
///
/// Get the document level metadata if present.
/// The metadata is XML in the (Extensible Metadata Platform) XMP format.
///
/// This will throw a if called on a disposed .
/// The metadata stream if it exists.
/// if the metadata is present, otherwise.
public bool TryGetXmpMetadata(out XmpMetadata metadata)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the document metadata after the document is disposed.");
}
metadata = null;
if (!Structure.Catalog.CatalogDictionary.TryGet(NameToken.Metadata, pdfScanner, out StreamToken xmpStreamToken))
{
return false;
}
metadata = new XmpMetadata(xmpStreamToken, filterProvider);
return true;
}
///
/// Gets the bookmarks if this document contains some.
///
/// This will throw a if called on a disposed .
public bool TryGetBookmarks(out Bookmarks bookmarks)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the bookmarks after the document is disposed.");
}
bookmarks = bookmarksProvider.GetBookmarks(Structure.Catalog);
if (bookmarks != null) return true;
return false;
}
///
/// Gets the form if this document contains one.
///
/// This will throw a if called on a disposed .
/// An from the document or if not present.
public bool TryGetForm(out AcroForm form)
{
if (isDisposed)
{
throw new ObjectDisposedException("Cannot access the form after the document is disposed.");
}
form = documentForm.Value;
return form != null;
}
///
///
/// Dispose the and close any unmanaged resources.
///
public void Dispose()
{
try
{
pdfScanner.Dispose();
inputBytes.Dispose();
}
catch (Exception ex)
{
log.Error("Failed disposing the PdfDocument due to an error.", ex);
}
finally
{
isDisposed = true;
}
}
}
}