2018-01-11 03:49:32 +08:00
namespace UglyToad.PdfPig
2017-11-10 03:14:09 +08:00
{
using System ;
2018-01-22 03:34:21 +08:00
using System.IO ;
2019-01-02 01:44:46 +08:00
using AcroForms ;
2017-11-10 03:14:09 +08:00
using Content ;
2018-11-25 03:02:06 +08:00
using CrossReference ;
2019-05-04 22:36:13 +08:00
using Encryption ;
2019-05-10 02:02:39 +08:00
using Exceptions ;
2017-11-10 03:14:09 +08:00
using IO ;
2017-12-23 07:54:54 +08:00
using Logging ;
2017-11-10 03:14:09 +08:00
using Parser ;
2018-01-14 23:33:22 +08:00
using Tokenization.Scanner ;
2017-11-10 03:14:09 +08:00
using Util.JetBrains.Annotations ;
2018-01-07 20:37:48 +08:00
/// <inheritdoc />
/// <summary>
2018-01-11 06:15:29 +08:00
/// Provides access to document level information for this PDF document as well as access to the <see cref="T:UglyToad.PdfPig.Content.Page"/>s contained in the document.
2018-01-07 20:37:48 +08:00
/// </summary>
2017-11-10 03:14:09 +08:00
public class PdfDocument : IDisposable
{
2018-03-31 05:02:05 +08:00
private bool isDisposed ;
2019-01-02 01:44:46 +08:00
private readonly Lazy < AcroForm > documentForm ;
2018-03-31 05:02:05 +08:00
private readonly bool isLenientParsing ;
2018-01-22 03:34:21 +08:00
2017-11-10 03:14:09 +08:00
[NotNull]
private readonly HeaderVersion version ;
2018-11-25 03:02:06 +08:00
2017-12-23 07:54:54 +08:00
private readonly ILog log ;
2018-03-31 05:02:05 +08:00
2018-01-22 03:34:21 +08:00
private readonly IInputBytes inputBytes ;
2018-03-31 05:02:05 +08:00
2017-11-10 03:14:09 +08:00
[NotNull]
private readonly ParsingCachingProviders cachingProviders ;
2019-05-04 22:36:13 +08:00
[CanBeNull]
private readonly EncryptionDictionary encryptionDictionary ;
[NotNull]
2018-01-21 02:42:29 +08:00
private readonly IPdfTokenScanner pdfScanner ;
2018-11-25 03:02:06 +08:00
2017-11-10 03:14:09 +08:00
[NotNull]
2018-03-31 05:02:05 +08:00
private readonly Pages pages ;
2017-12-28 21:14:03 +08:00
2018-01-11 04:31:38 +08:00
/// <summary>
/// The metadata associated with this document.
/// </summary>
2018-01-03 07:26:58 +08:00
[NotNull]
public DocumentInformation Information { get ; }
2018-11-25 03:02:06 +08:00
/// <summary>
/// Access to the underlying raw structure of the document.
/// </summary>
[NotNull]
2018-11-27 03:46:41 +08:00
public Structure Structure { get ; }
2018-11-25 03:02:06 +08:00
2018-01-04 04:15:25 +08:00
/// <summary>
/// The version number of the PDF specification which this file conforms to, for example 1.4.
/// </summary>
public decimal Version = > version . Version ;
2017-12-28 21:14:03 +08:00
/// <summary>
/// Get the number of pages in this document.
/// </summary>
2018-03-31 05:02:05 +08:00
public int NumberOfPages = > pages . Count ;
2017-11-10 03:14:09 +08:00
2019-05-04 22:36:13 +08:00
/// <summary>
/// Whether the document content is encrypted.
/// </summary>
public bool IsEncrypted = > encryptionDictionary ! = null ;
2018-01-22 03:34:21 +08:00
internal PdfDocument ( ILog log ,
IInputBytes inputBytes ,
HeaderVersion version ,
CrossReferenceTable crossReferenceTable ,
2018-01-14 23:33:22 +08:00
bool isLenientParsing ,
2017-11-10 03:14:09 +08:00
ParsingCachingProviders cachingProviders ,
2017-12-23 07:54:54 +08:00
IPageFactory pageFactory ,
2018-01-03 07:26:58 +08:00
Catalog catalog ,
2019-05-04 22:36:13 +08:00
DocumentInformation information ,
EncryptionDictionary encryptionDictionary ,
IPdfTokenScanner pdfScanner ,
2019-01-02 01:44:46 +08:00
AcroFormFactory acroFormFactory )
2017-11-10 03:14:09 +08:00
{
2017-12-23 07:54:54 +08:00
this . log = log ;
2018-01-22 03:34:21 +08:00
this . inputBytes = inputBytes ;
2017-11-10 03:14:09 +08:00
this . version = version ? ? throw new ArgumentNullException ( nameof ( version ) ) ;
this . isLenientParsing = isLenientParsing ;
this . cachingProviders = cachingProviders ? ? throw new ArgumentNullException ( nameof ( cachingProviders ) ) ;
2019-05-04 22:36:13 +08:00
this . encryptionDictionary = encryptionDictionary ;
2018-11-25 03:02:06 +08:00
this . pdfScanner = pdfScanner ? ? throw new ArgumentNullException ( nameof ( pdfScanner ) ) ;
2018-01-03 07:26:58 +08:00
Information = information ? ? throw new ArgumentNullException ( nameof ( information ) ) ;
2018-03-31 05:02:05 +08:00
pages = new Pages ( log , catalog , pageFactory , isLenientParsing , pdfScanner ) ;
2018-11-25 03:02:06 +08:00
Structure = new Structure ( catalog , crossReferenceTable , pdfScanner ) ;
2019-01-02 01:44:46 +08:00
documentForm = new Lazy < AcroForm > ( ( ) = > acroFormFactory . GetAcroForm ( catalog ) ) ;
2017-11-10 03:14:09 +08:00
}
2018-01-09 06:43:48 +08:00
/// <summary>
/// Creates a <see cref="PdfDocument"/> for reading from the provided file bytes.
/// </summary>
/// <param name="fileBytes">The bytes of the PDF file.</param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
2017-11-10 03:14:09 +08:00
public static PdfDocument Open ( byte [ ] fileBytes , ParsingOptions options = null ) = > PdfDocumentFactory . Open ( fileBytes , options ) ;
2018-03-31 05:02:05 +08:00
2018-01-09 06:43:48 +08:00
/// <summary>
/// Opens a file and creates a <see cref="PdfDocument"/> for reading from the provided file path.
/// </summary>
/// <param name="filePath">The full path to the file location of the PDF file.</param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
public static PdfDocument Open ( string filePath , ParsingOptions options = null ) = > PdfDocumentFactory . Open ( filePath , options ) ;
2018-03-31 05:02:05 +08:00
2018-01-22 03:34:21 +08:00
/// <summary>
/// Creates a <see cref="PdfDocument"/> for reading from the provided stream.
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
/// </summary>
/// <param name="stream">
/// A stream of the file contents, this must support reading and seeking.
/// The PdfDocument will not dispose of the provided stream.
/// </param>
/// <param name="options">Optional parameters controlling parsing.</param>
/// <returns>A <see cref="PdfDocument"/> providing access to the file contents.</returns>
public static PdfDocument Open ( Stream stream , ParsingOptions options = null ) = > PdfDocumentFactory . Open ( stream , options ) ;
2017-11-10 03:14:09 +08:00
2017-12-28 21:14:03 +08:00
/// <summary>
2018-11-24 22:38:44 +08:00
/// Get the page with the specified page number (1 indexed).
2017-12-28 21:14:03 +08:00
/// </summary>
/// <param name="pageNumber">The number of the page to return, this starts from 1.</param>
/// <returns>The page.</returns>
public Page GetPage ( int pageNumber )
{
2018-01-22 03:34:21 +08:00
if ( isDisposed )
{
throw new ObjectDisposedException ( "Cannot access page after the document is disposed." ) ;
}
log . Debug ( $"Accessing page {pageNumber}." ) ;
2019-05-10 02:02:39 +08:00
try
{
return pages . GetPage ( pageNumber ) ;
}
catch ( Exception ex )
{
if ( IsEncrypted )
{
throw new PdfDocumentEncryptedException ( "Document was encrypted which may have caused error when retrieving page." , encryptionDictionary , ex ) ;
}
throw ;
}
2017-12-28 21:14:03 +08:00
}
2019-01-02 01:44:46 +08:00
/// <summary>
/// Gets the form if this document contains one.
/// </summary>
/// <returns>An <see cref="AcroForm"/> from the document or <see langword="null"/> if not present.</returns>
internal AcroForm GetForm ( )
{
if ( isDisposed )
{
throw new ObjectDisposedException ( "Cannot access the form after the document is disposed." ) ;
}
return documentForm . Value ;
}
2018-11-25 03:02:06 +08:00
2018-01-11 04:31:38 +08:00
/// <inheritdoc />
/// <summary>
/// Dispose the <see cref="T:UglyToad.PdfPig.PdfDocument" /> and close any unmanaged resources.
/// </summary>
2017-11-10 03:14:09 +08:00
public void Dispose ( )
{
try
{
2018-01-22 03:34:21 +08:00
inputBytes . Dispose ( ) ;
}
catch ( Exception ex )
{
log . Error ( "Failed disposing the PdfDocument due to an error." , ex ) ;
2017-11-10 03:14:09 +08:00
}
2018-01-22 03:34:21 +08:00
finally
2017-11-10 03:14:09 +08:00
{
2018-01-22 03:34:21 +08:00
isDisposed = true ;
2017-11-10 03:14:09 +08:00
}
}
}
}