diff --git a/src/UglyToad.PdfPig.Tests/Integration/EmbeddedFileAttachmentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/EmbeddedFileAttachmentTests.cs
new file mode 100644
index 00000000..30b2fd9f
--- /dev/null
+++ b/src/UglyToad.PdfPig.Tests/Integration/EmbeddedFileAttachmentTests.cs
@@ -0,0 +1,38 @@
+namespace UglyToad.PdfPig.Tests.Integration
+{
+ using Xunit;
+
+ public class EmbeddedFileAttachmentTests
+ {
+ [Fact]
+ public void HasCorrectText()
+ {
+ var path = IntegrationHelpers.GetSpecificTestDocumentPath("embedded-file-attachment.pdf");
+
+ using (var document = PdfDocument.Open(path))
+ {
+ for (var i = 1; i <= document.NumberOfPages; i++)
+ {
+ var page = document.GetPage(i);
+
+ Assert.StartsWith("This is a test document. It contains a file attachment.", page.Text);
+ }
+ }
+ }
+
+ [Fact]
+ public void HasEmbeddedFiles()
+ {
+ var path = IntegrationHelpers.GetSpecificTestDocumentPath("embedded-file-attachment.pdf");
+
+ using (var document = PdfDocument.Open(path))
+ {
+ Assert.True(document.Advanced.TryGetEmbeddedFiles(out var files));
+
+ Assert.Equal(1, files.Count);
+
+ Assert.Equal(20668, files[0].Bytes.Count);
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/embedded-file-attachment.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/embedded-file-attachment.pdf
new file mode 100644
index 00000000..bfd6078b
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/embedded-file-attachment.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
index 4e4d643a..3f39e11d 100644
--- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
@@ -58,6 +58,7 @@
"UglyToad.PdfPig.AcroForms.Fields.AcroSignatureField",
"UglyToad.PdfPig.AcroForms.Fields.AcroTextField",
"UglyToad.PdfPig.AcroForms.Fields.AcroTextFieldFlags",
+ "UglyToad.PdfPig.AdvancedPdfDocumentAccess",
"UglyToad.PdfPig.Annotations.Annotation",
"UglyToad.PdfPig.Annotations.AnnotationBorder",
"UglyToad.PdfPig.Annotations.AnnotationFlags",
@@ -65,6 +66,7 @@
"UglyToad.PdfPig.Content.Catalog",
"UglyToad.PdfPig.Content.CropBox",
"UglyToad.PdfPig.Content.DocumentInformation",
+ "UglyToad.PdfPig.Content.EmbeddedFile",
"UglyToad.PdfPig.Content.Hyperlink",
"UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPdfImage",
diff --git a/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs b/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs
new file mode 100644
index 00000000..60c0b643
--- /dev/null
+++ b/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs
@@ -0,0 +1,104 @@
+namespace UglyToad.PdfPig
+{
+ using System;
+ using System.Collections.Generic;
+ using Content;
+ using Filters;
+ using Parser.Parts;
+ using Tokenization.Scanner;
+ using Tokens;
+
+ ///
+ ///
+ /// Provides access to rare or advanced features from the PDF specification.
+ ///
+ public class AdvancedPdfDocumentAccess : IDisposable
+ {
+ private readonly IPdfTokenScanner pdfScanner;
+ private readonly IFilterProvider filterProvider;
+ private readonly Catalog catalog;
+ private readonly bool isLenientParsing;
+
+ private bool isDisposed;
+
+ internal AdvancedPdfDocumentAccess(IPdfTokenScanner pdfScanner,
+ IFilterProvider filterProvider,
+ Catalog catalog,
+ bool isLenientParsing)
+ {
+ this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
+ this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
+ this.catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
+ this.isLenientParsing = isLenientParsing;
+ }
+
+ ///
+ /// Get any embedded files contained in this PDF document.
+ /// Since PDF 1.3 any external file referenced by the document may have its contents embedded within the referring PDF file,
+ /// allowing its contents to be stored or transmitted along with the PDF file.
+ ///
+ /// The set of embedded files in this document.
+ /// if this document contains more than zero embedded files, otherwise .
+ public bool TryGetEmbeddedFiles(out IReadOnlyList embeddedFiles)
+ {
+ GuardDisposed();
+
+ embeddedFiles = null;
+
+ if (!catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken namesDictionary)
+ || !namesDictionary.TryGet(NameToken.EmbeddedFiles, pdfScanner, out DictionaryToken embeddedFileNamesDictionary))
+ {
+ return false;
+ }
+
+ var embeddedFileNames = NameTreeParser.FlattenNameTreeToDictionary(embeddedFileNamesDictionary, pdfScanner, isLenientParsing,
+ x => x);
+
+ if (embeddedFileNames.Count == 0)
+ {
+ return false;
+ }
+
+ var result = new List();
+
+ foreach (var keyValuePair in embeddedFileNames)
+ {
+ if (!DirectObjectFinder.TryGet(keyValuePair.Value, pdfScanner, out DictionaryToken fileDescriptorDictionaryToken)
+ || !fileDescriptorDictionaryToken.TryGet(NameToken.Ef, pdfScanner, out DictionaryToken efDictionary)
+ || !efDictionary.TryGet(NameToken.F, pdfScanner, out StreamToken fileStreamToken))
+ {
+ continue;
+ }
+
+ var fileSpecification = string.Empty;
+ if (fileDescriptorDictionaryToken.TryGet(NameToken.F, pdfScanner, out IDataToken fileSpecificationToken))
+ {
+ fileSpecification = fileSpecificationToken.Data;
+ }
+
+ var fileBytes = fileStreamToken.Decode(filterProvider);
+
+ result.Add(new EmbeddedFile(keyValuePair.Key, fileSpecification, fileBytes, fileStreamToken));
+ }
+
+ embeddedFiles = result;
+
+ return embeddedFiles.Count > 0;
+ }
+
+ private void GuardDisposed()
+ {
+ if (isDisposed)
+ {
+ throw new ObjectDisposedException(nameof(AdvancedPdfDocumentAccess));
+ }
+ }
+
+ ///
+ public void Dispose()
+ {
+ pdfScanner?.Dispose();
+ isDisposed = true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig/Content/EmbeddedFile.cs b/src/UglyToad.PdfPig/Content/EmbeddedFile.cs
new file mode 100644
index 00000000..291eec5d
--- /dev/null
+++ b/src/UglyToad.PdfPig/Content/EmbeddedFile.cs
@@ -0,0 +1,46 @@
+namespace UglyToad.PdfPig.Content
+{
+ using System;
+ using System.Collections.Generic;
+ using Tokens;
+
+ ///
+ /// A file embedded in a PDF document for document references.
+ ///
+ public class EmbeddedFile
+ {
+ ///
+ /// The name given to this embedded file in the document's name tree.
+ ///
+ public string Name { get; }
+
+ ///
+ /// The specification of the path to the file.
+ ///
+ public string FileSpecification { get; }
+
+ ///
+ /// The decrypted bytes of the file.
+ ///
+ public IReadOnlyList Bytes { get; }
+
+ ///
+ /// The underlying embedded file stream.
+ ///
+ public StreamToken Stream { get; }
+
+ internal EmbeddedFile(string name, string fileSpecification, IReadOnlyList bytes, StreamToken stream)
+ {
+ Name = name ?? throw new ArgumentNullException(nameof(name));
+ FileSpecification = fileSpecification;
+ Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
+ Stream = stream ?? throw new ArgumentNullException(nameof(stream));
+ }
+
+ ///
+ public override string ToString()
+ {
+ return $"{Name}: {Stream.StreamDictionary}.";
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs b/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs
index 6a147bf2..893792c9 100644
--- a/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs
+++ b/src/UglyToad.PdfPig/Outline/BookmarksProvider.cs
@@ -198,51 +198,20 @@
* The keys in the name tree may be treated as text strings for display purposes.
* The destination value associated with a key in the name tree may be either an array or a dictionary.
*/
- ExtractNameTree(dests, catalog, pdfScanner, isLenientParsing, log, result);
+ NameTreeParser.FlattenNameTree(dests, pdfScanner, isLenientParsing, value =>
+ {
+ if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
+ {
+ return destination;
+ }
+
+ return null;
+ }, result);
}
return result;
}
- private static void ExtractNameTree(DictionaryToken nameTreeNodeDictionary, Catalog catalog, IPdfTokenScanner pdfScanner,
- bool isLenientParsing,
- ILog log,
- Dictionary explicitDestinations)
- {
- if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames))
- {
- for (var i = 0; i < nodeNames.Length; i += 2)
- {
- if (!(nodeNames[i] is IDataToken key))
- {
- continue;
- }
-
- var value = nodeNames[i + 1];
-
- if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
- {
- explicitDestinations[key.Data] = destination;
- }
- }
- }
-
- if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids))
- {
- foreach (var kid in kids.Data)
- {
- if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary))
- {
- ExtractNameTree(kidDictionary, catalog, pdfScanner, isLenientParsing, log, explicitDestinations);
- }
- else if (!isLenientParsing)
- {
- throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}.");
- }
- }
- }
- }
-
private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner,
ILog log, out ExplicitDestination destination)
{
diff --git a/src/UglyToad.PdfPig/Parser/Parts/NameTreeParser.cs b/src/UglyToad.PdfPig/Parser/Parts/NameTreeParser.cs
new file mode 100644
index 00000000..83910b44
--- /dev/null
+++ b/src/UglyToad.PdfPig/Parser/Parts/NameTreeParser.cs
@@ -0,0 +1,65 @@
+namespace UglyToad.PdfPig.Parser.Parts
+{
+ using System;
+ using System.Collections.Generic;
+ using Exceptions;
+ using Tokenization.Scanner;
+ using Tokens;
+
+ internal static class NameTreeParser
+ {
+ public static IReadOnlyDictionary FlattenNameTreeToDictionary(DictionaryToken nameTreeNodeDictionary,
+ IPdfTokenScanner pdfScanner,
+ bool isLenientParsing,
+ Func valuesFactory) where TResult : class
+ {
+ var result = new Dictionary();
+
+ FlattenNameTree(nameTreeNodeDictionary, pdfScanner, isLenientParsing, valuesFactory, result);
+
+ return result;
+ }
+
+ public static void FlattenNameTree(DictionaryToken nameTreeNodeDictionary,
+ IPdfTokenScanner pdfScanner,
+ bool isLenientParsing,
+ Func valuesFactory,
+ Dictionary result) where TResult : class
+ {
+ if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames))
+ {
+ for (var i = 0; i < nodeNames.Length; i += 2)
+ {
+ if (!(nodeNames[i] is IDataToken key))
+ {
+ continue;
+ }
+
+ var valueToken = nodeNames[i + 1];
+
+ var value = valuesFactory(valueToken);
+
+ if (value != null)
+ {
+ result[key.Data] = value;
+ }
+ }
+ }
+
+ if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids))
+ {
+ foreach (var kid in kids.Data)
+ {
+ if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary))
+ {
+ FlattenNameTree(kidDictionary, pdfScanner, isLenientParsing, valuesFactory, result);
+ }
+ else if (!isLenientParsing)
+ {
+ throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}.");
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs
index 5ad5d296..ecc08a8f 100644
--- a/src/UglyToad.PdfPig/PdfDocument.cs
+++ b/src/UglyToad.PdfPig/PdfDocument.cs
@@ -62,6 +62,11 @@
[NotNull]
public Structure Structure { get; }
+ ///
+ /// Access to rare or advanced features of the PDF specification.
+ ///
+ public AdvancedPdfDocumentAccess Advanced { get; }
+
///
/// The version number of the PDF specification which this file conforms to, for example 1.4.
///
@@ -104,6 +109,7 @@
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(catalog, pageFactory, isLenientParsing, pdfScanner);
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
+ Advanced = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog, isLenientParsing);
documentForm = new Lazy(() => acroFormFactory.GetAcroForm(catalog));
}
@@ -242,6 +248,7 @@
{
try
{
+ Advanced.Dispose();
pdfScanner.Dispose();
inputBytes.Dispose();
}