mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 02:37:56 +08:00
add method to retrieve any embedded files
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using Xunit;
|
||||
|
||||
public class EmbeddedFileAttachmentTests
|
||||
{
|
||||
[Fact]
|
||||
public void HasCorrectText()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("embedded-file-attachment.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path))
|
||||
{
|
||||
for (var i = 1; i <= document.NumberOfPages; i++)
|
||||
{
|
||||
var page = document.GetPage(i);
|
||||
|
||||
Assert.StartsWith("This is a test document. It contains a file attachment.", page.Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasEmbeddedFiles()
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("embedded-file-attachment.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path))
|
||||
{
|
||||
Assert.True(document.Advanced.TryGetEmbeddedFiles(out var files));
|
||||
|
||||
Assert.Equal(1, files.Count);
|
||||
|
||||
Assert.Equal(20668, files[0].Bytes.Count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@@ -58,6 +58,7 @@
|
||||
"UglyToad.PdfPig.AcroForms.Fields.AcroSignatureField",
|
||||
"UglyToad.PdfPig.AcroForms.Fields.AcroTextField",
|
||||
"UglyToad.PdfPig.AcroForms.Fields.AcroTextFieldFlags",
|
||||
"UglyToad.PdfPig.AdvancedPdfDocumentAccess",
|
||||
"UglyToad.PdfPig.Annotations.Annotation",
|
||||
"UglyToad.PdfPig.Annotations.AnnotationBorder",
|
||||
"UglyToad.PdfPig.Annotations.AnnotationFlags",
|
||||
@@ -65,6 +66,7 @@
|
||||
"UglyToad.PdfPig.Content.Catalog",
|
||||
"UglyToad.PdfPig.Content.CropBox",
|
||||
"UglyToad.PdfPig.Content.DocumentInformation",
|
||||
"UglyToad.PdfPig.Content.EmbeddedFile",
|
||||
"UglyToad.PdfPig.Content.Hyperlink",
|
||||
"UglyToad.PdfPig.Content.InlineImage",
|
||||
"UglyToad.PdfPig.Content.IPdfImage",
|
||||
|
104
src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs
Normal file
104
src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs
Normal file
@@ -0,0 +1,104 @@
|
||||
namespace UglyToad.PdfPig
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using Filters;
|
||||
using Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Provides access to rare or advanced features from the PDF specification.
|
||||
/// </summary>
|
||||
public class AdvancedPdfDocumentAccess : IDisposable
|
||||
{
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly Catalog catalog;
|
||||
private readonly bool isLenientParsing;
|
||||
|
||||
private bool isDisposed;
|
||||
|
||||
internal AdvancedPdfDocumentAccess(IPdfTokenScanner pdfScanner,
|
||||
IFilterProvider filterProvider,
|
||||
Catalog catalog,
|
||||
bool isLenientParsing)
|
||||
{
|
||||
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
|
||||
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
||||
this.catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get any embedded files contained in this PDF document.
|
||||
/// Since PDF 1.3 any external file referenced by the document may have its contents embedded within the referring PDF file,
|
||||
/// allowing its contents to be stored or transmitted along with the PDF file.
|
||||
/// </summary>
|
||||
/// <param name="embeddedFiles">The set of embedded files in this document.</param>
|
||||
/// <returns><see langword="true"/> if this document contains more than zero embedded files, otherwise <see langword="false"/>.</returns>
|
||||
public bool TryGetEmbeddedFiles(out IReadOnlyList<EmbeddedFile> embeddedFiles)
|
||||
{
|
||||
GuardDisposed();
|
||||
|
||||
embeddedFiles = null;
|
||||
|
||||
if (!catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken namesDictionary)
|
||||
|| !namesDictionary.TryGet(NameToken.EmbeddedFiles, pdfScanner, out DictionaryToken embeddedFileNamesDictionary))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var embeddedFileNames = NameTreeParser.FlattenNameTreeToDictionary(embeddedFileNamesDictionary, pdfScanner, isLenientParsing,
|
||||
x => x);
|
||||
|
||||
if (embeddedFileNames.Count == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var result = new List<EmbeddedFile>();
|
||||
|
||||
foreach (var keyValuePair in embeddedFileNames)
|
||||
{
|
||||
if (!DirectObjectFinder.TryGet(keyValuePair.Value, pdfScanner, out DictionaryToken fileDescriptorDictionaryToken)
|
||||
|| !fileDescriptorDictionaryToken.TryGet(NameToken.Ef, pdfScanner, out DictionaryToken efDictionary)
|
||||
|| !efDictionary.TryGet(NameToken.F, pdfScanner, out StreamToken fileStreamToken))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var fileSpecification = string.Empty;
|
||||
if (fileDescriptorDictionaryToken.TryGet(NameToken.F, pdfScanner, out IDataToken<string> fileSpecificationToken))
|
||||
{
|
||||
fileSpecification = fileSpecificationToken.Data;
|
||||
}
|
||||
|
||||
var fileBytes = fileStreamToken.Decode(filterProvider);
|
||||
|
||||
result.Add(new EmbeddedFile(keyValuePair.Key, fileSpecification, fileBytes, fileStreamToken));
|
||||
}
|
||||
|
||||
embeddedFiles = result;
|
||||
|
||||
return embeddedFiles.Count > 0;
|
||||
}
|
||||
|
||||
private void GuardDisposed()
|
||||
{
|
||||
if (isDisposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(AdvancedPdfDocumentAccess));
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
pdfScanner?.Dispose();
|
||||
isDisposed = true;
|
||||
}
|
||||
}
|
||||
}
|
46
src/UglyToad.PdfPig/Content/EmbeddedFile.cs
Normal file
46
src/UglyToad.PdfPig/Content/EmbeddedFile.cs
Normal file
@@ -0,0 +1,46 @@
|
||||
namespace UglyToad.PdfPig.Content
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Tokens;
|
||||
|
||||
/// <summary>
|
||||
/// A file embedded in a PDF document for document references.
|
||||
/// </summary>
|
||||
public class EmbeddedFile
|
||||
{
|
||||
/// <summary>
|
||||
/// The name given to this embedded file in the document's name tree.
|
||||
/// </summary>
|
||||
public string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The specification of the path to the file.
|
||||
/// </summary>
|
||||
public string FileSpecification { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The decrypted bytes of the file.
|
||||
/// </summary>
|
||||
public IReadOnlyList<byte> Bytes { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The underlying embedded file stream.
|
||||
/// </summary>
|
||||
public StreamToken Stream { get; }
|
||||
|
||||
internal EmbeddedFile(string name, string fileSpecification, IReadOnlyList<byte> bytes, StreamToken stream)
|
||||
{
|
||||
Name = name ?? throw new ArgumentNullException(nameof(name));
|
||||
FileSpecification = fileSpecification;
|
||||
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
|
||||
Stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{Name}: {Stream.StreamDictionary}.";
|
||||
}
|
||||
}
|
||||
}
|
@@ -198,51 +198,20 @@
|
||||
* The keys in the name tree may be treated as text strings for display purposes.
|
||||
* The destination value associated with a key in the name tree may be either an array or a dictionary.
|
||||
*/
|
||||
ExtractNameTree(dests, catalog, pdfScanner, isLenientParsing, log, result);
|
||||
NameTreeParser.FlattenNameTree(dests, pdfScanner, isLenientParsing, value =>
|
||||
{
|
||||
if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
|
||||
{
|
||||
return destination;
|
||||
}
|
||||
|
||||
return null;
|
||||
}, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void ExtractNameTree(DictionaryToken nameTreeNodeDictionary, Catalog catalog, IPdfTokenScanner pdfScanner,
|
||||
bool isLenientParsing,
|
||||
ILog log,
|
||||
Dictionary<string, ExplicitDestination> explicitDestinations)
|
||||
{
|
||||
if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames))
|
||||
{
|
||||
for (var i = 0; i < nodeNames.Length; i += 2)
|
||||
{
|
||||
if (!(nodeNames[i] is IDataToken<string> key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = nodeNames[i + 1];
|
||||
|
||||
if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
|
||||
{
|
||||
explicitDestinations[key.Data] = destination;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids))
|
||||
{
|
||||
foreach (var kid in kids.Data)
|
||||
{
|
||||
if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary))
|
||||
{
|
||||
ExtractNameTree(kidDictionary, catalog, pdfScanner, isLenientParsing, log, explicitDestinations);
|
||||
}
|
||||
else if (!isLenientParsing)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner,
|
||||
ILog log, out ExplicitDestination destination)
|
||||
{
|
||||
|
65
src/UglyToad.PdfPig/Parser/Parts/NameTreeParser.cs
Normal file
65
src/UglyToad.PdfPig/Parser/Parts/NameTreeParser.cs
Normal file
@@ -0,0 +1,65 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Exceptions;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
internal static class NameTreeParser
|
||||
{
|
||||
public static IReadOnlyDictionary<string, TResult> FlattenNameTreeToDictionary<TResult>(DictionaryToken nameTreeNodeDictionary,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
bool isLenientParsing,
|
||||
Func<IToken, TResult> valuesFactory) where TResult : class
|
||||
{
|
||||
var result = new Dictionary<string, TResult>();
|
||||
|
||||
FlattenNameTree(nameTreeNodeDictionary, pdfScanner, isLenientParsing, valuesFactory, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void FlattenNameTree<TResult>(DictionaryToken nameTreeNodeDictionary,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
bool isLenientParsing,
|
||||
Func<IToken, TResult> valuesFactory,
|
||||
Dictionary<string, TResult> result) where TResult : class
|
||||
{
|
||||
if (nameTreeNodeDictionary.TryGet(NameToken.Names, pdfScanner, out ArrayToken nodeNames))
|
||||
{
|
||||
for (var i = 0; i < nodeNames.Length; i += 2)
|
||||
{
|
||||
if (!(nodeNames[i] is IDataToken<string> key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var valueToken = nodeNames[i + 1];
|
||||
|
||||
var value = valuesFactory(valueToken);
|
||||
|
||||
if (value != null)
|
||||
{
|
||||
result[key.Data] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nameTreeNodeDictionary.TryGet(NameToken.Kids, pdfScanner, out ArrayToken kids))
|
||||
{
|
||||
foreach (var kid in kids.Data)
|
||||
{
|
||||
if (DirectObjectFinder.TryGet(kid, pdfScanner, out DictionaryToken kidDictionary))
|
||||
{
|
||||
FlattenNameTree(kidDictionary, pdfScanner, isLenientParsing, valuesFactory, result);
|
||||
}
|
||||
else if (!isLenientParsing)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Invalid kids entry in PDF name tree: {kid} in {kids}.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -62,6 +62,11 @@
|
||||
[NotNull]
|
||||
public Structure Structure { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Access to rare or advanced features of the PDF specification.
|
||||
/// </summary>
|
||||
public AdvancedPdfDocumentAccess Advanced { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The version number of the PDF specification which this file conforms to, for example 1.4.
|
||||
/// </summary>
|
||||
@@ -104,6 +109,7 @@
|
||||
Information = information ?? throw new ArgumentNullException(nameof(information));
|
||||
pages = new Pages(catalog, pageFactory, isLenientParsing, pdfScanner);
|
||||
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
|
||||
Advanced = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog, isLenientParsing);
|
||||
documentForm = new Lazy<AcroForm>(() => acroFormFactory.GetAcroForm(catalog));
|
||||
}
|
||||
|
||||
@@ -242,6 +248,7 @@
|
||||
{
|
||||
try
|
||||
{
|
||||
Advanced.Dispose();
|
||||
pdfScanner.Dispose();
|
||||
inputBytes.Dispose();
|
||||
}
|
||||
|
Reference in New Issue
Block a user