start adding support for reading encrypted documents

This commit is contained in:
Eliot Jones
2019-05-04 15:36:13 +01:00
parent b2a3ae7a49
commit be394f5bba
21 changed files with 534 additions and 66 deletions

View File

@@ -13,27 +13,27 @@
[Fact]
public void Tests()
{
//var files = Directory.GetFiles("C:\\git\\testdocs", "*.pdf");
// var files = new[]{ @"C:\Users\eliot\Downloads\Encrypted1.pdf" };
//foreach (var file in files)
//{
// try
// {
// using (var document = PdfDocument.Open(file, new ParsingOptions{ UseLenientParsing = false}))
// {
// for (var i = 1; i <= document.NumberOfPages; i++)
// {
// var page = document.GetPage(i);
// var text = page.Text;
// Trace.WriteLine(text);
// }
// }
// }
// catch (Exception ex)
// {
// throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
// }
//}
// foreach (var file in files)
// {
// try
// {
// using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
// {
// for (var i = 1; i <= document.NumberOfPages; i++)
// {
// var page = document.GetPage(i);
// var text = page.Text;
// Trace.WriteLine(text);
// }
// }
// }
// catch (Exception ex)
// {
// throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
// }
// }
}
}
}

View File

@@ -176,6 +176,23 @@ endobj
Assert.Equal(3, dictionary.Data.Count);
}
[Fact]
public void SupportTicket29()
{
var input = StringBytesTestConverter.Convert("<< /Type /Page /Parent 4 0 R /MediaBox [ 0 0 \r\n 100.28 841.89 ] /Resources >>");
tokenizer.TryTokenize(input.First, input.Bytes, out var token);
var dict = AssertDictionaryToken(token);
var mediaBox = dict.Data["MediaBox"] as ArrayToken;
Assert.NotNull(mediaBox);
Assert.Equal(4, mediaBox.Length);
}
private static void AssertDictionaryEntry<TValue, TValueData>(DictionaryToken dictionary, NameToken key,
TValueData value) where TValue : IDataToken<TValueData>
{

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Encryption;
using PdfPig.IO;
using PdfPig.Tokenization.Scanner;
using PdfPig.Tokens;
@@ -300,7 +301,7 @@ endobj
stream
%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼
endstream
endobj";
endobj";
var scanner = GetScanner(s);
@@ -313,7 +314,7 @@ endobj";
Assert.Equal("1245", stream.StreamDictionary.Data["S"].ToString());
Assert.Equal("%¥×³®í»š}%§X{{tøNåÝž¶ö¢ÖÞgrehtyyy$&%&£$££(*¾–~´¼", Encoding.UTF8.GetString(stream.Data.ToArray()));
}
}
[Fact]
public void ReadsStreamWithoutBreakBeforeEndstream()

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Linq;
using Content;
using Encryption;
using Exceptions;
using Fields;
using Filters;
@@ -20,11 +21,13 @@
{
private readonly IPdfTokenScanner tokenScanner;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider)
public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider, IEncryptionHandler encryptionHandler)
{
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
this.encryptionHandler = encryptionHandler ?? throw new ArgumentNullException(nameof(encryptionHandler));
}
/// <summary>
@@ -212,7 +215,7 @@
}
else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StreamToken valueStreamToken))
{
textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider).ToArray());
textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider, encryptionHandler).ToArray());
}
}

View File

@@ -85,6 +85,10 @@
{
ids.Add(str.Data);
}
else if (token is HexToken hex)
{
ids.Add(hex.Data);
}
}
Identifier = ids;

View File

@@ -0,0 +1,174 @@
namespace UglyToad.PdfPig.Encryption
{
using System;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class EncryptionDictionary
{
public string Filter { get; }
public EncryptionAlgorithmCode EncryptionAlgorithmCode { get; }
public int? KeyLength { get; }
public int StandardSecurityHandlerRevision { get; }
public string OwnerPasswordCheck { get; }
public string UserPasswordCheck { get; }
public UserAccessPermissions UserAccessPermissions { get; }
public bool IsStandardFilter => string.Equals(Filter, "Standard", StringComparison.OrdinalIgnoreCase);
public bool EncryptMetadata { get; }
public DictionaryToken Dictionary { get; }
public EncryptionDictionary(string filter, EncryptionAlgorithmCode encryptionAlgorithmCode,
int? keyLength,
int standardSecurityHandlerRevision,
string ownerPasswordCheck,
string userPasswordCheck,
UserAccessPermissions userAccessPermissions,
DictionaryToken dictionary,
bool encryptMetadata)
{
Filter = filter;
EncryptionAlgorithmCode = encryptionAlgorithmCode;
KeyLength = keyLength;
StandardSecurityHandlerRevision = standardSecurityHandlerRevision;
OwnerPasswordCheck = ownerPasswordCheck;
UserPasswordCheck = userPasswordCheck;
UserAccessPermissions = userAccessPermissions;
Dictionary = dictionary;
EncryptMetadata = encryptMetadata;
}
}
internal static class EncryptionDictionaryFactory
{
public static EncryptionDictionary Read(DictionaryToken encryptionDictionary, IPdfTokenScanner tokenScanner)
{
if (encryptionDictionary == null)
{
throw new ArgumentNullException(nameof(encryptionDictionary));
}
var filter = encryptionDictionary.Get<NameToken>(NameToken.Filter, tokenScanner);
var code = EncryptionAlgorithmCode.Unrecognized;
if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.V, tokenScanner, out NumericToken vNum))
{
code = (EncryptionAlgorithmCode) vNum.Int;
}
var length = default(int?);
if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.Length, tokenScanner, out NumericToken lengthToken))
{
length = lengthToken.Int;
}
var revision = default(int);
if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.R, tokenScanner, out NumericToken revisionToken))
{
revision = revisionToken.Int;
}
encryptionDictionary.TryGetOptionalStringDirect(NameToken.O, tokenScanner, out var ownerString);
encryptionDictionary.TryGetOptionalStringDirect(NameToken.U, tokenScanner, out var userString);
var access = default(UserAccessPermissions);
if (encryptionDictionary.TryGetOptionalTokenDirect(NameToken.P, tokenScanner, out NumericToken accessToken))
{
access = (UserAccessPermissions) accessToken.Int;
}
encryptionDictionary.TryGetOptionalTokenDirect(NameToken.EncryptMetaData, tokenScanner, out BooleanToken encryptMetadata);
return new EncryptionDictionary(filter.Data, code, length, revision, ownerString, userString, access, encryptionDictionary,
encryptMetadata?.Data ?? false);
}
}
/// <summary>
/// A code specifying the algorithm to be used in encrypting and decrypting the document.
/// </summary>
internal enum EncryptionAlgorithmCode
{
/// <summary>
/// An algorithm that is undocumented and no longer supported.
/// </summary>
Unrecognized = 0,
/// <summary>
/// RC4 or AES encryption using a key of 40 bits.
/// </summary>
Rc4OrAes40BitKey = 1,
/// <summary>
/// RC4 or AES encryption using a key of more than 40 bits.
/// </summary>
Rc4OrAesGreaterThan40BitKey = 2,
/// <summary>
/// An unpublished algorithm that permits encryption key lengths ranging from 40 to 128 bits.
/// </summary>
UnpublishedAlgorithm40To128BitKey = 3,
/// <summary>
/// The security handler defines the use of encryption and decryption in the document.
/// </summary>
SecurityHandlerInDocument
}
[Flags]
internal enum UserAccessPermissions
{
/// <summary>
/// (Revision 2) Print the document.
/// (Revision 3 or greater) Print the document (possibly not at the highest quality level, see <see cref="PrintHighQuality"/>).
/// </summary>
Print = 1 << 2,
/// <summary>
/// Modify the contents of the document by operations other than those
/// controlled by <see cref="AddOrModifyTextAnnotationsAndFillFormFields"/>, <see cref="FillExistingFormFields"/> and <see cref="AssembleDocument"/>.
/// </summary>
Modify = 1 << 3,
/// <summary>
/// (Revision 2) Copy or otherwise extract text and graphics from the document, including extracting text and graphics
/// (in support of accessibility to users with disabilities or for other purposes).
/// (Revision 3 or greater) Copy or otherwise extract text and graphics from the document by operations other
/// than that controlled by <see cref="ExtractTextAndGraphics"/>.
/// </summary>
CopyTextAndGraphics = 1 << 4,
/// <summary>
/// Add or modify text annotations, fill in interactive form fields, and, if <see cref="Modify"/> is also set,
/// create or modify interactive form fields (including signature fields).
/// </summary>
AddOrModifyTextAnnotationsAndFillFormFields = 1 << 5,
/// <summary>
/// (Revision 3 or greater) Fill in existing interactive form fields (including signature fields),
/// even if <see cref="AddOrModifyTextAnnotationsAndFillFormFields"/> is clear.
/// </summary>
FillExistingFormFields = 1 << 8,
/// <summary>
/// (Revision 3 or greater) Extract text and graphics (in support of accessibility to users with disabilities or for other purposes).
/// </summary>
ExtractTextAndGraphics = 1 << 9,
/// <summary>
/// (Revision 3 or greater) Assemble the document (insert, rotate, or delete pages and create bookmarks or thumbnail images),
/// even if <see cref="Modify"/> is clear.
/// </summary>
AssembleDocument = 1 << 10,
/// <summary>
/// (Revision 3 or greater) Print the document to a representation from which a faithful digital copy of the PDF content could be generated.
/// When this is clear (and <see cref="Print"/> is set), printing is limited to a low-level representation of the appearance,
/// possibly of degraded quality.
/// </summary>
PrintHighQuality = 1 << 12
}
}

View File

@@ -0,0 +1,173 @@
namespace UglyToad.PdfPig.Encryption
{
using System;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
using CrossReference;
using Tokens;
using Util;
using Util.JetBrains.Annotations;
internal class EncryptionHandler : IEncryptionHandler
{
private static readonly byte[] PaddingBytes =
{
0x28, 0xBF, 0x4E, 0x5E,
0x4E, 0x75, 0x8A, 0x41,
0x64, 0x00, 0x4E, 0x56,
0xFF, 0xFA, 0x01, 0x08,
0x2E, 0x2E, 0x00, 0xB6,
0xD0, 0x68, 0x3E, 0x80,
0x2F, 0x0C, 0xA9, 0xFE,
0x64, 0x53, 0x69, 0x7A
};
[CanBeNull]
private readonly EncryptionDictionary encryptionDictionary;
[NotNull]
private readonly byte[] documentIdBytes;
[NotNull]
private readonly string password;
public EncryptionHandler(EncryptionDictionary encryptionDictionary, TrailerDictionary trailerDictionary, string password)
{
this.encryptionDictionary = encryptionDictionary;
documentIdBytes = trailerDictionary.Identifier != null && trailerDictionary.Identifier.Count == 2 ?
OtherEncodings.StringAsLatin1Bytes(trailerDictionary.Identifier[0])
: EmptyArray<byte>.Instance;
this.password = password ?? string.Empty;
if (encryptionDictionary == null)
{
return;
}
var userKey = OtherEncodings.StringAsLatin1Bytes(encryptionDictionary.UserPasswordCheck);
var ownerKey = OtherEncodings.StringAsLatin1Bytes(encryptionDictionary.OwnerPasswordCheck);
var charset = OtherEncodings.Iso88591;
if (encryptionDictionary.StandardSecurityHandlerRevision == 5 || encryptionDictionary.StandardSecurityHandlerRevision == 6)
{
charset = Encoding.UTF8;
throw new NotSupportedException($"Revision of {encryptionDictionary.StandardSecurityHandlerRevision} not supported, please raise an issue.");
}
var passwordBytes = charset.GetBytes(this.password);
var length = encryptionDictionary.EncryptionAlgorithmCode == EncryptionAlgorithmCode.Rc4OrAes40BitKey
? 5
: encryptionDictionary.KeyLength.GetValueOrDefault() / 8;
CalculateKeyRevisions2To4(passwordBytes, ownerKey, (int) encryptionDictionary.UserAccessPermissions, encryptionDictionary.StandardSecurityHandlerRevision,
length, documentIdBytes, encryptionDictionary.EncryptMetadata);
}
public IReadOnlyList<byte> Decrypt(StreamToken stream)
{
if (encryptionDictionary == null)
{
return stream?.Data;
}
if (stream == null)
{
throw new ArgumentNullException(nameof(stream));
}
throw new NotImplementedException($"Encryption is not supported yet. Encryption used in document was: {encryptionDictionary.Dictionary}.");
}
private static bool IsUserPassword(byte[] password, byte[] userKey, byte[] ownerKey, int permissions,
byte[] documentId, int revision, int length, bool encryptMetadata)
{
switch (revision)
{
case 2:
case 3:
case 4:
break;
case 5:
case 6:
break;
default:
throw new NotSupportedException($"Unsupported encryption revision: {revision}.");
}
return false;
}
private static byte[] CalculateKeyRevisions2To4(byte[] password, byte[] ownerKey,
int permissions, int revision, int length, byte[] documentId, bool encryptMetadata)
{
// 1. Pad or truncate the password string to exactly 32 bytes.
var passwordFull = GetPaddedPassword(password);
using (var md5 = MD5.Create())
{
// 2. Initialize the MD5 hash function and pass the result of step 1 as input to this function.
var has = md5.ComputeHash(passwordFull);
// 3. Pass the value of the encryption dictionary's owner key entry to the MD5 hash function.
var has1 = md5.ComputeHash(ownerKey);
// 4. Treat the value of the P entry as an unsigned 4-byte integer.
var unsigned = (uint) permissions;
var permissionsBytes = new []
{
(byte) (unsigned),
(byte) (unsigned >> 8),
(byte) (unsigned >> 16),
(byte) (unsigned >> 24)
};
// 4. Pass these bytes to the MD5 hash function, low-order byte first.
var has2 = md5.ComputeHash(permissionsBytes);
// 5. Pass the first element of the file's file identifier array to the hash.
var has3 = md5.ComputeHash(documentId);
// 6. (Revision 4 or greater) If document metadata is not being encrypted, pass 4 bytes
// with the value 0xFFFFFFFF to the MD5 hash function.
if (revision >= 4)
{
md5.ComputeHash(new byte[] {0xFF, 0xFF, 0xFF, 0xFF});
}
// 7. Do the following 50 times: Take the output from the previous MD5 hash and
// pass the first n bytes of the output as input into a new MD5 hash,
// where n is the number of bytes of the encryption key as defined by the value
// of the encryption dictionarys Length entry.
return md5.Hash;
}
}
private static byte[] GetPaddedPassword(byte[] password)
{
if (password == null || password.Length == 0)
{
return PaddingBytes;
}
var result = new byte[32];
var passwordBytes = password.Length <= 32 ? password.Length : 32;
var paddingBytes = 32 - passwordBytes;
Array.ConstrainedCopy(password, 0, result, 0, passwordBytes);
if (paddingBytes > 0)
{
Array.ConstrainedCopy(PaddingBytes, 0, result, passwordBytes, paddingBytes);
}
return result;
}
}
}

View File

@@ -0,0 +1,16 @@
namespace UglyToad.PdfPig.Encryption
{
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Manages decryption of tokens in a PDF document where encryption is used.
/// </summary>
internal interface IEncryptionHandler
{
/// <summary>
/// Decrypt the contents of the stream if encryption is applied.
/// </summary>
IReadOnlyList<byte> Decrypt(StreamToken stream);
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.PdfPig.Encryption
{
using System.Collections.Generic;
using Tokens;
internal class NoOpEncryptionHandler : IEncryptionHandler
{
public static NoOpEncryptionHandler Instance { get; } = new NoOpEncryptionHandler();
private NoOpEncryptionHandler()
{
}
public IReadOnlyList<byte> Decrypt(StreamToken stream)
{
return stream.Data;
}
}
}

View File

@@ -4,6 +4,7 @@
using SystemFonts;
using Cmap;
using Encodings;
using Encryption;
using Exceptions;
using Filters;
using IO;
@@ -22,6 +23,7 @@
private readonly ILog log;
private readonly IPdfTokenScanner pdfScanner;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly CMapCache cMapCache;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
@@ -29,6 +31,7 @@
private readonly ISystemFontFinder systemFontFinder;
public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
CMapCache cMapCache,
FontDescriptorFactory fontDescriptorFactory,
TrueTypeFontParser trueTypeFontParser,
@@ -37,6 +40,7 @@
{
this.log = log;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
this.cMapCache = cMapCache;
this.fontDescriptorFactory = fontDescriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
@@ -85,7 +89,7 @@
{
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, pdfScanner);
var decodedUnicodeCMap = toUnicode.Decode(filterProvider);
var decodedUnicodeCMap = toUnicode.Decode(filterProvider, encryptionHandler);
if (decodedUnicodeCMap != null)
{
@@ -125,7 +129,7 @@
var fontFileStream = DirectObjectFinder.Get<StreamToken>(descriptor.FontFile.ObjectKey, pdfScanner);
var fontFile = fontFileStream.Decode(filterProvider);
var fontFile = fontFileStream.Decode(filterProvider, encryptionHandler);
var font = trueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile)));

View File

@@ -4,6 +4,7 @@
using CidFonts;
using Cmap;
using Composite;
using Encryption;
using Exceptions;
using Filters;
using IO;
@@ -18,14 +19,17 @@
private readonly CidFontFactory cidFontFactory;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly IPdfTokenScanner scanner;
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
IPdfTokenScanner scanner)
{
this.cidFontFactory = cidFontFactory;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
this.scanner = scanner;
}
@@ -68,7 +72,7 @@
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeValue, scanner);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, encryptionHandler);
if (decodedUnicodeCMap != null)
{
@@ -147,7 +151,7 @@
}
else if (value is StreamToken stream)
{
var decoded = stream.Decode(filterProvider);
var decoded = stream.Decode(filterProvider, encryptionHandler);
var cmap = cMapCache.Parse(new ByteArrayInputBytes(decoded), false);

View File

@@ -4,6 +4,7 @@
using Cmap;
using CompactFontFormat;
using Encodings;
using Encryption;
using Exceptions;
using Filters;
using IO;
@@ -21,12 +22,14 @@
private readonly IPdfTokenScanner pdfScanner;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly FontDescriptorFactory fontDescriptorFactory;
private readonly IEncodingReader encodingReader;
private readonly Type1FontParser type1FontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
public Type1FontHandler(IPdfTokenScanner pdfScanner, CMapCache cMapCache, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
FontDescriptorFactory fontDescriptorFactory,
IEncodingReader encodingReader,
Type1FontParser type1FontParser,
@@ -35,6 +38,7 @@
this.pdfScanner = pdfScanner;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
this.fontDescriptorFactory = fontDescriptorFactory;
this.encodingReader = encodingReader;
this.type1FontParser = type1FontParser;
@@ -89,7 +93,7 @@
{
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, pdfScanner);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, encryptionHandler);
if (decodedUnicodeCMap != null)
{
@@ -126,7 +130,7 @@
return null;
}
var bytes = stream.Decode(filterProvider);
var bytes = stream.Decode(filterProvider, encryptionHandler);
// We have a Compact Font Format font rather than an Adobe Type 1 Font.
if (stream.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subTypeName)

View File

@@ -3,6 +3,7 @@
using Cmap;
using Core;
using Encodings;
using Encryption;
using Exceptions;
using Filters;
using Geometry;
@@ -17,14 +18,17 @@
{
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly IEncodingReader encodingReader;
private readonly IPdfTokenScanner scanner;
public Type3FontHandler(IPdfTokenScanner scanner, CMapCache cMapCache, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
IEncodingReader encodingReader)
{
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
this.encodingReader = encodingReader;
this.scanner = scanner;
}
@@ -46,7 +50,7 @@
{
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, scanner);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, encryptionHandler);
if (decodedUnicodeCMap != null)
{

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using CidFonts;
using CompactFontFormat;
using Encryption;
using Exceptions;
using Filters;
using Geometry;
@@ -22,17 +23,21 @@
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly IPdfTokenScanner pdfScanner;
public CidFontFactory(IPdfTokenScanner pdfScanner, FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
public CidFontFactory(IPdfTokenScanner pdfScanner, FontDescriptorFactory descriptorFactory,
TrueTypeFontParser trueTypeFontParser,
CompactFontFormatParser compactFontFormatParser,
IFilterProvider filterProvider)
IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler)
{
this.pdfScanner = pdfScanner;
this.descriptorFactory = descriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
this.compactFontFormatParser = compactFontFormatParser;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
}
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
@@ -104,7 +109,7 @@
return null;
}
var fontFile = fontFileStream.Decode(filterProvider);
var fontFile = fontFileStream.Decode(filterProvider, encryptionHandler);
switch (descriptor.FontFile.FileType)
{
@@ -125,7 +130,7 @@
if (subtypeName == NameToken.CidFontType0C)
{
var bytes = str.Decode(filterProvider);
var bytes = str.Decode(filterProvider, encryptionHandler);
var font = compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
return font;
}
@@ -297,7 +302,7 @@
var stream = DirectObjectFinder.Get<StreamToken>(entry, pdfScanner);
var bytes = stream.Decode(filterProvider);
var bytes = stream.Decode(filterProvider, encryptionHandler);
return new CharacterIdentifierToGlyphIndexMap(bytes);
}

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using Annotations;
using Content;
using Encryption;
using Exceptions;
using Filters;
using Geometry;
@@ -21,17 +22,20 @@
private readonly IPdfTokenScanner pdfScanner;
private readonly IResourceStore resourceStore;
private readonly IFilterProvider filterProvider;
private readonly IEncryptionHandler encryptionHandler;
private readonly IPageContentParser pageContentParser;
private readonly XObjectFactory xObjectFactory;
private readonly ILog log;
public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
IPageContentParser pageContentParser,
XObjectFactory xObjectFactory,
ILog log)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
this.pageContentParser = pageContentParser;
this.xObjectFactory = xObjectFactory;
this.log = log;
@@ -84,7 +88,7 @@
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
}
bytes.AddRange(contentStream.Decode(filterProvider));
bytes.AddRange(contentStream.Decode(filterProvider, encryptionHandler));
}
content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
@@ -98,7 +102,7 @@
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var bytes = contentStream.Decode(filterProvider);
var bytes = contentStream.Decode(filterProvider, encryptionHandler);
content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
}

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Parser.Parts.CrossReference
{
using System.Collections.Generic;
using Encryption;
using Exceptions;
using Filters;
using PdfPig.CrossReference;
@@ -21,7 +22,7 @@
/// </summary>
public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
{
var decoded = stream.Decode(filterProvider);
var decoded = stream.Decode(filterProvider, NoOpEncryptionHandler.Instance);
var fieldSizes = new CrossReferenceStreamFieldSize(stream.StreamDictionary);

View File

@@ -5,6 +5,8 @@
using AcroForms;
using Content;
using CrossReference;
using Encryption;
using Exceptions;
using FileStructure;
using Filters;
using Fonts;
@@ -81,8 +83,7 @@
// We're ok with this since our intent is to lazily load the cross reference table.
// ReSharper disable once AccessToModifiedClosure
var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, bruteForceSearcher);
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider);
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance);
var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
var crossReferenceParser = new CrossReferenceParser(log, xrefValidator, objectChecker, crossReferenceStreamParser, new CrossReferenceTableParser());
@@ -103,45 +104,61 @@
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
var rootDictionary = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner, out var encryptionDictionary);
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider);
var encryptionHandler = new EncryptionHandler(encryptionDictionary, crossReferenceTable.Trailer, string.Empty);
pdfScanner.UpdateEncryptionHandler(encryptionHandler);
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider, encryptionHandler);
var encodingReader = new EncodingReader(pdfScanner);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
filterProvider, encryptionHandler, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, encryptionHandler, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, encryptionHandler, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encryptionHandler, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
encryptionHandler,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
new XObjectFactory(), log);
var informationFactory = new DocumentInformationFactory();
var rootDictionary = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner);
var information = informationFactory.Create(pdfScanner, crossReferenceTable.Trailer);
var catalog = catalogFactory.Create(pdfScanner, rootDictionary);
var caching = new ParsingCachingProviders(bruteForceSearcher, resourceContainer);
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider);
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, encryptionHandler);
return new PdfDocument(log, inputBytes, version, crossReferenceTable, isLenientParsing, caching, pageFactory, catalog, information,
pdfScanner, acroFormFactory);
encryptionDictionary,
pdfScanner,
acroFormFactory);
}
private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner)
private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
out EncryptionDictionary encryptionDictionary)
{
encryptionDictionary = null;
if (crossReferenceTable.Trailer.EncryptionToken != null)
{
throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken);
if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
{
throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
}
encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
//throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken);
}
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);

View File

@@ -5,6 +5,7 @@
using AcroForms;
using Content;
using CrossReference;
using Encryption;
using IO;
using Logging;
using Parser;
@@ -32,6 +33,10 @@
[NotNull]
private readonly ParsingCachingProviders cachingProviders;
[CanBeNull]
private readonly EncryptionDictionary encryptionDictionary;
[NotNull]
private readonly IPdfTokenScanner pdfScanner;
[NotNull]
@@ -59,6 +64,11 @@
/// </summary>
public int NumberOfPages => pages.Count;
/// <summary>
/// Whether the document content is encrypted.
/// </summary>
public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(ILog log,
IInputBytes inputBytes,
HeaderVersion version,
@@ -67,7 +77,9 @@
ParsingCachingProviders cachingProviders,
IPageFactory pageFactory,
Catalog catalog,
DocumentInformation information, IPdfTokenScanner pdfScanner,
DocumentInformation information,
EncryptionDictionary encryptionDictionary,
IPdfTokenScanner pdfScanner,
AcroFormFactory acroFormFactory)
{
this.log = log;
@@ -75,6 +87,7 @@
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.isLenientParsing = isLenientParsing;
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
this.encryptionDictionary = encryptionDictionary;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(log, catalog, pageFactory, isLenientParsing, pdfScanner);

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using Encryption;
using Exceptions;
using Filters;
using IO;
@@ -30,6 +31,8 @@
private readonly IFilterProvider filterProvider;
private readonly CoreTokenScanner coreTokenScanner;
private IEncryptionHandler encryptionHandler;
/// <summary>
/// Stores tokens encountered between obj - endobj markers for each <see cref="MoveNext"/> call.
/// Cleared after each operation.
@@ -44,14 +47,21 @@
public long CurrentPosition => coreTokenScanner.CurrentPosition;
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider)
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler)
{
this.inputBytes = inputBytes;
this.objectLocationProvider = objectLocationProvider;
this.filterProvider = filterProvider;
this.encryptionHandler = encryptionHandler;
coreTokenScanner = new CoreTokenScanner(inputBytes);
}
public void UpdateEncryptionHandler(IEncryptionHandler newHandler)
{
encryptionHandler = newHandler ?? throw new ArgumentNullException(nameof(newHandler));
}
public bool MoveNext()
{
// Read until we find object-number generation obj, e.g. "69 420 obj".
@@ -527,7 +537,7 @@
}
// Read the N integers
var bytes = new ByteArrayInputBytes(stream.Decode(filterProvider));
var bytes = new ByteArrayInputBytes(stream.Decode(filterProvider, encryptionHandler));
var scanner = new CoreTokenScanner(bytes);

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using Encryption;
using Filters;
using Util.JetBrains.Annotations;
@@ -38,7 +39,7 @@
Data = data ?? throw new ArgumentNullException(nameof(data));
}
internal IReadOnlyList<byte> Decode(IFilterProvider filterProvider)
internal IReadOnlyList<byte> Decode(IFilterProvider filterProvider, IEncryptionHandler encryptionHandler)
{
lock (lockObject)
{
@@ -49,7 +50,7 @@
var filters = filterProvider.GetFilters(StreamDictionary);
var transform = Data;
var transform = encryptionHandler.Decrypt(this);
for (var i = 0; i < filters.Count; i++)
{
transform = filters[i].Decode(transform, StreamDictionary, i);

View File

@@ -48,13 +48,7 @@
</PackageReference>
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45'
OR '$(TargetFramework)'=='net451'
OR '$(TargetFramework)'=='net452'
OR '$(TargetFramework)'=='net46'
OR '$(TargetFramework)'=='net461'
OR '$(TargetFramework)'=='net462'
OR '$(TargetFramework)'=='net47'">
<ItemGroup Condition="'$(TargetFramework)'=='net45' &#xD;&#xA; OR '$(TargetFramework)'=='net451' &#xD;&#xA; OR '$(TargetFramework)'=='net452' &#xD;&#xA; OR '$(TargetFramework)'=='net46' &#xD;&#xA; OR '$(TargetFramework)'=='net461' &#xD;&#xA; OR '$(TargetFramework)'=='net462' &#xD;&#xA; OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>