add classes to support loading existing documents into builder

This commit is contained in:
Eliot Jones 2020-07-19 17:46:04 +01:00
parent 48522ae1a5
commit 31bcb92c4c
4 changed files with 183 additions and 5 deletions

View File

@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Tests.Writer
{
using System.IO;
using Integration;
using PdfPig.Writer;
using Xunit;
public class PdfDocumentBuilderFromExistingTests
{
[Fact]
public void LoadAndSaveExistingNoModifications()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var bytes = File.ReadAllBytes(path);
var builder = PdfDocumentBuilder.FromPdf(bytes);
var output = builder.Build();
Assert.NotNull(output);
}
}
}

View File

@ -22,16 +22,23 @@
Count = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count);
}
public Page GetPage(int pageNumber, bool clipPaths)
{
if (pageNumber <= 0 || pageNumber > Count)
{
throw new ArgumentOutOfRangeException(nameof(pageNumber),
throw new ArgumentOutOfRangeException(nameof(pageNumber),
$"Page number {pageNumber} invalid, must be between 1 and {Count}.");
}
var pageNode = catalog.GetPageNode(pageNumber);
return CreateFromPageTreeNode(pageNode, pdfScanner, pageFactory, pageNumber, clipPaths);
}
public static Page CreateFromPageTreeNode(PageTreeNode pageNode, IPdfTokenScanner pdfScanner,
IPageFactory pageFactory,
int pageNumber, bool clipPaths)
{
var pageStack = new Stack<PageTreeNode>();
var currentNode = pageNode;
@ -42,7 +49,7 @@
}
var pageTreeMembers = new PageTreeMembers();
while (pageStack.Count > 0)
{
currentNode = pageStack.Pop();
@ -51,7 +58,7 @@
{
pageTreeMembers.ParentResources.Enqueue(resourcesDictionary);
}
if (currentNode.NodeDictionary.TryGet(NameToken.MediaBox, pdfScanner, out ArrayToken mediaBox))
{
pageTreeMembers.MediaBox = new MediaBox(mediaBox.ToRectangle(pdfScanner));
@ -64,7 +71,7 @@
}
var page = pageFactory.Create(pageNumber, pageNode.NodeDictionary, pageTreeMembers, clipPaths);
return page;
}
}

View File

@ -52,6 +52,11 @@ namespace UglyToad.PdfPig.Writer
/// </summary>
internal IReadOnlyDictionary<Guid, IWritingFont> Fonts => fonts.ToDictionary(x => x.Key, x => x.Value.FontProgram);
/// <summary>
/// Create a builder from an existing PDF file.
/// </summary>
public static PdfDocumentBuilder FromPdf(IReadOnlyList<byte> bytes) => PdfDocumentToPdfDocumentBuilderFactory.Convert(new ByteArrayInputBytes(bytes));
/// <summary>
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
/// </summary>

View File

@ -0,0 +1,142 @@
namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using Content;
using Core;
using CrossReference;
using Encryption;
using Exceptions;
using Filters;
using Graphics;
using Logging;
using Parser;
using Parser.FileStructure;
using Parser.Parts;
using PdfFonts;
using PdfFonts.Parser;
using PdfFonts.Parser.Handlers;
using PdfFonts.Parser.Parts;
using PdfPig.Fonts.SystemFonts;
using Tokenization.Scanner;
using Tokens;
internal static class PdfDocumentToPdfDocumentBuilderFactory
{
private static readonly ILog Log = new NoOpLog();
private static readonly IFilterProvider FilterProvider = DefaultFilterProvider.Instance;
public static PdfDocumentBuilder Convert(IInputBytes inputBytes)
{
if (inputBytes == null)
{
throw new ArgumentNullException(nameof(inputBytes));
}
var coreScanner = new CoreTokenScanner(inputBytes);
const bool isLenientParsing = false;
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
CrossReferenceTable crossReference = null;
// ReSharper disable once AccessToModifiedClosure
var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);
var (rootReference, rootDictionary) = ParseTrailer(crossReference, isLenientParsing,
pdfScanner,
out var encryptionDictionary);
if (encryptionDictionary != null)
{
throw new PdfDocumentEncryptedException("Unable to edit document with password");
}
var cidFontFactory = new CidFontFactory(pdfScanner, FilterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var type1Handler = new Type1FontHandler(pdfScanner, FilterProvider, encodingReader);
var fontFactory = new FontFactory(Log, new Type0FontHandler(cidFontFactory,
FilterProvider, pdfScanner),
new TrueTypeFontHandler(Log, pdfScanner, FilterProvider, encodingReader, SystemFontFinder.Instance,
type1Handler),
type1Handler,
new Type3FontHandler(pdfScanner, FilterProvider, encodingReader));
var resourceContainer = new ResourceStore(pdfScanner, fontFactory);
var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, FilterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
Log);
var builder = new PdfDocumentBuilder();
var number = 1;
foreach (var node in GetPages(catalog.PageTree))
{
// First, what resources can we define, fonts, etc.
// Second, we need to copy resource and dictionary keys we don't understand.
// Third, we need to re-use the inherited properties where possible to prevent double work.
var page = Pages.CreateFromPageTreeNode(node, pdfScanner, pageFactory, number++, false);
var pageBuilder = builder.AddPage(page.Width, page.Height);
pageBuilder.Advanced.Operations.AddRange(page.Operations);
}
return builder;
}
private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
out EncryptionDictionary encryptionDictionary)
{
encryptionDictionary = null;
if (crossReferenceTable.Trailer.EncryptionToken != null)
{
if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
{
throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
}
encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
}
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);
if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
{
rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
}
return (crossReferenceTable.Trailer.Root, rootDictionary);
}
private static IEnumerable<PageTreeNode> GetPages(PageTreeNode root)
{
if (root.IsPage)
{
yield return root;
yield break;
}
foreach (var child in root.Children)
{
foreach (var node in GetPages(child))
{
yield return node;
}
}
}
}
}