mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
add classes to support loading existing documents into builder
This commit is contained in:
parent
48522ae1a5
commit
31bcb92c4c
@ -0,0 +1,24 @@
|
||||
namespace UglyToad.PdfPig.Tests.Writer
|
||||
{
|
||||
using System.IO;
|
||||
using Integration;
|
||||
using PdfPig.Writer;
|
||||
using Xunit;
|
||||
|
||||
public class PdfDocumentBuilderFromExistingTests
|
||||
{
|
||||
[Fact]
|
||||
public void LoadAndSaveExistingNoModifications()
|
||||
{
|
||||
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
|
||||
|
||||
var bytes = File.ReadAllBytes(path);
|
||||
|
||||
var builder = PdfDocumentBuilder.FromPdf(bytes);
|
||||
|
||||
var output = builder.Build();
|
||||
|
||||
Assert.NotNull(output);
|
||||
}
|
||||
}
|
||||
}
|
@ -22,16 +22,23 @@
|
||||
|
||||
Count = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count);
|
||||
}
|
||||
|
||||
|
||||
public Page GetPage(int pageNumber, bool clipPaths)
|
||||
{
|
||||
if (pageNumber <= 0 || pageNumber > Count)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(pageNumber),
|
||||
throw new ArgumentOutOfRangeException(nameof(pageNumber),
|
||||
$"Page number {pageNumber} invalid, must be between 1 and {Count}.");
|
||||
}
|
||||
|
||||
var pageNode = catalog.GetPageNode(pageNumber);
|
||||
return CreateFromPageTreeNode(pageNode, pdfScanner, pageFactory, pageNumber, clipPaths);
|
||||
}
|
||||
|
||||
public static Page CreateFromPageTreeNode(PageTreeNode pageNode, IPdfTokenScanner pdfScanner,
|
||||
IPageFactory pageFactory,
|
||||
int pageNumber, bool clipPaths)
|
||||
{
|
||||
var pageStack = new Stack<PageTreeNode>();
|
||||
|
||||
var currentNode = pageNode;
|
||||
@ -42,7 +49,7 @@
|
||||
}
|
||||
|
||||
var pageTreeMembers = new PageTreeMembers();
|
||||
|
||||
|
||||
while (pageStack.Count > 0)
|
||||
{
|
||||
currentNode = pageStack.Pop();
|
||||
@ -51,7 +58,7 @@
|
||||
{
|
||||
pageTreeMembers.ParentResources.Enqueue(resourcesDictionary);
|
||||
}
|
||||
|
||||
|
||||
if (currentNode.NodeDictionary.TryGet(NameToken.MediaBox, pdfScanner, out ArrayToken mediaBox))
|
||||
{
|
||||
pageTreeMembers.MediaBox = new MediaBox(mediaBox.ToRectangle(pdfScanner));
|
||||
@ -64,7 +71,7 @@
|
||||
}
|
||||
|
||||
var page = pageFactory.Create(pageNumber, pageNode.NodeDictionary, pageTreeMembers, clipPaths);
|
||||
|
||||
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
@ -52,6 +52,11 @@ namespace UglyToad.PdfPig.Writer
|
||||
/// </summary>
|
||||
internal IReadOnlyDictionary<Guid, IWritingFont> Fonts => fonts.ToDictionary(x => x.Key, x => x.Value.FontProgram);
|
||||
|
||||
/// <summary>
|
||||
/// Create a builder from an existing PDF file.
|
||||
/// </summary>
|
||||
public static PdfDocumentBuilder FromPdf(IReadOnlyList<byte> bytes) => PdfDocumentToPdfDocumentBuilderFactory.Convert(new ByteArrayInputBytes(bytes));
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
|
||||
/// </summary>
|
||||
|
@ -0,0 +1,142 @@
|
||||
namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using Core;
|
||||
using CrossReference;
|
||||
using Encryption;
|
||||
using Exceptions;
|
||||
using Filters;
|
||||
using Graphics;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.FileStructure;
|
||||
using Parser.Parts;
|
||||
using PdfFonts;
|
||||
using PdfFonts.Parser;
|
||||
using PdfFonts.Parser.Handlers;
|
||||
using PdfFonts.Parser.Parts;
|
||||
using PdfPig.Fonts.SystemFonts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
internal static class PdfDocumentToPdfDocumentBuilderFactory
|
||||
{
|
||||
private static readonly ILog Log = new NoOpLog();
|
||||
private static readonly IFilterProvider FilterProvider = DefaultFilterProvider.Instance;
|
||||
|
||||
public static PdfDocumentBuilder Convert(IInputBytes inputBytes)
|
||||
{
|
||||
if (inputBytes == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(inputBytes));
|
||||
}
|
||||
|
||||
var coreScanner = new CoreTokenScanner(inputBytes);
|
||||
|
||||
const bool isLenientParsing = false;
|
||||
|
||||
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
|
||||
|
||||
var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
|
||||
new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
|
||||
|
||||
CrossReferenceTable crossReference = null;
|
||||
|
||||
// ReSharper disable once AccessToModifiedClosure
|
||||
var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);
|
||||
|
||||
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);
|
||||
|
||||
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
|
||||
crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);
|
||||
|
||||
var (rootReference, rootDictionary) = ParseTrailer(crossReference, isLenientParsing,
|
||||
pdfScanner,
|
||||
out var encryptionDictionary);
|
||||
|
||||
if (encryptionDictionary != null)
|
||||
{
|
||||
throw new PdfDocumentEncryptedException("Unable to edit document with password");
|
||||
}
|
||||
|
||||
var cidFontFactory = new CidFontFactory(pdfScanner, FilterProvider);
|
||||
var encodingReader = new EncodingReader(pdfScanner);
|
||||
|
||||
var type1Handler = new Type1FontHandler(pdfScanner, FilterProvider, encodingReader);
|
||||
|
||||
var fontFactory = new FontFactory(Log, new Type0FontHandler(cidFontFactory,
|
||||
FilterProvider, pdfScanner),
|
||||
new TrueTypeFontHandler(Log, pdfScanner, FilterProvider, encodingReader, SystemFontFinder.Instance,
|
||||
type1Handler),
|
||||
type1Handler,
|
||||
new Type3FontHandler(pdfScanner, FilterProvider, encodingReader));
|
||||
|
||||
var resourceContainer = new ResourceStore(pdfScanner, fontFactory);
|
||||
|
||||
var catalog = CatalogFactory.Create(rootReference, rootDictionary, pdfScanner, isLenientParsing);
|
||||
|
||||
var pageFactory = new PageFactory(pdfScanner, resourceContainer, FilterProvider,
|
||||
new PageContentParser(new ReflectionGraphicsStateOperationFactory()),
|
||||
Log);
|
||||
|
||||
var builder = new PdfDocumentBuilder();
|
||||
|
||||
var number = 1;
|
||||
foreach (var node in GetPages(catalog.PageTree))
|
||||
{
|
||||
// First, what resources can we define, fonts, etc.
|
||||
// Second, we need to copy resource and dictionary keys we don't understand.
|
||||
// Third, we need to re-use the inherited properties where possible to prevent double work.
|
||||
var page = Pages.CreateFromPageTreeNode(node, pdfScanner, pageFactory, number++, false);
|
||||
var pageBuilder = builder.AddPage(page.Width, page.Height);
|
||||
pageBuilder.Advanced.Operations.AddRange(page.Operations);
|
||||
}
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
private static (IndirectReference, DictionaryToken) ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner,
|
||||
out EncryptionDictionary encryptionDictionary)
|
||||
{
|
||||
encryptionDictionary = null;
|
||||
|
||||
if (crossReferenceTable.Trailer.EncryptionToken != null)
|
||||
{
|
||||
if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner, out DictionaryToken encryptionDictionaryToken))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
|
||||
}
|
||||
|
||||
encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
|
||||
}
|
||||
|
||||
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);
|
||||
|
||||
if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
|
||||
{
|
||||
rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
|
||||
}
|
||||
|
||||
return (crossReferenceTable.Trailer.Root, rootDictionary);
|
||||
}
|
||||
|
||||
private static IEnumerable<PageTreeNode> GetPages(PageTreeNode root)
|
||||
{
|
||||
if (root.IsPage)
|
||||
{
|
||||
yield return root;
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var child in root.Children)
|
||||
{
|
||||
foreach (var node in GetPages(child))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user