mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Allow to copy pages from another document
This is a naive implementation, because if you copy multiple pages from the same document, the recipient document would be bloated with duplicated resources
This commit is contained in:
@@ -644,6 +644,53 @@
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanCopyPage()
|
||||
{
|
||||
|
||||
byte[] b;
|
||||
{
|
||||
var builder = new PdfDocumentBuilder();
|
||||
|
||||
var page1 = builder.AddPage(PageSize.A4);
|
||||
|
||||
var file = TrueTypeTestHelper.GetFileBytes("Andada-Regular.ttf");
|
||||
|
||||
var font = builder.AddTrueTypeFont(file);
|
||||
|
||||
page1.AddText("Hello", 12, new PdfPoint(30, 50), font);
|
||||
|
||||
Assert.NotEmpty(page1.CurrentStream.Operations);
|
||||
|
||||
|
||||
using (var readDocument = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("bold-italic.pdf")))
|
||||
{
|
||||
var rpage = readDocument.GetPage(1);
|
||||
|
||||
var page2 = builder.AddPage(PageSize.A4);
|
||||
page2.CopyFrom(rpage);
|
||||
}
|
||||
|
||||
b = builder.Build();
|
||||
Assert.NotEmpty(b);
|
||||
}
|
||||
|
||||
WriteFile(nameof(CanCopyPage), b);
|
||||
|
||||
using (var document = PdfDocument.Open(b))
|
||||
{
|
||||
Assert.Equal( 2, document.NumberOfPages);
|
||||
|
||||
var page1 = document.GetPage(1);
|
||||
|
||||
Assert.Equal("Hello", page1.Text);
|
||||
|
||||
var page2 = document.GetPage(2);
|
||||
|
||||
Assert.Equal("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", page2.Text);
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteFile(string name, byte[] bytes, string extension = "pdf")
|
||||
{
|
||||
try
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
public class Page
|
||||
{
|
||||
private readonly AnnotationProvider annotationProvider;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
internal readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly Lazy<string> textLazy;
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -3,6 +3,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Content;
|
||||
@@ -10,8 +11,10 @@ namespace UglyToad.PdfPig.Writer
|
||||
using Fonts;
|
||||
using PdfPig.Fonts.TrueType;
|
||||
using Graphics.Operations;
|
||||
using Parser.Parts;
|
||||
using PdfPig.Fonts.Standard14Fonts;
|
||||
using PdfPig.Fonts.TrueType.Parser;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
using Util.JetBrains.Annotations;
|
||||
@@ -25,6 +28,9 @@ namespace UglyToad.PdfPig.Writer
|
||||
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
|
||||
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
|
||||
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
|
||||
private readonly Dictionary<IndirectReferenceToken, IToken> unwrittenTokens = new Dictionary<IndirectReferenceToken, IToken>();
|
||||
|
||||
internal int fontId = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The standard of PDF/A compliance of the generated document. Defaults to <see cref="PdfAStandard.None"/>.
|
||||
@@ -50,7 +56,12 @@ namespace UglyToad.PdfPig.Writer
|
||||
/// <summary>
|
||||
/// The fonts currently available in the document builder added via <see cref="AddTrueTypeFont"/> or <see cref="AddStandard14Font"/>. Keyed by id for internal purposes.
|
||||
/// </summary>
|
||||
internal IReadOnlyDictionary<Guid, IWritingFont> Fonts => fonts.ToDictionary(x => x.Key, x => x.Value.FontProgram);
|
||||
internal IReadOnlyDictionary<Guid, FontStored> Fonts => fonts;
|
||||
|
||||
/// <summary>
|
||||
/// The images currently available in the document builder added via <see cref="AddImage"/>. Keyed by id for internal purposes.
|
||||
/// </summary>
|
||||
internal IReadOnlyDictionary<Guid, ImageStored> Images => images;
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
|
||||
@@ -116,8 +127,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes)));
|
||||
var id = Guid.NewGuid();
|
||||
var i = fonts.Count;
|
||||
var added = new AddedFont(id, NameToken.Create($"F{i}"));
|
||||
var added = new AddedFont(id, NameToken.Create($"F{fontId++}"));
|
||||
fonts[id] = new FontStored(added, new TrueTypeWritingFont(font, fontFileBytes));
|
||||
|
||||
return added;
|
||||
@@ -141,7 +151,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
}
|
||||
|
||||
var id = Guid.NewGuid();
|
||||
var name = NameToken.Create($"F{fonts.Count}");
|
||||
var name = NameToken.Create($"F{fontId++}");
|
||||
var added = new AddedFont(id, name);
|
||||
fonts[id] = new FontStored(added, new Standard14WritingFont(Standard14.GetAdobeFontMetrics(type)));
|
||||
|
||||
@@ -259,6 +269,11 @@ namespace UglyToad.PdfPig.Writer
|
||||
context.WriteObject(memory, streamToken, image.Value.ObjectNumber);
|
||||
}
|
||||
|
||||
foreach (var tokenSet in unwrittenTokens)
|
||||
{
|
||||
context.WriteObject(memory, tokenSet.Value, (int)tokenSet.Key.Data.ObjectNumber);
|
||||
}
|
||||
|
||||
var procSet = new List<NameToken>
|
||||
{
|
||||
NameToken.Create("PDF"),
|
||||
@@ -278,9 +293,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
var fontsDictionary = new DictionaryToken(fontsWritten.Select(x => (fonts[x.Key].FontKey.Name, (IToken)new IndirectReferenceToken(x.Value.Number)))
|
||||
.ToDictionary(x => x.Item1, x => x.Item2));
|
||||
|
||||
var fontsDictionaryRef = context.WriteObject(memory, fontsDictionary);
|
||||
|
||||
resources.Add(NameToken.Font, new IndirectReferenceToken(fontsDictionaryRef.Number));
|
||||
resources.Add(NameToken.Font, fontsDictionary);
|
||||
}
|
||||
|
||||
var reserved = context.ReserveNumber();
|
||||
@@ -301,8 +314,24 @@ namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
foreach (var kvp in page.Value.Resources)
|
||||
{
|
||||
// TODO: combine resources if value is dictionary or array, otherwise overwrite.
|
||||
individualResources[kvp.Key] = kvp.Value;
|
||||
var value = kvp.Value;
|
||||
if (individualResources.TryGetValue(kvp.Key, out var pageToken))
|
||||
{
|
||||
if (pageToken is DictionaryToken leftDictionary && value is DictionaryToken rightDictionary)
|
||||
{
|
||||
var merged = leftDictionary.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
|
||||
foreach (var set in rightDictionary.Data)
|
||||
{
|
||||
merged[NameToken.Create(set.Key)] = set.Value;
|
||||
}
|
||||
|
||||
value = new DictionaryToken(merged);
|
||||
|
||||
}
|
||||
// Else override
|
||||
}
|
||||
|
||||
individualResources[kvp.Key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,6 +421,75 @@ namespace UglyToad.PdfPig.Writer
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
|
||||
/// and replace the indirect reference with the correct/new one
|
||||
/// </summary>
|
||||
/// <param name="tokenToCopy">Token to inspect for reference</param>
|
||||
/// <param name="tokenScanner">scanner get the content from the original document</param>
|
||||
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
|
||||
internal IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
// This token need to be deep copied, because they could contain reference. So we have to update them.
|
||||
switch (tokenToCopy)
|
||||
{
|
||||
case DictionaryToken dictionaryToken:
|
||||
{
|
||||
var newContent = new Dictionary<NameToken, IToken>();
|
||||
foreach (var setPair in dictionaryToken.Data)
|
||||
{
|
||||
var name = setPair.Key;
|
||||
var token = setPair.Value;
|
||||
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new DictionaryToken(newContent);
|
||||
}
|
||||
case ArrayToken arrayToken:
|
||||
{
|
||||
var newArray = new List<IToken>(arrayToken.Length);
|
||||
foreach (var token in arrayToken.Data)
|
||||
{
|
||||
newArray.Add(CopyToken(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new ArrayToken(newArray);
|
||||
}
|
||||
case IndirectReferenceToken referenceToken:
|
||||
{
|
||||
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
|
||||
|
||||
Debug.Assert(!(tokenObject is IndirectReferenceToken));
|
||||
|
||||
var newToken = CopyToken(tokenObject, tokenScanner);
|
||||
|
||||
var reserved = context.ReserveNumber();
|
||||
var newReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
|
||||
|
||||
unwrittenTokens.Add(newReference, newToken);
|
||||
|
||||
return newReference;
|
||||
}
|
||||
case StreamToken streamToken:
|
||||
{
|
||||
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
|
||||
Debug.Assert(properties != null);
|
||||
|
||||
var bytes = streamToken.Data;
|
||||
return new StreamToken(properties, bytes);
|
||||
}
|
||||
|
||||
case ObjectToken _:
|
||||
{
|
||||
// Since we don't write token directly to the stream.
|
||||
// We can't know the offset. Therefore the token would be invalid
|
||||
throw new NotSupportedException("Copying a Object token is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
return tokenToCopy;
|
||||
}
|
||||
|
||||
private static StreamToken WriteContentStream(IReadOnlyList<IGraphicsStateOperation> content)
|
||||
{
|
||||
using (var memoryStream = new MemoryStream())
|
||||
|
||||
@@ -15,7 +15,9 @@
|
||||
using Images;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using PdfFonts;
|
||||
using Tokens;
|
||||
using Graphics.Operations.PathPainting;
|
||||
@@ -228,7 +230,7 @@
|
||||
throw new ArgumentNullException(nameof(text));
|
||||
}
|
||||
|
||||
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontProgram))
|
||||
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontStore))
|
||||
{
|
||||
throw new ArgumentException($"No font has been added to the PdfDocumentBuilder with Id: {font.Id}. " +
|
||||
$"Use {nameof(documentBuilder.AddTrueTypeFont)} to register a font.", nameof(font));
|
||||
@@ -239,6 +241,8 @@
|
||||
throw new ArgumentOutOfRangeException(nameof(fontSize), "Font size must be greater than 0");
|
||||
}
|
||||
|
||||
var fontProgram = fontStore.FontProgram;
|
||||
|
||||
var fm = fontProgram.GetFontMatrix();
|
||||
|
||||
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
|
||||
@@ -271,7 +275,7 @@
|
||||
throw new ArgumentNullException(nameof(text));
|
||||
}
|
||||
|
||||
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontProgram))
|
||||
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontStore))
|
||||
{
|
||||
throw new ArgumentException($"No font has been added to the PdfDocumentBuilder with Id: {font.Id}. " +
|
||||
$"Use {nameof(documentBuilder.AddTrueTypeFont)} to register a font.", nameof(font));
|
||||
@@ -282,6 +286,8 @@
|
||||
throw new ArgumentOutOfRangeException(nameof(fontSize), "Font size must be greater than 0");
|
||||
}
|
||||
|
||||
var fontProgram = fontStore.FontProgram;
|
||||
|
||||
var fm = fontProgram.GetFontMatrix();
|
||||
|
||||
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
|
||||
@@ -497,6 +503,169 @@
|
||||
return new AddedImage(reference, png.Width, png.Height);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copy a page from unknown source to this page
|
||||
/// </summary>
|
||||
/// <param name="srcPage">Page to be copied</param>
|
||||
public void CopyFrom(Page srcPage)
|
||||
{
|
||||
ContentStream destinationStream = null;
|
||||
if (CurrentStream.Operations.Count > 0)
|
||||
{
|
||||
NewContentStreamAfter();
|
||||
}
|
||||
|
||||
destinationStream = CurrentStream;
|
||||
|
||||
if (!srcPage.Dictionary.TryGet(NameToken.Resources, srcPage.pdfScanner, out DictionaryToken srcResourceDictionary))
|
||||
{
|
||||
// If the page doesn't have resources, then we copy the entire content stream, since not operation would collide
|
||||
// with the ones already written
|
||||
destinationStream.Operations.AddRange(srcPage.Operations);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: How should we handle any other token in the page dictionary (Eg. LastModified, MediaBox, CropBox, BleedBox, TrimBox, ArtBox,
|
||||
// BoxColorInfo, Rotate, Group, Thumb, B, Dur, Trans, Annots, AA, Metadata, PieceInfo, StructParents, ID, PZ, SeparationInfo, Tabs,
|
||||
// TemplateInstantiated, PresSteps, UserUnit, VP)
|
||||
|
||||
var operations = new List<IGraphicsStateOperation>(srcPage.Operations);
|
||||
|
||||
// We need to relocate the resources, and we have to make sure that none of the resources collide with
|
||||
// the already written operation's resources
|
||||
|
||||
foreach (var set in srcResourceDictionary.Data)
|
||||
{
|
||||
var nameToken = NameToken.Create(set.Key);
|
||||
if (nameToken == NameToken.Font || nameToken == NameToken.Xobject)
|
||||
{
|
||||
// We have to skip this two because we have a separate dictionary for them
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!resourcesDictionary.TryGetValue(nameToken, out var currentToken))
|
||||
{
|
||||
// It means that this type of resources doesn't currently exist in the page, so we can copy it
|
||||
// with no problem
|
||||
resourcesDictionary[nameToken] = documentBuilder.CopyToken(set.Value, srcPage.pdfScanner);
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: I need to find a test case
|
||||
// It would have ExtendedGraphics or colorspaces, etc...
|
||||
}
|
||||
|
||||
// Special cases
|
||||
// Since we don't directly add font's to the pages resources, we have to go look at the document's font
|
||||
if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary))
|
||||
{
|
||||
Dictionary<NameToken, IToken> pageFontsDictionary = null;
|
||||
if (resourcesDictionary.TryGetValue(NameToken.Font, out var pageFontsToken))
|
||||
{
|
||||
pageFontsDictionary = (pageFontsToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
|
||||
Debug.Assert(pageFontsDictionary != null);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageFontsDictionary = new Dictionary<NameToken, IToken>();
|
||||
}
|
||||
|
||||
foreach (var fontSet in fontsDictionary.Data)
|
||||
{
|
||||
var fontName = fontSet.Key;
|
||||
var addedFont = documentBuilder.Fonts.Values.FirstOrDefault(f => f.FontKey.Name.Data == fontName);
|
||||
if (addedFont != default)
|
||||
{
|
||||
// This would mean that the imported font collide with one of the added font. so we have to rename it
|
||||
|
||||
var newName = $"F{documentBuilder.fontId++}";
|
||||
|
||||
// Set all the pertinent SetFontAndSize operations with the new name
|
||||
operations = operations.Select(op =>
|
||||
{
|
||||
if (!(op is SetFontAndSize fontAndSizeOperation))
|
||||
{
|
||||
return op;
|
||||
}
|
||||
|
||||
if (fontAndSizeOperation.Font.Data == fontName)
|
||||
{
|
||||
return new SetFontAndSize(NameToken.Create(newName), fontAndSizeOperation.Size);
|
||||
}
|
||||
|
||||
return op;
|
||||
}).ToList();
|
||||
|
||||
fontName = newName;
|
||||
}
|
||||
|
||||
if (!(fontSet.Value is IndirectReferenceToken fontReferenceToken))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the font, got a {fontSet.Value.GetType().Name}");
|
||||
}
|
||||
|
||||
pageFontsDictionary.Add(NameToken.Create(fontName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
|
||||
}
|
||||
|
||||
resourcesDictionary[NameToken.Font] = new DictionaryToken(pageFontsDictionary);
|
||||
}
|
||||
|
||||
// Since we don't directly add xobjects's to the pages resources, we have to go look at the document's xobjects
|
||||
if (srcResourceDictionary.TryGet(NameToken.Xobject, srcPage.pdfScanner, out DictionaryToken xobjectsDictionary))
|
||||
{
|
||||
Dictionary<NameToken, IToken> pageXobjectsDictionary = null;
|
||||
if (resourcesDictionary.TryGetValue(NameToken.Xobject, out var pageXobjectToken))
|
||||
{
|
||||
pageXobjectsDictionary = (pageXobjectToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
|
||||
Debug.Assert(pageXobjectsDictionary != null);
|
||||
}
|
||||
else
|
||||
{
|
||||
pageXobjectsDictionary = new Dictionary<NameToken, IToken>();
|
||||
}
|
||||
|
||||
var xobjectNamesUsed = Enumerable.Range(0, imageKey).Select(i => $"I{i}");
|
||||
foreach (var xobjectSet in xobjectsDictionary.Data)
|
||||
{
|
||||
var xobjectName = xobjectSet.Key;
|
||||
if (xobjectName[0] == 'I' && xobjectNamesUsed.Any(s => s == xobjectName))
|
||||
{
|
||||
// This would mean that the imported xobject collide with one of the added image. so we have to rename it
|
||||
var newName = $"I{imageKey++}";
|
||||
|
||||
// Set all the pertinent SetFontAndSize operations with the new name
|
||||
operations = operations.Select(op =>
|
||||
{
|
||||
if (!(op is InvokeNamedXObject invokeNamedOperation))
|
||||
{
|
||||
return op;
|
||||
}
|
||||
|
||||
if (invokeNamedOperation.Name.Data == xobjectName)
|
||||
{
|
||||
return new InvokeNamedXObject(NameToken.Create(newName));
|
||||
}
|
||||
|
||||
return op;
|
||||
}).ToList();
|
||||
|
||||
xobjectName = newName;
|
||||
}
|
||||
|
||||
if (!(xobjectSet.Value is IndirectReferenceToken fontReferenceToken))
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}");
|
||||
}
|
||||
|
||||
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
|
||||
}
|
||||
|
||||
resourcesDictionary[NameToken.Xobject] = new DictionaryToken(pageXobjectsDictionary);
|
||||
}
|
||||
|
||||
destinationStream.Operations.AddRange(operations);
|
||||
}
|
||||
|
||||
private List<Letter> DrawLetters(string text, IWritingFont font, TransformationMatrix fontMatrix, decimal fontSize, TransformationMatrix textMatrix)
|
||||
{
|
||||
var horizontalScaling = 1;
|
||||
|
||||
Reference in New Issue
Block a user