Allow to copy pages from another document

This is a naive implementation, because if you copy multiple pages from the same document, the recipient document would be bloated with duplicated resources
This commit is contained in:
InusualZ
2020-12-20 19:13:19 +00:00
parent ba5bc1f031
commit 7126564eef
4 changed files with 326 additions and 12 deletions

View File

@@ -644,6 +644,53 @@
}
}
[Fact]
public void CanCopyPage()
{
byte[] b;
{
var builder = new PdfDocumentBuilder();
var page1 = builder.AddPage(PageSize.A4);
var file = TrueTypeTestHelper.GetFileBytes("Andada-Regular.ttf");
var font = builder.AddTrueTypeFont(file);
page1.AddText("Hello", 12, new PdfPoint(30, 50), font);
Assert.NotEmpty(page1.CurrentStream.Operations);
using (var readDocument = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("bold-italic.pdf")))
{
var rpage = readDocument.GetPage(1);
var page2 = builder.AddPage(PageSize.A4);
page2.CopyFrom(rpage);
}
b = builder.Build();
Assert.NotEmpty(b);
}
WriteFile(nameof(CanCopyPage), b);
using (var document = PdfDocument.Open(b))
{
Assert.Equal( 2, document.NumberOfPages);
var page1 = document.GetPage(1);
Assert.Equal("Hello", page1.Text);
var page2 = document.GetPage(2);
Assert.Equal("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", page2.Text);
}
}
private static void WriteFile(string name, byte[] bytes, string extension = "pdf")
{
try

View File

@@ -17,7 +17,7 @@
public class Page
{
private readonly AnnotationProvider annotationProvider;
private readonly IPdfTokenScanner pdfScanner;
internal readonly IPdfTokenScanner pdfScanner;
private readonly Lazy<string> textLazy;
/// <summary>

View File

@@ -3,6 +3,7 @@ namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using Content;
@@ -10,8 +11,10 @@ namespace UglyToad.PdfPig.Writer
using Fonts;
using PdfPig.Fonts.TrueType;
using Graphics.Operations;
using Parser.Parts;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Fonts.TrueType.Parser;
using Tokenization.Scanner;
using Tokens;
using Util.JetBrains.Annotations;
@@ -25,6 +28,9 @@ namespace UglyToad.PdfPig.Writer
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
private readonly Dictionary<IndirectReferenceToken, IToken> unwrittenTokens = new Dictionary<IndirectReferenceToken, IToken>();
internal int fontId = 0;
/// <summary>
/// The standard of PDF/A compliance of the generated document. Defaults to <see cref="PdfAStandard.None"/>.
@@ -50,7 +56,12 @@ namespace UglyToad.PdfPig.Writer
/// <summary>
/// The fonts currently available in the document builder added via <see cref="AddTrueTypeFont"/> or <see cref="AddStandard14Font"/>. Keyed by id for internal purposes.
/// </summary>
internal IReadOnlyDictionary<Guid, IWritingFont> Fonts => fonts.ToDictionary(x => x.Key, x => x.Value.FontProgram);
internal IReadOnlyDictionary<Guid, FontStored> Fonts => fonts;
/// <summary>
/// The images currently available in the document builder added via <see cref="AddImage"/>. Keyed by id for internal purposes.
/// </summary>
internal IReadOnlyDictionary<Guid, ImageStored> Images => images;
/// <summary>
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
@@ -116,8 +127,7 @@ namespace UglyToad.PdfPig.Writer
{
var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes)));
var id = Guid.NewGuid();
var i = fonts.Count;
var added = new AddedFont(id, NameToken.Create($"F{i}"));
var added = new AddedFont(id, NameToken.Create($"F{fontId++}"));
fonts[id] = new FontStored(added, new TrueTypeWritingFont(font, fontFileBytes));
return added;
@@ -141,7 +151,7 @@ namespace UglyToad.PdfPig.Writer
}
var id = Guid.NewGuid();
var name = NameToken.Create($"F{fonts.Count}");
var name = NameToken.Create($"F{fontId++}");
var added = new AddedFont(id, name);
fonts[id] = new FontStored(added, new Standard14WritingFont(Standard14.GetAdobeFontMetrics(type)));
@@ -259,6 +269,11 @@ namespace UglyToad.PdfPig.Writer
context.WriteObject(memory, streamToken, image.Value.ObjectNumber);
}
foreach (var tokenSet in unwrittenTokens)
{
context.WriteObject(memory, tokenSet.Value, (int)tokenSet.Key.Data.ObjectNumber);
}
var procSet = new List<NameToken>
{
NameToken.Create("PDF"),
@@ -278,9 +293,7 @@ namespace UglyToad.PdfPig.Writer
var fontsDictionary = new DictionaryToken(fontsWritten.Select(x => (fonts[x.Key].FontKey.Name, (IToken)new IndirectReferenceToken(x.Value.Number)))
.ToDictionary(x => x.Item1, x => x.Item2));
var fontsDictionaryRef = context.WriteObject(memory, fontsDictionary);
resources.Add(NameToken.Font, new IndirectReferenceToken(fontsDictionaryRef.Number));
resources.Add(NameToken.Font, fontsDictionary);
}
var reserved = context.ReserveNumber();
@@ -301,8 +314,24 @@ namespace UglyToad.PdfPig.Writer
{
foreach (var kvp in page.Value.Resources)
{
// TODO: combine resources if value is dictionary or array, otherwise overwrite.
individualResources[kvp.Key] = kvp.Value;
var value = kvp.Value;
if (individualResources.TryGetValue(kvp.Key, out var pageToken))
{
if (pageToken is DictionaryToken leftDictionary && value is DictionaryToken rightDictionary)
{
var merged = leftDictionary.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
foreach (var set in rightDictionary.Data)
{
merged[NameToken.Create(set.Key)] = set.Value;
}
value = new DictionaryToken(merged);
}
// Else override
}
individualResources[kvp.Key] = value;
}
}
@@ -392,6 +421,75 @@ namespace UglyToad.PdfPig.Writer
}
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
internal IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner);
var reserved = context.ReserveNumber();
var newReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
unwrittenTokens.Add(newReference, newToken);
return newReference;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
}
private static StreamToken WriteContentStream(IReadOnlyList<IGraphicsStateOperation> content)
{
using (var memoryStream = new MemoryStream())

View File

@@ -15,7 +15,9 @@
using Images;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using PdfFonts;
using Tokens;
using Graphics.Operations.PathPainting;
@@ -228,7 +230,7 @@
throw new ArgumentNullException(nameof(text));
}
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontProgram))
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontStore))
{
throw new ArgumentException($"No font has been added to the PdfDocumentBuilder with Id: {font.Id}. " +
$"Use {nameof(documentBuilder.AddTrueTypeFont)} to register a font.", nameof(font));
@@ -239,6 +241,8 @@
throw new ArgumentOutOfRangeException(nameof(fontSize), "Font size must be greater than 0");
}
var fontProgram = fontStore.FontProgram;
var fm = fontProgram.GetFontMatrix();
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
@@ -271,7 +275,7 @@
throw new ArgumentNullException(nameof(text));
}
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontProgram))
if (!documentBuilder.Fonts.TryGetValue(font.Id, out var fontStore))
{
throw new ArgumentException($"No font has been added to the PdfDocumentBuilder with Id: {font.Id}. " +
$"Use {nameof(documentBuilder.AddTrueTypeFont)} to register a font.", nameof(font));
@@ -282,6 +286,8 @@
throw new ArgumentOutOfRangeException(nameof(fontSize), "Font size must be greater than 0");
}
var fontProgram = fontStore.FontProgram;
var fm = fontProgram.GetFontMatrix();
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
@@ -497,6 +503,169 @@
return new AddedImage(reference, png.Width, png.Height);
}
/// <summary>
/// Copy a page from unknown source to this page
/// </summary>
/// <param name="srcPage">Page to be copied</param>
public void CopyFrom(Page srcPage)
{
ContentStream destinationStream = null;
if (CurrentStream.Operations.Count > 0)
{
NewContentStreamAfter();
}
destinationStream = CurrentStream;
if (!srcPage.Dictionary.TryGet(NameToken.Resources, srcPage.pdfScanner, out DictionaryToken srcResourceDictionary))
{
// If the page doesn't have resources, then we copy the entire content stream, since not operation would collide
// with the ones already written
destinationStream.Operations.AddRange(srcPage.Operations);
return;
}
// TODO: How should we handle any other token in the page dictionary (Eg. LastModified, MediaBox, CropBox, BleedBox, TrimBox, ArtBox,
// BoxColorInfo, Rotate, Group, Thumb, B, Dur, Trans, Annots, AA, Metadata, PieceInfo, StructParents, ID, PZ, SeparationInfo, Tabs,
// TemplateInstantiated, PresSteps, UserUnit, VP)
var operations = new List<IGraphicsStateOperation>(srcPage.Operations);
// We need to relocate the resources, and we have to make sure that none of the resources collide with
// the already written operation's resources
foreach (var set in srcResourceDictionary.Data)
{
var nameToken = NameToken.Create(set.Key);
if (nameToken == NameToken.Font || nameToken == NameToken.Xobject)
{
// We have to skip this two because we have a separate dictionary for them
continue;
}
if (!resourcesDictionary.TryGetValue(nameToken, out var currentToken))
{
// It means that this type of resources doesn't currently exist in the page, so we can copy it
// with no problem
resourcesDictionary[nameToken] = documentBuilder.CopyToken(set.Value, srcPage.pdfScanner);
continue;
}
// TODO: I need to find a test case
// It would have ExtendedGraphics or colorspaces, etc...
}
// Special cases
// Since we don't directly add font's to the pages resources, we have to go look at the document's font
if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary))
{
Dictionary<NameToken, IToken> pageFontsDictionary = null;
if (resourcesDictionary.TryGetValue(NameToken.Font, out var pageFontsToken))
{
pageFontsDictionary = (pageFontsToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageFontsDictionary != null);
}
else
{
pageFontsDictionary = new Dictionary<NameToken, IToken>();
}
foreach (var fontSet in fontsDictionary.Data)
{
var fontName = fontSet.Key;
var addedFont = documentBuilder.Fonts.Values.FirstOrDefault(f => f.FontKey.Name.Data == fontName);
if (addedFont != default)
{
// This would mean that the imported font collide with one of the added font. so we have to rename it
var newName = $"F{documentBuilder.fontId++}";
// Set all the pertinent SetFontAndSize operations with the new name
operations = operations.Select(op =>
{
if (!(op is SetFontAndSize fontAndSizeOperation))
{
return op;
}
if (fontAndSizeOperation.Font.Data == fontName)
{
return new SetFontAndSize(NameToken.Create(newName), fontAndSizeOperation.Size);
}
return op;
}).ToList();
fontName = newName;
}
if (!(fontSet.Value is IndirectReferenceToken fontReferenceToken))
{
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the font, got a {fontSet.Value.GetType().Name}");
}
pageFontsDictionary.Add(NameToken.Create(fontName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
}
resourcesDictionary[NameToken.Font] = new DictionaryToken(pageFontsDictionary);
}
// Since we don't directly add xobjects's to the pages resources, we have to go look at the document's xobjects
if (srcResourceDictionary.TryGet(NameToken.Xobject, srcPage.pdfScanner, out DictionaryToken xobjectsDictionary))
{
Dictionary<NameToken, IToken> pageXobjectsDictionary = null;
if (resourcesDictionary.TryGetValue(NameToken.Xobject, out var pageXobjectToken))
{
pageXobjectsDictionary = (pageXobjectToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageXobjectsDictionary != null);
}
else
{
pageXobjectsDictionary = new Dictionary<NameToken, IToken>();
}
var xobjectNamesUsed = Enumerable.Range(0, imageKey).Select(i => $"I{i}");
foreach (var xobjectSet in xobjectsDictionary.Data)
{
var xobjectName = xobjectSet.Key;
if (xobjectName[0] == 'I' && xobjectNamesUsed.Any(s => s == xobjectName))
{
// This would mean that the imported xobject collide with one of the added image. so we have to rename it
var newName = $"I{imageKey++}";
// Set all the pertinent SetFontAndSize operations with the new name
operations = operations.Select(op =>
{
if (!(op is InvokeNamedXObject invokeNamedOperation))
{
return op;
}
if (invokeNamedOperation.Name.Data == xobjectName)
{
return new InvokeNamedXObject(NameToken.Create(newName));
}
return op;
}).ToList();
xobjectName = newName;
}
if (!(xobjectSet.Value is IndirectReferenceToken fontReferenceToken))
{
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}");
}
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
}
resourcesDictionary[NameToken.Xobject] = new DictionaryToken(pageXobjectsDictionary);
}
destinationStream.Operations.AddRange(operations);
}
private List<Letter> DrawLetters(string text, IWritingFont font, TransformationMatrix fontMatrix, decimal fontSize, TransformationMatrix textMatrix)
{
var horizontalScaling = 1;