clean up pagebuilder, switch merger to use pdfdocumentbuilder

This commit is contained in:
Plaisted
2021-02-08 12:37:09 -06:00
parent ca0b90523e
commit 6e1cf89cf9
5 changed files with 242 additions and 522 deletions

View File

@@ -74,13 +74,24 @@ namespace UglyToad.PdfPig.Writer
context.InitializePdf(1.7m); context.InitializePdf(1.7m);
} }
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(decimal version)
{
context = new PdfStreamWriter(new MemoryStream(), true);
context.InitializePdf(version);
}
/// <summary> /// <summary>
/// Creates a document builder using the supplied stream. /// Creates a document builder using the supplied stream.
/// </summary> /// </summary>
/// <param name="stream">Steam to write pdf to.</param> /// <param name="stream">Steam to write pdf to.</param>
/// <param name="disposeStream">If stream should be disposed when builder is.</param> /// <param name="disposeStream">If stream should be disposed when builder is.</param>
/// <param name="type">Type of pdf stream writer to use</param> /// <param name="type">Type of pdf stream writer to use</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream=false, PdfWriterType type=PdfWriterType.Default) /// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream=false, PdfWriterType type=PdfWriterType.Default, decimal version=1.7m)
{ {
switch (type) switch (type)
{ {
@@ -91,7 +102,7 @@ namespace UglyToad.PdfPig.Writer
context = new PdfStreamWriter(stream, disposeStream); context = new PdfStreamWriter(stream, disposeStream);
break; break;
} }
context.InitializePdf(1.7m); context.InitializePdf(version);
} }
/// <summary> /// <summary>
@@ -372,23 +383,12 @@ namespace UglyToad.PdfPig.Writer
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs); WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
} }
var builder = new PdfPageBuilder(pages.Count + 1, this, streams, resources, copiedPageDict); copiedPageDict[NameToken.Resources] = new DictionaryToken(resources);
if (resources.TryGetValue(NameToken.Font, out var fonts))
{
var existingFontDict = fonts as DictionaryToken;
foreach (var item in existingFontDict.Data)
{
var key = NameToken.Create(item.Key);
builder.fontDictionary[key] = item.Value;
}
resources.Remove(NameToken.Font);
}
var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict);
pages[builder.PageNumber] = builder; pages[builder.PageNumber] = builder;
return builder; return builder;
void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDict) void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDict)
{ {
DictionaryToken dict = GetRemoteDict(token); DictionaryToken dict = GetRemoteDict(token);
@@ -483,7 +483,7 @@ namespace UglyToad.PdfPig.Writer
foreach (var page in pages) foreach (var page in pages)
{ {
var pageDictionary = page.Value.additionalPageProperties; var pageDictionary = page.Value.pageDictionary;
pageDictionary[NameToken.Type] = NameToken.Page; pageDictionary[NameToken.Type] = NameToken.Page;
pageDictionary[NameToken.Parent] = leafRefs[leafNum]; pageDictionary[NameToken.Parent] = leafRefs[leafNum];
pageDictionary[NameToken.ProcSet] = new ArrayToken(procSet); pageDictionary[NameToken.ProcSet] = new ArrayToken(procSet);
@@ -492,16 +492,6 @@ namespace UglyToad.PdfPig.Writer
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize); pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
} }
// combine existing resources (if any) with added
var pageResources = new Dictionary<NameToken, IToken>();
foreach (var existing in page.Value.Resources)
{
pageResources[existing.Key] = existing.Value;
}
pageResources[NameToken.Font] = new DictionaryToken(page.Value.fontDictionary);
pageDictionary[NameToken.Resources] = new DictionaryToken(pageResources);
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList(); var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
if (toWrite.Count == 0) if (toWrite.Count == 0)
{ {

View File

@@ -2,22 +2,10 @@
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.IO; using System.IO;
using Content;
using Core;
using CrossReference;
using Encryption;
using Filters; using Filters;
using Logging; using Logging;
using Parser;
using Parser.FileStructure;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Exceptions;
using System.Linq; using System.Linq;
using Util;
/// <summary> /// <summary>
/// Merges PDF documents into each other. /// Merges PDF documents into each other.
@@ -48,9 +36,9 @@
_ = file1 ?? throw new ArgumentNullException(nameof(file1)); _ = file1 ?? throw new ArgumentNullException(nameof(file1));
_ = file2 ?? throw new ArgumentNullException(nameof(file2)); _ = file2 ?? throw new ArgumentNullException(nameof(file2));
using (var stream1 = new StreamInputBytes(File.OpenRead(file1))) using (var stream1 = File.OpenRead(file1))
{ {
using (var stream2 = new StreamInputBytes(File.OpenRead(file2))) using (var stream2 = File.OpenRead(file2))
{ {
Merge(new[] { stream1, stream2 }, output, new[] { file1Selection, file2Selection }); Merge(new[] { stream1, stream2 }, output, new[] { file1Selection, file2Selection });
} }
@@ -74,13 +62,13 @@
/// </summary> /// </summary>
public static void Merge(Stream output, params string[] filePaths) public static void Merge(Stream output, params string[] filePaths)
{ {
var streams = new List<StreamInputBytes>(filePaths.Length); var streams = new List<Stream>(filePaths.Length);
try try
{ {
for (var i = 0; i < filePaths.Length; i++) for (var i = 0; i < filePaths.Length; i++)
{ {
var filePath = filePaths[i] ?? throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}."); var filePath = filePaths[i] ?? throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}.");
streams.Add(new StreamInputBytes(File.OpenRead(filePath), true)); streams.Add(File.OpenRead(filePath));
} }
Merge(streams, output, null); Merge(streams, output, null);
@@ -103,7 +91,7 @@
using (var output = new MemoryStream()) using (var output = new MemoryStream())
{ {
Merge(files.Select(f => new ByteArrayInputBytes(f)).ToArray(), output, pagesBundle); Merge(files.Select(f => PdfDocument.Open(f)).ToArray(), output, pagesBundle);
return output.ToArray(); return output.ToArray();
} }
} }
@@ -122,317 +110,39 @@
_ = streams ?? throw new ArgumentNullException(nameof(streams)); _ = streams ?? throw new ArgumentNullException(nameof(streams));
_ = output ?? throw new ArgumentNullException(nameof(output)); _ = output ?? throw new ArgumentNullException(nameof(output));
Merge(streams.Select(f => new StreamInputBytes(f, false)).ToArray(), output, pagesBundle); Merge(streams.Select(f => PdfDocument.Open(f)).ToArray(), output, pagesBundle);
} }
private static void Merge(IReadOnlyList<IInputBytes> files, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle) private static void Merge(IReadOnlyList<PdfDocument> files, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle)
{
var maxVersion = files.Select(x=>x.Version).Max();
using (var document = new PdfDocumentBuilder(output, false, PdfWriterType.Default, maxVersion))
{ {
const bool isLenientParsing = false;
var writer = new PdfStreamWriter(output, false);
var documentBuilder = new DocumentMerger(writer);
var maxVersion = 1.2m;
var infos = new List<(CoreTokenScanner CoreScanner, HeaderVersion Version)>();
foreach (var fileIndex in Enumerable.Range(0, files.Count))
{
var inputBytes = files[fileIndex];
var coreScanner = new CoreTokenScanner(inputBytes);
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
maxVersion = Math.Max(maxVersion, version.Version);
infos.Add((coreScanner, version));
}
writer.InitializePdf(maxVersion);
foreach (var fileIndex in Enumerable.Range(0, files.Count)) foreach (var fileIndex in Enumerable.Range(0, files.Count))
{ {
var existing = files[fileIndex];
IReadOnlyList<int> pages = null; IReadOnlyList<int> pages = null;
if (pagesBundle != null && fileIndex < pagesBundle.Count) if (pagesBundle != null && fileIndex < pagesBundle.Count)
{ {
pages = pagesBundle[fileIndex]; pages = pagesBundle[fileIndex];
} }
var inputBytes = files[fileIndex];
var (coreScanner, version) = infos[fileIndex];
var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
CrossReferenceTable crossReference = null;
// ReSharper disable once AccessToModifiedClosure
var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);
var catalogDictionaryToken = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary);
if (encryptionDictionary != null)
{
throw new PdfDocumentEncryptedException("Unable to merge document with password");
}
var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing);
documentBuilder.AppendDocument(documentCatalog, pdfScanner, pages);
}
documentBuilder.Build();
}
// This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer()
private static DictionaryToken ParseCatalog(CrossReferenceTable crossReferenceTable,
IPdfTokenScanner pdfTokenScanner,
out EncryptionDictionary encryptionDictionary)
{
encryptionDictionary = null;
if (crossReferenceTable.Trailer.EncryptionToken != null)
{
if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner,
out DictionaryToken encryptionDictionaryToken))
{
throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
}
encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
}
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);
if (!rootDictionary.ContainsKey(NameToken.Type))
{
rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
}
return rootDictionary;
}
private class DocumentMerger
{
private const int ARTIFICIAL_NODE_LIMIT = 100;
private readonly IPdfStreamWriter context;
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesReference;
private int pageCount = 0;
public DocumentMerger(IPdfStreamWriter writer)
{
context = writer;
rootPagesReference = context.ReserveObjectNumber();
}
public void AppendDocument(Catalog catalog, IPdfTokenScanner tokenScanner, IReadOnlyList<int> pages)
{
IEnumerable<int> pageIndices;
if (pages == null) if (pages == null)
{ {
var pagesCount = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count); for (var i = 1; i <= existing.NumberOfPages; i++)
if (pagesCount < 1)
{ {
return; document.AddPage(existing, 1);
} }
} else
pageIndices = Enumerable.Range(1, pagesCount); {
foreach (var i in pages)
{
document.AddPage(existing, 1);
} }
else if (pages.Count < 1)
{
return;
} }
else
{
pageIndices = pages;
} }
var referencesFromDocument = new Dictionary<IndirectReference, IndirectReferenceToken>();
var currentNodeReference = context.ReserveObjectNumber();
var pagesReferences = new List<IndirectReferenceToken>();
var resources = new Dictionary<string, IToken>();
bool DoesAEntryCollide(PageTreeNode node)
{
while (node != null)
{
var dictionary = node.NodeDictionary;
if (dictionary.TryGet(NameToken.Resources, tokenScanner, out DictionaryToken resourcesDictionary))
{
var nonCollidingResources = resourcesDictionary.Data.Keys.Except(resources.Keys);
if (nonCollidingResources.Count() != resourcesDictionary.Data.Count)
{
// This means that at least one of the resources collided
return true;
} }
} }
/* TODO: How to handle?
* `Rotate`
* `CropBox`
* `MediaBox`
*/
// No colliding entry was found, in this node
// Keep walking up into the tree
node = node.Parent;
}
return false;
}
void CopyEntries(PageTreeNode node)
{
while (node != null)
{
var dictionary = node.NodeDictionary;
if (dictionary.TryGet(NameToken.Resources, tokenScanner, out DictionaryToken resourcesDictionary))
{
foreach (var pair in resourcesDictionary.Data)
{
resources.Add(pair.Key, CopyToken(pair.Value, tokenScanner, referencesFromDocument));
}
}
/* TODO: How to handle?
* `Rotate`
* `CropBox`
* `MediaBox`
*/
// Keep walking up into the tree
node = node.Parent;
}
}
void CreateTree()
{
if (pagesReferences.Count < 1)
{
throw new InvalidOperationException("Pages reference should always be more than 1 when executing this function");
}
var newPagesNode = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(pagesReferences) },
{ NameToken.Count, new NumericToken(pagesReferences.Count) },
{ NameToken.Parent, rootPagesReference }
};
if (resources.Count > 0)
{
newPagesNode.Add(NameToken.Resources, DictionaryToken.With(resources));
}
var pagesDictionary = new DictionaryToken(newPagesNode);
context.WriteToken(pagesDictionary, currentNodeReference);
pagesTokenReferences.Add(currentNodeReference);
pageCount += pagesReferences.Count;
};
foreach (var pageIndex in pageIndices)
{
var pageNode = catalog.GetPageNode(pageIndex);
if (pagesReferences.Count >= ARTIFICIAL_NODE_LIMIT || DoesAEntryCollide(pageNode))
{
CreateTree();
currentNodeReference = context.ReserveObjectNumber();
pagesReferences = new List<IndirectReferenceToken>();
resources = new Dictionary<string, IToken>();
}
CopyEntries(pageNode.Parent);
pagesReferences.Add(CopyPageNode(pageNode, currentNodeReference, tokenScanner, referencesFromDocument));
}
if (pagesReferences.Count < 1)
{
throw new InvalidOperationException("Pages reference couldn't be less than 1 because we have reserved a indirect reference token");
}
CreateTree();
}
public void Build()
{
if (pagesTokenReferences.Count < 1)
{
throw new PdfDocumentFormatException("Empty document");
}
var pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(pagesTokenReferences) },
{ NameToken.Count, new NumericToken(pageCount) }
});
var pagesRef = context.WriteToken(pagesDictionary, rootPagesReference);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Catalog },
{ NameToken.Pages, pagesRef }
});
var catalogRef = context.WriteToken(catalog);
context.CompletePdf(catalogRef);
Close();
}
public void Close()
{
context.Dispose();
}
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
Debug.Assert(pageNode.IsPage);
var pageDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Parent, parentPagesObject},
};
foreach (var setPair in pageNode.NodeDictionary.Data)
{
var name = setPair.Key;
var token = setPair.Value;
if (name == NameToken.Parent)
{
// Skip Parent token, since we have to reassign it
continue;
}
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner, referencesFromDocument));
}
return context.WriteToken(new DictionaryToken(pageDictionary));
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
return WriterUtil.CopyToken(context, tokenToCopy, tokenScanner, referencesFromDocument);
}
}
} }
} }

View File

@@ -28,22 +28,26 @@
/// </summary> /// </summary>
public class PdfPageBuilder public class PdfPageBuilder
{ {
// parent
private readonly PdfDocumentBuilder documentBuilder; private readonly PdfDocumentBuilder documentBuilder;
private IPageContentStream currentStream;
internal readonly List<IPageContentStream> contentStreams;
internal readonly Dictionary<NameToken, IToken> additionalPageProperties = new Dictionary<NameToken, IToken>();
private readonly Dictionary<NameToken, IToken> resourcesDictionary = new Dictionary<NameToken, IToken>();
internal Dictionary<NameToken, IToken> fontDictionary = new Dictionary<NameToken, IToken>();
internal int nextFontId = 1;
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
// all page data other than content streams
internal readonly Dictionary<NameToken, IToken> pageDictionary = new Dictionary<NameToken, IToken>();
// streams
internal readonly List<IPageContentStream> contentStreams;
private IPageContentStream currentStream;
// maps fonts added using PdfDocumentBuilder to page font names
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
internal int nextFontId = 1;
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word) //a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
private int textSequence; private int textSequence;
private int imageKey = 1; private int imageKey = 1;
internal IReadOnlyDictionary<NameToken, IToken> Resources => resourcesDictionary; internal IReadOnlyDictionary<string, IToken> Resources => pageDictionary.GetOrCreateDict(NameToken.Resources);
/// <summary> /// <summary>
/// The number of this page, 1-indexed. /// The number of this page, 1-indexed.
@@ -75,16 +79,15 @@
} }
internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder, IEnumerable<CopiedContentStream> copied, internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder, IEnumerable<CopiedContentStream> copied,
Dictionary<NameToken, IToken> existingResources, Dictionary<NameToken, IToken> pageDict) Dictionary<NameToken, IToken> pageDict)
{ {
this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder)); this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder));
PageNumber = number; PageNumber = number;
pageDictionary = pageDict;
contentStreams = new List<IPageContentStream>(); contentStreams = new List<IPageContentStream>();
contentStreams.AddRange(copied); contentStreams.AddRange(copied);
currentStream = new DefaultContentStream(); currentStream = new DefaultContentStream();
contentStreams.Add(currentStream); contentStreams.Add(currentStream);
additionalPageProperties =pageDict ?? new Dictionary<NameToken, IToken>();
resourcesDictionary = existingResources;
} }
/// <summary> /// <summary>
@@ -343,13 +346,15 @@
if (!documentFonts.TryGetValue(font.Id, out NameToken value)) if (!documentFonts.TryGetValue(font.Id, out NameToken value))
{ {
value = NameToken.Create($"F{nextFontId++}"); value = NameToken.Create($"F{nextFontId++}");
while (fontDictionary.ContainsKey(value)) var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var fonts = resources.GetOrCreateDict(NameToken.Font);
while (fonts.ContainsKey(value))
{ {
value = NameToken.Create($"F{nextFontId++}"); value = NameToken.Create($"F{nextFontId++}");
} }
documentFonts[font.Id] = value; documentFonts[font.Id] = value;
fontDictionary[value] = font.Reference; fonts[value] = font.Reference;
} }
return value; return value;
@@ -395,17 +400,11 @@
}; };
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), data); var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), data);
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict) var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
|| !(xobjectsDict is DictionaryToken xobjects))
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var key = NameToken.Create($"I{imageKey++}"); var key = NameToken.Create($"I{imageKey++}");
xObjects[key] = reference;
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, reference);
currentStream.Add(Push.Value); currentStream.Add(Push.Value);
// This needs to be the placement rectangle. // This needs to be the placement rectangle.
@@ -435,16 +434,11 @@
/// </summary> /// </summary>
public void AddImage(AddedImage image, PdfRectangle placementRectangle) public void AddImage(AddedImage image, PdfRectangle placementRectangle)
{ {
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict) var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
|| !(xobjectsDict is DictionaryToken xobjects)) var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var key = NameToken.Create($"I{imageKey++}"); var key = NameToken.Create($"I{imageKey++}");
xObjects[key] = new IndirectReferenceToken(image.Reference);
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(image.Reference));
currentStream.Add(Push.Value); currentStream.Add(Push.Value);
// This needs to be the placement rectangle. // This needs to be the placement rectangle.
@@ -513,16 +507,12 @@
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), compressed); var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), compressed);
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict) var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
|| !(xobjectsDict is DictionaryToken xobjects)) var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var key = NameToken.Create($"I{imageKey++}"); var key = NameToken.Create($"I{imageKey++}");
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, reference); xObjects[key] = reference;
currentStream.Add(Push.Value); currentStream.Add(Push.Value);
// This needs to be the placement rectangle. // This needs to be the placement rectangle.
@@ -568,6 +558,8 @@
// We need to relocate the resources, and we have to make sure that none of the resources collide with // We need to relocate the resources, and we have to make sure that none of the resources collide with
// the already written operation's resources // the already written operation's resources
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
foreach (var set in srcResourceDictionary.Data) foreach (var set in srcResourceDictionary.Data)
{ {
var nameToken = NameToken.Create(set.Key); var nameToken = NameToken.Create(set.Key);
@@ -577,11 +569,11 @@
continue; continue;
} }
if (!resourcesDictionary.TryGetValue(nameToken, out var currentToken)) if (!resources.ContainsKey(nameToken))
{ {
// It means that this type of resources doesn't currently exist in the page, so we can copy it // It means that this type of resources doesn't currently exist in the page, so we can copy it
// with no problem // with no problem
resourcesDictionary[nameToken] = documentBuilder.CopyToken(srcPage.pdfScanner, set.Value); resources[nameToken] = documentBuilder.CopyToken(srcPage.pdfScanner, set.Value);
continue; continue;
} }
@@ -593,25 +585,16 @@
// Since we don't directly add font's to the pages resources, we have to go look at the document's font // Since we don't directly add font's to the pages resources, we have to go look at the document's font
if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary)) if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary))
{ {
Dictionary<NameToken, IToken> pageFontsDictionary = null; var pageFontsDictionary = resources.GetOrCreateDict(NameToken.Font);
if (resourcesDictionary.TryGetValue(NameToken.Font, out var pageFontsToken))
{
pageFontsDictionary = (pageFontsToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageFontsDictionary != null);
}
else
{
pageFontsDictionary = new Dictionary<NameToken, IToken>();
}
foreach (var fontSet in fontsDictionary.Data) foreach (var fontSet in fontsDictionary.Data)
{ {
var fontName = NameToken.Create(fontSet.Key); var fontName = NameToken.Create(fontSet.Key);
if (fontDictionary.ContainsKey(fontName)) if (pageFontsDictionary.ContainsKey(fontName))
{ {
// This would mean that the imported font collide with one of the added font. so we have to rename it // This would mean that the imported font collide with one of the added font. so we have to rename it
var newName = NameToken.Create($"F{nextFontId++}"); var newName = NameToken.Create($"F{nextFontId++}");
while (fontDictionary.ContainsKey(newName)) while (pageFontsDictionary.ContainsKey(newName))
{ {
newName = NameToken.Create($"F{nextFontId++}"); newName = NameToken.Create($"F{nextFontId++}");
} }
@@ -642,26 +625,12 @@
pageFontsDictionary.Add(fontName, documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken)); pageFontsDictionary.Add(fontName, documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken));
} }
foreach (var item in pageFontsDictionary)
{
fontDictionary[item.Key] = item.Value;
}
} }
// Since we don't directly add xobjects's to the pages resources, we have to go look at the document's xobjects // Since we don't directly add xobjects's to the pages resources, we have to go look at the document's xobjects
if (srcResourceDictionary.TryGet(NameToken.Xobject, srcPage.pdfScanner, out DictionaryToken xobjectsDictionary)) if (srcResourceDictionary.TryGet(NameToken.Xobject, srcPage.pdfScanner, out DictionaryToken xobjectsDictionary))
{ {
Dictionary<NameToken, IToken> pageXobjectsDictionary = null; var pageXobjectsDictionary = resources.GetOrCreateDict(NameToken.Xobject);
if (resourcesDictionary.TryGetValue(NameToken.Xobject, out var pageXobjectToken))
{
pageXobjectsDictionary = (pageXobjectToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageXobjectsDictionary != null);
}
else
{
pageXobjectsDictionary = new Dictionary<NameToken, IToken>();
}
var xobjectNamesUsed = Enumerable.Range(0, imageKey).Select(i => $"I{i}"); var xobjectNamesUsed = Enumerable.Range(0, imageKey).Select(i => $"I{i}");
foreach (var xobjectSet in xobjectsDictionary.Data) foreach (var xobjectSet in xobjectsDictionary.Data)
@@ -696,10 +665,8 @@
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}"); throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}");
} }
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken)); pageXobjectsDictionary[xobjectName] = documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken);
} }
resourcesDictionary[NameToken.Xobject] = new DictionaryToken(pageXobjectsDictionary);
} }
destinationStream.Operations.AddRange(operations); destinationStream.Operations.AddRange(operations);
@@ -905,5 +872,7 @@
Height = height; Height = height;
} }
} }
} }
} }

View File

@@ -7,11 +7,62 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.Linq; using System.Linq;
using System.Text;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
internal class WriterUtil
internal static class WriterUtil
{ {
public static Dictionary<string, IToken> GetOrCreateDict(this Dictionary<NameToken, IToken> dict, NameToken key)
{
if (dict.ContainsKey(key))
{
var item = dict[key];
if (!(item is DictionaryToken dt))
{
throw new ApplicationException("Expected dictionary token, got " + item.GetType());
}
if (dt.Data is Dictionary<string, IToken> mutable)
{
return mutable;
}
mutable = dt.Data.
ToDictionary(x => x.Key, x => x.Value);
dict[key] = DictionaryToken.With(mutable);
return mutable;
}
var created = new Dictionary<string, IToken>();
dict[key] = DictionaryToken.With(created);
return created;
}
public static Dictionary<string, IToken> GetOrCreateDict(this Dictionary<string, IToken> dict, string key)
{
if (dict.ContainsKey(key))
{
var item = dict[key];
if (!(item is DictionaryToken dt))
{
throw new ApplicationException("Expected dictionary token, got " + item.GetType());
}
if (dt.Data is Dictionary<string, IToken> mutable)
{
return mutable;
}
mutable = dt.Data.
ToDictionary(x => x.Key, x => x.Value);
dict[key] = DictionaryToken.With(mutable);
return mutable;
}
var created = new Dictionary<string, IToken>();
dict[key] = DictionaryToken.With(created);
return created;
}
/// <summary> /// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream /// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one /// and replace the indirect reference with the correct/new one