diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs
new file mode 100644
index 00000000..05df66aa
--- /dev/null
+++ b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs
@@ -0,0 +1,26 @@
+namespace UglyToad.PdfPig.Tests.Writer
+{
+ using Integration;
+ using PdfPig.Writer;
+ using Xunit;
+
+ public class PdfMergerTests
+ {
+ [Fact]
+ public void CanMerge2SimpleDocuments()
+ {
+ var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
+ var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
+
+ var result = PdfMerger.Merge(one, two);
+
+ // FIX: Enable UseLenianParseOff
+ using (var document = PdfDocument.Open(result/*, ParsingOptions.LenientParsingOff */))
+ {
+ Assert.Equal(2, document.NumberOfPages);
+
+ Assert.Equal(1.7m, document.Version);
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig/Writer/Merging/ObjectsTree.cs b/src/UglyToad.PdfPig/Writer/Merging/ObjectsTree.cs
new file mode 100644
index 00000000..8580b99e
--- /dev/null
+++ b/src/UglyToad.PdfPig/Writer/Merging/ObjectsTree.cs
@@ -0,0 +1,24 @@
+namespace UglyToad.PdfPig.Writer.Merging
+{
+ using Content;
+ using CrossReference;
+ using Tokens;
+
+ // I don't see the purpose of this class, @Elliot maybe you can explain to me the purpose
+ internal class ObjectsTree
+ {
+ public TrailerDictionary TrailerDictionary { get; }
+
+ public ObjectToken TrailerObject { get; }
+
+ public Catalog Catalog { get; }
+
+ public ObjectsTree(TrailerDictionary trailerDictionary, ObjectToken trailerObject,
+ Catalog catalog)
+ {
+ TrailerDictionary = trailerDictionary;
+ TrailerObject = trailerObject;
+ Catalog = catalog;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs
new file mode 100644
index 00000000..7d7d3ca1
--- /dev/null
+++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs
@@ -0,0 +1,421 @@
+namespace UglyToad.PdfPig.Writer
+{
+ using System;
+ using System.Collections.Generic;
+ using System.Diagnostics;
+ using System.IO;
+ using System.Linq;
+ using Content;
+ using Core;
+ using CrossReference;
+ using Encryption;
+ using Filters;
+ using Logging;
+ using Merging;
+ using Parser;
+ using Parser.FileStructure;
+ using Parser.Parts;
+ using Tokenization.Scanner;
+ using Tokens;
+ using UglyToad.PdfPig.Exceptions;
+ using UglyToad.PdfPig.Graphics.Operations;
+ using UglyToad.PdfPig.Writer.Fonts;
+
+ ///
+ /// Merges PDF documents into each other.
+ ///
+ public static class PdfMerger
+ {
+ private static readonly ILog Log = new NoOpLog();
+
+ private static readonly IFilterProvider FilterProvider = new MemoryFilterProvider(new DecodeParameterResolver(Log),
+ new PngPredictor(), Log);
+
+ ///
+ /// Merge two PDF documents together with the pages from
+ /// followed by .
+ ///
+ public static byte[] Merge(string file1, string file2)
+ {
+ if (file1 == null)
+ {
+ throw new ArgumentNullException(nameof(file1));
+ }
+
+ if (file2 == null)
+ {
+ throw new ArgumentNullException(nameof(file2));
+ }
+
+ return Merge(new[]
+ {
+ File.ReadAllBytes(file1),
+ File.ReadAllBytes(file2)
+ });
+ }
+
+ ///
+ /// Merge the set of PDF documents.
+ ///
+ public static byte[] Merge(IReadOnlyList files)
+ {
+ if (files == null)
+ {
+ throw new ArgumentNullException(nameof(files));
+ }
+
+ const bool isLenientParsing = true;
+
+ using var documentBuilder = new DocumentBuilder();
+
+ foreach (var file in files)
+ {
+ var inputBytes = new ByteArrayInputBytes(file);
+ var coreScanner = new CoreTokenScanner(inputBytes);
+
+ var version = FileHeaderParser.Parse(coreScanner, true, Log);
+
+ var bruteForceSearcher = new BruteForceSearcher(inputBytes);
+ var xrefValidator = new XrefOffsetValidator(Log);
+ var objectChecker = new XrefCosOffsetChecker(Log, bruteForceSearcher);
+
+ var crossReferenceParser = new CrossReferenceParser(Log, xrefValidator, objectChecker, new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
+
+ var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
+
+ var objectLocations = bruteForceSearcher.GetObjectLocations();
+
+ CrossReferenceTable crossReference = null;
+
+ var locationProvider = new ObjectLocationProvider(() => crossReference, bruteForceSearcher);
+ // I'm not using the BruteForceObjectLocationProvider because, the offset that it give are wrong by +2
+ // var locationProvider = new BruteForcedObjectLocationProvider(objectLocations);
+
+ var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);
+
+ crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);
+
+ var trailerDictionary = crossReference.Trailer;
+
+ var (trailerRef, catalogDictionaryToken) = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary);
+
+ if (encryptionDictionary != null)
+ {
+ // TODO: Find option of how to pass password for the documents...
+ throw new PdfDocumentEncryptedException("Unable to merge document with password");
+ // pdfScanner.UpdateEncryptionHandler(new EncryptionHandler(encryptionDictionary, trailerDictionary, new[] { string.Empty }));
+ }
+
+ var objectsTree = new ObjectsTree(trailerDictionary, pdfScanner.Get(trailerRef),
+ CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing));
+
+ var objectsLocation = bruteForceSearcher.GetObjectLocations();
+
+ var root = pdfScanner.Get(trailerDictionary.Root);
+
+ var tokens = new List();
+
+ pdfScanner.Seek(0);
+ while (pdfScanner.MoveNext())
+ {
+ tokens.Add(pdfScanner.CurrentToken);
+ }
+
+ if (!(tokens.Count == objectLocations.Count))
+ {
+ // Do we really need to check this?
+ throw new PdfDocumentFormatException("Something whent wrong while reading file");
+ }
+
+ documentBuilder.AppendNewDocument(objectsTree, pdfScanner);
+ }
+
+ return documentBuilder.Build();
+ }
+
+ // This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer()
+ private static (IndirectReference, DictionaryToken) ParseCatalog(CrossReferenceTable crossReferenceTable,
+ IPdfTokenScanner pdfTokenScanner,
+ out EncryptionDictionary encryptionDictionary)
+ {
+ encryptionDictionary = null;
+
+ if (crossReferenceTable.Trailer.EncryptionToken != null)
+ {
+ if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner,
+ out DictionaryToken encryptionDictionaryToken))
+ {
+ throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
+ }
+
+ encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
+ }
+
+ var rootDictionary = DirectObjectFinder.Get(crossReferenceTable.Trailer.Root, pdfTokenScanner);
+
+ if (!rootDictionary.ContainsKey(NameToken.Type))
+ {
+ rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
+ }
+
+ return (crossReferenceTable.Trailer.Root, rootDictionary);
+ }
+
+ // Note: I don't think making this a disposable is a good idea.
+ // Also, suggestion for name?
+ private class DocumentBuilder : IDisposable
+ {
+ private bool isDisposed = false;
+
+ private MemoryStream Memory = new MemoryStream();
+
+ private readonly BuilderContext Context = new BuilderContext();
+
+ private readonly List DocumentPages = new List();
+
+ private IndirectReferenceToken RootPagesIndirectReference;
+
+ public DocumentBuilder()
+ {
+ var reserved = Context.ReserveNumber();
+ RootPagesIndirectReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
+
+ WriteHeaderToStream();
+ }
+
+ private void WriteHeaderToStream()
+ {
+ // Copied from UglyToad.PdfPig.Writer.PdfDocumentBuilder
+ WriteString("%PDF-1.7", Memory);
+
+ // Files with binary data should contain a 2nd comment line followed by 4 bytes with values > 127
+ Memory.WriteText("%");
+ Memory.WriteByte(169);
+ Memory.WriteByte(205);
+ Memory.WriteByte(196);
+ Memory.WriteByte(210);
+ Memory.WriteNewLine();
+ }
+
+ public void AppendNewDocument(ObjectsTree newDocument, IPdfTokenScanner tokenScanner)
+ {
+ if (isDisposed)
+ {
+ throw new ObjectDisposedException("Merger disposed already");
+ }
+
+ /*
+ * I decided that I want to have an /Pages object for each document's pages. That way I avoided resource name conflict
+ * But I guess that doesn't matter either way? So that part can be eliminated?
+ */
+ var pageReferences = ConstructPageReferences(newDocument.Catalog.PageTree, tokenScanner);
+
+ var pagesDictionary = new DictionaryToken(new Dictionary
+ {
+ { NameToken.Type, NameToken.Pages },
+ { NameToken.Kids, new ArrayToken(pageReferences) },
+ { NameToken.Count, new NumericToken(pageReferences.Count) },
+ { NameToken.Parent, RootPagesIndirectReference }
+ });
+
+ var pagesRef = Context.WriteObject(Memory, pagesDictionary);
+ DocumentPages.Add(new IndirectReferenceToken(pagesRef.Number));
+ }
+
+ private IReadOnlyList ConstructPageReferences(PageTreeNode treeNode, IPdfTokenScanner tokenScanner)
+ {
+ var reserved = Context.ReserveNumber();
+ var parentIndirect = new IndirectReferenceToken(new IndirectReference(reserved, 0));
+
+ var pageReferences = new List();
+ foreach (var pageNode in treeNode.Children)
+ {
+ if (!pageNode.IsPage)
+ {
+ var nestedPageReferences = ConstructPageReferences(pageNode, tokenScanner);
+ var pagesDictionary = new DictionaryToken(new Dictionary
+ {
+ { NameToken.Type, NameToken.Pages },
+ { NameToken.Kids, new ArrayToken(nestedPageReferences) },
+ { NameToken.Count, new NumericToken(nestedPageReferences.Count) },
+ { NameToken.Parent, parentIndirect }
+ });
+
+ var pagesRef = Context.WriteObject(Memory, pagesDictionary);
+ pageReferences.Add(new IndirectReferenceToken(pagesRef.Number));
+ continue;
+ }
+
+ var pageDictionary = new Dictionary
+ {
+ {NameToken.Parent, parentIndirect},
+ };
+
+ foreach(var setPair in pageNode.NodeDictionary.Data)
+ {
+ var name = setPair.Key;
+ var token = setPair.Value;
+
+ if (name == NameToken.Parent)
+ {
+ // Skip Parent token, since we have to reassign it
+ continue;
+ }
+
+ pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
+ }
+
+ var pageRef = Context.WriteObject(Memory, new DictionaryToken(pageDictionary), reserved);
+ pageReferences.Add(new IndirectReferenceToken(pageRef.Number));
+ }
+
+ return pageReferences;
+ }
+
+ private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
+ {
+ if (tokenToCopy is DictionaryToken dictionaryToken)
+ {
+ var newContent = new Dictionary();
+ foreach (var setPair in dictionaryToken.Data)
+ {
+ var name = setPair.Key;
+ var token = setPair.Value;
+ newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
+ }
+
+ return new DictionaryToken(newContent);
+ }
+ else if (tokenToCopy is ArrayToken arrayToken)
+ {
+ var newArray = new List(arrayToken.Length);
+ foreach (var token in arrayToken.Data)
+ {
+ newArray.Add(CopyToken(token, tokenScanner));
+ }
+
+ return new ArrayToken(newArray);
+ }
+ else if (tokenToCopy is IndirectReferenceToken referenceToken)
+ {
+ var tokenObject = DirectObjectFinder.Get(referenceToken.Data, tokenScanner);
+
+ // Is this even a allowed?
+ Debug.Assert(!(tokenObject is IndirectReferenceToken));
+
+ var newToken = CopyToken(tokenObject, tokenScanner);
+ var objToken = Context.WriteObject(Memory, newToken);
+ return new IndirectReferenceToken(objToken.Number);
+ }
+ else
+ {
+ // TODO: Should we do a deep copy of the token?
+ return tokenToCopy;
+ }
+ }
+
+ public byte[] Build()
+ {
+ if (isDisposed)
+ {
+ throw new ObjectDisposedException("Merger disposed already");
+ }
+
+ if (DocumentPages.Count < 1)
+ {
+ throw new PdfDocumentFormatException("Empty document");
+ }
+
+ var pagesDictionary = new DictionaryToken(new Dictionary
+ {
+ { NameToken.Type, NameToken.Pages },
+ { NameToken.Kids, new ArrayToken(DocumentPages) },
+ { NameToken.Count, new NumericToken(DocumentPages.Count) }
+ });
+
+ var pagesRef = Context.WriteObject(Memory, pagesDictionary, (int)RootPagesIndirectReference.Data.ObjectNumber);
+
+ var catalog = new DictionaryToken(new Dictionary
+ {
+ { NameToken.Type, NameToken.Catalog },
+ { NameToken.Pages, new IndirectReferenceToken(pagesRef.Number) }
+ });
+
+ var catalogRef = Context.WriteObject(Memory, catalog);
+
+ TokenWriter.WriteCrossReferenceTable(Context.ObjectOffsets, catalogRef, Memory, null);
+
+ var bytes = Memory.ToArray();
+
+ Dispose();
+
+ return bytes;
+ }
+
+ // Note: This method is copied from UglyToad.PdfPig.Writer.PdfDocumentBuilder
+ private static void WriteString(string text, MemoryStream stream, bool appendBreak = true)
+ {
+ var bytes = OtherEncodings.StringAsLatin1Bytes(text);
+ stream.Write(bytes, 0, bytes.Length);
+ if (appendBreak)
+ {
+ stream.WriteNewLine();
+ }
+ }
+
+ public void Dispose()
+ {
+ if (isDisposed)
+ return;
+
+ Memory.Dispose();
+ Memory = null;
+ isDisposed = true;
+ }
+ }
+
+ // Currently unused becauase, brute force search give the wrong offset (+2)
+ private class BruteForcedObjectLocationProvider : IObjectLocationProvider
+ {
+ private readonly Dictionary objectLocations;
+ private readonly Dictionary cache = new Dictionary();
+
+ public BruteForcedObjectLocationProvider(IReadOnlyDictionary objectLocations)
+ {
+ this.objectLocations = objectLocations.ToDictionary(x => x.Key, x => x.Value);
+ }
+
+ public bool TryGetOffset(IndirectReference reference, out long offset)
+ {
+ var result = objectLocations.TryGetValue(reference, out offset);
+ //offset -= 2;
+ return result;
+ }
+
+ public void UpdateOffset(IndirectReference reference, long offset)
+ {
+ objectLocations[reference] = offset;
+ }
+
+ public bool TryGetCached(IndirectReference reference, out ObjectToken objectToken)
+ {
+ return cache.TryGetValue(reference, out objectToken);
+ }
+
+ public void Cache(ObjectToken objectToken, bool force = false)
+ {
+ if (!TryGetOffset(objectToken.Number, out var offsetExpected) || force)
+ {
+ cache[objectToken.Number] = objectToken;
+ }
+
+ if (offsetExpected != objectToken.Position)
+ {
+ return;
+ }
+
+ cache[objectToken.Number] = objectToken;
+ }
+ }
+ }
+}
\ No newline at end of file