From 5dbee43dbb35698027186451bf6b055761c31626 Mon Sep 17 00:00:00 2001 From: romain v Date: Wed, 6 Jan 2021 12:14:12 +0100 Subject: [PATCH] Adds output stream support in pdfmerger methods --- .../Writer/PdfMergerTests.cs | 60 ++++++++---- src/UglyToad.PdfPig/Writer/PdfMerger.cs | 92 +++++++++++-------- src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs | 24 +---- 3 files changed, 99 insertions(+), 77 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs index ce906c43..fd55b02f 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs @@ -15,20 +15,24 @@ var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf"); var result = PdfMerger.Merge(one, two); + CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "Write something inInkscape", "I am a simple pdf."); + } - using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + [Fact] + public void CanMerge2SimpleDocumentsIntoStream() + { + var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf"); + var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf"); + + using (var outputStream = GetSelfDestructingNewFileStream("merge2")) { - Assert.Equal(2, document.NumberOfPages); + if (outputStream is null) + { + return;//we can't create a file in this test session + } - Assert.Equal(1.5m, document.Version); - - var page1 = document.GetPage(1); - - Assert.Equal("Write something inInkscape", page1.Text); - - var page2 = document.GetPage(2); - - Assert.Equal("I am a simple pdf.", page2.Text); + PdfMerger.Merge(one, two, outputStream); + CanMerge2SimpleDocumentsAssertions(outputStream, "Write something inInkscape", "I am a simple pdf."); } } @@ -39,20 +43,22 @@ var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf"); var result = PdfMerger.Merge(one, two); - - using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape"); + } + + private void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text) + { + stream.Position = 0; + using (var document = PdfDocument.Open(stream, ParsingOptions.LenientParsingOff)) { Assert.Equal(2, document.NumberOfPages); - Assert.Equal(1.5m, document.Version); var page1 = document.GetPage(1); - - Assert.Equal("I am a simple pdf.", page1.Text); + Assert.Equal(page1Text, page1.Text); var page2 = document.GetPage(2); - - Assert.Equal("Write something inInkscape", page2.Text); + Assert.Equal(page2Text, page2.Text); } } @@ -174,6 +180,24 @@ } } + private static FileStream GetSelfDestructingNewFileStream(string name) + { + try + { + if (!Directory.Exists("Merger")) + { + Directory.CreateDirectory("Merger"); + } + + var output = Path.Combine("Merger", $"{name}.pdf"); + return File.Create(output, 4096, FileOptions.DeleteOnClose); + } + catch + { + return null; + } + } + [Fact] public void NoStackoverflow() { diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index cfc1e89c..654977a5 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -33,19 +33,28 @@ /// public static byte[] Merge(string file1, string file2, IReadOnlyList file1Selection = null, IReadOnlyList file2Selection = null) { - if (file1 == null) + using (var output = new MemoryStream()) { - throw new ArgumentNullException(nameof(file1)); + Merge(file1, file2, output, file1Selection, file2Selection); + return output.ToArray(); } + } - if (file2 == null) + /// + /// Merge two PDF documents together with the pages from followed by into the output stream. + /// + public static void Merge(string file1, string file2, Stream output, IReadOnlyList file1Selection = null, IReadOnlyList file2Selection = null) + { + _ = file1 ?? throw new ArgumentNullException(nameof(file1)); + _ = file2 ?? throw new ArgumentNullException(nameof(file2)); + + using (var stream1 = new StreamInputBytes(File.OpenRead(file1))) { - throw new ArgumentNullException(nameof(file2)); + using (var stream2 = new StreamInputBytes(File.OpenRead(file2))) + { + Merge(new[] { stream1, stream2 }, output, new[] { file1Selection, file2Selection }); + } } - - using var stream1 = new StreamInputBytes(File.OpenRead(file1)); - using var stream2 = new StreamInputBytes(File.OpenRead(file2)); - return Merge(new[] { stream1, stream2 }, new [] { file1Selection, file2Selection }); } /// @@ -53,26 +62,32 @@ /// public static byte[] Merge(params string[] filePaths) { - var bytes = new List(filePaths.Length); + using (var output = new MemoryStream()) + { + Merge(output, filePaths); + return output.ToArray(); + } + } + /// + /// Merge multiple PDF documents together with the pages in the order the file paths are provided into the output stream + /// + public static void Merge(Stream output, params string[] filePaths) + { + var streams = new List(filePaths.Length); try { for (var i = 0; i < filePaths.Length; i++) { - var filePath = filePaths[i]; - if (filePath == null) - { - throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}."); - } - - bytes.Add(new StreamInputBytes(File.OpenRead(filePath), true)); + var filePath = filePaths[i] ?? throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}."); + streams.Add(new StreamInputBytes(File.OpenRead(filePath), true)); } - return Merge(bytes, null); + Merge(streams, output, null); } finally { - foreach (var stream in bytes) + foreach (var stream in streams) { stream.Dispose(); } @@ -84,37 +99,37 @@ /// public static byte[] Merge(IReadOnlyList files, IReadOnlyList> pagesBundle = null) { - if (files == null) - { - throw new ArgumentNullException(nameof(files)); - } + _ = files ?? throw new ArgumentNullException(nameof(files)); - return Merge(files.Select(f => new ByteArrayInputBytes(f)).ToArray(), pagesBundle); + using (var output = new MemoryStream()) + { + Merge(files.Select(f => new ByteArrayInputBytes(f)).ToArray(), output, pagesBundle); + return output.ToArray(); + } } /// - /// Merge the set of PDF documents. + /// Merge the set of PDF documents into the output stream /// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream. /// /// A list of streams for the files contents, this must support reading and seeking. /// + /// Must be writable /// /// - public static byte[] Merge(IReadOnlyList streams, IReadOnlyList> pagesBundle = null) + public static void Merge(IReadOnlyList streams, Stream output, IReadOnlyList> pagesBundle = null) { - if (streams == null) - { - throw new ArgumentNullException(nameof(streams)); - } + _ = streams ?? throw new ArgumentNullException(nameof(streams)); + _ = output ?? throw new ArgumentNullException(nameof(output)); - return Merge(streams.Select(f => new StreamInputBytes(f, false)).ToArray(), pagesBundle); + Merge(streams.Select(f => new StreamInputBytes(f, false)).ToArray(), output, pagesBundle); } - private static byte[] Merge(IReadOnlyList files, IReadOnlyList> pagesBundle = null) + private static void Merge(IReadOnlyList files, Stream output, IReadOnlyList> pagesBundle) { const bool isLenientParsing = false; - var documentBuilder = new DocumentMerger(); + var documentBuilder = new DocumentMerger(output); foreach (var fileIndex in Enumerable.Range(0, files.Count)) { @@ -153,7 +168,7 @@ documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages); } - return documentBuilder.Build(); + documentBuilder.Build(); } // This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer() @@ -190,15 +205,16 @@ private const int ARTIFICIAL_NODE_LIMIT = 100; - private readonly PdfStreamWriter context = new PdfStreamWriter(); + private readonly PdfStreamWriter context; private readonly List pagesTokenReferences = new List(); private readonly IndirectReferenceToken rootPagesReference; private decimal currentVersion = DefaultVersion; private int pageCount = 0; - public DocumentMerger() + public DocumentMerger(Stream baseStream) { + context = new PdfStreamWriter(baseStream, false); rootPagesReference = context.ReserveNumberToken(); } @@ -336,7 +352,7 @@ CreateTree(); } - public byte[] Build() + public void Build() { if (pagesTokenReferences.Count < 1) { @@ -362,11 +378,7 @@ context.Flush(currentVersion, catalogRef); - var bytes = context.ToArray(); - Close(); - - return bytes; } public void Close() diff --git a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs index dec4c269..81f1c77a 100644 --- a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs +++ b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs @@ -23,11 +23,14 @@ public bool DisposeStream { get; set; } - public PdfStreamWriter() : this(new MemoryStream()) { } - public PdfStreamWriter(Stream baseStream, bool disposeStream = true) { Stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream)); + if (!baseStream.CanWrite) + { + throw new ArgumentException("Output stream must be writable"); + } + DisposeStream = disposeStream; } @@ -109,23 +112,6 @@ return new IndirectReferenceToken(new IndirectReference(ReserveNumber(), 0)); } - public byte[] ToArray() - { - var currentPosition = Stream.Position; - Stream.Seek(0, SeekOrigin.Begin); - - var bytes = new byte[Stream.Length]; - - if (Stream.Read(bytes, 0, bytes.Length) != bytes.Length) - { - throw new Exception("Unable to read all the bytes from stream"); - } - - Stream.Seek(currentPosition, SeekOrigin.Begin); - - return bytes; - } - public void Dispose() { if (!DisposeStream)