Merge pull request #254 from Poltuu/pdfmerger_stream_apis

Pdfmerger stream apis
This commit is contained in:
Eliot Jones
2021-01-15 09:34:45 -04:00
committed by GitHub
3 changed files with 122 additions and 72 deletions

View File

@@ -15,20 +15,24 @@
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var result = PdfMerger.Merge(one, two);
CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "Write something inInkscape", "I am a simple pdf.");
}
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
[Fact]
public void CanMerge2SimpleDocumentsIntoStream()
{
var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
using (var outputStream = GetSelfDestructingNewFileStream("merge2"))
{
Assert.Equal(2, document.NumberOfPages);
if (outputStream is null)
{
return;//we can't create a file in this test session
}
Assert.Equal(1.5m, document.Version);
var page1 = document.GetPage(1);
Assert.Equal("Write something inInkscape", page1.Text);
var page2 = document.GetPage(2);
Assert.Equal("I am a simple pdf.", page2.Text);
PdfMerger.Merge(one, two, outputStream);
CanMerge2SimpleDocumentsAssertions(outputStream, "Write something inInkscape", "I am a simple pdf.");
}
}
@@ -39,20 +43,22 @@
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
var result = PdfMerger.Merge(one, two);
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape");
}
private void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text)
{
stream.Position = 0;
using (var document = PdfDocument.Open(stream, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.Equal(1.5m, document.Version);
var page1 = document.GetPage(1);
Assert.Equal("I am a simple pdf.", page1.Text);
Assert.Equal(page1Text, page1.Text);
var page2 = document.GetPage(2);
Assert.Equal("Write something inInkscape", page2.Text);
Assert.Equal(page2Text, page2.Text);
}
}
@@ -174,6 +180,24 @@
}
}
private static FileStream GetSelfDestructingNewFileStream(string name)
{
try
{
if (!Directory.Exists("Merger"))
{
Directory.CreateDirectory("Merger");
}
var output = Path.Combine("Merger", $"{name}.pdf");
return File.Create(output, 4096, FileOptions.DeleteOnClose);
}
catch
{
return null;
}
}
[Fact]
public void NoStackoverflow()
{

View File

@@ -33,21 +33,28 @@
/// </summary>
public static byte[] Merge(string file1, string file2, IReadOnlyList<int> file1Selection = null, IReadOnlyList<int> file2Selection = null)
{
if (file1 == null)
using (var output = new MemoryStream())
{
throw new ArgumentNullException(nameof(file1));
Merge(file1, file2, output, file1Selection, file2Selection);
return output.ToArray();
}
}
if (file2 == null)
/// <summary>
/// Merge two PDF documents together with the pages from <paramref name="file1"/> followed by <paramref name="file2"/> into the output stream.
/// </summary>
public static void Merge(string file1, string file2, Stream output, IReadOnlyList<int> file1Selection = null, IReadOnlyList<int> file2Selection = null)
{
_ = file1 ?? throw new ArgumentNullException(nameof(file1));
_ = file2 ?? throw new ArgumentNullException(nameof(file2));
using (var stream1 = new StreamInputBytes(File.OpenRead(file1)))
{
throw new ArgumentNullException(nameof(file2));
using (var stream2 = new StreamInputBytes(File.OpenRead(file2)))
{
Merge(new[] { stream1, stream2 }, output, new[] { file1Selection, file2Selection });
}
}
return Merge(new[]
{
File.ReadAllBytes(file1),
File.ReadAllBytes(file2)
}, new [] { file1Selection, file2Selection });
}
/// <summary>
@@ -55,21 +62,36 @@
/// </summary>
public static byte[] Merge(params string[] filePaths)
{
var bytes = new List<byte[]>(filePaths.Length);
for (var i = 0; i < filePaths.Length; i++)
using (var output = new MemoryStream())
{
var filePath = filePaths[i];
Merge(output, filePaths);
return output.ToArray();
}
}
if (filePath == null)
/// <summary>
/// Merge multiple PDF documents together with the pages in the order the file paths are provided into the output stream
/// </summary>
public static void Merge(Stream output, params string[] filePaths)
{
var streams = new List<StreamInputBytes>(filePaths.Length);
try
{
for (var i = 0; i < filePaths.Length; i++)
{
throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}.");
var filePath = filePaths[i] ?? throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}.");
streams.Add(new StreamInputBytes(File.OpenRead(filePath), true));
}
bytes.Add(File.ReadAllBytes(filePath));
Merge(streams, output, null);
}
finally
{
foreach (var stream in streams)
{
stream.Dispose();
}
}
return Merge(bytes, null);
}
/// <summary>
@@ -77,26 +99,47 @@
/// </summary>
public static byte[] Merge(IReadOnlyList<byte[]> files, IReadOnlyList<IReadOnlyList<int>> pagesBundle = null)
{
if (files == null)
{
throw new ArgumentNullException(nameof(files));
}
_ = files ?? throw new ArgumentNullException(nameof(files));
using (var output = new MemoryStream())
{
Merge(files.Select(f => new ByteArrayInputBytes(f)).ToArray(), output, pagesBundle);
return output.ToArray();
}
}
/// <summary>
/// Merge the set of PDF documents into the output stream
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
/// <param name="streams">
/// A list of streams for the files contents, this must support reading and seeking.
/// </param>
/// <param name="output">Must be writable</param>
/// <param name="pagesBundle"></param>
/// </summary>
public static void Merge(IReadOnlyList<Stream> streams, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle = null)
{
_ = streams ?? throw new ArgumentNullException(nameof(streams));
_ = output ?? throw new ArgumentNullException(nameof(output));
Merge(streams.Select(f => new StreamInputBytes(f, false)).ToArray(), output, pagesBundle);
}
private static void Merge(IReadOnlyList<IInputBytes> files, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle)
{
const bool isLenientParsing = false;
var documentBuilder = new DocumentMerger();
var documentBuilder = new DocumentMerger(output);
foreach (var fileIndex in Enumerable.Range(0, files.Count))
{
var file = files[fileIndex];
IReadOnlyList<int> pages = null;
if (pagesBundle != null && fileIndex < pagesBundle.Count)
{
pages = pagesBundle[fileIndex];
}
var inputBytes = new ByteArrayInputBytes(file);
var inputBytes = files[fileIndex];
var coreScanner = new CoreTokenScanner(inputBytes);
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
@@ -125,7 +168,7 @@
documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages);
}
return documentBuilder.Build();
documentBuilder.Build();
}
// This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer()
@@ -162,15 +205,16 @@
private const int ARTIFICIAL_NODE_LIMIT = 100;
private readonly PdfStreamWriter context = new PdfStreamWriter();
private readonly PdfStreamWriter context;
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesReference;
private decimal currentVersion = DefaultVersion;
private int pageCount = 0;
public DocumentMerger()
public DocumentMerger(Stream baseStream)
{
context = new PdfStreamWriter(baseStream, false);
rootPagesReference = context.ReserveNumberToken();
}
@@ -308,7 +352,7 @@
CreateTree();
}
public byte[] Build()
public void Build()
{
if (pagesTokenReferences.Count < 1)
{
@@ -334,11 +378,7 @@
context.Flush(currentVersion, catalogRef);
var bytes = context.ToArray();
Close();
return bytes;
}
public void Close()

View File

@@ -23,11 +23,14 @@
public bool DisposeStream { get; set; }
public PdfStreamWriter() : this(new MemoryStream()) { }
public PdfStreamWriter(Stream baseStream, bool disposeStream = true)
{
Stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream));
if (!baseStream.CanWrite)
{
throw new ArgumentException("Output stream must be writable");
}
DisposeStream = disposeStream;
}
@@ -109,23 +112,6 @@
return new IndirectReferenceToken(new IndirectReference(ReserveNumber(), 0));
}
public byte[] ToArray()
{
var currentPosition = Stream.Position;
Stream.Seek(0, SeekOrigin.Begin);
var bytes = new byte[Stream.Length];
if (Stream.Read(bytes, 0, bytes.Length) != bytes.Length)
{
throw new Exception("Unable to read all the bytes from stream");
}
Stream.Seek(currentPosition, SeekOrigin.Begin);
return bytes;
}
public void Dispose()
{
if (!DisposeStream)