Merge pull request #279 from plaisted/edit-docs-v2

Add pages from PdfDocument to PdfDocumentBuilder
This commit is contained in:
Eliot Jones
2021-02-14 09:58:08 -04:00
committed by GitHub
22 changed files with 1611 additions and 976 deletions

View File

@@ -0,0 +1,52 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using PdfPig.Tokens;
using System.Collections.Generic;
using Xunit;
public class AdvancedPdfDocumentAccessTests
{
[Fact]
public void ReplacesObjectsFunc()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (var document = PdfDocument.Open(path))
{
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, tk =>
{
var dict = new Dictionary<NameToken, IToken>();
dict[NameToken.Length] = new NumericToken(0);
var replaced = new StreamToken(new DictionaryToken(dict), new List<byte>());
return replaced;
});
var page = document.GetPage(1);
Assert.Empty(page.Letters);
}
}
[Fact]
public void ReplacesObjects()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (var document = PdfDocument.Open(path))
{
var dict = new Dictionary<NameToken, IToken>();
dict[NameToken.Length] = new NumericToken(0);
var replacement = new StreamToken(new DictionaryToken(dict), new List<byte>());
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, replacement);
var page = document.GetPage(1);
Assert.Empty(page.Letters);
}
}
}
}

View File

@@ -217,6 +217,7 @@
"UglyToad.PdfPig.Writer.PdfAStandard",
"UglyToad.PdfPig.Writer.PdfDocumentBuilder",
"UglyToad.PdfPig.Writer.PdfMerger",
"UglyToad.PdfPig.Writer.PdfWriterType",
"UglyToad.PdfPig.Writer.PdfPageBuilder",
"UglyToad.PdfPig.Writer.TokenWriter",
"UglyToad.PdfPig.XObjects.XObjectImage"

View File

@@ -45,6 +45,11 @@ namespace UglyToad.PdfPig.Tests.Tokens
return Objects[reference];
}
public void ReplaceToken(IndirectReference reference, IToken token)
{
throw new NotImplementedException();
}
public void Dispose()
{
}

View File

@@ -6,6 +6,7 @@
using Integration;
using PdfPig.Core;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Tokens;
using PdfPig.Writer;
using Tests.Fonts.TrueType;
using Xunit;
@@ -691,6 +692,297 @@
}
}
[Fact]
public void CanAddHelloWorldToSimplePage()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var doc = PdfDocument.Open(path);
var builder = new PdfDocumentBuilder();
var page = builder.AddPage(doc, 1);
page.DrawLine(new PdfPoint(30, 520), new PdfPoint(360, 520));
page.DrawLine(new PdfPoint(360, 520), new PdfPoint(360, 250));
page.SetStrokeColor(250, 132, 131);
page.DrawLine(new PdfPoint(25, 70), new PdfPoint(100, 70), 3);
page.ResetColor();
page.DrawRectangle(new PdfPoint(30, 200), 250, 100, 0.5m);
page.DrawRectangle(new PdfPoint(30, 100), 250, 100, 0.5m);
var file = TrueTypeTestHelper.GetFileBytes("Andada-Regular.ttf");
var font = builder.AddTrueTypeFont(file);
var letters = page.AddText("Hello World!", 12, new PdfPoint(30, 50), font);
Assert.NotEmpty(page.CurrentStream.Operations);
var b = builder.Build();
WriteFile(nameof(CanAddHelloWorldToSimplePage), b);
Assert.NotEmpty(b);
using (var document = PdfDocument.Open(b))
{
var page1 = document.GetPage(1);
Assert.Equal("I am a simple pdf.Hello World!", page1.Text);
var h = page1.Letters[18];
Assert.Equal("H", h.Value);
Assert.Equal("Andada-Regular", h.FontName);
var comparer = new DoubleComparer(0.01);
var pointComparer = new PointComparer(comparer);
for (int i = 0; i < letters.Count; i++)
{
var readerLetter = page1.Letters[i+18];
var writerLetter = letters[i];
Assert.Equal(readerLetter.Value, writerLetter.Value);
Assert.Equal(readerLetter.Location, writerLetter.Location, pointComparer);
Assert.Equal(readerLetter.FontSize, writerLetter.FontSize, comparer);
Assert.Equal(readerLetter.GlyphRectangle.Width, writerLetter.GlyphRectangle.Width, comparer);
Assert.Equal(readerLetter.GlyphRectangle.Height, writerLetter.GlyphRectangle.Height, comparer);
Assert.Equal(readerLetter.GlyphRectangle.BottomLeft, writerLetter.GlyphRectangle.BottomLeft, pointComparer);
}
}
}
[Fact]
public void CanMerge2SimpleDocumentsReversed_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (var docOne = PdfDocument.Open(one))
using (var docTwo = PdfDocument.Open(two))
{
var builder = new PdfDocumentBuilder();
builder.AddPage(docOne, 1);
builder.AddPage(docTwo, 1);
var result = builder.Build();
PdfMergerTests.CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape", false);
}
}
[Fact]
public void CanMerge2SimpleDocuments_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
using (var docOne = PdfDocument.Open(one))
using (var docTwo = PdfDocument.Open(two))
using (var builder = new PdfDocumentBuilder())
{
builder.AddPage(docOne, 1);
builder.AddPage(docTwo, 1);
var result = builder.Build();
PdfMergerTests.CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "Write something inInkscape", "I am a simple pdf.", false);
}
}
[Fact]
public void CanDedupObjectsFromSameDoc_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
using (var doc = PdfDocument.Open(one))
{
var builder = new PdfDocumentBuilder();
builder.AddPage(doc, 1);
builder.AddPage(doc, 1);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
}
[Fact]
public void CanDedupObjectsFromDifferentDoc_HashBuilder()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
using (var doc = PdfDocument.Open(one))
using (var doc2 = PdfDocument.Open(one))
using (var builder = new PdfDocumentBuilder(new MemoryStream(), true, PdfWriterType.ObjectInMemoryDedup))
{
builder.AddPage(doc, 1);
builder.AddPage(doc2, 1);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
}
[Fact]
public void CanCreatePageTree()
{
var count = 25 * 25 * 25 + 1;
using (var builder = new PdfDocumentBuilder())
{
for (var i = 0; i < count;i++)
{
builder.AddPage(PageSize.A4);
}
var result = builder.Build();
WriteFile(nameof(CanCreatePageTree), result);
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(count, document.NumberOfPages);
}
}
}
[Fact]
public void CanWriteEmptyContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
builder.AddPage(PageSize.A4);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// single empty page should result in single content stream
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteSingleContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// single empty page should result in single content stream
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteAndIgnoreEmptyContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
pb.NewContentStreamAfter();
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// empty stream should be ignored and resulting single stream should be written
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteMultipleContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
pb.NewContentStreamAfter();
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// multiple streams should be written to array
var streams = pg.Dictionary.Data[NameToken.Contents] as ArrayToken;
Assert.NotNull(streams);
Assert.Equal(2, streams.Length);
}
}
}
[InlineData("Single Page Simple - from google drive.pdf")]
[InlineData("Old Gutnish Internet Explorer.pdf")]
[InlineData("68-1990-01_A.pdf")]
[InlineData("Multiple Page - from Mortality Statistics.pdf")]
[Theory]
public void CopiedPagesResultInSameData(string name)
{
var docPath = IntegrationHelpers.GetDocumentPath(name);
using (var doc = PdfDocument.Open(docPath, ParsingOptions.LenientParsingOff))
using (var builder = new PdfDocumentBuilder())
{
var count1 = GetCounts(doc);
for (var i = 1; i <= doc.NumberOfPages; i++)
{
builder.AddPage(doc, i);
}
var result = builder.Build();
WriteFile(nameof(CopiedPagesResultInSameData) + "_" + name, result);
using (var doc2 = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
var count2 = GetCounts(doc2);
Assert.Equal(count1.Item1, count2.Item1);
Assert.Equal(count1.Item2, count2.Item2);
}
}
(int, double) GetCounts(PdfDocument toCount)
{
int letters = 0;
double location = 0;
foreach (var page in toCount.GetPages())
{
foreach (var letter in page.Letters)
{
unchecked { letters += 1; }
unchecked {
location += letter.Location.X;
location += letter.Location.Y;
location += letter.Font.Name.Length;
}
}
}
return (letters, location);
}
}
private static void WriteFile(string name, byte[] bytes, string extension = "pdf")
{
try

View File

@@ -47,13 +47,16 @@
CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape");
}
private void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text)
internal static void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text, bool checkVersion=true)
{
stream.Position = 0;
using (var document = PdfDocument.Open(stream, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.Equal(1.5m, document.Version);
if (checkVersion)
{
Assert.Equal(1.5m, document.Version);
}
var page1 = document.GetPage(1);
Assert.Equal(page1Text, page1.Text);
@@ -105,7 +108,7 @@
[Fact]
public void DedupsObjectsFromSameDoc()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new List<byte[]> { File.ReadAllBytes(one) }, new List<IReadOnlyList<int>> { new List<int> { 1, 2} });
@@ -142,21 +145,29 @@
public void CanMergeWithSelection()
{
var first = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new [] { File.ReadAllBytes(first) }, new [] { new[] {2, 1, 4, 3, 6, 5} });
var contents = File.ReadAllBytes(first);
var toCopy = new[] {2, 1, 4, 3, 6, 5};
var result = PdfMerger.Merge(new [] { contents }, new [] { toCopy });
WriteFile(nameof(CanMergeWithSelection), result);
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var merged = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(6, document.NumberOfPages);
foreach (var page in document.GetPages())
{
Assert.NotNull(page.Text);
Assert.Equal(6, merged.NumberOfPages);
for (var i =1;i<merged.NumberOfPages;i++)
{
Assert.Equal(
existing.GetPage(toCopy[i-1]).Text,
merged.GetPage(i).Text
);
}
}
}
[Fact]
public void CanMergeMultipleWithSelection()
{

View File

@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using Content;
using Core;
using Filters;
using Parser.Parts;
using Tokenization.Scanner;
@@ -82,6 +83,30 @@
return embeddedFiles.Count > 0;
}
/// <summary>
/// Replaces the token in an internal cache that will be returned instead of
/// scanning the source PDF data for future requests.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="replacer">Func that takes existing token as input and return new token.</param>
public void ReplaceIndirectObject(IndirectReference reference, Func<IToken, IToken> replacer)
{
var obj = pdfScanner.Get(reference);
var replacement = replacer(obj.Data);
pdfScanner.ReplaceToken(reference, replacement);
}
/// <summary>
/// Replaces the token in an internal cache that will be returned instead of
/// scanning the source PDF data for future requests.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="replacement">Replacement token to use.</param>
public void ReplaceIndirectObject(IndirectReference reference, IToken replacement)
{
pdfScanner.ReplaceToken(reference, replacement);
}
private void GuardDisposed()
{
if (isDisposed)

View File

@@ -16,5 +16,13 @@
/// <param name="reference">The object number for the object to tokenize.</param>
/// <returns>The tokenized object.</returns>
ObjectToken Get(IndirectReference reference);
/// <summary>
/// Adds the token to an internal cache that will be returned instead of
/// scanning the source PDF data.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="token">The token to replace the existing data.</param>
void ReplaceToken(IndirectReference reference, IToken token);
}
}

View File

@@ -29,6 +29,9 @@
private bool isDisposed;
private bool isBruteForcing;
private readonly Dictionary<IndirectReference, ObjectToken> overwrittenTokens =
new Dictionary<IndirectReference, ObjectToken>();
/// <summary>
/// Stores tokens encountered between obj - endobj markers for each <see cref="MoveNext"/> call.
/// Cleared after each operation.
@@ -670,6 +673,11 @@
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
if (overwrittenTokens.TryGetValue(reference, out var value))
{
return value;
}
if (objectLocationProvider.TryGetCached(reference, out var objectToken))
{
return objectToken;
@@ -710,6 +718,13 @@
return BruteForceFileToFindReference(reference);
}
public void ReplaceToken(IndirectReference reference, IToken token)
{
// Using 0 position as it isn't written to stream and this value doesn't
// seem to be used by any callers. In future may need to revisit this.
overwrittenTokens[reference] = new ObjectToken(0, reference, token);
}
private ObjectToken BruteForceFileToFindReference(IndirectReference reference)
{
try

View File

@@ -9,7 +9,7 @@
private const string SrgbIec61966OutputCondition = "sRGB IEC61966-2.1";
private const string RegistryName = "http://www.color.org";
public static ArrayToken GetOutputIntentsArray(Func<IToken, ObjectToken> objectWriter)
public static ArrayToken GetOutputIntentsArray(Func<IToken, IndirectReferenceToken> objectWriter)
{
var rgbColorCondition = new StringToken(SrgbIec61966OutputCondition);
@@ -38,7 +38,7 @@
{NameToken.OutputConditionIdentifier, rgbColorCondition},
{NameToken.RegistryName, new StringToken(RegistryName)},
{NameToken.Info, rgbColorCondition},
{NameToken.DestOutputProfile, new IndirectReferenceToken(written.Number)}
{NameToken.DestOutputProfile, written}
}),
});
}

View File

@@ -3,6 +3,7 @@
using System.IO;
using Core;
using Tokens;
using Util.JetBrains.Annotations;
internal interface IWritingFont
{
@@ -16,7 +17,7 @@
TransformationMatrix GetFontMatrix();
ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context);
IndirectReferenceToken WriteFont(IPdfStreamWriter writer, [CanBeNull]IndirectReferenceToken reservedIndirect=null);
byte GetValueForCharacter(char character);
}

View File

@@ -8,6 +8,7 @@
using PdfPig.Fonts.AdobeFontMetrics;
using PdfPig.Fonts.Encodings;
using Tokens;
using Util.JetBrains.Annotations;
internal class Standard14WritingFont : IWritingFont
{
@@ -55,20 +56,24 @@
return TransformationMatrix.FromValues(1/1000.0, 0, 0, 1/1000.0, 0, 0);
}
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, IndirectReferenceToken reservedIndirect=null)
{
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Font },
{ NameToken.Subtype, NameToken.Type1 },
{ NameToken.BaseFont, NameToken.Create(metrics.FontName) },
{ NameToken.Encoding, NameToken.MacRomanEncoding },
{ NameToken.Name, fontKeyName }
{ NameToken.Encoding, NameToken.MacRomanEncoding }
};
var token = new DictionaryToken(dictionary);
var result = context.WriteObject(outputStream, token);
if (reservedIndirect != null)
{
return writer.WriteToken(token, reservedIndirect);
}
var result = writer.WriteToken(token);
return result;
}
@@ -92,46 +97,5 @@
return result;
}
}
internal class BuilderContext
{
private readonly List<int> reservedNumbers = new List<int>();
public int CurrentNumber { get; private set; } = 1;
private readonly Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets => objectOffsets;
public ObjectToken WriteObject(Stream stream, IToken token, int? reservedNumber = null)
{
int number;
if (reservedNumber.HasValue)
{
if (!reservedNumbers.Remove(reservedNumber.Value))
{
throw new InvalidOperationException();
}
number = reservedNumber.Value;
}
else
{
number = CurrentNumber++;
}
var reference = new IndirectReference(number, 0);
var obj = new ObjectToken(stream.Position, reference, token);
objectOffsets.Add(reference, obj.Position);
TokenWriter.WriteToken(obj, stream);
return obj;
}
public int ReserveNumber()
{
var reserved = CurrentNumber;
reservedNumbers.Add(reserved);
CurrentNumber++;
return reserved;
}
}
}

View File

@@ -47,14 +47,14 @@
return TransformationMatrix.FromValues(1.0 / unitsPerEm, 0, 0, 1.0 / unitsPerEm, 0, 0);
}
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, IndirectReferenceToken reservedIndirect=null)
{
var newEncoding = new TrueTypeSubsetEncoding(characterMapping.Keys.ToList());
var subsetBytes = TrueTypeSubsetter.Subset(fontFileBytes.ToArray(), newEncoding);
var embeddedFile = DataCompresser.CompressToStream(subsetBytes);
var fileRef = context.WriteObject(outputStream, embeddedFile);
var fileRef = writer.WriteToken(embeddedFile);
var baseFont = NameToken.Create(font.TableRegister.NameTable.GetPostscriptName());
@@ -76,7 +76,7 @@
{ NameToken.Descent, new NumericToken(Math.Round(hhead.Descent * scaling, 2)) },
{ NameToken.CapHeight, new NumericToken(90) },
{ NameToken.StemV, new NumericToken(90) },
{ NameToken.FontFile2, new IndirectReferenceToken(fileRef.Number) }
{ NameToken.FontFile2, fileRef }
};
var os2 = font.TableRegister.Os2Table;
@@ -108,29 +108,32 @@
widths.Add(new NumericToken(width));
}
var descriptor = context.WriteObject(outputStream, new DictionaryToken(descriptorDictionary));
var descriptor = writer.WriteToken(new DictionaryToken(descriptorDictionary));
var toUnicodeCMap = ToUnicodeCMapBuilder.ConvertToCMapStream(characterMapping);
var toUnicodeStream = DataCompresser.CompressToStream(toUnicodeCMap);
var toUnicode = context.WriteObject(outputStream, toUnicodeStream);
var toUnicode = writer.WriteToken(toUnicodeStream);
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Font },
{ NameToken.Subtype, NameToken.TrueType },
{ NameToken.BaseFont, baseFont },
{ NameToken.FontDescriptor, new IndirectReferenceToken(descriptor.Number) },
{ NameToken.FontDescriptor, descriptor },
{ NameToken.FirstChar, new NumericToken(0) },
{ NameToken.LastChar, new NumericToken(lastCharacter) },
{ NameToken.Widths, new ArrayToken(widths) },
{NameToken.ToUnicode, new IndirectReferenceToken(toUnicode.Number) }
{NameToken.ToUnicode, toUnicode }
};
var token = new DictionaryToken(dictionary);
var result = context.WriteObject(outputStream, token);
if (reservedIndirect != null)
{
return writer.WriteToken(token, reservedIndirect);
}
return result;
return writer.WriteToken(token);
}
public byte GetValueForCharacter(char character)

View File

@@ -0,0 +1,52 @@
namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tokens;
internal interface IPdfStreamWriter : IDisposable
{
/// <summary>
/// The underlying stream used by the writer.
/// </summary>
Stream Stream { get; }
/// <summary>
/// Writes a single token to the stream.
/// </summary>
/// <param name="token">Token to write.</param>
/// <returns>Indirect reference to the token.</returns>
IndirectReferenceToken WriteToken(IToken token);
/// <summary>
/// Writes a token to a reserved object number.
/// </summary>
/// <param name="token">Token to write.</param>
/// <param name="indirectReference">Reserved indirect reference.</param>
/// <returns>Reserved indirect reference.</returns>
IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference);
/// <summary>
/// Reserves an object number for an object to be written.
/// Useful with cyclic references where object number must be known before
/// writing.
/// </summary>
/// <returns>A reserved indirect reference.</returns>
IndirectReferenceToken ReserveObjectNumber();
/// <summary>
/// Initializes the PDF stream with pdf header.
/// </summary>
/// <param name="version">Version of PDF.</param>
void InitializePdf(decimal version);
/// <summary>
/// Completes the PDF writing trailing PDF information.
/// </summary>
/// <param name="catalogReference">Indirect reference of catalog.</param>
/// <param name="documentInformationReference">Reference to document information (optional)</param>
void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null);
}
}

View File

@@ -8,14 +8,14 @@ namespace UglyToad.PdfPig.Writer
{
internal static class PdfABaselineRuleBuilder
{
public static void Obey(Dictionary<NameToken, IToken> catalog, Func<IToken, ObjectToken> writerFunc,
public static void Obey(Dictionary<NameToken, IToken> catalog, Func<IToken, IndirectReferenceToken> writerFunc,
PdfDocumentBuilder.DocumentInformationBuilder documentInformationBuilder,
PdfAStandard archiveStandard)
{
catalog[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(writerFunc);
var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, 1.7m, archiveStandard);
var xmpObj = writerFunc(xmpStream);
catalog[NameToken.Metadata] = new IndirectReferenceToken(xmpObj.Number);
catalog[NameToken.Metadata] = xmpObj;
}
}
}

View File

@@ -0,0 +1,151 @@
namespace UglyToad.PdfPig.Writer
{
using System.Collections.Generic;
using System.IO;
using Tokens;
internal class PdfDedupStreamWriter : PdfStreamWriter
{
private readonly Dictionary<byte[], IndirectReferenceToken> hashes = new Dictionary<byte[], IndirectReferenceToken>(new FNVByteComparison());
public PdfDedupStreamWriter(Stream stream, bool dispose) : base(stream, dispose)
{
}
private readonly MemoryStream ms = new MemoryStream();
public override IndirectReferenceToken WriteToken(IToken token)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
ms.SetLength(0);
TokenWriter.WriteToken(token, ms);
var contents = ms.ToArray();
if (hashes.TryGetValue(contents, out var value))
{
return value;
}
var ir = ReserveObjectNumber();
hashes.Add(contents, ir);
offsets.Add(ir.Data, Stream.Position);
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
return ir;
}
public override IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
ms.SetLength(0);
TokenWriter.WriteToken(token, ms);
var contents = ms.ToArray();
hashes.Add(contents, indirectReference);
offsets.Add(indirectReference.Data, Stream.Position);
TokenWriter.WriteObject(indirectReference.Data.ObjectNumber, indirectReference.Data.Generation, contents, Stream);
return indirectReference;
}
public new void Dispose()
{
hashes.Clear();
base.Dispose();
}
class FNVByteComparison : IEqualityComparer<byte[]>
{
public bool Equals(byte[] x, byte[] y)
{
if (x.Length != y.Length)
{
return false;
}
for (var i = 0; i < x.Length; i++)
{
if (x[i] != y[i])
{
return false;
}
}
return true;
}
public int GetHashCode(byte[] obj)
{
var hash = FnvHash.Create();
foreach (var t in obj)
{
hash.Combine(t);
}
return hash.HashCode;
}
}
/// <summary>
/// A hash combiner that is implemented with the Fowler/Noll/Vo algorithm (FNV-1a). This is a mutable struct for performance reasons.
/// </summary>
struct FnvHash
{
/// <summary>
/// The starting point of the FNV hash.
/// </summary>
public const int Offset = unchecked((int)2166136261);
/// <summary>
/// The prime number used to compute the FNV hash.
/// </summary>
private const int Prime = 16777619;
/// <summary>
/// Gets the current result of the hash function.
/// </summary>
public int HashCode { get; private set; }
/// <summary>
/// Creates a new FNV hash initialized to <see cref="Offset"/>.
/// </summary>
public static FnvHash Create()
{
var result = new FnvHash();
result.HashCode = Offset;
return result;
}
/// <summary>
/// Adds the specified byte to the hash.
/// </summary>
/// <param name="data">The byte to hash.</param>
public void Combine(byte data)
{
unchecked
{
HashCode ^= data;
HashCode *= Prime;
}
}
/// <summary>
/// Adds the specified integer to this hash, in little-endian order.
/// </summary>
/// <param name="data">The integer to hash.</param>
public void Combine(int data)
{
Combine(unchecked((byte)data));
Combine(unchecked((byte)(data >> 8)));
Combine(unchecked((byte)(data >> 16)));
Combine(unchecked((byte)(data >> 24)));
}
}
}
}

View File

@@ -10,10 +10,9 @@ namespace UglyToad.PdfPig.Writer
using Core;
using Fonts;
using PdfPig.Fonts.TrueType;
using Graphics.Operations;
using Parser.Parts;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Fonts.TrueType.Parser;
using System.Runtime.CompilerServices;
using Tokenization.Scanner;
using Tokens;
@@ -22,16 +21,23 @@ namespace UglyToad.PdfPig.Writer
/// <summary>
/// Provides methods to construct new PDF documents.
/// </summary>
public class PdfDocumentBuilder
public class PdfDocumentBuilder : IDisposable
{
private readonly BuilderContext context = new BuilderContext();
private readonly IPdfStreamWriter context;
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
private readonly Dictionary<IndirectReferenceToken, IToken> unwrittenTokens = new Dictionary<IndirectReferenceToken, IToken>();
private bool completed = false;
internal int fontId = 0;
private readonly static ArrayToken DefaultProcSet = new ArrayToken(new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
});
/// <summary>
/// The standard of PDF/A compliance of the generated document. Defaults to <see cref="PdfAStandard.None"/>.
/// </summary>
@@ -59,9 +65,44 @@ namespace UglyToad.PdfPig.Writer
internal IReadOnlyDictionary<Guid, FontStored> Fonts => fonts;
/// <summary>
/// The images currently available in the document builder added via <see cref="AddImage"/>. Keyed by id for internal purposes.
/// Creates a document builder keeping resources in memory.
/// </summary>
internal IReadOnlyDictionary<Guid, ImageStored> Images => images;
public PdfDocumentBuilder()
{
context = new PdfStreamWriter(new MemoryStream(), true);
context.InitializePdf(1.7m);
}
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(decimal version)
{
context = new PdfStreamWriter(new MemoryStream(), true);
context.InitializePdf(version);
}
/// <summary>
/// Creates a document builder using the supplied stream.
/// </summary>
/// <param name="stream">Steam to write pdf to.</param>
/// <param name="disposeStream">If stream should be disposed when builder is.</param>
/// <param name="type">Type of pdf stream writer to use</param>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream=false, PdfWriterType type=PdfWriterType.Default, decimal version=1.7m)
{
switch (type)
{
case PdfWriterType.ObjectInMemoryDedup:
context = new PdfDedupStreamWriter(stream, disposeStream);
break;
default:
context = new PdfStreamWriter(stream, disposeStream);
break;
}
context.InitializePdf(version);
}
/// <summary>
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
@@ -127,9 +168,8 @@ namespace UglyToad.PdfPig.Writer
{
var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes)));
var id = Guid.NewGuid();
var added = new AddedFont(id, NameToken.Create($"F{fontId++}"));
var added = new AddedFont(id, context.ReserveObjectNumber());
fonts[id] = new FontStored(added, new TrueTypeWritingFont(font, fontFileBytes));
return added;
}
catch (Exception ex)
@@ -152,21 +192,15 @@ namespace UglyToad.PdfPig.Writer
var id = Guid.NewGuid();
var name = NameToken.Create($"F{fontId++}");
var added = new AddedFont(id, name);
var added = new AddedFont(id, context.ReserveObjectNumber());
fonts[id] = new FontStored(added, new Standard14WritingFont(Standard14.GetAdobeFontMetrics(type)));
return added;
}
internal IndirectReference AddImage(DictionaryToken dictionary, byte[] bytes)
internal IndirectReferenceToken AddImage(DictionaryToken dictionary, byte[] bytes)
{
var reserved = context.ReserveNumber();
var stored = new ImageStored(dictionary, bytes, reserved);
images[stored.Id] = stored;
return new IndirectReference(reserved, 0);
var streamToken = new StreamToken(dictionary, bytes);
return context.WriteToken(streamToken);
}
/// <summary>
@@ -235,275 +269,386 @@ namespace UglyToad.PdfPig.Writer
return AddPage(rectangle.Width, rectangle.Height);
}
internal IToken CopyToken(IPdfTokenScanner source, IToken token)
{
if (!existingCopies.TryGetValue(source, out var refs))
{
refs = new Dictionary<IndirectReference, IndirectReferenceToken>();
existingCopies.Add(source, refs);
}
return WriterUtil.CopyToken(context, token, source, refs);
}
internal class PageInfo
{
public DictionaryToken Page { get; set; }
public IReadOnlyList<DictionaryToken> Parents { get; set; }
}
private readonly ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>> existingCopies =
new ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>>();
private readonly ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>> existingTrees =
new ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>>();
/// <summary>
/// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
/// </summary>
/// <param name="document">Source document.</param>
/// <param name="pageNumber">Page to copy.</param>
/// <returns>A builder for editing the page.</returns>
public PdfPageBuilder AddPage(PdfDocument document, int pageNumber)
{
if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs))
{
refs = new Dictionary<IndirectReference, IndirectReferenceToken>();
existingCopies.Add(document.Structure.TokenScanner, refs);
}
if (!existingTrees.TryGetValue(document, out var pagesInfos))
{
pagesInfos = new Dictionary<int, PageInfo>();
int i = 1;
foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
{
pagesInfos[i] = new PageInfo
{
Page = pageDict, Parents = parents
};
i++;
}
existingTrees.Add(document, pagesInfos);
}
if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo))
{
throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
}
// copy content streams
var streams = new List<PdfPageBuilder.CopiedContentStream>();
if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
{
if (contentsToken is ArrayToken array)
{
foreach (var item in array.Data)
{
if (item is IndirectReferenceToken ir)
{
streams.Add(new PdfPageBuilder.CopiedContentStream(
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
}
}
}
else if (contentsToken is IndirectReferenceToken ir)
{
streams.Add(new PdfPageBuilder.CopiedContentStream(
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
}
}
// manually copy page dict / resources as we need to modify some
var copiedPageDict = new Dictionary<NameToken, IToken>();
Dictionary<NameToken, IToken> resources = new Dictionary<NameToken, IToken>();
// just put all parent resources into new page
foreach (var dict in pageInfo.Parents)
{
if (dict.TryGet(NameToken.Resources, out var resourceToken))
{
CopyResourceDict(resourceToken, resources);
}
}
foreach (var kvp in pageInfo.Page.Data)
{
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
{
continue;
}
if (kvp.Key == NameToken.Resources)
{
CopyResourceDict(kvp.Value, resources);
continue;
}
copiedPageDict[NameToken.Create(kvp.Key)] =
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
}
copiedPageDict[NameToken.Resources] = new DictionaryToken(resources);
var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict);
pages[builder.PageNumber] = builder;
return builder;
void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDict)
{
DictionaryToken dict = GetRemoteDict(token);
if (dict == null)
{
return;
}
foreach (var item in dict.Data)
{
if (!destinationDict.ContainsKey(NameToken.Create(item.Key)))
{
if (item.Value is IndirectReferenceToken ir)
{
// convert indirect to direct as PdfPageBuilder needs to modify resource entries
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
}
else
{
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
}
continue;
}
var subDict = GetRemoteDict(item.Value);
var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken;
if (destSubDict == null || subDict == null)
{
// not a dict.. just overwrite with more important one? should maybe check arrays?
if (item.Value is IndirectReferenceToken ir)
{
// convert indirect to direct as PdfPageBuilder needs to modify resource entries
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
}
else
{
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
}
continue;
}
foreach (var subItem in subDict.Data)
{
// last copied most important important
destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value,
document.Structure.TokenScanner, refs);
}
}
}
DictionaryToken GetRemoteDict(IToken token)
{
DictionaryToken dict = null;
if (token is IndirectReferenceToken ir)
{
dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken;
}
else if (token is DictionaryToken dt)
{
dict = dt;
}
return dict;
}
}
private void CompleteDocument()
{
// write fonts to reserved object numbers
foreach (var font in fonts)
{
font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference);
}
const int desiredLeafSize = 25; // allow customization at some point?
var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
var leafRefs = new List<IndirectReferenceToken>();
var leafChildren = new List<List<IndirectReferenceToken>>();
var leafs = new List<Dictionary<NameToken, IToken>>();
for (var i = 0; i < numLeafs; i++)
{
leafs.Add(new Dictionary<NameToken, IToken>()
{
{NameToken.Type, NameToken.Pages},
});
leafChildren.Add(new List<IndirectReferenceToken>());
leafRefs.Add(context.ReserveObjectNumber());
}
int leafNum = 0;
foreach (var page in pages)
{
var pageDictionary = page.Value.pageDictionary;
pageDictionary[NameToken.Type] = NameToken.Page;
pageDictionary[NameToken.Parent] = leafRefs[leafNum];
pageDictionary[NameToken.ProcSet] = DefaultProcSet;
if (!pageDictionary.ContainsKey(NameToken.MediaBox))
{
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
}
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
if (toWrite.Count == 0)
{
// write empty
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
}
else if (toWrite.Count == 1)
{
// write single
pageDictionary[NameToken.Contents] = toWrite[0].Write(context);
}
else
{
// write array
var streams = new List<IToken>();
foreach (var stream in toWrite)
{
streams.Add(stream.Write(context));
}
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
}
leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary)));
if (leafChildren[leafNum].Count >= desiredLeafSize)
{
leafNum += 1;
}
}
var dummyName = NameToken.Create("ObjIdToUse");
for (var i = 0; i < leafs.Count; i++)
{
leafs[i][NameToken.Kids] = new ArrayToken(leafChildren[i]);
leafs[i][NameToken.Count] = new NumericToken(leafChildren[i].Count);
leafs[i][dummyName] = leafRefs[i];
}
var catalogDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Catalog},
};
if (leafs.Count == 1)
{
var leaf = leafs[0];
var id = leaf[dummyName] as IndirectReferenceToken;
leaf.Remove(dummyName);
catalogDictionary[NameToken.Pages] = context.WriteToken(new DictionaryToken(leaf), id);
}
else
{
var rootPageInfo = CreatePageTree(leafs, null);
catalogDictionary[NameToken.Pages] = rootPageInfo.Ref;
}
if (ArchiveStandard != PdfAStandard.None)
{
Func<IToken, IndirectReferenceToken> writerFunc = x => context.WriteToken(x);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard);
switch (ArchiveStandard)
{
case PdfAStandard.A1A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
case PdfAStandard.A2B:
break;
case PdfAStandard.A2A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
}
}
var catalog = new DictionaryToken(catalogDictionary);
var catalogRef = context.WriteToken(catalog);
var informationReference = default(IndirectReferenceToken);
if (IncludeDocumentInformation)
{
var informationDictionary = DocumentInformation.ToDictionary();
if (informationDictionary.Count > 0)
{
var dictionary = new DictionaryToken(informationDictionary);
informationReference = context.WriteToken(dictionary);
}
}
context.CompletePdf(catalogRef, informationReference);
completed = true;
(int Count, IndirectReferenceToken Ref) CreatePageTree(List<Dictionary<NameToken, IToken>> pagesNodes, IndirectReferenceToken parent)
{
// TODO shorten page tree when there is a single or small number of pages left in a branch
var count = 0;
var thisObj = context.ReserveObjectNumber();
var children = new List<IndirectReferenceToken>();
if (pagesNodes.Count > desiredLeafSize)
{
var currentTreeDepth = (int) Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize));
var perBranch = (int) Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1));
var branches = (int)Math.Ceiling(decimal.Divide(pagesNodes.Count, (decimal)perBranch));
for (var i = 0; i < branches; i++)
{
var part = pagesNodes.Skip(i*perBranch).Take(perBranch).ToList();
var result = CreatePageTree(part, thisObj);
count += result.Count;
children.Add(result.Ref);
}
}
else
{
foreach (var page in pagesNodes)
{
page[NameToken.Parent] = thisObj;
var id = page[dummyName] as IndirectReferenceToken;
page.Remove(dummyName);
count += (page[NameToken.Count] as NumericToken).Int;
children.Add(context.WriteToken(new DictionaryToken(page), id));
}
}
var node = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Pages},
{NameToken.Kids, new ArrayToken(children)},
{NameToken.Count, new NumericToken(count)}
};
if (parent != null)
{
node[NameToken.Parent] = parent;
}
return (count, context.WriteToken(new DictionaryToken(node), thisObj));
}
}
/// <summary>
/// Builds a PDF document from the current content of this builder and its pages.
/// </summary>
/// <returns>The bytes of the resulting PDF document.</returns>
public byte[] Build()
{
var fontsWritten = new Dictionary<Guid, ObjectToken>();
using (var memory = new MemoryStream())
CompleteDocument();
if (context.Stream is MemoryStream ms)
{
// Header
WriteString("%PDF-1.7", memory);
// Files with binary data should contain a 2nd comment line followed by 4 bytes with values > 127
memory.WriteText("%");
memory.WriteByte(169);
memory.WriteByte(205);
memory.WriteByte(196);
memory.WriteByte(210);
memory.WriteNewLine();
// Body
foreach (var font in fonts)
{
var fontObj = font.Value.FontProgram.WriteFont(font.Value.FontKey.Name, memory, context);
fontsWritten.Add(font.Key, fontObj);
}
foreach (var image in images)
{
var streamToken = new StreamToken(image.Value.StreamDictionary, image.Value.StreamData);
context.WriteObject(memory, streamToken, image.Value.ObjectNumber);
}
foreach (var tokenSet in unwrittenTokens)
{
context.WriteObject(memory, tokenSet.Value, (int)tokenSet.Key.Data.ObjectNumber);
}
var procSet = new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
};
var resources = new Dictionary<NameToken, IToken>
{
{ NameToken.ProcSet, new ArrayToken(procSet) }
};
if (fontsWritten.Count > 0)
{
var fontsDictionary = new DictionaryToken(fontsWritten.Select(x => (fonts[x.Key].FontKey.Name, (IToken)new IndirectReferenceToken(x.Value.Number)))
.ToDictionary(x => x.Item1, x => x.Item2));
resources.Add(NameToken.Font, fontsDictionary);
}
var reserved = context.ReserveNumber();
var parentIndirect = new IndirectReferenceToken(new IndirectReference(reserved, 0));
var pageReferences = new List<IndirectReferenceToken>();
foreach (var page in pages)
{
var individualResources = new Dictionary<NameToken, IToken>(resources);
var pageDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Page},
{NameToken.MediaBox, RectangleToArray(page.Value.PageSize)},
{NameToken.Parent, parentIndirect}
};
if (page.Value.Resources.Count > 0)
{
foreach (var kvp in page.Value.Resources)
{
var value = kvp.Value;
if (individualResources.TryGetValue(kvp.Key, out var pageToken))
{
if (pageToken is DictionaryToken leftDictionary && value is DictionaryToken rightDictionary)
{
var merged = leftDictionary.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
foreach (var set in rightDictionary.Data)
{
merged[NameToken.Create(set.Key)] = set.Value;
}
value = new DictionaryToken(merged);
}
// Else override
}
individualResources[kvp.Key] = value;
}
}
pageDictionary[NameToken.Resources] = new DictionaryToken(individualResources);
if (page.Value.ContentStreams.Count == 1)
{
var contentStream = WriteContentStream(page.Value.CurrentStream.Operations);
var contentStreamObj = context.WriteObject(memory, contentStream);
pageDictionary[NameToken.Contents] = new IndirectReferenceToken(contentStreamObj.Number);
}
else if (page.Value.ContentStreams.Count > 1)
{
var streamTokens = page.Value.ContentStreams.Select(contentStream =>
{
var streamToken = WriteContentStream(contentStream.Operations);
var contentStreamObj = context.WriteObject(memory, streamToken);
return new IndirectReferenceToken(contentStreamObj.Number);
}).ToList();
pageDictionary[NameToken.Contents] = new ArrayToken(streamTokens);
}
var pageRef = context.WriteObject(memory, new DictionaryToken(pageDictionary));
pageReferences.Add(new IndirectReferenceToken(pageRef.Number));
}
var pagesDictionaryData = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Pages},
{NameToken.Kids, new ArrayToken(pageReferences)},
{NameToken.Count, new NumericToken(pageReferences.Count)}
};
var pagesDictionary = new DictionaryToken(pagesDictionaryData);
var pagesRef = context.WriteObject(memory, pagesDictionary, reserved);
var catalogDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Catalog},
{NameToken.Pages, new IndirectReferenceToken(pagesRef.Number)}
};
if (ArchiveStandard != PdfAStandard.None)
{
Func<IToken, ObjectToken> writerFunc = x => context.WriteObject(memory, x);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard);
switch (ArchiveStandard)
{
case PdfAStandard.A1A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
case PdfAStandard.A2B:
break;
case PdfAStandard.A2A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
}
}
var catalog = new DictionaryToken(catalogDictionary);
var catalogRef = context.WriteObject(memory, catalog);
var informationReference = default(IndirectReference?);
if (IncludeDocumentInformation)
{
var informationDictionary = DocumentInformation.ToDictionary();
if (informationDictionary.Count > 0)
{
var dictionary = new DictionaryToken(informationDictionary);
informationReference = context.WriteObject(memory, dictionary).Number;
}
}
TokenWriter.WriteCrossReferenceTable(context.ObjectOffsets, catalogRef, memory, informationReference);
return memory.ToArray();
}
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
internal IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner);
var reserved = context.ReserveNumber();
var newReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
unwrittenTokens.Add(newReference, newToken);
return newReference;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
return ms.ToArray();
}
return tokenToCopy;
}
private static StreamToken WriteContentStream(IReadOnlyList<IGraphicsStateOperation> content)
{
using (var memoryStream = new MemoryStream())
if (!context.Stream.CanSeek)
{
foreach (var operation in content)
{
operation.Write(memoryStream);
}
throw new InvalidOperationException("PdfDocument.Build() called with non-seekable stream.");
}
var bytes = memoryStream.ToArray();
var stream = DataCompresser.CompressToStream(bytes);
return stream;
using (var temp = new MemoryStream())
{
context.Stream.Seek(0, SeekOrigin.Begin);
context.Stream.CopyTo(temp);
return temp.ToArray();
}
}
@@ -518,15 +663,6 @@ namespace UglyToad.PdfPig.Writer
});
}
private static void WriteString(string text, MemoryStream stream, bool appendBreak = true)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
stream.Write(bytes, 0, bytes.Length);
if (appendBreak)
{
stream.WriteNewLine();
}
}
internal class FontStored
{
@@ -543,25 +679,6 @@ namespace UglyToad.PdfPig.Writer
}
}
internal class ImageStored
{
public Guid Id { get; }
public DictionaryToken StreamDictionary { get; }
public byte[] StreamData { get; }
public int ObjectNumber { get; }
public ImageStored(DictionaryToken streamDictionary, byte[] streamData, int objectNumber)
{
Id = Guid.NewGuid();
StreamDictionary = streamDictionary;
StreamData = streamData;
ObjectNumber = objectNumber;
}
}
/// <summary>
/// A key representing a font available to use on the current document builder. Create by adding a font to a document using either
/// <see cref="AddStandard14Font"/> or <see cref="AddTrueTypeFont"/>.
@@ -574,17 +691,17 @@ namespace UglyToad.PdfPig.Writer
internal Guid Id { get; }
/// <summary>
/// The name of this font.
/// Reference to the added font.
/// </summary>
public NameToken Name { get; }
internal IndirectReferenceToken Reference { get; }
/// <summary>
/// Create a new <see cref="AddedFont"/>.
/// </summary>
internal AddedFont(Guid id, NameToken name)
internal AddedFont(Guid id, IndirectReferenceToken reference)
{
Id = id;
Name = name ?? throw new ArgumentNullException(nameof(name));
Reference = reference;
}
}
@@ -661,5 +778,18 @@ namespace UglyToad.PdfPig.Writer
return result;
}
}
/// <summary>
/// Disposes underlying stream if set to do so.
/// </summary>
public void Dispose()
{
if (!completed)
{
CompleteDocument();
}
context.Dispose();
}
}
}

View File

@@ -2,22 +2,10 @@
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using Content;
using Core;
using CrossReference;
using Encryption;
using Filters;
using Logging;
using Parser;
using Parser.FileStructure;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Exceptions;
using System.Linq;
using Util;
/// <summary>
/// Merges PDF documents into each other.
@@ -48,9 +36,9 @@
_ = file1 ?? throw new ArgumentNullException(nameof(file1));
_ = file2 ?? throw new ArgumentNullException(nameof(file2));
using (var stream1 = new StreamInputBytes(File.OpenRead(file1)))
using (var stream1 = File.OpenRead(file1))
{
using (var stream2 = new StreamInputBytes(File.OpenRead(file2)))
using (var stream2 = File.OpenRead(file2))
{
Merge(new[] { stream1, stream2 }, output, new[] { file1Selection, file2Selection });
}
@@ -74,13 +62,13 @@
/// </summary>
public static void Merge(Stream output, params string[] filePaths)
{
var streams = new List<StreamInputBytes>(filePaths.Length);
var streams = new List<Stream>(filePaths.Length);
try
{
for (var i = 0; i < filePaths.Length; i++)
{
var filePath = filePaths[i] ?? throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}.");
streams.Add(new StreamInputBytes(File.OpenRead(filePath), true));
streams.Add(File.OpenRead(filePath));
}
Merge(streams, output, null);
@@ -103,7 +91,7 @@
using (var output = new MemoryStream())
{
Merge(files.Select(f => new ByteArrayInputBytes(f)).ToArray(), output, pagesBundle);
Merge(files.Select(f => PdfDocument.Open(f)).ToArray(), output, pagesBundle);
return output.ToArray();
}
}
@@ -122,369 +110,39 @@
_ = streams ?? throw new ArgumentNullException(nameof(streams));
_ = output ?? throw new ArgumentNullException(nameof(output));
Merge(streams.Select(f => new StreamInputBytes(f, false)).ToArray(), output, pagesBundle);
Merge(streams.Select(f => PdfDocument.Open(f)).ToArray(), output, pagesBundle);
}
private static void Merge(IReadOnlyList<IInputBytes> files, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle)
private static void Merge(IReadOnlyList<PdfDocument> files, Stream output, IReadOnlyList<IReadOnlyList<int>> pagesBundle)
{
const bool isLenientParsing = false;
var documentBuilder = new DocumentMerger(output);
foreach (var fileIndex in Enumerable.Range(0, files.Count))
var maxVersion = files.Select(x=>x.Version).Max();
using (var document = new PdfDocumentBuilder(output, false, PdfWriterType.Default, maxVersion))
{
IReadOnlyList<int> pages = null;
if (pagesBundle != null && fileIndex < pagesBundle.Count)
foreach (var fileIndex in Enumerable.Range(0, files.Count))
{
pages = pagesBundle[fileIndex];
var existing = files[fileIndex];
IReadOnlyList<int> pages = null;
if (pagesBundle != null && fileIndex < pagesBundle.Count)
{
pages = pagesBundle[fileIndex];
}
if (pages == null)
{
for (var i = 1; i <= existing.NumberOfPages; i++)
{
document.AddPage(existing, i);
}
} else
{
foreach (var i in pages)
{
document.AddPage(existing, i);
}
}
}
var inputBytes = files[fileIndex];
var coreScanner = new CoreTokenScanner(inputBytes);
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
CrossReferenceTable crossReference = null;
// ReSharper disable once AccessToModifiedClosure
var locationProvider = new ObjectLocationProvider(() => crossReference, inputBytes);
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, FilterProvider, NoOpEncryptionHandler.Instance);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, coreScanner, isLenientParsing);
crossReference = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, version.OffsetInFile, pdfScanner, coreScanner);
var catalogDictionaryToken = ParseCatalog(crossReference, pdfScanner, out var encryptionDictionary);
if (encryptionDictionary != null)
{
throw new PdfDocumentEncryptedException("Unable to merge document with password");
}
var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing);
documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages);
}
documentBuilder.Build();
}
// This method is a basically a copy of the method UglyToad.PdfPig.Parser.PdfDocumentFactory.ParseTrailer()
private static DictionaryToken ParseCatalog(CrossReferenceTable crossReferenceTable,
IPdfTokenScanner pdfTokenScanner,
out EncryptionDictionary encryptionDictionary)
{
encryptionDictionary = null;
if (crossReferenceTable.Trailer.EncryptionToken != null)
{
if (!DirectObjectFinder.TryGet(crossReferenceTable.Trailer.EncryptionToken, pdfTokenScanner,
out DictionaryToken encryptionDictionaryToken))
{
throw new PdfDocumentFormatException($"Unrecognized encryption token in trailer: {crossReferenceTable.Trailer.EncryptionToken}.");
}
encryptionDictionary = EncryptionDictionaryFactory.Read(encryptionDictionaryToken, pdfTokenScanner);
}
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);
if (!rootDictionary.ContainsKey(NameToken.Type))
{
rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
}
return rootDictionary;
}
private class DocumentMerger
{
private const decimal DefaultVersion = 1.2m;
private const int ARTIFICIAL_NODE_LIMIT = 100;
private readonly PdfStreamWriter context;
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesReference;
private decimal currentVersion = DefaultVersion;
private int pageCount = 0;
public DocumentMerger(Stream baseStream)
{
context = new PdfStreamWriter(baseStream, false);
rootPagesReference = context.ReserveNumberToken();
}
public void AppendDocument(Catalog catalog, decimal version, IPdfTokenScanner tokenScanner, IReadOnlyList<int> pages)
{
IEnumerable<int> pageIndices;
if (pages == null)
{
var pagesCount = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count);
if (pagesCount < 1)
{
return;
}
pageIndices = Enumerable.Range(1, pagesCount);
}
else if (pages.Count < 1)
{
return;
}
else
{
pageIndices = pages;
}
currentVersion = Math.Max(version, currentVersion);
var referencesFromDocument = new Dictionary<IndirectReference, IndirectReferenceToken>();
var currentNodeReference = context.ReserveNumberToken();
var pagesReferences = new List<IndirectReferenceToken>();
var resources = new Dictionary<string, IToken>();
bool DoesAEntryCollide(PageTreeNode node)
{
while (node != null)
{
var dictionary = node.NodeDictionary;
if (dictionary.TryGet(NameToken.Resources, tokenScanner, out DictionaryToken resourcesDictionary))
{
var nonCollidingResources = resourcesDictionary.Data.Keys.Except(resources.Keys);
if (nonCollidingResources.Count() != resourcesDictionary.Data.Count)
{
// This means that at least one of the resources collided
return true;
}
}
/* TODO: How to handle?
* `Rotate`
* `CropBox`
* `MediaBox`
*/
// No colliding entry was found, in this node
// Keep walking up into the tree
node = node.Parent;
}
return false;
}
void CopyEntries(PageTreeNode node)
{
while (node != null)
{
var dictionary = node.NodeDictionary;
if (dictionary.TryGet(NameToken.Resources, tokenScanner, out DictionaryToken resourcesDictionary))
{
foreach (var pair in resourcesDictionary.Data)
{
resources.Add(pair.Key, CopyToken(pair.Value, tokenScanner, referencesFromDocument));
}
}
/* TODO: How to handle?
* `Rotate`
* `CropBox`
* `MediaBox`
*/
// Keep walking up into the tree
node = node.Parent;
}
}
void CreateTree()
{
if (pagesReferences.Count < 1)
{
throw new InvalidOperationException("Pages reference should always be more than 1 when executing this function");
}
var newPagesNode = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(pagesReferences) },
{ NameToken.Count, new NumericToken(pagesReferences.Count) },
{ NameToken.Parent, rootPagesReference }
};
if (resources.Count > 0)
{
newPagesNode.Add(NameToken.Resources, DictionaryToken.With(resources));
}
var pagesDictionary = new DictionaryToken(newPagesNode);
pagesTokenReferences.Add(context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber));
pageCount += pagesReferences.Count;
};
foreach (var pageIndex in pageIndices)
{
var pageNode = catalog.GetPageNode(pageIndex);
if (pagesReferences.Count >= ARTIFICIAL_NODE_LIMIT || DoesAEntryCollide(pageNode))
{
CreateTree();
currentNodeReference = context.ReserveNumberToken();
pagesReferences = new List<IndirectReferenceToken>();
resources = new Dictionary<string, IToken>();
}
CopyEntries(pageNode.Parent);
pagesReferences.Add(CopyPageNode(pageNode, currentNodeReference, tokenScanner, referencesFromDocument));
}
if (pagesReferences.Count < 1)
{
throw new InvalidOperationException("Pages reference couldn't be less than 1 because we have reserved a indirect reference token");
}
CreateTree();
}
public void Build()
{
if (pagesTokenReferences.Count < 1)
{
throw new PdfDocumentFormatException("Empty document");
}
var pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Pages },
{ NameToken.Kids, new ArrayToken(pagesTokenReferences) },
{ NameToken.Count, new NumericToken(pageCount) }
});
var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Catalog },
{ NameToken.Pages, pagesRef }
});
var catalogRef = context.WriteToken(catalog);
context.Flush(currentVersion, catalogRef);
Close();
}
public void Close()
{
context.Dispose();
}
private IndirectReferenceToken CopyPageNode(PageTreeNode pageNode, IndirectReferenceToken parentPagesObject, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
Debug.Assert(pageNode.IsPage);
var pageDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Parent, parentPagesObject},
};
foreach (var setPair in pageNode.NodeDictionary.Data)
{
var name = setPair.Key;
var token = setPair.Value;
if (name == NameToken.Parent)
{
// Skip Parent token, since we have to reassign it
continue;
}
pageDictionary.Add(NameToken.Create(name), CopyToken(token, tokenScanner, referencesFromDocument));
}
return context.WriteToken(new DictionaryToken(pageDictionary));
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner, referencesFromDocument));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner, referencesFromDocument));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{
return newReferenceToken;
}
//we add the token to referencesFromDocument to prevent stackoverflow on references cycles
newReferenceToken = context.ReserveNumberToken();
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner, referencesFromDocument);
context.WriteToken(newReferenceToken, newToken);
return newReferenceToken;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner, referencesFromDocument) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
}
}
}
}

View File

@@ -28,16 +28,26 @@
/// </summary>
public class PdfPageBuilder
{
// parent
private readonly PdfDocumentBuilder documentBuilder;
private readonly List<ContentStream> contentStreams;
private readonly Dictionary<NameToken, IToken> resourcesDictionary = new Dictionary<NameToken, IToken>();
// all page data other than content streams
internal readonly Dictionary<NameToken, IToken> pageDictionary = new Dictionary<NameToken, IToken>();
// streams
internal readonly List<IPageContentStream> contentStreams;
private IPageContentStream currentStream;
// maps fonts added using PdfDocumentBuilder to page font names
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
internal int nextFontId = 1;
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
private int textSequence;
private int imageKey = 1;
internal IReadOnlyDictionary<NameToken, IToken> Resources => resourcesDictionary;
internal IReadOnlyDictionary<string, IToken> Resources => pageDictionary.GetOrCreateDict(NameToken.Resources);
/// <summary>
/// The number of this page, 1-indexed.
@@ -52,34 +62,43 @@
/// <summary>
/// Access to the underlying data structures for advanced use cases.
/// </summary>
public ContentStream CurrentStream { get; private set; }
public IContentStream CurrentStream => currentStream;
/// <summary>
/// Access to
/// </summary>
public IReadOnlyList<ContentStream> ContentStreams { get; }
public IReadOnlyList<IContentStream> ContentStreams => contentStreams;
internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder)
{
this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder));
PageNumber = number;
CurrentStream = new ContentStream();
ContentStreams = contentStreams = new List<ContentStream>()
{
CurrentStream
};
currentStream = new DefaultContentStream();
contentStreams = new List<IPageContentStream>() {currentStream};
}
internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder, IEnumerable<CopiedContentStream> copied,
Dictionary<NameToken, IToken> pageDict)
{
this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder));
PageNumber = number;
pageDictionary = pageDict;
contentStreams = new List<IPageContentStream>();
contentStreams.AddRange(copied);
currentStream = new DefaultContentStream();
contentStreams.Add(currentStream);
}
/// <summary>
/// Allow to append a new content stream before the current one and select it
/// </summary>
public void NewContentStreamBefore()
{
var index = Math.Max(contentStreams.IndexOf(CurrentStream) - 1, 0);
var index = Math.Max(contentStreams.IndexOf(currentStream) - 1, 0);
CurrentStream = new ContentStream();
contentStreams.Insert(index, CurrentStream);
currentStream = new DefaultContentStream();
contentStreams.Insert(index, currentStream);
}
/// <summary>
@@ -87,10 +106,10 @@
/// </summary>
public void NewContentStreamAfter()
{
var index = Math.Min(contentStreams.IndexOf(CurrentStream) + 1, contentStreams.Count);
var index = Math.Min(contentStreams.IndexOf(currentStream) + 1, contentStreams.Count);
CurrentStream = new ContentStream();
contentStreams.Insert(index, CurrentStream);
currentStream = new DefaultContentStream();
contentStreams.Insert(index, currentStream);
}
/// <summary>
@@ -99,12 +118,12 @@
/// <param name="index">index of the content stream to be selected</param>
public void SelectContentStream(int index)
{
if (index < 0 || index >= ContentStreams.Count)
if (index < 0 || index >= contentStreams.Count)
{
throw new IndexOutOfRangeException(nameof(index));
}
CurrentStream = ContentStreams[index];
currentStream = contentStreams[index];
}
/// <summary>
@@ -117,16 +136,16 @@
{
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
CurrentStream.Add(new BeginNewSubpath((decimal)from.X, (decimal)from.Y));
CurrentStream.Add(new AppendStraightLineSegment((decimal)to.X, (decimal)to.Y));
CurrentStream.Add(StrokePath.Value);
currentStream.Add(new BeginNewSubpath((decimal)from.X, (decimal)from.Y));
currentStream.Add(new AppendStraightLineSegment((decimal)to.X, (decimal)to.Y));
currentStream.Add(StrokePath.Value);
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(1));
currentStream.Add(new SetLineWidth(1));
}
}
@@ -142,23 +161,23 @@
{
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
CurrentStream.Add(new AppendRectangle((decimal)position.X, (decimal)position.Y, width, height));
currentStream.Add(new AppendRectangle((decimal)position.X, (decimal)position.Y, width, height));
if (fill)
{
CurrentStream.Add(FillPathEvenOddRuleAndStroke.Value);
currentStream.Add(FillPathEvenOddRuleAndStroke.Value);
}
else
{
CurrentStream.Add(StrokePath.Value);
currentStream.Add(StrokePath.Value);
}
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
}
@@ -170,8 +189,8 @@
/// <param name="b">Blue - 0 to 255</param>
public void SetStrokeColor(byte r, byte g, byte b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
currentStream.Add(Push.Value);
currentStream.Add(new SetStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
}
/// <summary>
@@ -182,8 +201,8 @@
/// <param name="b">Blue - 0 to 1</param>
internal void SetStrokeColorExact(decimal r, decimal g, decimal b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetStrokeColorDeviceRgb(CheckRgbDecimal(r, nameof(r)),
currentStream.Add(Push.Value);
currentStream.Add(new SetStrokeColorDeviceRgb(CheckRgbDecimal(r, nameof(r)),
CheckRgbDecimal(g, nameof(g)), CheckRgbDecimal(b, nameof(b))));
}
@@ -195,8 +214,8 @@
/// <param name="b">Blue - 0 to 255</param>
public void SetTextAndFillColor(byte r, byte g, byte b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetNonStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
currentStream.Add(Push.Value);
currentStream.Add(new SetNonStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
}
/// <summary>
@@ -204,7 +223,7 @@
/// </summary>
public void ResetColor()
{
CurrentStream.Add(Pop.Value);
currentStream.Add(Pop.Value);
}
/// <summary>
@@ -247,7 +266,7 @@
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
var letters = DrawLetters(text, fontProgram, fm, fontSize, textMatrix);
var letters = DrawLetters(null, text, fontProgram, fm, fontSize, textMatrix);
return letters;
}
@@ -286,23 +305,25 @@
throw new ArgumentOutOfRangeException(nameof(fontSize), "Font size must be greater than 0");
}
var fontName = GetAddedFont(font);
var fontProgram = fontStore.FontProgram;
var fm = fontProgram.GetFontMatrix();
var textMatrix = TransformationMatrix.FromValues(1, 0, 0, 1, position.X, position.Y);
var letters = DrawLetters(text, fontProgram, fm, fontSize, textMatrix);
var letters = DrawLetters(fontName, text, fontProgram, fm, fontSize, textMatrix);
CurrentStream.Add(BeginText.Value);
CurrentStream.Add(new SetFontAndSize(font.Name, fontSize));
CurrentStream.Add(new MoveToNextLineWithOffset((decimal)position.X, (decimal)position.Y));
currentStream.Add(BeginText.Value);
currentStream.Add(new SetFontAndSize(fontName, fontSize));
currentStream.Add(new MoveToNextLineWithOffset((decimal)position.X, (decimal)position.Y));
var bytesPerShow = new List<byte>();
foreach (var letter in text)
{
if (char.IsWhiteSpace(letter))
{
CurrentStream.Add(new ShowText(bytesPerShow.ToArray()));
currentStream.Add(new ShowText(bytesPerShow.ToArray()));
bytesPerShow.Clear();
}
@@ -312,14 +333,33 @@
if (bytesPerShow.Count > 0)
{
CurrentStream.Add(new ShowText(bytesPerShow.ToArray()));
currentStream.Add(new ShowText(bytesPerShow.ToArray()));
}
CurrentStream.Add(EndText.Value);
currentStream.Add(EndText.Value);
return letters;
}
private NameToken GetAddedFont(PdfDocumentBuilder.AddedFont font)
{
if (!documentFonts.TryGetValue(font.Id, out NameToken value))
{
value = NameToken.Create($"F{nextFontId++}");
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var fonts = resources.GetOrCreateDict(NameToken.Font);
while (fonts.ContainsKey(value))
{
value = NameToken.Create($"F{nextFontId++}");
}
documentFonts[font.Id] = value;
fonts[value] = font.Reference;
}
return value;
}
/// <summary>
/// Adds the JPEG image represented by the input bytes at the specified location.
/// </summary>
@@ -360,30 +400,24 @@
};
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), data);
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict)
|| !(xobjectsDict is DictionaryToken xobjects))
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
var key = NameToken.Create($"I{imageKey++}");
xObjects[key] = reference;
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(reference));
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new []
currentStream.Add(new ModifyCurrentTransformationMatrix(new []
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
return new AddedImage(reference, info.Width, info.Height);
return new AddedImage(reference.Data, info.Width, info.Height);
}
/// <summary>
@@ -400,27 +434,22 @@
/// </summary>
public void AddImage(AddedImage image, PdfRectangle placementRectangle)
{
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict)
|| !(xobjectsDict is DictionaryToken xobjects))
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
var key = NameToken.Create($"I{imageKey++}");
xObjects[key] = new IndirectReferenceToken(image.Reference);
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(image.Reference));
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new[]
currentStream.Add(new ModifyCurrentTransformationMatrix(new[]
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
}
/// <summary>
@@ -478,29 +507,25 @@
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), compressed);
if (!resourcesDictionary.TryGetValue(NameToken.Xobject, out var xobjectsDict)
|| !(xobjectsDict is DictionaryToken xobjects))
{
xobjects = new DictionaryToken(new Dictionary<NameToken, IToken>());
resourcesDictionary[NameToken.Xobject] = xobjects;
}
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
var key = NameToken.Create($"I{imageKey++}");
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(reference));
xObjects[key] = reference;
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new[]
currentStream.Add(new ModifyCurrentTransformationMatrix(new[]
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
return new AddedImage(reference, png.Width, png.Height);
return new AddedImage(reference.Data, png.Width, png.Height);
}
/// <summary>
@@ -509,13 +534,12 @@
/// <param name="srcPage">Page to be copied</param>
public void CopyFrom(Page srcPage)
{
ContentStream destinationStream = null;
if (CurrentStream.Operations.Count > 0)
if (currentStream.Operations.Count > 0)
{
NewContentStreamAfter();
}
destinationStream = CurrentStream;
var destinationStream = currentStream;
if (!srcPage.Dictionary.TryGet(NameToken.Resources, srcPage.pdfScanner, out DictionaryToken srcResourceDictionary))
{
@@ -534,6 +558,8 @@
// We need to relocate the resources, and we have to make sure that none of the resources collide with
// the already written operation's resources
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
foreach (var set in srcResourceDictionary.Data)
{
var nameToken = NameToken.Create(set.Key);
@@ -543,11 +569,11 @@
continue;
}
if (!resourcesDictionary.TryGetValue(nameToken, out var currentToken))
if (!resources.ContainsKey(nameToken))
{
// It means that this type of resources doesn't currently exist in the page, so we can copy it
// with no problem
resourcesDictionary[nameToken] = documentBuilder.CopyToken(set.Value, srcPage.pdfScanner);
resources[nameToken] = documentBuilder.CopyToken(srcPage.pdfScanner, set.Value);
continue;
}
@@ -559,26 +585,19 @@
// Since we don't directly add font's to the pages resources, we have to go look at the document's font
if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary))
{
Dictionary<NameToken, IToken> pageFontsDictionary = null;
if (resourcesDictionary.TryGetValue(NameToken.Font, out var pageFontsToken))
{
pageFontsDictionary = (pageFontsToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageFontsDictionary != null);
}
else
{
pageFontsDictionary = new Dictionary<NameToken, IToken>();
}
var pageFontsDictionary = resources.GetOrCreateDict(NameToken.Font);
foreach (var fontSet in fontsDictionary.Data)
{
var fontName = fontSet.Key;
var addedFont = documentBuilder.Fonts.Values.FirstOrDefault(f => f.FontKey.Name.Data == fontName);
if (addedFont != default)
var fontName = NameToken.Create(fontSet.Key);
if (pageFontsDictionary.ContainsKey(fontName))
{
// This would mean that the imported font collide with one of the added font. so we have to rename it
var newName = $"F{documentBuilder.fontId++}";
var newName = NameToken.Create($"F{nextFontId++}");
while (pageFontsDictionary.ContainsKey(newName))
{
newName = NameToken.Create($"F{nextFontId++}");
}
// Set all the pertinent SetFontAndSize operations with the new name
operations = operations.Select(op =>
@@ -590,7 +609,7 @@
if (fontAndSizeOperation.Font.Data == fontName)
{
return new SetFontAndSize(NameToken.Create(newName), fontAndSizeOperation.Size);
return new SetFontAndSize(newName, fontAndSizeOperation.Size);
}
return op;
@@ -604,25 +623,14 @@
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the font, got a {fontSet.Value.GetType().Name}");
}
pageFontsDictionary.Add(NameToken.Create(fontName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
pageFontsDictionary.Add(fontName, documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken));
}
resourcesDictionary[NameToken.Font] = new DictionaryToken(pageFontsDictionary);
}
// Since we don't directly add xobjects's to the pages resources, we have to go look at the document's xobjects
if (srcResourceDictionary.TryGet(NameToken.Xobject, srcPage.pdfScanner, out DictionaryToken xobjectsDictionary))
{
Dictionary<NameToken, IToken> pageXobjectsDictionary = null;
if (resourcesDictionary.TryGetValue(NameToken.Xobject, out var pageXobjectToken))
{
pageXobjectsDictionary = (pageXobjectToken as DictionaryToken)?.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
Debug.Assert(pageXobjectsDictionary != null);
}
else
{
pageXobjectsDictionary = new Dictionary<NameToken, IToken>();
}
var pageXobjectsDictionary = resources.GetOrCreateDict(NameToken.Xobject);
var xobjectNamesUsed = Enumerable.Range(0, imageKey).Select(i => $"I{i}");
foreach (var xobjectSet in xobjectsDictionary.Data)
@@ -657,16 +665,14 @@
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}");
}
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
pageXobjectsDictionary[xobjectName] = documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken);
}
resourcesDictionary[NameToken.Xobject] = new DictionaryToken(pageXobjectsDictionary);
}
destinationStream.Operations.AddRange(operations);
}
private List<Letter> DrawLetters(string text, IWritingFont font, TransformationMatrix fontMatrix, decimal fontSize, TransformationMatrix textMatrix)
private List<Letter> DrawLetters(NameToken name, string text, IWritingFont font, TransformationMatrix fontMatrix, decimal fontSize, TransformationMatrix textMatrix)
{
var horizontalScaling = 1;
var rise = 0;
@@ -698,7 +704,7 @@
var documentSpace = textMatrix.Transform(renderingMatrix.Transform(fontMatrix.Transform(rect)));
var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, (double)fontSize, FontDetails.GetDefault(font.Name),
var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, (double)fontSize, FontDetails.GetDefault(name),
GrayColor.Black,
(double)fontSize,
textSequence);
@@ -744,27 +750,91 @@
/// <summary>
/// Provides access to the raw page data structures for advanced editing use cases.
/// </summary>
public class ContentStream
public interface IContentStream
{
/// <summary>
/// The operations making up the page content stream.
/// </summary>
public List<IGraphicsStateOperation> Operations { get; }
List<IGraphicsStateOperation> Operations { get; }
}
/// <summary>
/// Create a new <see cref="ContentStream"/>.
/// </summary>
internal ContentStream()
internal interface IPageContentStream : IContentStream
{
bool ReadOnly { get; }
bool HasContent { get; }
void Add(IGraphicsStateOperation operation);
IndirectReferenceToken Write(IPdfStreamWriter writer);
}
internal class DefaultContentStream : IPageContentStream
{
private readonly List<IGraphicsStateOperation> operations;
public DefaultContentStream() : this(new List<IGraphicsStateOperation>())
{
Operations = new List<IGraphicsStateOperation>();
}
public DefaultContentStream(List<IGraphicsStateOperation> operations)
{
this.operations = operations;
}
internal void Add(IGraphicsStateOperation newOperation)
public bool ReadOnly => false;
public bool HasContent => operations.Any();
public void Add(IGraphicsStateOperation operation)
{
Operations.Add(newOperation);
operations.Add(operation);
}
public List<IGraphicsStateOperation> Operations => operations;
public IndirectReferenceToken Write(IPdfStreamWriter writer)
{
using (var memoryStream = new MemoryStream())
{
foreach (var operation in operations)
{
operation.Write(memoryStream);
}
var bytes = memoryStream.ToArray();
var stream = DataCompresser.CompressToStream(bytes);
return writer.WriteToken(stream);
}
}
}
internal class CopiedContentStream : IPageContentStream
{
private readonly IndirectReferenceToken token;
public bool ReadOnly => true;
public bool HasContent => true;
public CopiedContentStream(IndirectReferenceToken indirectReferenceToken)
{
token = indirectReferenceToken;
}
public IndirectReferenceToken Write(IPdfStreamWriter writer)
{
return token;
}
public void Add(IGraphicsStateOperation operation)
{
throw new NotSupportedException("Writing to a copied content stream is not supported.");
}
public List<IGraphicsStateOperation> Operations =>
throw new NotSupportedException("Reading raw operations is not supported from a copied content stream.");
}
/// <summary>
/// A key representing an image available to use for the current document builder.
/// Create it by adding an image to a page using <see cref="AddJpeg(byte[],PdfRectangle)"/>.
@@ -802,5 +872,7 @@
Height = height;
}
}
}
}

View File

@@ -11,19 +11,15 @@
/// <summary>
/// This class would lazily flush all token. Allowing us to make changes to references without need to rewrite the whole stream
/// </summary>
internal class PdfStreamWriter : IDisposable
internal class PdfStreamWriter : IPdfStreamWriter
{
private readonly List<int> reservedNumbers = new List<int>();
protected const decimal DefaultVersion = 1.2m;
protected Dictionary<IndirectReference, long> offsets = new Dictionary<IndirectReference, long>();
protected bool DisposeStream { get; set; }
protected bool Initialized { get; set; }
protected int CurrentNumber { get; set; } = 1;
private readonly Dictionary<IndirectReferenceToken, IToken> tokenReferences = new Dictionary<IndirectReferenceToken, IToken>();
public int CurrentNumber { get; private set; } = 1;
public Stream Stream { get; private set; }
public bool DisposeStream { get; set; }
public PdfStreamWriter(Stream baseStream, bool disposeStream = true)
internal PdfStreamWriter(Stream baseStream, bool disposeStream = true)
{
Stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream));
if (!baseStream.CanWrite)
@@ -34,13 +30,42 @@
DisposeStream = disposeStream;
}
public void Flush(decimal version, IndirectReferenceToken catalogReference)
public Stream Stream { get; protected set; }
public virtual IndirectReferenceToken WriteToken(IToken token)
{
if (catalogReference == null)
if (!Initialized)
{
throw new ArgumentNullException(nameof(catalogReference));
InitializePdf(DefaultVersion);
}
var ir = ReserveObjectNumber();
offsets.Add(ir.Data, Stream.Position);
var obj = new ObjectToken(Stream.Position, ir.Data, token);
TokenWriter.WriteToken(obj, Stream);
return ir;
}
public virtual IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
offsets.Add(indirectReference.Data, Stream.Position);
var obj = new ObjectToken(Stream.Position, indirectReference.Data, token);
TokenWriter.WriteToken(obj, Stream);
return indirectReference;
}
public IndirectReferenceToken ReserveObjectNumber()
{
return new IndirectReferenceToken(new IndirectReference(CurrentNumber++, 0));
}
public void InitializePdf(decimal version)
{
WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", Stream);
Stream.WriteText("%");
@@ -49,87 +74,12 @@
Stream.WriteByte(196);
Stream.WriteByte(210);
Stream.WriteNewLine();
var offsets = new Dictionary<IndirectReference, long>();
ObjectToken catalogToken = null;
foreach (var pair in tokenReferences)
{
var referenceToken = pair.Key;
var token = pair.Value;
var offset = Stream.Position;
var obj = new ObjectToken(offset, referenceToken.Data, token);
TokenWriter.WriteToken(obj, Stream);
offsets.Add(referenceToken.Data, offset);
if (catalogToken == null && referenceToken == catalogReference)
{
catalogToken = obj;
}
}
if (catalogToken == null)
{
throw new Exception("Catalog object wasn't found");
}
// TODO: Support document information
TokenWriter.WriteCrossReferenceTable(offsets, catalogToken, Stream, null);
Initialized = true;
}
public IndirectReferenceToken WriteToken(IToken token, int? reservedNumber = null)
public void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference = null)
{
if (!reservedNumber.HasValue)
{
return AddToken(token, CurrentNumber++);
}
if (!reservedNumbers.Remove(reservedNumber.Value))
{
throw new InvalidOperationException("You can't reuse a reserved number");
}
// When we end up writing this token, all of his child would already have been added and checked for duplicate
return AddToken(token, reservedNumber.Value);
}
public void WriteToken(IndirectReferenceToken referenceToken, IToken token)
{
tokenReferences.Add(referenceToken, token);
}
public int ReserveNumber()
{
var reserved = CurrentNumber;
reservedNumbers.Add(reserved);
CurrentNumber++;
return reserved;
}
public IndirectReferenceToken ReserveNumberToken()
{
return new IndirectReferenceToken(new IndirectReference(ReserveNumber(), 0));
}
public void Dispose()
{
if (!DisposeStream)
{
Stream = null;
return;
}
Stream?.Dispose();
Stream = null;
}
private IndirectReferenceToken AddToken(IToken token, int reservedNumber)
{
var reference = new IndirectReference(reservedNumber, 0);
var referenceToken = new IndirectReferenceToken(reference);
tokenReferences.Add(referenceToken, token);
return referenceToken;
TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data);
}
private static void WriteString(string text, Stream stream)
@@ -138,5 +88,15 @@
stream.Write(bytes, 0, bytes.Length);
stream.WriteNewLine();
}
public void Dispose()
{
if (DisposeStream)
{
Stream?.Dispose();
}
Stream = null;
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using System.Text;
/// <summary>
/// Type of pdf writer to use.
/// </summary>
public enum PdfWriterType
{
/// <summary>
/// Default output writer
/// </summary>
Default,
/// <summary>
/// De-duplicates objects while writing but requires keeping in memory reference.
/// </summary>
ObjectInMemoryDedup
}
}

View File

@@ -131,7 +131,7 @@
/// <param name="outputStream">The output stream to write to.</param>
/// <param name="documentInformationReference">The object reference for the document information dictionary if present.</param>
internal static void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
ObjectToken catalogToken,
IndirectReference catalogToken,
Stream outputStream,
IndirectReference? documentInformationReference)
{
@@ -201,7 +201,7 @@
{
// 1 for the free entry.
{NameToken.Size, new NumericToken(objectOffsets.Count + 1)},
{NameToken.Root, new IndirectReferenceToken(catalogToken.Number)},
{NameToken.Root, new IndirectReferenceToken(catalogToken)},
{NameToken.Id, identifier}
};
@@ -225,6 +225,32 @@
outputStream.Write(Eof, 0, Eof.Length);
}
/// <summary>
/// Writes pre-serialized token as an object token to the output stream.
/// </summary>
/// <param name="objectNumber">Object number of the indirect object.</param>
/// <param name="generation">Generation of the indirect object.</param>
/// <param name="data">Pre-serialized object contents.</param>
/// <param name="outputStream">The stream to write the token to.</param>
internal static void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
{
WriteLong(objectNumber, outputStream);
WriteWhitespace(outputStream);
WriteInt(generation, outputStream);
WriteWhitespace(outputStream);
outputStream.Write(ObjStart, 0, ObjStart.Length);
WriteLineBreak(outputStream);
outputStream.Write(data, 0, data.Length);
WriteLineBreak(outputStream);
outputStream.Write(ObjEnd, 0, ObjEnd.Length);
WriteLineBreak(outputStream);
}
private static void WriteHex(HexToken hex, Stream stream)
{
stream.WriteByte(HexStart);

View File

@@ -0,0 +1,188 @@
namespace UglyToad.PdfPig.Writer
{
using Content;
using Core;
using Parser.Parts;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Tokenization.Scanner;
using Tokens;
internal static class WriterUtil
{
public static Dictionary<string, IToken> GetOrCreateDict(this Dictionary<NameToken, IToken> dict, NameToken key)
{
if (dict.TryGetValue(key, out var item))
{
if (!(item is DictionaryToken dt))
{
throw new ApplicationException("Expected dictionary token, got " + item.GetType());
}
if (dt.Data is Dictionary<string, IToken> mutable)
{
return mutable;
}
mutable = dt.Data.
ToDictionary(x => x.Key, x => x.Value);
dict[key] = DictionaryToken.With(mutable);
return mutable;
}
var created = new Dictionary<string, IToken>();
dict[key] = DictionaryToken.With(created);
return created;
}
public static Dictionary<string, IToken> GetOrCreateDict(this Dictionary<string, IToken> dict, string key)
{
if (dict.TryGetValue(key, out var item))
{
if (!(item is DictionaryToken dt))
{
throw new ApplicationException("Expected dictionary token, got " + item.GetType());
}
if (dt.Data is Dictionary<string, IToken> mutable)
{
return mutable;
}
mutable = dt.Data.
ToDictionary(x => x.Key, x => x.Value);
dict[key] = DictionaryToken.With(mutable);
return mutable;
}
var created = new Dictionary<string, IToken>();
dict[key] = DictionaryToken.With(created);
return created;
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="writer">PDF stream writer</param>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied tokens for original document.</param>
/// <param name="callstack">Call stack of indirect references</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
public static IToken CopyToken(IPdfStreamWriter writer, IToken tokenToCopy, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument, Dictionary<IndirectReference, IndirectReferenceToken> callstack=null)
{
if (callstack == null)
{
callstack = new Dictionary<IndirectReference, IndirectReferenceToken>();
}
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(writer, token, tokenScanner, referencesFromDocument, callstack));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(writer, token, tokenScanner, referencesFromDocument, callstack));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{
return newReferenceToken;
}
if (callstack.ContainsKey(referenceToken.Data) && callstack[referenceToken.Data] == null)
{
newReferenceToken = writer.ReserveObjectNumber();
callstack[referenceToken.Data] = newReferenceToken;
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
return newReferenceToken;
}
callstack.Add(referenceToken.Data, null);
// we add the token to referencesFromDocument to prevent stackoverflow on references cycles
// newReferenceToken = context.ReserveNumberToken();
// callstack.Add(newReferenceToken.Data.ObjectNumber);
// referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
//
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var result = CopyToken(writer, tokenObject, tokenScanner, referencesFromDocument, callstack);
if (callstack[referenceToken.Data] != null)
{
return writer.WriteToken(result, callstack[referenceToken.Data]);
}
newReferenceToken = writer.WriteToken(result);
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
return newReferenceToken;
}
case StreamToken streamToken:
{
var properties = CopyToken(writer, streamToken.StreamDictionary, tokenScanner, referencesFromDocument, callstack) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
}
internal static IEnumerable<(DictionaryToken, IReadOnlyList<DictionaryToken>)> WalkTree(PageTreeNode node, List<DictionaryToken> parents=null)
{
if (parents == null)
{
parents = new List<DictionaryToken>();
}
if (node.IsPage)
{
yield return (node.NodeDictionary, parents);
yield break;
}
parents = parents.ToList();
parents.Add(node.NodeDictionary);
foreach (var child in node.Children)
{
foreach (var item in WalkTree(child, parents))
{
yield return item;
}
}
}
}
}