From 2df5ec9b1021b87fae6392a9b5e88c1d3ecfd4c7 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 7 Aug 2021 13:49:01 -0400 Subject: [PATCH] handle transparency when adding pdfs in builder #345 #346 --- .../Writer/PdfDocumentBuilder.cs | 1592 ++++++++--------- src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs | 52 +- 2 files changed, 843 insertions(+), 801 deletions(-) diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs index 0682cceb..0620dfa9 100644 --- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs @@ -1,414 +1,414 @@ - -namespace UglyToad.PdfPig.Writer -{ - using System; - using System.Collections.Generic; - using System.Diagnostics; - using System.IO; - using System.Linq; - using Content; - using Core; - using Fonts; - using PdfPig.Fonts.TrueType; - using PdfPig.Fonts.Standard14Fonts; - using PdfPig.Fonts.TrueType.Parser; - using System.Runtime.CompilerServices; - using Tokenization.Scanner; - using Tokens; - - using Util.JetBrains.Annotations; - - /// - /// Provides methods to construct new PDF documents. - /// - public class PdfDocumentBuilder : IDisposable - { - private readonly IPdfStreamWriter context; - private readonly Dictionary pages = new Dictionary(); - private readonly Dictionary fonts = new Dictionary(); - private bool completed = false; - internal int fontId = 0; - - private readonly static ArrayToken DefaultProcSet = new ArrayToken(new List - { - NameToken.Create("PDF"), - NameToken.Text, - NameToken.ImageB, - NameToken.ImageC, - NameToken.ImageI - }); - - /// - /// The standard of PDF/A compliance of the generated document. Defaults to . - /// - public PdfAStandard ArchiveStandard { get; set; } = PdfAStandard.None; - - /// - /// Whether to include the document information dictionary in the produced document. - /// - public bool IncludeDocumentInformation { get; set; } = true; - - /// - /// The values of the fields to include in the document information dictionary. - /// - public DocumentInformationBuilder DocumentInformation { get; } = new DocumentInformationBuilder(); - - /// - /// The current page builders in the document and the corresponding 1 indexed page numbers. Use - /// or to add a new page. - /// - public IReadOnlyDictionary Pages => pages; - - /// - /// The fonts currently available in the document builder added via or . Keyed by id for internal purposes. - /// - internal IReadOnlyDictionary Fonts => fonts; - - /// - /// Creates a document builder keeping resources in memory. - /// - public PdfDocumentBuilder() - { - context = new PdfStreamWriter(new MemoryStream(), true); - context.InitializePdf(1.7m); - } - - /// - /// Creates a document builder keeping resources in memory. - /// - /// Pdf version to use in header. - public PdfDocumentBuilder(decimal version) - { - context = new PdfStreamWriter(new MemoryStream(), true); - context.InitializePdf(version); - } - - /// - /// Creates a document builder using the supplied stream. - /// - /// Steam to write pdf to. - /// If stream should be disposed when builder is. - /// Type of pdf stream writer to use - /// Pdf version to use in header. - public PdfDocumentBuilder(Stream stream, bool disposeStream = false, PdfWriterType type = PdfWriterType.Default, decimal version = 1.7m) - { - switch (type) - { - case PdfWriterType.ObjectInMemoryDedup: - context = new PdfDedupStreamWriter(stream, disposeStream); - break; - default: - context = new PdfStreamWriter(stream, disposeStream); - break; - } - context.InitializePdf(version); - } - - /// - /// Determines whether the bytes of the TrueType font file provided can be used in a PDF document. - /// - /// The bytes of a TrueType font file. - /// Any reason messages explaining why the file can't be used, if applicable. - /// if the file can be used, otherwise. - public bool CanUseTrueTypeFont(IReadOnlyList fontFileBytes, out IReadOnlyList reasons) - { - var reasonsMutable = new List(); - reasons = reasonsMutable; - try - { - if (fontFileBytes == null) - { - reasonsMutable.Add("Provided bytes were null."); - return false; - } - - if (fontFileBytes.Count == 0) - { - reasonsMutable.Add("Provided bytes were empty."); - return false; - } - - var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes))); - - if (font.TableRegister.CMapTable == null) - { - reasonsMutable.Add("The provided font did not contain a cmap table, used to map character codes to glyph codes."); - return false; - } - - if (font.TableRegister.Os2Table == null) - { - reasonsMutable.Add("The provided font did not contain an OS/2 table, used to fill in the font descriptor dictionary."); - return false; - } - - if (font.TableRegister.PostScriptTable == null) - { - reasonsMutable.Add("The provided font did not contain a post PostScript table, used to map character codes to glyph codes."); - return false; - } - - return true; - } - catch (Exception ex) - { - reasonsMutable.Add(ex.Message); - return false; - } - } - - /// - /// Adds a TrueType font to the builder so that pages in this document can use it. - /// - /// The bytes of a TrueType font. - /// An identifier which can be passed to . - public AddedFont AddTrueTypeFont(IReadOnlyList fontFileBytes) - { - try - { - var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes))); - var id = Guid.NewGuid(); - var added = new AddedFont(id, context.ReserveObjectNumber()); - fonts[id] = new FontStored(added, new TrueTypeWritingFont(font, fontFileBytes)); - return added; - } - catch (Exception ex) - { - throw new InvalidOperationException("Writing only supports TrueType fonts, please provide a valid TrueType font.", ex); - } - } - - /// - /// Adds one of the Standard 14 fonts which are included by default in PDF programs so that pages in this document can use it. These Standard 14 fonts are old and possibly obsolete. - /// - /// The type of the Standard 14 font to use. - /// An identifier which can be passed to . - public AddedFont AddStandard14Font(Standard14Font type) - { - if (ArchiveStandard != PdfAStandard.None) - { - throw new NotSupportedException($"PDF/A {ArchiveStandard} requires the font to be embedded in the file, only {nameof(AddTrueTypeFont)} is supported."); - } - - var id = Guid.NewGuid(); - var name = NameToken.Create($"F{fontId++}"); - var added = new AddedFont(id, context.ReserveObjectNumber()); - fonts[id] = new FontStored(added, new Standard14WritingFont(Standard14.GetAdobeFontMetrics(type))); - return added; - } - - internal IndirectReferenceToken AddImage(DictionaryToken dictionary, byte[] bytes) - { - var streamToken = new StreamToken(dictionary, bytes); - return context.WriteToken(streamToken); - } - - /// - /// Add a new page with the specified size, this page will be included in the output when is called. - /// - /// The width of the page in points. - /// The height of the page in points. - /// A builder for editing the new page. - public PdfPageBuilder AddPage(double width, double height) - { - if (width < 0) - { - throw new ArgumentOutOfRangeException(nameof(width), $"Width cannot be negative, got: {width}."); - } - - if (height < 0) - { - throw new ArgumentOutOfRangeException(nameof(height), $"Height cannot be negative, got: {height}."); - } - - PdfPageBuilder builder = null; - for (var i = 0; i < pages.Count; i++) - { - if (!pages.ContainsKey(i + 1)) - { - builder = new PdfPageBuilder(i + 1, this); - break; - } - } - - if (builder == null) - { - builder = new PdfPageBuilder(pages.Count + 1, this); - } - - builder.PageSize = new PdfRectangle(0, 0, width, height); - pages[builder.PageNumber] = builder; - - return builder; - } - - /// - /// Add a new page with the specified size, this page will be included in the output when is called. - /// - /// The size of the page to add. - /// Whether the page is in portait or landscape orientation. - /// A builder for editing the new page. - public PdfPageBuilder AddPage(PageSize size, bool isPortrait = true) - { - if (size == PageSize.Custom) - { - throw new ArgumentException($"Cannot use ${nameof(PageSize.Custom)} for ${nameof(AddPage)} using the ${nameof(PageSize)} enum, call the overload with width and height instead.", - nameof(size)); - } - - if (!size.TryGetPdfRectangle(out var rectangle)) - { - throw new ArgumentException($"No rectangle found for Page Size {size}."); - } - - if (!isPortrait) - { - return AddPage(rectangle.Height, rectangle.Width); - } - - return AddPage(rectangle.Width, rectangle.Height); - } - - - internal IToken CopyToken(IPdfTokenScanner source, IToken token) - { - if (!existingCopies.TryGetValue(source, out var refs)) - { - refs = new Dictionary(); - existingCopies.Add(source, refs); - } - - return WriterUtil.CopyToken(context, token, source, refs); - } - - internal class PageInfo - { - public DictionaryToken Page { get; set; } - public IReadOnlyList Parents { get; set; } - } - private readonly ConditionalWeakTable> existingCopies = - new ConditionalWeakTable>(); - private readonly ConditionalWeakTable> existingTrees = - new ConditionalWeakTable>(); - /// - /// Add a new page with the specified size, this page will be included in the output when is called. - /// - /// Source document. - /// Page to copy. - /// A builder for editing the page. - public PdfPageBuilder AddPage(PdfDocument document, int pageNumber) - { - if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs)) - { - refs = new Dictionary(); - existingCopies.Add(document.Structure.TokenScanner, refs); - } - - if (!existingTrees.TryGetValue(document, out var pagesInfos)) - { - pagesInfos = new Dictionary(); - int i = 1; - foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree)) - { - pagesInfos[i] = new PageInfo - { - Page = pageDict, - Parents = parents - }; - i++; - } - - existingTrees.Add(document, pagesInfos); - } - - if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo)) - { - throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document."); - } - - // copy content streams - var streams = new List(); - if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken)) - { - if (contentsToken is ArrayToken array) - { - foreach (var item in array.Data) - { - if (item is IndirectReferenceToken ir) - { - streams.Add(new PdfPageBuilder.CopiedContentStream( - WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); - } - - } - } - else if (contentsToken is IndirectReferenceToken ir) - { - streams.Add(new PdfPageBuilder.CopiedContentStream( - WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); - } - } - - // manually copy page dict / resources as we need to modify some - var copiedPageDict = new Dictionary(); - Dictionary resources = new Dictionary(); - - // just put all parent resources into new page - foreach (var dict in pageInfo.Parents) - { - if (dict.TryGet(NameToken.Resources, out var resourceToken)) - { - CopyResourceDict(resourceToken, resources); - } - if (dict.TryGet(NameToken.MediaBox, out var mb)) + +namespace UglyToad.PdfPig.Writer +{ + using System; + using System.Collections.Generic; + using System.Diagnostics; + using System.IO; + using System.Linq; + using Content; + using Core; + using Fonts; + using PdfPig.Fonts.TrueType; + using PdfPig.Fonts.Standard14Fonts; + using PdfPig.Fonts.TrueType.Parser; + using System.Runtime.CompilerServices; + using Tokenization.Scanner; + using Tokens; + + using Util.JetBrains.Annotations; + + /// + /// Provides methods to construct new PDF documents. + /// + public class PdfDocumentBuilder : IDisposable + { + private readonly IPdfStreamWriter context; + private readonly Dictionary pages = new Dictionary(); + private readonly Dictionary fonts = new Dictionary(); + private bool completed = false; + internal int fontId = 0; + + private readonly static ArrayToken DefaultProcSet = new ArrayToken(new List + { + NameToken.Create("PDF"), + NameToken.Text, + NameToken.ImageB, + NameToken.ImageC, + NameToken.ImageI + }); + + /// + /// The standard of PDF/A compliance of the generated document. Defaults to . + /// + public PdfAStandard ArchiveStandard { get; set; } = PdfAStandard.None; + + /// + /// Whether to include the document information dictionary in the produced document. + /// + public bool IncludeDocumentInformation { get; set; } = true; + + /// + /// The values of the fields to include in the document information dictionary. + /// + public DocumentInformationBuilder DocumentInformation { get; } = new DocumentInformationBuilder(); + + /// + /// The current page builders in the document and the corresponding 1 indexed page numbers. Use + /// or to add a new page. + /// + public IReadOnlyDictionary Pages => pages; + + /// + /// The fonts currently available in the document builder added via or . Keyed by id for internal purposes. + /// + internal IReadOnlyDictionary Fonts => fonts; + + /// + /// Creates a document builder keeping resources in memory. + /// + public PdfDocumentBuilder() + { + context = new PdfStreamWriter(new MemoryStream(), true); + context.InitializePdf(1.7m); + } + + /// + /// Creates a document builder keeping resources in memory. + /// + /// Pdf version to use in header. + public PdfDocumentBuilder(decimal version) + { + context = new PdfStreamWriter(new MemoryStream(), true); + context.InitializePdf(version); + } + + /// + /// Creates a document builder using the supplied stream. + /// + /// Steam to write pdf to. + /// If stream should be disposed when builder is. + /// Type of pdf stream writer to use + /// Pdf version to use in header. + public PdfDocumentBuilder(Stream stream, bool disposeStream = false, PdfWriterType type = PdfWriterType.Default, decimal version = 1.7m) + { + switch (type) + { + case PdfWriterType.ObjectInMemoryDedup: + context = new PdfDedupStreamWriter(stream, disposeStream); + break; + default: + context = new PdfStreamWriter(stream, disposeStream); + break; + } + context.InitializePdf(version); + } + + /// + /// Determines whether the bytes of the TrueType font file provided can be used in a PDF document. + /// + /// The bytes of a TrueType font file. + /// Any reason messages explaining why the file can't be used, if applicable. + /// if the file can be used, otherwise. + public bool CanUseTrueTypeFont(IReadOnlyList fontFileBytes, out IReadOnlyList reasons) + { + var reasonsMutable = new List(); + reasons = reasonsMutable; + try + { + if (fontFileBytes == null) { - copiedPageDict[NameToken.MediaBox] = WriterUtil.CopyToken(context, mb, document.Structure.TokenScanner, refs); + reasonsMutable.Add("Provided bytes were null."); + return false; } - if (dict.TryGet(NameToken.CropBox, out var cb)) + + if (fontFileBytes.Count == 0) + { + reasonsMutable.Add("Provided bytes were empty."); + return false; + } + + var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes))); + + if (font.TableRegister.CMapTable == null) + { + reasonsMutable.Add("The provided font did not contain a cmap table, used to map character codes to glyph codes."); + return false; + } + + if (font.TableRegister.Os2Table == null) + { + reasonsMutable.Add("The provided font did not contain an OS/2 table, used to fill in the font descriptor dictionary."); + return false; + } + + if (font.TableRegister.PostScriptTable == null) + { + reasonsMutable.Add("The provided font did not contain a post PostScript table, used to map character codes to glyph codes."); + return false; + } + + return true; + } + catch (Exception ex) + { + reasonsMutable.Add(ex.Message); + return false; + } + } + + /// + /// Adds a TrueType font to the builder so that pages in this document can use it. + /// + /// The bytes of a TrueType font. + /// An identifier which can be passed to . + public AddedFont AddTrueTypeFont(IReadOnlyList fontFileBytes) + { + try + { + var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(fontFileBytes))); + var id = Guid.NewGuid(); + var added = new AddedFont(id, context.ReserveObjectNumber()); + fonts[id] = new FontStored(added, new TrueTypeWritingFont(font, fontFileBytes)); + return added; + } + catch (Exception ex) + { + throw new InvalidOperationException("Writing only supports TrueType fonts, please provide a valid TrueType font.", ex); + } + } + + /// + /// Adds one of the Standard 14 fonts which are included by default in PDF programs so that pages in this document can use it. These Standard 14 fonts are old and possibly obsolete. + /// + /// The type of the Standard 14 font to use. + /// An identifier which can be passed to . + public AddedFont AddStandard14Font(Standard14Font type) + { + if (ArchiveStandard != PdfAStandard.None) + { + throw new NotSupportedException($"PDF/A {ArchiveStandard} requires the font to be embedded in the file, only {nameof(AddTrueTypeFont)} is supported."); + } + + var id = Guid.NewGuid(); + var name = NameToken.Create($"F{fontId++}"); + var added = new AddedFont(id, context.ReserveObjectNumber()); + fonts[id] = new FontStored(added, new Standard14WritingFont(Standard14.GetAdobeFontMetrics(type))); + return added; + } + + internal IndirectReferenceToken AddImage(DictionaryToken dictionary, byte[] bytes) + { + var streamToken = new StreamToken(dictionary, bytes); + return context.WriteToken(streamToken); + } + + /// + /// Add a new page with the specified size, this page will be included in the output when is called. + /// + /// The width of the page in points. + /// The height of the page in points. + /// A builder for editing the new page. + public PdfPageBuilder AddPage(double width, double height) + { + if (width < 0) + { + throw new ArgumentOutOfRangeException(nameof(width), $"Width cannot be negative, got: {width}."); + } + + if (height < 0) + { + throw new ArgumentOutOfRangeException(nameof(height), $"Height cannot be negative, got: {height}."); + } + + PdfPageBuilder builder = null; + for (var i = 0; i < pages.Count; i++) + { + if (!pages.ContainsKey(i + 1)) + { + builder = new PdfPageBuilder(i + 1, this); + break; + } + } + + if (builder == null) + { + builder = new PdfPageBuilder(pages.Count + 1, this); + } + + builder.PageSize = new PdfRectangle(0, 0, width, height); + pages[builder.PageNumber] = builder; + + return builder; + } + + /// + /// Add a new page with the specified size, this page will be included in the output when is called. + /// + /// The size of the page to add. + /// Whether the page is in portait or landscape orientation. + /// A builder for editing the new page. + public PdfPageBuilder AddPage(PageSize size, bool isPortrait = true) + { + if (size == PageSize.Custom) + { + throw new ArgumentException($"Cannot use ${nameof(PageSize.Custom)} for ${nameof(AddPage)} using the ${nameof(PageSize)} enum, call the overload with width and height instead.", + nameof(size)); + } + + if (!size.TryGetPdfRectangle(out var rectangle)) + { + throw new ArgumentException($"No rectangle found for Page Size {size}."); + } + + if (!isPortrait) + { + return AddPage(rectangle.Height, rectangle.Width); + } + + return AddPage(rectangle.Width, rectangle.Height); + } + + + internal IToken CopyToken(IPdfTokenScanner source, IToken token) + { + if (!existingCopies.TryGetValue(source, out var refs)) + { + refs = new Dictionary(); + existingCopies.Add(source, refs); + } + + return WriterUtil.CopyToken(context, token, source, refs); + } + + internal class PageInfo + { + public DictionaryToken Page { get; set; } + public IReadOnlyList Parents { get; set; } + } + private readonly ConditionalWeakTable> existingCopies = + new ConditionalWeakTable>(); + private readonly ConditionalWeakTable> existingTrees = + new ConditionalWeakTable>(); + /// + /// Add a new page with the specified size, this page will be included in the output when is called. + /// + /// Source document. + /// Page to copy. + /// A builder for editing the page. + public PdfPageBuilder AddPage(PdfDocument document, int pageNumber) + { + if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs)) + { + refs = new Dictionary(); + existingCopies.Add(document.Structure.TokenScanner, refs); + } + + if (!existingTrees.TryGetValue(document, out var pagesInfos)) + { + pagesInfos = new Dictionary(); + int i = 1; + foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree)) + { + pagesInfos[i] = new PageInfo + { + Page = pageDict, + Parents = parents + }; + i++; + } + + existingTrees.Add(document, pagesInfos); + } + + if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo)) + { + throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document."); + } + + // copy content streams + var streams = new List(); + if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken)) + { + if (contentsToken is ArrayToken array) + { + foreach (var item in array.Data) + { + if (item is IndirectReferenceToken ir) + { + streams.Add(new PdfPageBuilder.CopiedContentStream( + WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); + } + + } + } + else if (contentsToken is IndirectReferenceToken ir) + { + streams.Add(new PdfPageBuilder.CopiedContentStream( + WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); + } + } + + // manually copy page dict / resources as we need to modify some + var copiedPageDict = new Dictionary(); + Dictionary resources = new Dictionary(); + + // just put all parent resources into new page + foreach (var dict in pageInfo.Parents) + { + if (dict.TryGet(NameToken.Resources, out var resourceToken)) + { + CopyResourceDict(resourceToken, resources); + } + if (dict.TryGet(NameToken.MediaBox, out var mb)) + { + copiedPageDict[NameToken.MediaBox] = WriterUtil.CopyToken(context, mb, document.Structure.TokenScanner, refs); + } + if (dict.TryGet(NameToken.CropBox, out var cb)) { copiedPageDict[NameToken.CropBox] = WriterUtil.CopyToken(context, cb, document.Structure.TokenScanner, refs); } - if (dict.TryGet(NameToken.Rotate, out var rt)) - { - copiedPageDict[NameToken.Rotate] = WriterUtil.CopyToken(context, rt, document.Structure.TokenScanner, refs); - } - } - - - foreach (var kvp in pageInfo.Page.Data) - { - if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type) - { - continue; - } - - if (kvp.Key == NameToken.Resources) - { - CopyResourceDict(kvp.Value, resources); - continue; - } - - copiedPageDict[NameToken.Create(kvp.Key)] = - WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs); - } - - copiedPageDict[NameToken.Resources] = new DictionaryToken(resources); - - var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict); - pages[builder.PageNumber] = builder; - return builder; - - void CopyResourceDict(IToken token, Dictionary destinationDict) - { - DictionaryToken dict = GetRemoteDict(token); - if (dict == null) - { - return; - } - foreach (var item in dict.Data) - { - if (!destinationDict.ContainsKey(NameToken.Create(item.Key))) - { + if (dict.TryGet(NameToken.Rotate, out var rt)) + { + copiedPageDict[NameToken.Rotate] = WriterUtil.CopyToken(context, rt, document.Structure.TokenScanner, refs); + } + } + + + foreach (var kvp in pageInfo.Page.Data) + { + if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type) + { + continue; + } + + if (kvp.Key == NameToken.Resources) + { + CopyResourceDict(kvp.Value, resources); + continue; + } + + copiedPageDict[NameToken.Create(kvp.Key)] = + WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs); + } + + copiedPageDict[NameToken.Resources] = new DictionaryToken(resources); + + var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict); + pages[builder.PageNumber] = builder; + return builder; + + void CopyResourceDict(IToken token, Dictionary destinationDict) + { + DictionaryToken dict = GetRemoteDict(token); + if (dict == null) + { + return; + } + foreach (var item in dict.Data) + { + if (!destinationDict.ContainsKey(NameToken.Create(item.Key))) + { if (item.Value is IndirectReferenceToken ir) { // convert indirect to direct as PdfPageBuilder needs to modify resource entries @@ -422,396 +422,396 @@ namespace UglyToad.PdfPig.Writer { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, obj.Data, document.Structure.TokenScanner, refs); } - } - else - { - destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); - } - - continue; - } - - var subDict = GetRemoteDict(item.Value); - var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken; - if (destSubDict == null || subDict == null) - { - // not a dict.. just overwrite with more important one? should maybe check arrays? - if (item.Value is IndirectReferenceToken ir) - { - // convert indirect to direct as PdfPageBuilder needs to modify resource entries - destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs); - } - else - { - destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); - } - continue; - } - foreach (var subItem in subDict.Data) - { - // last copied most important important - destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value, - document.Structure.TokenScanner, refs); - } - } - } - - DictionaryToken GetRemoteDict(IToken token) - { - DictionaryToken dict = null; - if (token is IndirectReferenceToken ir) - { - dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken; - } - else if (token is DictionaryToken dt) - { - dict = dt; - } - return dict; - } - } - - private void CompleteDocument() - { - // write fonts to reserved object numbers - foreach (var font in fonts) - { - font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference); - } - - const int desiredLeafSize = 25; // allow customization at some point? - var numLeafs = (int)Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize)); - - var leafRefs = new List(); - var leafChildren = new List>(); - var leafs = new List>(); - for (var i = 0; i < numLeafs; i++) - { - leafs.Add(new Dictionary() - { - {NameToken.Type, NameToken.Pages}, - }); - leafChildren.Add(new List()); - leafRefs.Add(context.ReserveObjectNumber()); - } - - int leafNum = 0; - - foreach (var page in pages) - { - var pageDictionary = page.Value.pageDictionary; - pageDictionary[NameToken.Type] = NameToken.Page; - pageDictionary[NameToken.Parent] = leafRefs[leafNum]; - pageDictionary[NameToken.ProcSet] = DefaultProcSet; - if (!pageDictionary.ContainsKey(NameToken.MediaBox)) - { - pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize); - } - - var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList(); - if (toWrite.Count == 0) - { - // write empty - pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context); - } - else if (toWrite.Count == 1) - { - // write single - pageDictionary[NameToken.Contents] = toWrite[0].Write(context); - } - else - { - // write array - var streams = new List(); - foreach (var stream in toWrite) - { - streams.Add(stream.Write(context)); - } - pageDictionary[NameToken.Contents] = new ArrayToken(streams); - } - - - leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary))); - - if (leafChildren[leafNum].Count >= desiredLeafSize) - { - leafNum += 1; - } - } - - var dummyName = NameToken.Create("ObjIdToUse"); - for (var i = 0; i < leafs.Count; i++) - { - leafs[i][NameToken.Kids] = new ArrayToken(leafChildren[i]); - leafs[i][NameToken.Count] = new NumericToken(leafChildren[i].Count); - leafs[i][dummyName] = leafRefs[i]; - } - - var catalogDictionary = new Dictionary - { - {NameToken.Type, NameToken.Catalog}, - }; - if (leafs.Count == 1) - { - var leaf = leafs[0]; - var id = leaf[dummyName] as IndirectReferenceToken; - leaf.Remove(dummyName); - catalogDictionary[NameToken.Pages] = context.WriteToken(new DictionaryToken(leaf), id); - } - else - { - var rootPageInfo = CreatePageTree(leafs, null); - catalogDictionary[NameToken.Pages] = rootPageInfo.Ref; - } - - if (ArchiveStandard != PdfAStandard.None) - { - Func writerFunc = x => context.WriteToken(x); - - PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard); - - switch (ArchiveStandard) - { - case PdfAStandard.A1A: - PdfA1ARuleBuilder.Obey(catalogDictionary); - break; - case PdfAStandard.A2B: - break; - case PdfAStandard.A2A: - PdfA1ARuleBuilder.Obey(catalogDictionary); - break; - } - } - - var catalog = new DictionaryToken(catalogDictionary); - - var catalogRef = context.WriteToken(catalog); - - var informationReference = default(IndirectReferenceToken); - if (IncludeDocumentInformation) - { - var informationDictionary = DocumentInformation.ToDictionary(); - if (informationDictionary.Count > 0) - { - var dictionary = new DictionaryToken(informationDictionary); - informationReference = context.WriteToken(dictionary); - } - } - - context.CompletePdf(catalogRef, informationReference); - - completed = true; - - (int Count, IndirectReferenceToken Ref) CreatePageTree(List> pagesNodes, IndirectReferenceToken parent) - { - // TODO shorten page tree when there is a single or small number of pages left in a branch - var count = 0; - var thisObj = context.ReserveObjectNumber(); + } + else + { + destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); + } - var children = new List(); - if (pagesNodes.Count > desiredLeafSize) - { - var currentTreeDepth = (int)Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize)); - var perBranch = (int)Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1)); - var branches = (int)Math.Ceiling(decimal.Divide(pagesNodes.Count, (decimal)perBranch)); - for (var i = 0; i < branches; i++) - { - var part = pagesNodes.Skip(i * perBranch).Take(perBranch).ToList(); - var result = CreatePageTree(part, thisObj); - count += result.Count; - children.Add(result.Ref); - } - } - else - { - foreach (var page in pagesNodes) - { - page[NameToken.Parent] = thisObj; - var id = page[dummyName] as IndirectReferenceToken; - page.Remove(dummyName); - count += (page[NameToken.Count] as NumericToken).Int; - children.Add(context.WriteToken(new DictionaryToken(page), id)); - } - } - - var node = new Dictionary - { - {NameToken.Type, NameToken.Pages}, - {NameToken.Kids, new ArrayToken(children)}, - {NameToken.Count, new NumericToken(count)} - }; - if (parent != null) - { - node[NameToken.Parent] = parent; - } - return (count, context.WriteToken(new DictionaryToken(node), thisObj)); - } - } - - /// - /// Builds a PDF document from the current content of this builder and its pages. - /// - /// The bytes of the resulting PDF document. - public byte[] Build() - { - CompleteDocument(); - - if (context.Stream is MemoryStream ms) - { - return ms.ToArray(); - } - - if (!context.Stream.CanSeek) - { - throw new InvalidOperationException("PdfDocument.Build() called with non-seekable stream."); - } - - using (var temp = new MemoryStream()) - { - context.Stream.Seek(0, SeekOrigin.Begin); - context.Stream.CopyTo(temp); - return temp.ToArray(); - } - } - - private static ArrayToken RectangleToArray(PdfRectangle rectangle) - { - return new ArrayToken(new[] - { - new NumericToken((decimal)rectangle.BottomLeft.X), - new NumericToken((decimal)rectangle.BottomLeft.Y), - new NumericToken((decimal)rectangle.TopRight.X), - new NumericToken((decimal)rectangle.TopRight.Y) - }); - } - - - internal class FontStored - { - [NotNull] - public AddedFont FontKey { get; } - - [NotNull] - public IWritingFont FontProgram { get; } - - public FontStored(AddedFont fontKey, IWritingFont fontProgram) - { - FontKey = fontKey ?? throw new ArgumentNullException(nameof(fontKey)); - FontProgram = fontProgram ?? throw new ArgumentNullException(nameof(fontProgram)); - } - } - - /// - /// A key representing a font available to use on the current document builder. Create by adding a font to a document using either - /// or . - /// - public class AddedFont - { - /// - /// The Id uniquely identifying this font on the builder. - /// - internal Guid Id { get; } - - /// - /// Reference to the added font. - /// - internal IndirectReferenceToken Reference { get; } - - /// - /// Create a new . - /// - internal AddedFont(Guid id, IndirectReferenceToken reference) - { - Id = id; - Reference = reference; - } - } - - /// - /// Sets the values of the dictionary for the document being created. - /// Control inclusion of the document information dictionary on the output with . - /// - public class DocumentInformationBuilder - { - /// - /// . - /// - public string Title { get; set; } - - /// - /// . - /// - public string Author { get; set; } - - /// - /// . - /// - public string Subject { get; set; } - - /// - /// . - /// - public string Keywords { get; set; } - - /// - /// . - /// - public string Creator { get; set; } - - /// - /// . - /// - public string Producer { get; set; } = "PdfPig"; - - internal Dictionary ToDictionary() - { - var result = new Dictionary(); - - if (Title != null) - { - result[NameToken.Title] = new StringToken(Title); - } - - if (Author != null) - { - result[NameToken.Author] = new StringToken(Author); - } - - if (Subject != null) - { - result[NameToken.Subject] = new StringToken(Subject); - } - - if (Keywords != null) - { - result[NameToken.Keywords] = new StringToken(Keywords); - } - - if (Creator != null) - { - result[NameToken.Creator] = new StringToken(Creator); - } - - if (Producer != null) - { - result[NameToken.Producer] = new StringToken(Producer); - } - - return result; - } - } - - /// - /// Disposes underlying stream if set to do so. - /// - public void Dispose() - { - if (!completed) - { - CompleteDocument(); + continue; + } + + var subDict = GetRemoteDict(item.Value); + var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken; + if (destSubDict == null || subDict == null) + { + // not a dict.. just overwrite with more important one? should maybe check arrays? + if (item.Value is IndirectReferenceToken ir) + { + // convert indirect to direct as PdfPageBuilder needs to modify resource entries + destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs); + } + else + { + destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); + } + continue; + } + foreach (var subItem in subDict.Data) + { + // last copied most important important + destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value, + document.Structure.TokenScanner, refs); + } + } } - context.Dispose(); - } - } -} + DictionaryToken GetRemoteDict(IToken token) + { + DictionaryToken dict = null; + if (token is IndirectReferenceToken ir) + { + dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken; + } + else if (token is DictionaryToken dt) + { + dict = dt; + } + return dict; + } + } + + private void CompleteDocument() + { + // write fonts to reserved object numbers + foreach (var font in fonts) + { + font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference); + } + + const int desiredLeafSize = 25; // allow customization at some point? + var numLeafs = (int)Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize)); + + var leafRefs = new List(); + var leafChildren = new List>(); + var leafs = new List>(); + for (var i = 0; i < numLeafs; i++) + { + leafs.Add(new Dictionary() + { + {NameToken.Type, NameToken.Pages}, + }); + leafChildren.Add(new List()); + leafRefs.Add(context.ReserveObjectNumber()); + } + + int leafNum = 0; + + foreach (var page in pages) + { + var pageDictionary = page.Value.pageDictionary; + pageDictionary[NameToken.Type] = NameToken.Page; + pageDictionary[NameToken.Parent] = leafRefs[leafNum]; + pageDictionary[NameToken.ProcSet] = DefaultProcSet; + if (!pageDictionary.ContainsKey(NameToken.MediaBox)) + { + pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize); + } + + var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList(); + if (toWrite.Count == 0) + { + // write empty + pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context); + } + else if (toWrite.Count == 1) + { + // write single + pageDictionary[NameToken.Contents] = toWrite[0].Write(context); + } + else + { + // write array + var streams = new List(); + foreach (var stream in toWrite) + { + streams.Add(stream.Write(context)); + } + pageDictionary[NameToken.Contents] = new ArrayToken(streams); + } + + + leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary))); + + if (leafChildren[leafNum].Count >= desiredLeafSize) + { + leafNum += 1; + } + } + + var dummyName = NameToken.Create("ObjIdToUse"); + for (var i = 0; i < leafs.Count; i++) + { + leafs[i][NameToken.Kids] = new ArrayToken(leafChildren[i]); + leafs[i][NameToken.Count] = new NumericToken(leafChildren[i].Count); + leafs[i][dummyName] = leafRefs[i]; + } + + var catalogDictionary = new Dictionary + { + {NameToken.Type, NameToken.Catalog}, + }; + if (leafs.Count == 1) + { + var leaf = leafs[0]; + var id = leaf[dummyName] as IndirectReferenceToken; + leaf.Remove(dummyName); + catalogDictionary[NameToken.Pages] = context.WriteToken(new DictionaryToken(leaf), id); + } + else + { + var rootPageInfo = CreatePageTree(leafs, null); + catalogDictionary[NameToken.Pages] = rootPageInfo.Ref; + } + + if (ArchiveStandard != PdfAStandard.None) + { + Func writerFunc = x => context.WriteToken(x); + + PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard); + + switch (ArchiveStandard) + { + case PdfAStandard.A1A: + PdfA1ARuleBuilder.Obey(catalogDictionary); + break; + case PdfAStandard.A2B: + break; + case PdfAStandard.A2A: + PdfA1ARuleBuilder.Obey(catalogDictionary); + break; + } + } + + var catalog = new DictionaryToken(catalogDictionary); + + var catalogRef = context.WriteToken(catalog); + + var informationReference = default(IndirectReferenceToken); + if (IncludeDocumentInformation) + { + var informationDictionary = DocumentInformation.ToDictionary(); + if (informationDictionary.Count > 0) + { + var dictionary = new DictionaryToken(informationDictionary); + informationReference = context.WriteToken(dictionary); + } + } + + context.CompletePdf(catalogRef, informationReference); + + completed = true; + + (int Count, IndirectReferenceToken Ref) CreatePageTree(List> pagesNodes, IndirectReferenceToken parent) + { + // TODO shorten page tree when there is a single or small number of pages left in a branch + var count = 0; + var thisObj = context.ReserveObjectNumber(); + + var children = new List(); + if (pagesNodes.Count > desiredLeafSize) + { + var currentTreeDepth = (int)Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize)); + var perBranch = (int)Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1)); + var branches = (int)Math.Ceiling(decimal.Divide(pagesNodes.Count, (decimal)perBranch)); + for (var i = 0; i < branches; i++) + { + var part = pagesNodes.Skip(i * perBranch).Take(perBranch).ToList(); + var result = CreatePageTree(part, thisObj); + count += result.Count; + children.Add(result.Ref); + } + } + else + { + foreach (var page in pagesNodes) + { + page[NameToken.Parent] = thisObj; + var id = page[dummyName] as IndirectReferenceToken; + page.Remove(dummyName); + count += (page[NameToken.Count] as NumericToken).Int; + children.Add(context.WriteToken(new DictionaryToken(page), id)); + } + } + + var node = new Dictionary + { + {NameToken.Type, NameToken.Pages}, + {NameToken.Kids, new ArrayToken(children)}, + {NameToken.Count, new NumericToken(count)} + }; + if (parent != null) + { + node[NameToken.Parent] = parent; + } + return (count, context.WriteToken(new DictionaryToken(node), thisObj)); + } + } + + /// + /// Builds a PDF document from the current content of this builder and its pages. + /// + /// The bytes of the resulting PDF document. + public byte[] Build() + { + CompleteDocument(); + + if (context.Stream is MemoryStream ms) + { + return ms.ToArray(); + } + + if (!context.Stream.CanSeek) + { + throw new InvalidOperationException("PdfDocument.Build() called with non-seekable stream."); + } + + using (var temp = new MemoryStream()) + { + context.Stream.Seek(0, SeekOrigin.Begin); + context.Stream.CopyTo(temp); + return temp.ToArray(); + } + } + + private static ArrayToken RectangleToArray(PdfRectangle rectangle) + { + return new ArrayToken(new[] + { + new NumericToken((decimal)rectangle.BottomLeft.X), + new NumericToken((decimal)rectangle.BottomLeft.Y), + new NumericToken((decimal)rectangle.TopRight.X), + new NumericToken((decimal)rectangle.TopRight.Y) + }); + } + + + internal class FontStored + { + [NotNull] + public AddedFont FontKey { get; } + + [NotNull] + public IWritingFont FontProgram { get; } + + public FontStored(AddedFont fontKey, IWritingFont fontProgram) + { + FontKey = fontKey ?? throw new ArgumentNullException(nameof(fontKey)); + FontProgram = fontProgram ?? throw new ArgumentNullException(nameof(fontProgram)); + } + } + + /// + /// A key representing a font available to use on the current document builder. Create by adding a font to a document using either + /// or . + /// + public class AddedFont + { + /// + /// The Id uniquely identifying this font on the builder. + /// + internal Guid Id { get; } + + /// + /// Reference to the added font. + /// + internal IndirectReferenceToken Reference { get; } + + /// + /// Create a new . + /// + internal AddedFont(Guid id, IndirectReferenceToken reference) + { + Id = id; + Reference = reference; + } + } + + /// + /// Sets the values of the dictionary for the document being created. + /// Control inclusion of the document information dictionary on the output with . + /// + public class DocumentInformationBuilder + { + /// + /// . + /// + public string Title { get; set; } + + /// + /// . + /// + public string Author { get; set; } + + /// + /// . + /// + public string Subject { get; set; } + + /// + /// . + /// + public string Keywords { get; set; } + + /// + /// . + /// + public string Creator { get; set; } + + /// + /// . + /// + public string Producer { get; set; } = "PdfPig"; + + internal Dictionary ToDictionary() + { + var result = new Dictionary(); + + if (Title != null) + { + result[NameToken.Title] = new StringToken(Title); + } + + if (Author != null) + { + result[NameToken.Author] = new StringToken(Author); + } + + if (Subject != null) + { + result[NameToken.Subject] = new StringToken(Subject); + } + + if (Keywords != null) + { + result[NameToken.Keywords] = new StringToken(Keywords); + } + + if (Creator != null) + { + result[NameToken.Creator] = new StringToken(Creator); + } + + if (Producer != null) + { + result[NameToken.Producer] = new StringToken(Producer); + } + + return result; + } + } + + /// + /// Disposes underlying stream if set to do so. + /// + public void Dispose() + { + if (!completed) + { + CompleteDocument(); + } + + context.Dispose(); + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index 9a754d16..da2de186 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -15,7 +15,6 @@ using Images; using System; using System.Collections.Generic; - using System.Diagnostics; using System.IO; using System.Linq; using PdfFonts; @@ -491,19 +490,62 @@ data = memoryStream.ToArray(); } + var widthToken = new NumericToken(png.Width); + var heightToken = new NumericToken(png.Height); + + IndirectReferenceToken smaskReference = null; + + if (png.HasAlphaChannel) + { + var smaskData = new byte[data.Length / 3]; + for (var rowIndex = 0; rowIndex < png.Height; rowIndex++) + { + for (var colIndex = 0; colIndex < png.Width; colIndex++) + { + var pixel = png.GetPixel(colIndex, rowIndex); + + var index = rowIndex * png.Width + colIndex; + smaskData[index] = pixel.A; + } + } + + var compressedSmask = DataCompresser.CompressBytes(smaskData); + + // Create a soft-mask. + var smaskDictionary = new Dictionary + { + {NameToken.Type, NameToken.Xobject}, + {NameToken.Subtype, NameToken.Image}, + {NameToken.Width, widthToken}, + {NameToken.Height, heightToken}, + {NameToken.ColorSpace, NameToken.Devicegray}, + {NameToken.BitsPerComponent, new NumericToken(png.Header.BitDepth)}, + {NameToken.Decode, new ArrayToken(new IToken[] { new NumericToken(0), new NumericToken(1) })}, + {NameToken.Length, new NumericToken(compressedSmask.Length)}, + {NameToken.Filter, NameToken.FlateDecode} + }; + + smaskReference = documentBuilder.AddImage(new DictionaryToken(smaskDictionary), compressedSmask); + } + var compressed = DataCompresser.CompressBytes(data); var imgDictionary = new Dictionary { - {NameToken.Type, NameToken.Xobject }, - {NameToken.Subtype, NameToken.Image }, - {NameToken.Width, new NumericToken(png.Width) }, - {NameToken.Height, new NumericToken(png.Height) }, + {NameToken.Type, NameToken.Xobject}, + {NameToken.Subtype, NameToken.Image}, + {NameToken.Width, widthToken}, + {NameToken.Height, heightToken}, {NameToken.BitsPerComponent, new NumericToken(png.Header.BitDepth)}, {NameToken.ColorSpace, NameToken.Devicergb}, {NameToken.Filter, NameToken.FlateDecode}, {NameToken.Length, new NumericToken(compressed.Length)} }; + + if (smaskReference != null) + { + imgDictionary.Add(NameToken.Smask, smaskReference); + } var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), compressed);