diff --git a/src/UglyToad.PdfPig.Tests/Writer/XmpTests.cs b/src/UglyToad.PdfPig.Tests/Writer/XmpTests.cs new file mode 100644 index 00000000..e9568f7c --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Writer/XmpTests.cs @@ -0,0 +1,108 @@ +namespace UglyToad.PdfPig.Tests.Writer +{ + using Integration; + using PdfPig.Content; + using PdfPig.Writer; + using System.Linq; + using System.Xml.Linq; + using Xunit; + + public class XmpTests + { + const string RdfNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + const string PdfaidNamespace = "http://www.aiim.org/pdfa/ns/id/"; + const string FtxFormsNamespace = "http://ns.ftx.com/forms/1.0/"; + const string FtxControldataNamespace = "http://ns.ftx.com/forms/1.0/controldata/"; + + [Fact] + public void XmpInfoIsWrittenToPdfADocument() + { + byte[] pdfA2aDocument = BuildPdfA2aDocument(); + + using (PdfDocument newDocument = PdfDocument.Open(pdfA2aDocument)) + { + Assert.True(newDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata)); + XDocument xmp = xmpMetadata.GetXDocument(); + /* Should contain the the PDF/A-2a XMP nodes. + * 2 + * A + */ + Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value); + Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value); + } + } + + [Fact] + public void CustomXmpInfoIsMergedIntoPdfADocumentXmp() + { + using (PdfDocument simpleDocument = PdfDocument.Open(BuildPdfA2aDocument())) + { + PdfDocumentBuilder pdfDocumentBuilder = new() + { + ArchiveStandard = PdfAStandard.A2A, + IncludeDocumentInformation = true, + XmpMetadata = XDocument.Parse(@" + + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 10 + 10.2020 + 6 + + 1 + B + + + "), + }; + pdfDocumentBuilder.AddPage(simpleDocument, 1); + + using (PdfDocument xmpDocument = PdfDocument.Open(pdfDocumentBuilder.Build())) + { + Assert.True(xmpDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata)); + XDocument xmp = xmpMetadata.GetXDocument(); + /* Should still contain exact one each of the correct PDF/A-2a XMP nodes. + * PDF/A-1b from the added XMP document must not be there. + * 2 + * A + */ + Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value); + Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part")); + Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value); + Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance")); + + // Should also contain the nodes from the added XMP document + Assert.Single(xmp.Descendants(XNamespace.Get(FtxFormsNamespace) + "ControlData")); + Assert.Equal("0", xmp.Descendants(XNamespace.Get(FtxControldataNamespace) + "Anzahl_Zeichen_Titel").First().Value); + } + } + } + + private byte[] BuildPdfA2aDocument() + { + string simpleDoc = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf"); + using (PdfDocument pdfPigDocument = PdfDocument.Open(simpleDoc)) + { + PdfDocumentBuilder pdfDocumentBuilder = new() + { + ArchiveStandard = PdfAStandard.A2A, + }; + pdfDocumentBuilder.AddPage(pdfPigDocument, 1); + + return pdfDocumentBuilder.Build(); + } + } + } +} diff --git a/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs index ce67c191..4ca39d35 100644 --- a/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Xml.Linq; using UglyToad.PdfPig.Tokens; using UglyToad.PdfPig.Writer.Colors; using UglyToad.PdfPig.Writer.Xmp; @@ -13,10 +14,11 @@ namespace UglyToad.PdfPig.Writer Func writerFunc, PdfDocumentBuilder.DocumentInformationBuilder documentInformationBuilder, PdfAStandard archiveStandard, - decimal version) + decimal version, + XDocument xmpMetadata) { catalog[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(writerFunc); - var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard); + var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard, xmpMetadata); var xmpObj = writerFunc(xmpStream); catalog[NameToken.Metadata] = xmpObj; } diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs index 9f710260..bf52ff3f 100644 --- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs @@ -19,6 +19,7 @@ namespace UglyToad.PdfPig.Writer using Tokens; using Util.JetBrains.Annotations; + using System.Xml.Linq; /// /// Provides methods to construct new PDF documents. @@ -61,6 +62,12 @@ namespace UglyToad.PdfPig.Writer /// public Bookmarks Bookmarks { get; set; } + /// + /// The document level metadata, which is XML in the XMP (Extensible Metadata Platform) format. Will only be added, if the PDF is + /// created with an ArchiveStandard other than PdfAStandard.None. + /// + public XDocument XmpMetadata { get; set; } + /// /// The current page builders in the document and the corresponding 1 indexed page numbers. Use /// or to add a new page. @@ -715,7 +722,7 @@ namespace UglyToad.PdfPig.Writer { Func writerFunc = x => context.WriteToken(x); - PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version); + PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version, XmpMetadata); switch (ArchiveStandard) { diff --git a/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs b/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs index ae669284..f56d0717 100644 --- a/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs +++ b/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs @@ -39,7 +39,7 @@ namespace UglyToad.PdfPig.Writer.Xmp private const string PdfAIdentificationExtensionNamespace = "http://www.aiim.org/pdfa/ns/id/"; public static StreamToken GenerateXmpStream(PdfDocumentBuilder.DocumentInformationBuilder builder, decimal version, - PdfAStandard standard) + PdfAStandard standard, XDocument additionalXmpMetadata) { XNamespace xmpMeta = XmpMetaNamespace; XNamespace rdf = RdfNamespace; @@ -74,8 +74,8 @@ namespace UglyToad.PdfPig.Writer.Xmp }); var pdfAIdContainer = GetVersionAndConformanceLevelIdentificationElement(rdf, emptyRdfAbout, standard); - - var document = new XDocument( + + var document = MergeXmpXdocuments(new XDocument( new XElement(xmpMeta + "xmpmeta", GetNamespaceAttribute(XmpMetaPrefix, XmpMetaNamespace), new XAttribute(xmpMeta + "xmptk", Xmptk), new XElement(rdf + "RDF", @@ -84,7 +84,7 @@ namespace UglyToad.PdfPig.Writer.Xmp pdfAIdContainer ) ) - ); + ), additionalXmpMetadata); var xml = document.ToString(SaveOptions.None).Replace("\r\n", "\n"); xml = $"\n{xml}\n"; @@ -238,5 +238,42 @@ namespace UglyToad.PdfPig.Writer.Xmp ValueFunc = valueFunc; } } + + /// + /// Merge multiple System.Xml.Linq.XDocument objects. + /// Nodes in rdf:Description nodes of XDocuments with higher array index which already occur in rdf:Description nodes of XDocuments + /// with a lower index won't be inserted. This leads to a simple XML merge, where no duplicate pdf:PDFVersion or pdfaid:conformance + /// nodes will occur. + /// + private static XDocument MergeXmpXdocuments(params XDocument[] xDocuments) + { + XDocument document = new XDocument(xDocuments.FirstOrDefault()); + foreach (XDocument xdocOriginal in xDocuments.Skip(1).Where(doc => doc != null)) + { + XDocument xdoc = new XDocument(xdocOriginal); + XElement rdfMainNode = document.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First(); + XElement rdfCurrentNode = xdoc.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First(); + + // Remove all children of rdf:Description which are already existing in the main node + var allDescriptions = rdfCurrentNode.Elements().ToList(); + foreach (var description in allDescriptions) + { + foreach (XElement descriptionElement in description.Elements().ToList()) + { + if (rdfMainNode. + Descendants(XNamespace.Get(RdfNamespace) + "Description"). + SelectMany(d => d.Descendants()). + Select(mx => mx.Name). + Contains(descriptionElement.Name)) + { + descriptionElement.Remove(); + } + } + } + + rdfMainNode.Add(allDescriptions); + } + return document; + } } }