Add support for custom xmp metadata to PdfDocumentBuilder #490

2026-03-10 00:23:29 +08:00 · 2023-10-06 22:58:02 +02:00
parent 06ac832616
commit 81cdb474e1
4 changed files with 161 additions and 7 deletions
--- a/src/UglyToad.PdfPig.Tests/Writer/XmpTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Writer/XmpTests.cs
@@ -0,0 +1,108 @@
+namespace UglyToad.PdfPig.Tests.Writer
+{
+    using Integration;
+    using PdfPig.Content;
+    using PdfPig.Writer;
+    using System.Linq;
+    using System.Xml.Linq;
+    using Xunit;
+
+    public class XmpTests
+    {
+        const string RdfNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+        const string PdfaidNamespace = "http://www.aiim.org/pdfa/ns/id/";
+        const string FtxFormsNamespace = "http://ns.ftx.com/forms/1.0/";
+        const string FtxControldataNamespace = "http://ns.ftx.com/forms/1.0/controldata/";
+
+        [Fact]
+        public void XmpInfoIsWrittenToPdfADocument()
+        {
+            byte[] pdfA2aDocument = BuildPdfA2aDocument();
+
+            using (PdfDocument newDocument = PdfDocument.Open(pdfA2aDocument))
+            {
+                Assert.True(newDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata));
+                XDocument xmp = xmpMetadata.GetXDocument();
+                /* Should contain the the PDF/A-2a XMP nodes.
+                 * <pdfaid:part>2</pdfaid:part>
+                 * <pdfaid:conformance>A</pdfaid:conformance>
+                 */
+                Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value);
+                Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value);
+            }
+        }
+
+        [Fact]
+        public void CustomXmpInfoIsMergedIntoPdfADocumentXmp()
+        {
+            using (PdfDocument simpleDocument = PdfDocument.Open(BuildPdfA2aDocument()))
+            {
+                PdfDocumentBuilder pdfDocumentBuilder = new()
+                {
+                    ArchiveStandard = PdfAStandard.A2A,
+                    IncludeDocumentInformation = true,
+                    XmpMetadata = XDocument.Parse(@"<x:xmpmeta xmlns:x=""adobe:ns:meta/"" x:xmptk=""Adobe XMP Core 5.6-c014 79.156797, 2014/08/20-09:53:02        "">
+                          <rdf:RDF xmlns:rdf=""http://www.w3.org/1999/02/22-rdf-syntax-ns#"">
+                            <rdf:Description
+	                         xmlns:ftx=""http://ns.ftx.com/forms/1.0/""
+	                         xmlns:control=""http://ns.ftx.com/forms/1.0/controldata/""
+	                         xmlns:pdfaid=""http://www.aiim.org/pdfa/ns/id/""
+	                         xmlns:pdf=""http://ns.adobe.com/pdf/1.3/""
+	                         >
+                              <ftx:ControlData rdf:parseType=""Resource"">
+                                <control:Anzahl_Zeichen_Titel>0</control:Anzahl_Zeichen_Titel>
+                                <control:Anzahl_Zeichen_Vorname>0</control:Anzahl_Zeichen_Vorname>
+                                <control:Anzahl_Zeichen_Namenszusatz>0</control:Anzahl_Zeichen_Namenszusatz>
+                                <control:Anzahl_Zeichen_Hausnummer>0</control:Anzahl_Zeichen_Hausnummer>
+                                <control:Anzahl_Zeichen_Postleitzahl>0</control:Anzahl_Zeichen_Postleitzahl>
+                                <control:Anzahl_Zeichen_Wohnsitzlaendercode>0</control:Anzahl_Zeichen_Wohnsitzlaendercode>
+                                <control:Auftragsnummer_Einsender>0</control:Auftragsnummer_Einsender>
+                                <control:Formularnummer>10</control:Formularnummer>
+                                <control:Formularversion>10.2020</control:Formularversion>
+                                <control:Technische_Version>6</control:Technische_Version>
+                              </ftx:ControlData>
+                              <pdfaid:part>1</pdfaid:part>
+                              <pdfaid:conformance>B</pdfaid:conformance>
+                            </rdf:Description>
+                          </rdf:RDF>
+                        </x:xmpmeta>"),
+                };
+                pdfDocumentBuilder.AddPage(simpleDocument, 1);
+
+                using (PdfDocument xmpDocument = PdfDocument.Open(pdfDocumentBuilder.Build()))
+                {
+                    Assert.True(xmpDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata));
+                    XDocument xmp = xmpMetadata.GetXDocument();
+                    /* Should still contain exact one each of the correct PDF/A-2a XMP nodes.
+                     * PDF/A-1b from the added XMP document must not be there.
+                     * <pdfaid:part>2</pdfaid:part>
+                     * <pdfaid:conformance>A</pdfaid:conformance>
+                     */
+                    Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value);
+                    Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part"));
+                    Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value);
+                    Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance"));
+
+                    // Should also contain the nodes from the added XMP document
+                    Assert.Single(xmp.Descendants(XNamespace.Get(FtxFormsNamespace) + "ControlData"));
+                    Assert.Equal("0", xmp.Descendants(XNamespace.Get(FtxControldataNamespace) + "Anzahl_Zeichen_Titel").First().Value);
+                }
+            }
+        }
+
+        private byte[] BuildPdfA2aDocument()
+        {
+            string simpleDoc = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
+            using (PdfDocument pdfPigDocument = PdfDocument.Open(simpleDoc))
+            {
+                PdfDocumentBuilder pdfDocumentBuilder = new()
+                {
+                    ArchiveStandard = PdfAStandard.A2A,
+                };
+                pdfDocumentBuilder.AddPage(pdfPigDocument, 1);
+
+                return pdfDocumentBuilder.Build();
+            }
+        }
+    }
+}
--- a/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfABaselineRuleBuilder.cs
@@ -1,5 +1,6 @@
 using System;
 using System.Collections.Generic;
+using System.Xml.Linq;
 using UglyToad.PdfPig.Tokens;
 using UglyToad.PdfPig.Writer.Colors;
 using UglyToad.PdfPig.Writer.Xmp;
@@ -13,10 +14,11 @@ namespace UglyToad.PdfPig.Writer
            Func<IToken, IndirectReferenceToken> writerFunc,
            PdfDocumentBuilder.DocumentInformationBuilder documentInformationBuilder,
            PdfAStandard archiveStandard,
-            decimal version)
+            decimal version,
+            XDocument xmpMetadata)
        {
            catalog[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(writerFunc);
-            var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard);
+            var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard, xmpMetadata);
            var xmpObj = writerFunc(xmpStream);
            catalog[NameToken.Metadata] = xmpObj;
        }
--- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
@@ -19,6 +19,7 @@ namespace UglyToad.PdfPig.Writer
    using Tokens;

    using Util.JetBrains.Annotations;
+    using System.Xml.Linq;

    /// <summary>
    /// Provides methods to construct new PDF documents.
@@ -61,6 +62,12 @@ namespace UglyToad.PdfPig.Writer
        /// </summary>
        public Bookmarks Bookmarks { get; set; }

+        /// <summary>
+        /// The document level metadata, which is XML in the XMP (Extensible Metadata Platform) format. Will only be added, if the PDF is
+        /// created with an ArchiveStandard other than PdfAStandard.None.
+        /// </summary>
+        public XDocument XmpMetadata { get; set; }
+
        /// <summary>
        /// The current page builders in the document and the corresponding 1 indexed page numbers. Use <see cref="AddPage(double,double)"/>
        /// or <see cref="AddPage(PageSize,bool)"/> to add a new page.
@@ -715,7 +722,7 @@ namespace UglyToad.PdfPig.Writer
            {
                Func<IToken, IndirectReferenceToken> writerFunc = x => context.WriteToken(x);

-                PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version);
+                PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version, XmpMetadata);

                switch (ArchiveStandard)
                {
--- a/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/Xmp/XmpWriter.cs
@@ -39,7 +39,7 @@ namespace UglyToad.PdfPig.Writer.Xmp
        private const string PdfAIdentificationExtensionNamespace = "http://www.aiim.org/pdfa/ns/id/";
        
        public static StreamToken GenerateXmpStream(PdfDocumentBuilder.DocumentInformationBuilder builder, decimal version,
-            PdfAStandard standard)
+            PdfAStandard standard, XDocument additionalXmpMetadata)
        {
            XNamespace xmpMeta = XmpMetaNamespace;
            XNamespace rdf = RdfNamespace;
@@ -74,8 +74,8 @@ namespace UglyToad.PdfPig.Writer.Xmp
                });

            var pdfAIdContainer = GetVersionAndConformanceLevelIdentificationElement(rdf, emptyRdfAbout, standard);
-            
-            var document = new XDocument(
+
+            var document = MergeXmpXdocuments(new XDocument(
                new XElement(xmpMeta + "xmpmeta", GetNamespaceAttribute(XmpMetaPrefix, XmpMetaNamespace),
                    new XAttribute(xmpMeta + "xmptk", Xmptk),
                    new XElement(rdf + "RDF",
@@ -84,7 +84,7 @@ namespace UglyToad.PdfPig.Writer.Xmp
                        pdfAIdContainer
                    )
                )
-            );
+            ), additionalXmpMetadata);

            var xml = document.ToString(SaveOptions.None).Replace("\r\n", "\n");
            xml = $"<?xpacket begin=\"\ufeff\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n{xml}\n<?xpacket end=\"r\"?>";
@@ -238,5 +238,42 @@ namespace UglyToad.PdfPig.Writer.Xmp
                ValueFunc = valueFunc;
            }
        }
+
+        /// <summary>
+        /// Merge multiple System.Xml.Linq.XDocument objects.
+        /// Nodes in rdf:Description nodes of XDocuments with higher array index which already occur in rdf:Description nodes of XDocuments
+        /// with a lower index won't be inserted. This leads to a simple XML merge, where no duplicate pdf:PDFVersion or pdfaid:conformance
+        /// nodes will occur.
+        /// </summary>
+        private static XDocument MergeXmpXdocuments(params XDocument[] xDocuments)
+        {
+            XDocument document = new XDocument(xDocuments.FirstOrDefault());
+            foreach (XDocument xdocOriginal in xDocuments.Skip(1).Where(doc => doc != null))
+            {
+                XDocument xdoc = new XDocument(xdocOriginal);
+                XElement rdfMainNode = document.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First();
+                XElement rdfCurrentNode = xdoc.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First();
+
+                // Remove all children of rdf:Description which are already existing in the main node
+                var allDescriptions = rdfCurrentNode.Elements().ToList();
+                foreach (var description in allDescriptions)
+                {
+                    foreach (XElement descriptionElement in description.Elements().ToList())
+                    {
+                        if (rdfMainNode.
+                            Descendants(XNamespace.Get(RdfNamespace) + "Description").
+                            SelectMany(d => d.Descendants()).
+                            Select(mx => mx.Name).
+                            Contains(descriptionElement.Name))
+                        {
+                            descriptionElement.Remove();
+                        }
+                    }
+                }
+
+                rdfMainNode.Add(allDescriptions);
+            }
+            return document;
+        }
    }
 }