Add support for custom xmp metadata to PdfDocumentBuilder #490

This commit is contained in:
mli
2023-10-06 22:58:02 +02:00
committed by BobLd
parent 06ac832616
commit 81cdb474e1
4 changed files with 161 additions and 7 deletions

View File

@@ -0,0 +1,108 @@
namespace UglyToad.PdfPig.Tests.Writer
{
using Integration;
using PdfPig.Content;
using PdfPig.Writer;
using System.Linq;
using System.Xml.Linq;
using Xunit;
public class XmpTests
{
const string RdfNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
const string PdfaidNamespace = "http://www.aiim.org/pdfa/ns/id/";
const string FtxFormsNamespace = "http://ns.ftx.com/forms/1.0/";
const string FtxControldataNamespace = "http://ns.ftx.com/forms/1.0/controldata/";
[Fact]
public void XmpInfoIsWrittenToPdfADocument()
{
byte[] pdfA2aDocument = BuildPdfA2aDocument();
using (PdfDocument newDocument = PdfDocument.Open(pdfA2aDocument))
{
Assert.True(newDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata));
XDocument xmp = xmpMetadata.GetXDocument();
/* Should contain the the PDF/A-2a XMP nodes.
* <pdfaid:part>2</pdfaid:part>
* <pdfaid:conformance>A</pdfaid:conformance>
*/
Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value);
Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value);
}
}
[Fact]
public void CustomXmpInfoIsMergedIntoPdfADocumentXmp()
{
using (PdfDocument simpleDocument = PdfDocument.Open(BuildPdfA2aDocument()))
{
PdfDocumentBuilder pdfDocumentBuilder = new()
{
ArchiveStandard = PdfAStandard.A2A,
IncludeDocumentInformation = true,
XmpMetadata = XDocument.Parse(@"<x:xmpmeta xmlns:x=""adobe:ns:meta/"" x:xmptk=""Adobe XMP Core 5.6-c014 79.156797, 2014/08/20-09:53:02 "">
<rdf:RDF xmlns:rdf=""http://www.w3.org/1999/02/22-rdf-syntax-ns#"">
<rdf:Description
xmlns:ftx=""http://ns.ftx.com/forms/1.0/""
xmlns:control=""http://ns.ftx.com/forms/1.0/controldata/""
xmlns:pdfaid=""http://www.aiim.org/pdfa/ns/id/""
xmlns:pdf=""http://ns.adobe.com/pdf/1.3/""
>
<ftx:ControlData rdf:parseType=""Resource"">
<control:Anzahl_Zeichen_Titel>0</control:Anzahl_Zeichen_Titel>
<control:Anzahl_Zeichen_Vorname>0</control:Anzahl_Zeichen_Vorname>
<control:Anzahl_Zeichen_Namenszusatz>0</control:Anzahl_Zeichen_Namenszusatz>
<control:Anzahl_Zeichen_Hausnummer>0</control:Anzahl_Zeichen_Hausnummer>
<control:Anzahl_Zeichen_Postleitzahl>0</control:Anzahl_Zeichen_Postleitzahl>
<control:Anzahl_Zeichen_Wohnsitzlaendercode>0</control:Anzahl_Zeichen_Wohnsitzlaendercode>
<control:Auftragsnummer_Einsender>0</control:Auftragsnummer_Einsender>
<control:Formularnummer>10</control:Formularnummer>
<control:Formularversion>10.2020</control:Formularversion>
<control:Technische_Version>6</control:Technische_Version>
</ftx:ControlData>
<pdfaid:part>1</pdfaid:part>
<pdfaid:conformance>B</pdfaid:conformance>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>"),
};
pdfDocumentBuilder.AddPage(simpleDocument, 1);
using (PdfDocument xmpDocument = PdfDocument.Open(pdfDocumentBuilder.Build()))
{
Assert.True(xmpDocument.TryGetXmpMetadata(out XmpMetadata xmpMetadata));
XDocument xmp = xmpMetadata.GetXDocument();
/* Should still contain exact one each of the correct PDF/A-2a XMP nodes.
* PDF/A-1b from the added XMP document must not be there.
* <pdfaid:part>2</pdfaid:part>
* <pdfaid:conformance>A</pdfaid:conformance>
*/
Assert.Equal("2", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part").First().Value);
Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "part"));
Assert.Equal("A", xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance").First().Value);
Assert.Single(xmp.Descendants(XNamespace.Get(PdfaidNamespace) + "conformance"));
// Should also contain the nodes from the added XMP document
Assert.Single(xmp.Descendants(XNamespace.Get(FtxFormsNamespace) + "ControlData"));
Assert.Equal("0", xmp.Descendants(XNamespace.Get(FtxControldataNamespace) + "Anzahl_Zeichen_Titel").First().Value);
}
}
}
private byte[] BuildPdfA2aDocument()
{
string simpleDoc = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (PdfDocument pdfPigDocument = PdfDocument.Open(simpleDoc))
{
PdfDocumentBuilder pdfDocumentBuilder = new()
{
ArchiveStandard = PdfAStandard.A2A,
};
pdfDocumentBuilder.AddPage(pdfPigDocument, 1);
return pdfDocumentBuilder.Build();
}
}
}
}

View File

@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Xml.Linq;
using UglyToad.PdfPig.Tokens;
using UglyToad.PdfPig.Writer.Colors;
using UglyToad.PdfPig.Writer.Xmp;
@@ -13,10 +14,11 @@ namespace UglyToad.PdfPig.Writer
Func<IToken, IndirectReferenceToken> writerFunc,
PdfDocumentBuilder.DocumentInformationBuilder documentInformationBuilder,
PdfAStandard archiveStandard,
decimal version)
decimal version,
XDocument xmpMetadata)
{
catalog[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(writerFunc);
var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard);
var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, version, archiveStandard, xmpMetadata);
var xmpObj = writerFunc(xmpStream);
catalog[NameToken.Metadata] = xmpObj;
}

View File

@@ -19,6 +19,7 @@ namespace UglyToad.PdfPig.Writer
using Tokens;
using Util.JetBrains.Annotations;
using System.Xml.Linq;
/// <summary>
/// Provides methods to construct new PDF documents.
@@ -61,6 +62,12 @@ namespace UglyToad.PdfPig.Writer
/// </summary>
public Bookmarks Bookmarks { get; set; }
/// <summary>
/// The document level metadata, which is XML in the XMP (Extensible Metadata Platform) format. Will only be added, if the PDF is
/// created with an ArchiveStandard other than PdfAStandard.None.
/// </summary>
public XDocument XmpMetadata { get; set; }
/// <summary>
/// The current page builders in the document and the corresponding 1 indexed page numbers. Use <see cref="AddPage(double,double)"/>
/// or <see cref="AddPage(PageSize,bool)"/> to add a new page.
@@ -715,7 +722,7 @@ namespace UglyToad.PdfPig.Writer
{
Func<IToken, IndirectReferenceToken> writerFunc = x => context.WriteToken(x);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard, version, XmpMetadata);
switch (ArchiveStandard)
{

View File

@@ -39,7 +39,7 @@ namespace UglyToad.PdfPig.Writer.Xmp
private const string PdfAIdentificationExtensionNamespace = "http://www.aiim.org/pdfa/ns/id/";
public static StreamToken GenerateXmpStream(PdfDocumentBuilder.DocumentInformationBuilder builder, decimal version,
PdfAStandard standard)
PdfAStandard standard, XDocument additionalXmpMetadata)
{
XNamespace xmpMeta = XmpMetaNamespace;
XNamespace rdf = RdfNamespace;
@@ -74,8 +74,8 @@ namespace UglyToad.PdfPig.Writer.Xmp
});
var pdfAIdContainer = GetVersionAndConformanceLevelIdentificationElement(rdf, emptyRdfAbout, standard);
var document = new XDocument(
var document = MergeXmpXdocuments(new XDocument(
new XElement(xmpMeta + "xmpmeta", GetNamespaceAttribute(XmpMetaPrefix, XmpMetaNamespace),
new XAttribute(xmpMeta + "xmptk", Xmptk),
new XElement(rdf + "RDF",
@@ -84,7 +84,7 @@ namespace UglyToad.PdfPig.Writer.Xmp
pdfAIdContainer
)
)
);
), additionalXmpMetadata);
var xml = document.ToString(SaveOptions.None).Replace("\r\n", "\n");
xml = $"<?xpacket begin=\"\ufeff\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n{xml}\n<?xpacket end=\"r\"?>";
@@ -238,5 +238,42 @@ namespace UglyToad.PdfPig.Writer.Xmp
ValueFunc = valueFunc;
}
}
/// <summary>
/// Merge multiple System.Xml.Linq.XDocument objects.
/// Nodes in rdf:Description nodes of XDocuments with higher array index which already occur in rdf:Description nodes of XDocuments
/// with a lower index won't be inserted. This leads to a simple XML merge, where no duplicate pdf:PDFVersion or pdfaid:conformance
/// nodes will occur.
/// </summary>
private static XDocument MergeXmpXdocuments(params XDocument[] xDocuments)
{
XDocument document = new XDocument(xDocuments.FirstOrDefault());
foreach (XDocument xdocOriginal in xDocuments.Skip(1).Where(doc => doc != null))
{
XDocument xdoc = new XDocument(xdocOriginal);
XElement rdfMainNode = document.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First();
XElement rdfCurrentNode = xdoc.Descendants(XNamespace.Get(RdfNamespace) + "RDF").First();
// Remove all children of rdf:Description which are already existing in the main node
var allDescriptions = rdfCurrentNode.Elements().ToList();
foreach (var description in allDescriptions)
{
foreach (XElement descriptionElement in description.Elements().ToList())
{
if (rdfMainNode.
Descendants(XNamespace.Get(RdfNamespace) + "Description").
SelectMany(d => d.Descendants()).
Select(mx => mx.Name).
Contains(descriptionElement.Name))
{
descriptionElement.Remove();
}
}
}
rdfMainNode.Add(allDescriptions);
}
return document;
}
}
}