#127 add basic pdf/a-1b level compliance to the document builder

adds color profiles/output intents and an xmp metadata stream to the document in order to be compliant with pdf/a-1b (basic). this compliance level is toggled on the builder since it will generate larger files and set to 'off/none' by default. pdf/a documents are also not able to use standard fonts so using a font when the compliance level is not none will throw.
This commit is contained in:
Eliot Jones
2020-03-29 16:43:52 +01:00
parent 7d52bc8be4
commit 5f45ee53bd
9 changed files with 378 additions and 6 deletions

View File

@@ -195,6 +195,7 @@
"UglyToad.PdfPig.Util.Adler32Checksum", "UglyToad.PdfPig.Util.Adler32Checksum",
"UglyToad.PdfPig.Util.IWordExtractor", "UglyToad.PdfPig.Util.IWordExtractor",
"UglyToad.PdfPig.Util.DefaultWordExtractor", "UglyToad.PdfPig.Util.DefaultWordExtractor",
"UglyToad.PdfPig.Writer.PdfAStandard",
"UglyToad.PdfPig.Writer.PdfDocumentBuilder", "UglyToad.PdfPig.Writer.PdfDocumentBuilder",
"UglyToad.PdfPig.Writer.PdfMerger", "UglyToad.PdfPig.Writer.PdfMerger",
"UglyToad.PdfPig.Writer.PdfPageBuilder", "UglyToad.PdfPig.Writer.PdfPageBuilder",

View File

@@ -494,6 +494,34 @@
} }
} }
[Fact]
public void CanGeneratePdfA1BFile()
{
var builder = new PdfDocumentBuilder
{
ArchiveStandard = PdfAStandard.A1B
};
var page = builder.AddPage(PageSize.A4);
var font = builder.AddTrueTypeFont(TrueTypeTestHelper.GetFileBytes("Roboto-Regular.ttf"));
page.AddText("Howdy!", 12, new PdfPoint(25, 670), font);
var bytes = builder.Build();
WriteFile(nameof(CanGeneratePdfA1BFile), bytes);
using (var pdf = PdfDocument.Open(bytes, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, pdf.NumberOfPages);
Assert.True(pdf.TryGetXmpMetadata(out var xmp));
Assert.NotNull(xmp.GetXDocument());
}
}
private static void WriteFile(string name, byte[] bytes) private static void WriteFile(string name, byte[] bytes)
{ {
try try

Binary file not shown.

View File

@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks> <TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
@@ -12,9 +12,11 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<None Remove="Resources\CMap\*" /> <None Remove="Resources\CMap\*" />
<None Remove="Resources\ICC\*" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<EmbeddedResource Include="Resources\CMap\*" /> <EmbeddedResource Include="Resources\CMap\*" />
<EmbeddedResource Include="Resources\ICC\*" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0"> <PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0">

View File

@@ -0,0 +1,46 @@
namespace UglyToad.PdfPig.Writer.Colors
{
using System;
using System.Collections.Generic;
using Tokens;
internal static class OutputIntentsFactory
{
private const string SrgbIec61966OutputCondition = "sRGB IEC61966-2.1";
private const string RegistryName = "http://www.color.org";
public static ArrayToken GetOutputIntentsArray(Func<IToken, ObjectToken> objectWriter)
{
var rgbColorCondition = new StringToken(SrgbIec61966OutputCondition);
var profileBytes = ProfileStreamReader.GetSRgb2014();
var compressedBytes = DataCompresser.CompressBytes(profileBytes);
var profileStreamDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Length, new NumericToken(compressedBytes.Length)},
{NameToken.N, new NumericToken(3)},
{NameToken.Filter, NameToken.FlateDecode}
};
var stream = new StreamToken(new DictionaryToken(profileStreamDictionary), compressedBytes);
var written = objectWriter(stream);
return new ArrayToken(new IToken[]
{
new DictionaryToken(new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.OutputIntent },
{NameToken.S, NameToken.GtsPdfa1},
{NameToken.OutputCondition, rgbColorCondition},
{NameToken.OutputConditionIdentifier, rgbColorCondition},
{NameToken.RegistryName, new StringToken(RegistryName)},
{NameToken.Info, rgbColorCondition},
{NameToken.DestOutputProfile, new IndirectReferenceToken(written.Number)}
}),
});
}
}
}

View File

@@ -0,0 +1,34 @@
namespace UglyToad.PdfPig.Writer.Colors
{
using System;
using System.IO;
using System.Linq;
using PdfFonts.Parser;
internal static class ProfileStreamReader
{
public static byte[] GetSRgb2014()
{
var resources = typeof(ProfileStreamReader).Assembly.GetManifestResourceNames();
var resource = resources.FirstOrDefault(x =>
x.EndsWith("sRGB2014.icc", StringComparison.InvariantCultureIgnoreCase));
if (resource == null)
{
throw new InvalidOperationException("Could not find the sRGB ICC color profile stream.");
}
byte[] bytes;
using (var stream = typeof(CMapParser).Assembly.GetManifestResourceStream(resource))
using (var memoryStream = new MemoryStream())
{
stream?.CopyTo(memoryStream);
bytes = memoryStream.ToArray();
}
return bytes;
}
}
}

View File

@@ -0,0 +1,17 @@
namespace UglyToad.PdfPig.Writer
{
/// <summary>
/// The standard of PDF/A compliance for generated documents.
/// </summary>
public enum PdfAStandard
{
/// <summary>
/// No PDF/A compliance.
/// </summary>
None = 0,
/// <summary>
/// Compliance with PDF/A1-B. Level B (basic) conformance are standards necessary for the reliable reproduction of a document's visual appearance.
/// </summary>
A1B = 1
}
}

View File

@@ -1,4 +1,5 @@
namespace UglyToad.PdfPig.Writer 
namespace UglyToad.PdfPig.Writer
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@@ -12,6 +13,9 @@
using PdfPig.Fonts.Standard14Fonts; using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Fonts.TrueType.Parser; using PdfPig.Fonts.TrueType.Parser;
using Tokens; using Tokens;
using Colors;
using Xmp;
using Util.JetBrains.Annotations; using Util.JetBrains.Annotations;
/// <summary> /// <summary>
@@ -24,6 +28,11 @@
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>(); private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>(); private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
/// <summary>
/// The standard of PDF/A compliance of the generated document. Defaults to <see cref="PdfAStandard.None"/>.
/// </summary>
public PdfAStandard ArchiveStandard { get; set; } = PdfAStandard.None;
/// <summary> /// <summary>
/// Whether to include the document information dictionary in the produced document. /// Whether to include the document information dictionary in the produced document.
/// </summary> /// </summary>
@@ -128,6 +137,11 @@
/// <returns>An identifier which can be passed to <see cref="PdfPageBuilder.AddText"/>.</returns> /// <returns>An identifier which can be passed to <see cref="PdfPageBuilder.AddText"/>.</returns>
public AddedFont AddStandard14Font(Standard14Font type) public AddedFont AddStandard14Font(Standard14Font type)
{ {
if (ArchiveStandard != PdfAStandard.None)
{
throw new NotSupportedException($"PDF/A {ArchiveStandard} requires the font to be embedded in the file, only {nameof(AddTrueTypeFont)} is supported.");
}
var id = Guid.NewGuid(); var id = Guid.NewGuid();
var name = NameToken.Create($"F{fonts.Count}"); var name = NameToken.Create($"F{fonts.Count}");
var added = new AddedFont(id, name); var added = new AddedFont(id, name);
@@ -321,11 +335,21 @@
var pagesRef = context.WriteObject(memory, pagesDictionary, reserved); var pagesRef = context.WriteObject(memory, pagesDictionary, reserved);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken> var catalogDictionary = new Dictionary<NameToken, IToken>
{ {
{ NameToken.Type, NameToken.Catalog }, {NameToken.Type, NameToken.Catalog},
{ NameToken.Pages, new IndirectReferenceToken(pagesRef.Number) } {NameToken.Pages, new IndirectReferenceToken(pagesRef.Number)}
}); };
if (ArchiveStandard != PdfAStandard.None)
{
catalogDictionary[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(x => context.WriteObject(memory, x));
var xmpStream = XmpWriter.GenerateXmpStream(DocumentInformation, 1.7m, ArchiveStandard);
var xmpObj = context.WriteObject(memory, xmpStream);
catalogDictionary[NameToken.Metadata] = new IndirectReferenceToken(xmpObj.Number);
}
var catalog = new DictionaryToken(catalogDictionary);
var catalogRef = context.WriteObject(memory, catalog); var catalogRef = context.WriteObject(memory, catalog);

View File

@@ -0,0 +1,220 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using UglyToad.PdfPig.Tokens;
namespace UglyToad.PdfPig.Writer.Xmp
{
internal static class XmpWriter
{
private const string Xmptk = "Adobe XMP Core 5.6-c014 79.156797, 2014/08/20-09:53:02 ";
private const string RdfNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
private const string XmpMetaPrefix = "x";
private const string XmpMetaNamespace = "adobe:ns:meta/";
private const string DublinCorePrefix = "dc";
private const string DublinCoreNamespace = "http://purl.org/dc/elements/1.1/";
private const string XmpBasicPrefix = "xmp";
private const string XmpBasicNamespace = "http://ns.adobe.com/xap/1.0/";
// ReSharper disable UnusedMember.Local
private const string XmpRightsManagementPrefix = "xmpRights";
private const string XmpRightsManagementNamespace = "http://ns.adobe.com/xap/1.0/rights/";
private const string XmpMediaManagementPrefix = "xmpMM";
private const string XmpMediaManagementNamespace = "http://ns.adobe.com/xap/1.0/mm/";
// ReSharper restore UnusedMember.Local
private const string AdobePdfPrefix = "pdf";
private const string AdobePdfNamespace = "http://ns.adobe.com/pdf/1.3/";
private const string PdfAIdentificationExtensionPrefix = "pdfaid";
private const string PdfAIdentificationExtensionNamespace = "http://www.aiim.org/pdfa/ns/id/";
public static StreamToken GenerateXmpStream(PdfDocumentBuilder.DocumentInformationBuilder builder, decimal version,
PdfAStandard standard)
{
XNamespace xmpMeta = XmpMetaNamespace;
XNamespace rdf = RdfNamespace;
var emptyRdfAbout = new XAttribute(rdf + "about", string.Empty);
var rdfDescriptionElement = new XElement(rdf + "Description", emptyRdfAbout);
// Dublin Core Schema
AddElementsForSchema(rdfDescriptionElement, DublinCorePrefix, DublinCoreNamespace, builder,
new List<SchemaMapper>
{
new SchemaMapper("format", b => "application/pdf"),
new SchemaMapper("creator", b => b.Author),
new SchemaMapper("description", b => b.Subject),
new SchemaMapper("title", b => b.Title)
});
// XMP Basic Schema
AddElementsForSchema(rdfDescriptionElement, XmpBasicPrefix, XmpBasicNamespace, builder,
new List<SchemaMapper>
{
new SchemaMapper("CreatorTool", b => b.Creator)
});
// Adobe PDF Schema
AddElementsForSchema(rdfDescriptionElement, AdobePdfPrefix, AdobePdfNamespace, builder,
new List<SchemaMapper>
{
new SchemaMapper("PDFVersion", b => "1.7"),
new SchemaMapper("Producer", b => b.Producer)
});
var pdfAIdContainer = GetVersionAndConformanceLevelIdentificationElement(rdf, emptyRdfAbout, standard);
var document = new XDocument(
new XElement(xmpMeta + "xmpmeta", GetNamespaceAttribute(XmpMetaPrefix, XmpMetaNamespace),
new XAttribute(xmpMeta + "xmptk", Xmptk),
new XElement(rdf + "RDF",
GetNamespaceAttribute("rdf", rdf),
rdfDescriptionElement,
pdfAIdContainer
)
)
);
var xml = document.ToString(SaveOptions.None).Replace("\r\n", "\n");
xml = $"<?xpacket begin=\"\ufeff\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n{xml}\n<?xpacket end=\"r\"?>";
var bytes = Encoding.UTF8.GetBytes(xml);
return new StreamToken(new DictionaryToken(new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Metadata},
{NameToken.Subtype, NameToken.Xml},
{NameToken.Length, new NumericToken(bytes.Length)}
}), bytes);
}
private static XAttribute GetNamespaceAttribute(string prefix, XNamespace ns) => new XAttribute(XNamespace.Xmlns + prefix, ns);
private static void AddElementsForSchema(XElement parent, string prefix, string ns, PdfDocumentBuilder.DocumentInformationBuilder builder,
List<SchemaMapper> mappers)
{
var xns = XNamespace.Get(ns);
parent.Add(GetNamespaceAttribute(prefix, xns));
foreach (var mapper in mappers)
{
var value = mapper.ValueFunc(builder);
if (value == null)
{
continue;
}
parent.Add(new XElement(xns + mapper.Name, value));
}
}
private static XElement GetVersionAndConformanceLevelIdentificationElement(XNamespace rdf, XAttribute emptyRdfAbout, PdfAStandard standard)
{
/*
* The only mandatory XMP entries are those which indicate that the file is a PDF/A-1 file and its conformance level.
* The PDF/A version and conformance level of a file shall be specified using the PDF/A Identification extension schema.
*/
XNamespace pdfaid = PdfAIdentificationExtensionNamespace;
var pdfAidContainer = new XElement(rdf + "Description", emptyRdfAbout, GetNamespaceAttribute(PdfAIdentificationExtensionPrefix, pdfaid));
int part;
string conformance;
switch (standard)
{
case PdfAStandard.A1B:
part = 1;
conformance = "B";
break;
default:
throw new ArgumentOutOfRangeException(nameof(standard), standard, null);
}
pdfAidContainer.Add(new XElement(pdfaid + "part", part));
pdfAidContainer.Add(new XElement(pdfaid + "conformance", conformance));
return pdfAidContainer;
}
// Potentially required for further PDF/A versions.
// ReSharper disable once UnusedMember.Local
private static XElement GetExtensionSchemasElement(XNamespace rdf, XAttribute emptyRdfAbout)
{
const string pdfAExtensionSchemaContainerSchemaPrefix = "pdfaExtension";
const string pdfAExtensionSchemaContainerSchemaUri = "http://www.aiim.org/pdfa/ns/extension/";
const string pdfASchemaValueTypePrefix = "pdfaSchema";
const string pdfASchemaValueTypeUri = "http://www.aiim.org/pdfa/ns/schema#";
const string pdfAPropertyValueTypePrefix = "pdfaProperty";
const string pdfAPropertyValueTypeUri = "http://www.aiim.org/pdfa/ns/property#";
XNamespace pdfaExtension = pdfAExtensionSchemaContainerSchemaUri;
XNamespace pdfaSchema = pdfASchemaValueTypeUri;
XNamespace pdfaProperty = pdfAPropertyValueTypeUri;
var pdfaSchemaContainer = new XElement(rdf + "Description", emptyRdfAbout,
GetNamespaceAttribute(pdfAExtensionSchemaContainerSchemaPrefix, pdfaExtension),
GetNamespaceAttribute(pdfASchemaValueTypePrefix, pdfaSchema),
GetNamespaceAttribute(pdfAPropertyValueTypePrefix, pdfaProperty));
var schemaBag = new XElement(pdfaExtension + "schemas",
new XElement(rdf + "Bag"));
var individualSchemaContainer = new XElement(rdf + "li", new XAttribute(rdf + "parseType", "Resource"));
individualSchemaContainer.Add(new XElement(pdfaSchema + "namespaceURI", PdfAIdentificationExtensionNamespace));
individualSchemaContainer.Add(new XElement(pdfaSchema + "prefix", PdfAIdentificationExtensionPrefix));
individualSchemaContainer.Add(new XElement(pdfaSchema + "schema", "PDF/A ID Schema"));
var seqContainer = new XElement(pdfaSchema + "property", new XElement(rdf + "Seq"));
var seq = seqContainer.Elements().Last();
seq.Add(GetSchemaPropertyListItem(rdf, pdfaProperty, "part", "Part of PDF/A standard", "internal", "Integer"));
seq.Add(GetSchemaPropertyListItem(rdf, pdfaProperty, "amd", "Amendment of PDF/A standard"));
seq.Add(GetSchemaPropertyListItem(rdf, pdfaProperty, "conformance", "Conformance level of PDF/A standard"));
individualSchemaContainer.Add(seqContainer);
schemaBag.Elements().Last().Add(individualSchemaContainer);
pdfaSchemaContainer.Add(schemaBag);
return pdfaSchemaContainer;
}
private static XElement GetSchemaPropertyListItem(XNamespace rdfNs,
XNamespace pdfaPropertyNs, string name, string description, string category = "internal", string valueType = "Text")
{
var li = new XElement(rdfNs + "li", new XAttribute(rdfNs + "parseType", "Resource"));
li.Add(new XElement(pdfaPropertyNs + "category", category));
li.Add(new XElement(pdfaPropertyNs + "description", description));
li.Add(new XElement(pdfaPropertyNs + "name", name));
li.Add(new XElement(pdfaPropertyNs + "valueType", valueType));
return li;
}
private class SchemaMapper
{
public string Name { get; }
public Func<PdfDocumentBuilder.DocumentInformationBuilder, string> ValueFunc { get; }
public SchemaMapper(string name, Func<PdfDocumentBuilder.DocumentInformationBuilder, string> valueFunc)
{
Name = name;
ValueFunc = valueFunc;
}
}
}
}