diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf
new file mode 100644
index 00000000..33ac0de7
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
index a9ca12da..fbadf78e 100644
--- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
@@ -101,6 +101,33 @@
}
}
+ [Fact]
+ public void CanFastAddPageAndStripLinkAnnots()
+ {
+ var first = IntegrationHelpers.GetDocumentPath("outline.pdf");
+ var contents = File.ReadAllBytes(first);
+
+ var annotCount = 0;
+ byte[] results = null;
+ using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
+ using (var output = new PdfDocumentBuilder())
+ {
+ output.AddPage(existing, 1);
+ results = output.Build();
+ var pg = existing.GetPage(1);
+ var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
+ annotCount = annots.Count;
+ Assert.Contains(annots, x => x.Type == Annotations.AnnotationType.Link);
+ }
+
+ using (var rewritten = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
+ {
+ var pg = rewritten.GetPage(1);
+ var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
+ Assert.Equal(annotCount - 1, annots.Count);
+ Assert.DoesNotContain(annots, x => x.Type == Annotations.AnnotationType.Link);
+ }
+ }
[Fact]
public void CanReadSingleBlankPage()
diff --git a/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs
index 13281f7a..035260bc 100644
--- a/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs
@@ -8,6 +8,13 @@
internal interface IPdfStreamWriter : IDisposable
{
+ ///
+ /// Sets if the stream writer should attempt to deduplicate objects.
+ /// May not have any affect if does not
+ /// support deduplication.
+ ///
+ bool AttemptDeduplication { get; set; }
+
///
/// The underlying stream used by the writer.
///
diff --git a/src/UglyToad.PdfPig/Writer/PdfDedupStreamWriter.cs b/src/UglyToad.PdfPig/Writer/PdfDedupStreamWriter.cs
index 38c77cbc..0d1cb1af 100644
--- a/src/UglyToad.PdfPig/Writer/PdfDedupStreamWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfDedupStreamWriter.cs
@@ -23,13 +23,16 @@
ms.SetLength(0);
TokenWriter.WriteToken(token, ms);
var contents = ms.ToArray();
- if (hashes.TryGetValue(contents, out var value))
+ if (AttemptDeduplication && hashes.TryGetValue(contents, out var value))
{
return value;
}
var ir = ReserveObjectNumber();
- hashes.Add(contents, ir);
+ if (AttemptDeduplication)
+ {
+ hashes.Add(contents, ir);
+ }
offsets.Add(ir.Data, Stream.Position);
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
index 0620dfa9..a50b3870 100644
--- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
@@ -330,6 +330,10 @@ namespace UglyToad.PdfPig.Writer
var streams = new List();
if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
{
+ // Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
+ // dedup if on to avoid issues
+ var prev = context.AttemptDeduplication;
+ context.AttemptDeduplication = false;
if (contentsToken is ArrayToken array)
{
foreach (var item in array.Data)
@@ -347,6 +351,7 @@ namespace UglyToad.PdfPig.Writer
streams.Add(new PdfPageBuilder.CopiedContentStream(
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
}
+ context.AttemptDeduplication = prev;
}
// manually copy page dict / resources as we need to modify some
@@ -379,15 +384,55 @@ namespace UglyToad.PdfPig.Writer
{
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
{
+ // don't copy these as they'll be handled during page tree writing
continue;
}
if (kvp.Key == NameToken.Resources)
{
+ // merge parent resources into child
CopyResourceDict(kvp.Value, resources);
continue;
}
+ if (kvp.Key == NameToken.Annots)
+ {
+ var val = kvp.Value;
+ if (kvp.Value is IndirectReferenceToken ir)
+ {
+ val = document.Structure.TokenScanner.Get(ir.Data).Data;
+ }
+
+ if (!(val is ArrayToken arr))
+ {
+ // should be array... ignore and remove bad dict
+ continue;
+ }
+
+ // -> ignore links to resolve issues with refencing non-existing pages
+ // at some point should add support for copying the links if the
+ // pages are copied as well but for now just fix corruption
+ var toAdd = new List();
+ foreach (var annot in arr.Data)
+ {
+ DictionaryToken tk = GetRemoteDict(annot);
+ if (tk == null)
+ {
+ // malformed
+ continue;
+ }
+ if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
+ {
+ // link -> ignore
+ continue;
+ }
+ toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
+ }
+ // copy rest
+ copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
+ continue;
+ }
+
copiedPageDict[NameToken.Create(kvp.Key)] =
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
}
@@ -508,10 +553,14 @@ namespace UglyToad.PdfPig.Writer
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
}
+ // Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
+ // dedup if on to avoid issues
+ var prev = context.AttemptDeduplication;
+ context.AttemptDeduplication = false;
+
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
if (toWrite.Count == 0)
{
- // write empty
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
}
else if (toWrite.Count == 1)
@@ -529,7 +578,7 @@ namespace UglyToad.PdfPig.Writer
}
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
}
-
+ context.AttemptDeduplication = prev;;
leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary)));
diff --git a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs
index f444e3dc..65d7ec23 100644
--- a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs
@@ -30,8 +30,9 @@
DisposeStream = disposeStream;
}
- public Stream Stream { get; protected set; }
-
+ public Stream Stream { get; protected set; }
+ public bool AttemptDeduplication { get; set; } = true;
+
public virtual IndirectReferenceToken WriteToken(IToken token)
{
if (!Initialized)
diff --git a/src/UglyToad.PdfPig/Writer/TokenWriter.cs b/src/UglyToad.PdfPig/Writer/TokenWriter.cs
index ee8ae638..2dacb681 100644
--- a/src/UglyToad.PdfPig/Writer/TokenWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/TokenWriter.cs
@@ -80,7 +80,11 @@
/// The token to write to the stream.
/// The stream to write the token to.
public static void WriteToken(IToken token, Stream outputStream)
- {
+ {
+ if (token == null)
+ {
+ throw new ArgumentNullException(nameof(token));
+ }
switch (token)
{
case ArrayToken array:
@@ -119,7 +123,9 @@
break;
case StringToken stringToken:
WriteString(stringToken, outputStream);
- break;
+ break;
+ default:
+ throw new PdfDocumentFormatException($"Attempted to write token type of {token.GetType()} but was not known.");
}
}
@@ -294,8 +300,16 @@
foreach (var pair in dictionary.Data)
{
- WriteName(pair.Key, outputStream);
- WriteToken(pair.Value, outputStream);
+ WriteName(pair.Key, outputStream);
+
+ // handle scenario where PdfPig has a null value under some circumstances
+ if (pair.Value == null)
+ {
+ WriteToken(NullToken.Instance, outputStream);
+ } else
+ {
+ WriteToken(pair.Value, outputStream);
+ }
}
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);