make link copying more tolerant when adding page

in #1082 and other issues relating to annotations we're running into constraints of the current model of building a pdf document. currently we skip all link type annotations, i think we can support copying of links where the link destination is outside the current document. however the more i look at this code the more i think we need a radical redesign of how document building is done because it has been pushed far beyond its current capabilities, i'll detail my thinking in the related pr in more detail
2025-08-20 04:48:53 +08:00 · 2025-07-23 20:56:26 -05:00 · 2025-07-23 20:56:26 -05:00 · 2a10b6c285
commit 2a10b6c285
parent 85fc63d585
2 changed files with 148 additions and 64 deletions
--- a/src/UglyToad.PdfPig.Tokens/NameToken.Constants.cs
+++ b/src/UglyToad.PdfPig.Tokens/NameToken.Constants.cs
@ -310,6 +310,7 @@
        public static readonly NameToken Last = new NameToken("Last");
        public static readonly NameToken LastChar = new NameToken("LastChar");
        public static readonly NameToken LastModified = new NameToken("LastModified");
+        public static readonly NameToken Launch = new NameToken("Launch");
        public static readonly NameToken Lc = new NameToken("LC");
        public static readonly NameToken Le = new NameToken("LE");
        public static readonly NameToken Leading = new NameToken("Leading");
--- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
@ -458,72 +458,14 @@ namespace UglyToad.PdfPig.Writer
                    {
                        continue;
                    }
-                    
-                    var val = kvp.Value;
-                    if (kvp.Value is IndirectReferenceToken ir)
-                    {
-                        ObjectToken tk = document.Structure.TokenScanner.Get(ir.Data);
-                        if (tk is null)
-                        {
-                            // malformed
-                            continue;
-                        }
-                        val = tk.Data;
-                    }

-                    if (!(val is ArrayToken arr))
-                    {
-                        // should be array... ignore and remove bad dict
-                        continue;
-                    }
+                    var copiedTokens = CopyAnnotationsFromPageSource(
+                        kvp.Value,
+                        document.Structure.TokenScanner,
+                        refs,
+                        options.CopyLinkFunc);

-                    // if copyLink is unset, ignore links to resolve issues with refencing non-existing pages
-                    var toAdd = new List<IToken>();
-                    foreach (var annot in arr.Data)
-                    {
-                        DictionaryToken? tk = GetRemoteDict(annot);
-                        if (tk is null)
-                        {
-                            // malformed
-                            continue;
-                        }
-
-                        if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
-                        {
-                            if (options.CopyLinkFunc is null)
-                            {
-                                // ignore link if don't know how to copy
-                                continue;
-                            }
-
-                            var link = page.annotationProvider.GetAction(tk);
-                            if (link is null)
-                            {
-                                // ignore unknown link actions
-                                continue;
-                            }
-
-                            var copiedLink = options.CopyLinkFunc(link);
-                            if (copiedLink is null)
-                            {
-                                // ignore if caller wants to skip the link
-                                continue;
-                            }
-
-                            if (copiedLink != link)
-                            {
-                                // defer to write links when all pages are added
-                                var copiedToken = (DictionaryToken)WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs);
-                                links.Add((copiedToken, copiedLink));
-                                continue;
-                            }
-
-                            // copy as is if caller returns the same link
-                        }
-                        toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
-                    }
-                    // copy rest
-                    copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
+                    copiedPageDict[NameToken.Annots] = new ArrayToken(copiedTokens);
                    continue;
                }

@ -625,6 +567,147 @@ namespace UglyToad.PdfPig.Writer
            }
        }

+        private IReadOnlyList<IToken> CopyAnnotationsFromPageSource(
+            IToken val,
+            IPdfTokenScanner sourceScanner,
+            IDictionary<IndirectReference, IndirectReferenceToken> refs,
+            Func<PdfAction, PdfAction?>? linkCopyFunc)
+        {
+            var permittedLinkActionTypes = new HashSet<NameToken>
+            {
+                // A web URI.
+                NameToken.Uri,
+                // A page in a different non-embedded document.
+                NameToken.GoToR,
+                // Launch an external application.
+                NameToken.Launch,
+            };
+
+            if (!DirectObjectFinder.TryGet(val, sourceScanner, out ArrayToken? annotationsArray))
+            {
+                return [];
+            }
+
+            var copiedAnnotations = new List<IToken>();
+            foreach (var annotEntry in annotationsArray.Data)
+            {
+                if (!DirectObjectFinder.TryGet(annotEntry, sourceScanner, out DictionaryToken? annotDict))
+                {
+                    continue;
+                }
+
+                var removedKeys = new List<NameToken>();
+
+                /*
+                 * An indirect reference to the page object with which this annotation is associated.
+                 * Note: This entry is required for screen annotations associated with rendition actions.
+                 */
+                if (annotDict.TryGet(NameToken.P, out _))
+                {
+                    // If we have a page reference we should update it when this page is written.
+                    // For now, we'll remove it. This will corrupt screen annotations as noted above.
+                    removedKeys.Add(NameToken.P);
+                }
+
+                // We don't copy the struct tree so skip this for now.
+                if (annotDict.TryGet(NameToken.StructParent, out _))
+                {
+                    removedKeys.Add(NameToken.StructParent);
+                }
+
+                // We treat non-link annotations as ok for now, we should revisit this.
+                if (!annotDict.TryGet(NameToken.Subtype, sourceScanner, out NameToken? subtype)
+                    || subtype != NameToken.Link)
+                {
+                    var copiedRef = WriterUtil.CopyToken(
+                        context,
+                        CopyWithSkippedKeys(annotDict, removedKeys),
+                        sourceScanner,
+                        refs);
+
+                    copiedAnnotations.Add(copiedRef);
+
+                    continue;
+                }
+
+                // Todo copy
+                // var copiedLogicOverride = linkCopyFunc()
+
+                //var link = page.annotationProvider.GetAction(tk);
+
+                // If the link has an action then this link can point elsewhere in this document, maybe not to a page we copied?
+                if (annotDict.TryGet(NameToken.A, sourceScanner, out DictionaryToken? actionDict))
+                {
+                    // If the link annotation points somewhere inside our document we can't currently maintain validity on-copy.
+                    if (!actionDict.TryGet(NameToken.S, sourceScanner, out NameToken? actionType)
+                        || !permittedLinkActionTypes.Contains(actionType))
+                    {
+                        continue;
+                    }
+
+                    var copiedRef = WriterUtil.CopyToken(
+                        context,
+                        CopyWithSkippedKeys(annotDict, removedKeys),
+                        sourceScanner,
+                        refs);
+
+                    copiedAnnotations.Add(copiedRef);
+
+                    continue;
+                }
+
+                // A dest can point elsewhere in this document, maybe not to a page we copied?
+                if (annotDict.TryGet(NameToken.Dest, out _))
+                {
+                    // Skip for now.
+                    continue;
+                }
+
+                // If neither /A nor /Dest are present then I don't really know what this link does, so it should be safe to copy:
+                var finalCopiedRef = WriterUtil.CopyToken(
+                    context,
+                    CopyWithSkippedKeys(annotDict, removedKeys),
+                    sourceScanner,
+                    refs);
+
+                copiedAnnotations.Add(finalCopiedRef);
+            }
+
+            return copiedAnnotations;
+        }
+
+        private static DictionaryToken CopyWithSkippedKeys(
+            DictionaryToken source,
+            IReadOnlyList<NameToken> skipped)
+        {
+            var dict = new Dictionary<NameToken, IToken>();
+
+            foreach (var kvp in source.Data)
+            {
+                var name = NameToken.Create(kvp.Key);
+
+                var ignore = false;
+
+                foreach (var skippedName in skipped)
+                {
+                    if (skippedName == name)
+                    {
+                        ignore = true;
+                        break;
+                    }
+                }
+
+                if (ignore)
+                {
+                    continue;
+                }
+
+                dict[name] = kvp.Value;
+            }
+
+            return new DictionaryToken(dict);
+        }
+
        private void CompleteDocument()
        {
            // write fonts to reserved object numbers