make link copying more tolerant when adding page

in #1082 and other issues relating to annotations we're running into
constraints of the current model of building a pdf document. currently
we skip all link type annotations, i think we can support copying of links
where the link destination is outside the current document. however the
more i look at this code the more i think we need a radical redesign of
how document building is done because it has been pushed far beyond
its current capabilities, i'll detail my thinking in the related pr in more
detail
This commit is contained in:
EliotJones 2025-07-23 20:56:26 -05:00 committed by BobLd
parent 85fc63d585
commit 2a10b6c285
2 changed files with 148 additions and 64 deletions

View File

@ -310,6 +310,7 @@
public static readonly NameToken Last = new NameToken("Last");
public static readonly NameToken LastChar = new NameToken("LastChar");
public static readonly NameToken LastModified = new NameToken("LastModified");
public static readonly NameToken Launch = new NameToken("Launch");
public static readonly NameToken Lc = new NameToken("LC");
public static readonly NameToken Le = new NameToken("LE");
public static readonly NameToken Leading = new NameToken("Leading");

View File

@ -458,72 +458,14 @@ namespace UglyToad.PdfPig.Writer
{
continue;
}
var val = kvp.Value;
if (kvp.Value is IndirectReferenceToken ir)
{
ObjectToken tk = document.Structure.TokenScanner.Get(ir.Data);
if (tk is null)
{
// malformed
continue;
}
val = tk.Data;
}
if (!(val is ArrayToken arr))
{
// should be array... ignore and remove bad dict
continue;
}
var copiedTokens = CopyAnnotationsFromPageSource(
kvp.Value,
document.Structure.TokenScanner,
refs,
options.CopyLinkFunc);
// if copyLink is unset, ignore links to resolve issues with refencing non-existing pages
var toAdd = new List<IToken>();
foreach (var annot in arr.Data)
{
DictionaryToken? tk = GetRemoteDict(annot);
if (tk is null)
{
// malformed
continue;
}
if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
{
if (options.CopyLinkFunc is null)
{
// ignore link if don't know how to copy
continue;
}
var link = page.annotationProvider.GetAction(tk);
if (link is null)
{
// ignore unknown link actions
continue;
}
var copiedLink = options.CopyLinkFunc(link);
if (copiedLink is null)
{
// ignore if caller wants to skip the link
continue;
}
if (copiedLink != link)
{
// defer to write links when all pages are added
var copiedToken = (DictionaryToken)WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs);
links.Add((copiedToken, copiedLink));
continue;
}
// copy as is if caller returns the same link
}
toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
}
// copy rest
copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
copiedPageDict[NameToken.Annots] = new ArrayToken(copiedTokens);
continue;
}
@ -625,6 +567,147 @@ namespace UglyToad.PdfPig.Writer
}
}
private IReadOnlyList<IToken> CopyAnnotationsFromPageSource(
IToken val,
IPdfTokenScanner sourceScanner,
IDictionary<IndirectReference, IndirectReferenceToken> refs,
Func<PdfAction, PdfAction?>? linkCopyFunc)
{
var permittedLinkActionTypes = new HashSet<NameToken>
{
// A web URI.
NameToken.Uri,
// A page in a different non-embedded document.
NameToken.GoToR,
// Launch an external application.
NameToken.Launch,
};
if (!DirectObjectFinder.TryGet(val, sourceScanner, out ArrayToken? annotationsArray))
{
return [];
}
var copiedAnnotations = new List<IToken>();
foreach (var annotEntry in annotationsArray.Data)
{
if (!DirectObjectFinder.TryGet(annotEntry, sourceScanner, out DictionaryToken? annotDict))
{
continue;
}
var removedKeys = new List<NameToken>();
/*
* An indirect reference to the page object with which this annotation is associated.
* Note: This entry is required for screen annotations associated with rendition actions.
*/
if (annotDict.TryGet(NameToken.P, out _))
{
// If we have a page reference we should update it when this page is written.
// For now, we'll remove it. This will corrupt screen annotations as noted above.
removedKeys.Add(NameToken.P);
}
// We don't copy the struct tree so skip this for now.
if (annotDict.TryGet(NameToken.StructParent, out _))
{
removedKeys.Add(NameToken.StructParent);
}
// We treat non-link annotations as ok for now, we should revisit this.
if (!annotDict.TryGet(NameToken.Subtype, sourceScanner, out NameToken? subtype)
|| subtype != NameToken.Link)
{
var copiedRef = WriterUtil.CopyToken(
context,
CopyWithSkippedKeys(annotDict, removedKeys),
sourceScanner,
refs);
copiedAnnotations.Add(copiedRef);
continue;
}
// Todo copy
// var copiedLogicOverride = linkCopyFunc()
//var link = page.annotationProvider.GetAction(tk);
// If the link has an action then this link can point elsewhere in this document, maybe not to a page we copied?
if (annotDict.TryGet(NameToken.A, sourceScanner, out DictionaryToken? actionDict))
{
// If the link annotation points somewhere inside our document we can't currently maintain validity on-copy.
if (!actionDict.TryGet(NameToken.S, sourceScanner, out NameToken? actionType)
|| !permittedLinkActionTypes.Contains(actionType))
{
continue;
}
var copiedRef = WriterUtil.CopyToken(
context,
CopyWithSkippedKeys(annotDict, removedKeys),
sourceScanner,
refs);
copiedAnnotations.Add(copiedRef);
continue;
}
// A dest can point elsewhere in this document, maybe not to a page we copied?
if (annotDict.TryGet(NameToken.Dest, out _))
{
// Skip for now.
continue;
}
// If neither /A nor /Dest are present then I don't really know what this link does, so it should be safe to copy:
var finalCopiedRef = WriterUtil.CopyToken(
context,
CopyWithSkippedKeys(annotDict, removedKeys),
sourceScanner,
refs);
copiedAnnotations.Add(finalCopiedRef);
}
return copiedAnnotations;
}
private static DictionaryToken CopyWithSkippedKeys(
DictionaryToken source,
IReadOnlyList<NameToken> skipped)
{
var dict = new Dictionary<NameToken, IToken>();
foreach (var kvp in source.Data)
{
var name = NameToken.Create(kvp.Key);
var ignore = false;
foreach (var skippedName in skipped)
{
if (skippedName == name)
{
ignore = true;
break;
}
}
if (ignore)
{
continue;
}
dict[name] = kvp.Value;
}
return new DictionaryToken(dict);
}
private void CompleteDocument()
{
// write fonts to reserved object numbers