mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Introduce a new API for intercepting token that are being copied
This API would allow us to track any type of token while is/was copied to a stream, so when a similar token come again, we can decide if want to just use the already written token or the new one. This API would allow us to divide the code for each specific thing that we are trying to avoid having duplicate, while not penalizing performance. Another plus would be, that since every "deduplicator" code would be behind a class, if a class is causing some performance regression that the user don't want, the user could decide not to add it and the resultant pdf would still be valid
This commit is contained in:
24
src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs
Normal file
24
src/UglyToad.PdfPig/Writer/Copier/IObjectCopier.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace UglyToad.PdfPig.Writer.Copier
|
||||
{
|
||||
using System;
|
||||
using Tokens;
|
||||
|
||||
/// <summary>
|
||||
/// An interface for copying token
|
||||
/// </summary>
|
||||
public interface IObjectCopier
|
||||
{
|
||||
/// <summary>
|
||||
/// Copy the token to the destination stream
|
||||
/// </summary>
|
||||
/// <param name="sourceToken">Token to copy</param>
|
||||
/// <param name="tokenScanner">Function to resolve indirect reference identified in the token to copy</param>
|
||||
/// <returns></returns>
|
||||
public IToken CopyObject(IToken sourceToken, Func<IndirectReferenceToken, IToken> tokenScanner);
|
||||
|
||||
/// <summary>
|
||||
/// Clear the references of the previously copied object
|
||||
/// </summary>
|
||||
public void ClearReference();
|
||||
}
|
||||
}
|
||||
75
src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs
Normal file
75
src/UglyToad.PdfPig/Writer/Copier/MultiCopier.cs
Normal file
@@ -0,0 +1,75 @@
|
||||
namespace UglyToad.PdfPig.Writer.Copier
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Tokens;
|
||||
using Writer;
|
||||
|
||||
/// <inheritdoc/>
|
||||
internal class MultiCopier : ObjectCopier
|
||||
{
|
||||
private readonly List<IObjectCopier> copiers;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public MultiCopier(PdfStreamWriter destinationStream) : base(destinationStream)
|
||||
{
|
||||
copiers = new List<IObjectCopier>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="copier"></param>
|
||||
public void AddCopier(IObjectCopier copier)
|
||||
{
|
||||
copiers.Add(copier);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="copier"></param>
|
||||
/// <returns></returns>
|
||||
public bool RemoveCopier(IObjectCopier copier)
|
||||
{
|
||||
return copiers.Remove(copier);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IReadOnlyList<IObjectCopier> GetCopiers()
|
||||
{
|
||||
return copiers;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public override IToken CopyObject(IToken sourceToken, Func<IndirectReferenceToken, IToken> tokenScanner)
|
||||
{
|
||||
// We give the token to the child copiers, to see if they have a better way of copying the token
|
||||
foreach (var copier in copiers)
|
||||
{
|
||||
var newToken = copier.CopyObject(sourceToken, tokenScanner);
|
||||
if (newToken != null)
|
||||
{
|
||||
return newToken;
|
||||
}
|
||||
}
|
||||
|
||||
// If the token did not found a suitable copier, let just do a simple copy of the token
|
||||
return base.CopyObject(sourceToken, tokenScanner);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public override void ClearReference()
|
||||
{
|
||||
foreach (var copier in copiers)
|
||||
{
|
||||
copier.ClearReference();
|
||||
}
|
||||
|
||||
base.ClearReference();
|
||||
}
|
||||
}
|
||||
}
|
||||
170
src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs
Normal file
170
src/UglyToad.PdfPig/Writer/Copier/ObjectCopier.cs
Normal file
@@ -0,0 +1,170 @@
|
||||
namespace UglyToad.PdfPig.Writer.Copier
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using PdfPig;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Writer;
|
||||
|
||||
/// <inheritdoc/>
|
||||
internal class ObjectCopier : IObjectCopier
|
||||
{
|
||||
private readonly PdfStreamWriter pdfStream;
|
||||
|
||||
private readonly Dictionary<IndirectReferenceToken, IndirectReferenceToken> newReferenceMap;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public ObjectCopier(PdfStreamWriter destinationStream)
|
||||
{
|
||||
pdfStream = destinationStream ?? throw new ArgumentNullException(nameof(destinationStream));
|
||||
newReferenceMap = new Dictionary<IndirectReferenceToken, IndirectReferenceToken>();
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IToken CopyObject(IToken sourceToken, PdfDocument sourceDocument)
|
||||
{
|
||||
IToken tokenScanner(IndirectReferenceToken referenceToken)
|
||||
{
|
||||
var objToken = sourceDocument.Structure.GetObject(referenceToken.Data);
|
||||
return objToken.Data;
|
||||
}
|
||||
|
||||
return CopyObject(sourceToken, tokenScanner);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IToken CopyObject(IToken sourceToken, IPdfTokenScanner tokenScanner)
|
||||
{
|
||||
IToken tokenGetter(IndirectReferenceToken referenceToken)
|
||||
{
|
||||
var objToken = tokenScanner.Get(referenceToken.Data);
|
||||
return objToken.Data;
|
||||
}
|
||||
|
||||
return CopyObject(sourceToken, tokenGetter);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public virtual IToken CopyObject(IToken sourceToken, Func<IndirectReferenceToken, IToken> tokenScanner)
|
||||
{
|
||||
// This token need to be deep copied, because they could contain reference. So we have to update them.
|
||||
switch (sourceToken)
|
||||
{
|
||||
case DictionaryToken dictionaryToken:
|
||||
{
|
||||
var newContent = new Dictionary<NameToken, IToken>();
|
||||
foreach (var setPair in dictionaryToken.Data)
|
||||
{
|
||||
var name = setPair.Key;
|
||||
var token = setPair.Value;
|
||||
|
||||
newContent.Add(NameToken.Create(name), CopyObject(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new DictionaryToken(newContent);
|
||||
}
|
||||
case ArrayToken arrayToken:
|
||||
{
|
||||
var newArray = new List<IToken>(arrayToken.Length);
|
||||
foreach (var token in arrayToken.Data)
|
||||
{
|
||||
newArray.Add(CopyObject(token, tokenScanner));
|
||||
}
|
||||
|
||||
return new ArrayToken(newArray);
|
||||
}
|
||||
case IndirectReferenceToken referenceToken:
|
||||
{
|
||||
if (TryGetNewReference(referenceToken, out var newReferenceToken))
|
||||
{
|
||||
return newReferenceToken;
|
||||
}
|
||||
|
||||
var referencedToken = tokenScanner(referenceToken);
|
||||
var newReferencedToken = CopyObject(referencedToken, tokenScanner);
|
||||
|
||||
var newToken = WriteToken(newReferencedToken);
|
||||
SetNewReference(referenceToken, newToken);
|
||||
return newToken;
|
||||
}
|
||||
|
||||
case StreamToken streamToken:
|
||||
{
|
||||
var properties = CopyObject(streamToken.StreamDictionary, tokenScanner);
|
||||
var bytes = streamToken.Data;
|
||||
return new StreamToken(properties as DictionaryToken, bytes);
|
||||
}
|
||||
|
||||
case ObjectToken _:
|
||||
{
|
||||
|
||||
// This is because, since we don't write token directly to the stream. So we can't know the offset.
|
||||
// The token would be invalid. Although I don't think the copy of an object token would ever happen
|
||||
throw new NotSupportedException("Copying a Object token is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
return sourceToken;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="sourceReferenceToken"></param>
|
||||
/// <param name="newReferenceToken"></param>
|
||||
/// <returns></returns>
|
||||
public virtual bool TryGetNewReference(IndirectReferenceToken sourceReferenceToken, out IndirectReferenceToken newReferenceToken)
|
||||
{
|
||||
newReferenceToken = default;
|
||||
foreach (var referenceSet in newReferenceMap)
|
||||
{
|
||||
if (!referenceSet.Key.Equals(sourceReferenceToken))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
newReferenceToken = referenceSet.Value;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public virtual void ClearReference()
|
||||
{
|
||||
newReferenceMap.Clear();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="oldToken"></param>
|
||||
/// <param name="newToken"></param>
|
||||
public void SetNewReference(IndirectReferenceToken oldToken, IndirectReferenceToken newToken)
|
||||
{
|
||||
newReferenceMap.Add(oldToken, newToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public int ReserveTokenNumber()
|
||||
{
|
||||
return pdfStream.ReserveNumber();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="token"></param>
|
||||
/// <param name="reservedNumber"></param>
|
||||
/// <returns></returns>
|
||||
public IndirectReferenceToken WriteToken(IToken token, int? reservedNumber = null)
|
||||
{
|
||||
return pdfStream.WriteToken(token, reservedNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
34
src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs
Normal file
34
src/UglyToad.PdfPig/Writer/Copier/TokenHelper.cs
Normal file
@@ -0,0 +1,34 @@
|
||||
namespace UglyToad.PdfPig.Writer.Copier
|
||||
{
|
||||
using System;
|
||||
using Tokens;
|
||||
|
||||
internal static class TokenHelper
|
||||
{
|
||||
// This is to avoid infinite loop in production. Although, it should never happen
|
||||
const int MAX_ITERATIONS = 10;
|
||||
|
||||
public static T GetTokenAs<T>(IToken token, Func<IndirectReferenceToken, IToken> lookupFunc) where T : IToken
|
||||
{
|
||||
var iterations = 0;
|
||||
|
||||
var original = token;
|
||||
while (iterations++ < MAX_ITERATIONS)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case T result:
|
||||
return result;
|
||||
case IndirectReferenceToken tokenReference:
|
||||
token = lookupFunc(tokenReference);
|
||||
continue;
|
||||
case ObjectToken tokenObject:
|
||||
token = tokenObject.Data;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException($"Unable to extract a {typeof(T)} token from {original}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user