mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Merge pull request #652 from mvantzet/PreventCorruptionWhenRemovingText-538
Prevent PDF corruption when removing text (#538)
This commit is contained in:
@@ -1279,6 +1279,7 @@
|
||||
public int Tokens { get; private set; }
|
||||
public int Objects { get; private set; }
|
||||
public bool WroteCrossReferenceTable { get; private set; }
|
||||
public bool WritingPageContents { get; set; }
|
||||
|
||||
public void WriteToken(IToken token, Stream outputStream)
|
||||
{
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using Tokens;
|
||||
|
||||
internal interface IPdfStreamWriter : IDisposable
|
||||
@@ -20,6 +18,11 @@
|
||||
/// </summary>
|
||||
Stream Stream { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Hints that the stream writer is used for writing page contents.
|
||||
/// </summary>
|
||||
bool WritingPageContents { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Writes a single token to the stream.
|
||||
/// </summary>
|
||||
|
||||
@@ -37,5 +37,10 @@
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||
IndirectReference catalogToken, Stream outputStream,
|
||||
IndirectReference? documentInformationReference);
|
||||
|
||||
/// <summary>
|
||||
/// Hints to the token writer that we are currently writing page contents.
|
||||
/// </summary>
|
||||
bool WritingPageContents { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,11 @@ namespace UglyToad.PdfPig.Writer
|
||||
/// </summary>
|
||||
internal class NoTextTokenWriter : TokenWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Set this value prior to processing page to get the right page number in log messages
|
||||
/// </summary>
|
||||
internal int Page { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Write stream without <see cref="ShowText"/> or <see cref="ShowTextsWithPositioning"/> operations
|
||||
/// </summary>
|
||||
@@ -25,10 +30,16 @@ namespace UglyToad.PdfPig.Writer
|
||||
/// <param name="outputStream"></param>
|
||||
protected override void WriteStream(StreamToken streamToken, Stream outputStream)
|
||||
{
|
||||
if (!TryGetStreamWithoutText(streamToken, out var outputStreamToken))
|
||||
StreamToken outputStreamToken;
|
||||
if (!WritingPageContents && !IsFormStream(streamToken))
|
||||
{
|
||||
outputStreamToken = streamToken;
|
||||
}
|
||||
else if (!TryGetStreamWithoutText(streamToken, out outputStreamToken))
|
||||
{
|
||||
outputStreamToken = streamToken;
|
||||
}
|
||||
|
||||
WriteDictionary(outputStreamToken.StreamDictionary, outputStream);
|
||||
WriteLineBreak(outputStream);
|
||||
outputStream.Write(StreamStart, 0, StreamStart.Length);
|
||||
@@ -38,6 +49,12 @@ namespace UglyToad.PdfPig.Writer
|
||||
outputStream.Write(StreamEnd, 0, StreamEnd.Length);
|
||||
}
|
||||
|
||||
private bool IsFormStream(StreamToken streamToken)
|
||||
{
|
||||
return streamToken.StreamDictionary.Data.TryGetValue(NameToken.Subtype.Data, out var value)
|
||||
&& (NameToken)value == NameToken.Form;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Try get a stream without <see cref="ShowText"/> or <see cref="ShowTextsWithPositioning"/> operations.
|
||||
/// </summary>
|
||||
@@ -63,7 +80,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
IReadOnlyList<IGraphicsStateOperation> operations;
|
||||
try
|
||||
{
|
||||
operations = pageContentParser.Parse(1, new ByteArrayInputBytes(bytes), new NoOpLog());
|
||||
operations = pageContentParser.Parse(Page, new ByteArrayInputBytes(bytes), new NoOpLog());
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
@@ -76,7 +93,9 @@ namespace UglyToad.PdfPig.Writer
|
||||
var haveText = false;
|
||||
foreach (var op in operations)
|
||||
{
|
||||
if (op.Operator == ShowText.Symbol || op.Operator == ShowTextsWithPositioning.Symbol)
|
||||
if (op.Operator == ShowText.Symbol
|
||||
|| op.Operator == ShowTextsWithPositioning.Symbol
|
||||
|| op.Operator == MoveToNextLineShowText.Symbol)
|
||||
{
|
||||
haveText = true;
|
||||
continue;
|
||||
@@ -89,7 +108,22 @@ namespace UglyToad.PdfPig.Writer
|
||||
return false;
|
||||
}
|
||||
outputStreamT.Seek(0, SeekOrigin.Begin);
|
||||
outputStreamToken = DataCompresser.CompressToStream(outputStreamT.ToArray());
|
||||
|
||||
var compressedBytes = DataCompresser.CompressBytes(outputStreamT.ToArray());
|
||||
var outputStreamDictionary = new Dictionary<NameToken, IToken>()
|
||||
{
|
||||
{ NameToken.Length, new NumericToken(compressedBytes.Length) },
|
||||
{ NameToken.Filter, NameToken.FlateDecode }
|
||||
};
|
||||
foreach (var kv in streamToken.StreamDictionary.Data)
|
||||
{
|
||||
var key = NameToken.Create(kv.Key);
|
||||
if (!outputStreamDictionary.ContainsKey(key))
|
||||
{
|
||||
outputStreamDictionary[key] = kv.Value;
|
||||
}
|
||||
};
|
||||
outputStreamToken = new StreamToken(new DictionaryToken(outputStreamDictionary), compressedBytes);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -360,6 +360,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
// dedup if on to avoid issues
|
||||
var prev = context.AttemptDeduplication;
|
||||
context.AttemptDeduplication = false;
|
||||
context.WritingPageContents = true;
|
||||
if (contentsToken is ArrayToken array)
|
||||
{
|
||||
foreach (var item in array.Data)
|
||||
@@ -378,6 +379,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
|
||||
}
|
||||
context.AttemptDeduplication = prev;
|
||||
context.WritingPageContents = false;
|
||||
}
|
||||
|
||||
// manually copy page dict / resources as we need to modify some
|
||||
@@ -406,7 +408,6 @@ namespace UglyToad.PdfPig.Writer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
foreach (var kvp in pageInfo.Page.Data)
|
||||
{
|
||||
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
|
||||
|
||||
@@ -25,6 +25,12 @@
|
||||
|
||||
public bool AttemptDeduplication { get; set; } = true;
|
||||
|
||||
public bool WritingPageContents
|
||||
{
|
||||
get => TokenWriter.WritingPageContents;
|
||||
set => TokenWriter.WritingPageContents = value;
|
||||
}
|
||||
|
||||
internal PdfStreamWriter(
|
||||
Stream baseStream,
|
||||
bool disposeStream = true,
|
||||
|
||||
@@ -83,19 +83,22 @@ namespace UglyToad.PdfPig.Writer
|
||||
/// </summary>
|
||||
public static void RemoveText(PdfDocument file, Stream output, IReadOnlyList<int> pagesBundle = null)
|
||||
{
|
||||
using (var document = new PdfDocumentBuilder(output, false, PdfWriterType.Default, file.Version, tokenWriter: new NoTextTokenWriter()))
|
||||
var tokenWriter = new NoTextTokenWriter();
|
||||
using (var document = new PdfDocumentBuilder(output, false, PdfWriterType.Default, file.Version, tokenWriter: tokenWriter))
|
||||
{
|
||||
if (pagesBundle == null)
|
||||
{
|
||||
for (var i = 1; i <= file.NumberOfPages; i++)
|
||||
{
|
||||
tokenWriter.Page = i;
|
||||
document.AddPage(file, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var i in pagesBundle)
|
||||
{
|
||||
tokenWriter.Page = i;
|
||||
document.AddPage(file, i);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,6 +276,12 @@
|
||||
outputStream.Write(Eof, 0, Eof.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Indicates that we are writing page contents.
|
||||
/// Can be used by a derived class.
|
||||
/// </summary>
|
||||
public bool WritingPageContents { get; set; }
|
||||
|
||||
/// <inheritdoc cref="ITokenWriter.WriteObject" />
|
||||
public void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user