Merge pull request #525 from mvantzet/ITokenWriter

Make TokenWriter non-static, implement ITokenWriter, injection in PdfDocumentBuilder, add PdfTextRemover
This commit is contained in:
Eliot Jones
2023-01-08 11:49:56 -05:00
committed by GitHub
22 changed files with 551 additions and 137 deletions

View File

@@ -217,9 +217,11 @@
"UglyToad.PdfPig.Util.DefaultWordExtractor",
"UglyToad.PdfPig.Util.DateFormatHelper",
"UglyToad.PdfPig.Util.WhitespaceSizeStatistics",
"UglyToad.PdfPig.Writer.ITokenWriter",
"UglyToad.PdfPig.Writer.PdfAStandard",
"UglyToad.PdfPig.Writer.PdfDocumentBuilder",
"UglyToad.PdfPig.Writer.PdfMerger",
"UglyToad.PdfPig.Writer.PdfTextRemover",
"UglyToad.PdfPig.Writer.PdfWriterType",
"UglyToad.PdfPig.Writer.PdfPageBuilder",
"UglyToad.PdfPig.Writer.TokenWriter",

View File

@@ -1,73 +1,74 @@
namespace UglyToad.PdfPig.Tests.Writer
{
using System.IO;
using System.Linq;
using Content;
using Integration;
using PdfPig.Core;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Tokens;
using PdfPig.Writer;
using System.Linq;
using Content;
using Integration;
using PdfPig.Core;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Tokens;
using PdfPig.Writer;
using System.Collections.Generic;
using Tests.Fonts.TrueType;
using Xunit;
public class PdfDocumentBuilderTests
{
[Fact]
public void CanWriteSingleBlankPage()
{
var result = CreateSingleBlankPage();
WriteFile(nameof(CanWriteSinglePageHelloWorld), result);
Assert.NotEmpty(result);
var str = OtherEncodings.BytesAsLatin1String(result);
Assert.StartsWith("%PDF", str);
Assert.EndsWith("%%EOF", str);
}
[Fact]
public void CanCreateSingleCustomPageSize()
{
var builder = new PdfDocumentBuilder();
var page = builder.AddPage(120, 250);
var font = builder.AddStandard14Font(Standard14Font.Helvetica);
page.AddText("Small page.", 12, new PdfPoint(25, 200), font);
var bytes = builder.Build();
WriteFile(nameof(CanCreateSingleCustomPageSize), bytes);
using (var document = PdfDocument.Open(bytes, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var page1 = document.GetPage(1);
Assert.Equal(120, page1.Width);
Assert.Equal(250, page1.Height);
Assert.Equal("Small page.", page1.Text);
}
public class PdfDocumentBuilderTests
{
[Fact]
public void CanWriteSingleBlankPage()
{
var result = CreateSingleBlankPage();
WriteFile(nameof(CanWriteSinglePageHelloWorld), result);
Assert.NotEmpty(result);
var str = OtherEncodings.BytesAsLatin1String(result);
Assert.StartsWith("%PDF", str);
Assert.EndsWith("%%EOF", str);
}
[Fact]
public void CanFastAddPageAndInheritProps()
{
var first = IntegrationHelpers.GetDocumentPath("inherited_mediabox.pdf");
var contents = File.ReadAllBytes(first);
[Fact]
public void CanCreateSingleCustomPageSize()
{
var builder = new PdfDocumentBuilder();
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
var page = builder.AddPage(120, 250);
var font = builder.AddStandard14Font(Standard14Font.Helvetica);
page.AddText("Small page.", 12, new PdfPoint(25, 200), font);
var bytes = builder.Build();
WriteFile(nameof(CanCreateSingleCustomPageSize), bytes);
using (var document = PdfDocument.Open(bytes, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var page1 = document.GetPage(1);
Assert.Equal(120, page1.Width);
Assert.Equal(250, page1.Height);
Assert.Equal("Small page.", page1.Text);
}
}
[Fact]
public void CanFastAddPageAndInheritProps()
{
var first = IntegrationHelpers.GetDocumentPath("inherited_mediabox.pdf");
var contents = File.ReadAllBytes(first);
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
{
output.AddPage(existing, 1);
results = output.Build();
results = output.Build();
}
using (var rewritted = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
@@ -75,22 +76,22 @@
var pg = rewritted.GetPage(1);
Assert.Equal(200, pg.MediaBox.Bounds.Width);
Assert.Equal(100, pg.MediaBox.Bounds.Height);
}
}
}
[Fact]
public void CanFastAddPageWithStreamSubtype()
{
var first = IntegrationHelpers.GetDocumentPath("steam_in_page_dict.pdf");
var contents = File.ReadAllBytes(first);
[Fact]
public void CanFastAddPageWithStreamSubtype()
{
var first = IntegrationHelpers.GetDocumentPath("steam_in_page_dict.pdf");
var contents = File.ReadAllBytes(first);
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
{
output.AddPage(existing, 1);
results = output.Build();
results = output.Build();
}
using (var rewritted = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
@@ -98,26 +99,26 @@
// really just checking for no exception...
var pg = rewritted.GetPage(1);
Assert.NotNull(pg.Content);
}
}
}
[Fact]
public void CanFastAddPageAndStripLinkAnnots()
{
var first = IntegrationHelpers.GetDocumentPath("outline.pdf");
var contents = File.ReadAllBytes(first);
[Fact]
public void CanFastAddPageAndStripLinkAnnots()
{
var first = IntegrationHelpers.GetDocumentPath("outline.pdf");
var contents = File.ReadAllBytes(first);
var annotCount = 0;
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
{
output.AddPage(existing, 1);
results = output.Build();
var pg = existing.GetPage(1);
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
annotCount = annots.Count;
Assert.Contains(annots, x => x.Type == Annotations.AnnotationType.Link);
Assert.Contains(annots, x => x.Type == Annotations.AnnotationType.Link);
}
using (var rewritten = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
@@ -126,7 +127,7 @@
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
Assert.Equal(annotCount - 1, annots.Count);
Assert.DoesNotContain(annots, x => x.Type == Annotations.AnnotationType.Link);
}
}
}
[Fact]
@@ -1114,6 +1115,27 @@
}
}
[Fact]
public void CanUseCustomTokenWriter()
{
var docPath = IntegrationHelpers.GetDocumentPath("68-1990-01_A.pdf");
var tw = new TestTokenWriter();
using (var doc = PdfDocument.Open(docPath))
using (var ms = new MemoryStream())
using (var builder = new PdfDocumentBuilder(ms, tokenWriter: tw))
{
for (var i = 1; i <= doc.NumberOfPages; i++)
{
builder.AddPage(doc, i);
}
builder.Build();
}
Assert.Equal(tw.Objects, 0); // No objects in sample file
Assert.True(tw.Tokens > 1000); // Roughly 1065
Assert.True(tw.WroteCrossReferenceTable);
}
private static void WriteFile(string name, byte[] bytes, string extension = "pdf")
{
try
@@ -1133,4 +1155,29 @@
}
}
}
public class TestTokenWriter : ITokenWriter
{
public int Tokens { get; private set; }
public int Objects { get; private set; }
public bool WroteCrossReferenceTable { get; private set; }
public void WriteToken(IToken token, Stream outputStream)
{
Tokens++;
}
public void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
{
Objects++;
}
public void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
IndirectReference catalogToken,
Stream outputStream,
IndirectReference? documentInformationReference)
{
WroteCrossReferenceTable = true;
}
}
}

View File

@@ -0,0 +1,35 @@
using UglyToad.PdfPig.Tests.Integration;
using UglyToad.PdfPig.Writer;
using System.IO;
using Xunit;
namespace UglyToad.PdfPig.Tests.Writer
{
public class PdfTextRemoverTests
{
[Theory]
[InlineData("Two Page Text Only - from libre office.pdf")]
[InlineData("cat-genetics.pdf")]
[InlineData("Motor Insurance claim form.pdf")]
[InlineData("Single Page Images - from libre office.pdf")]
public void TextRemoverRemovesText(string file)
{
var filePath = IntegrationHelpers.GetDocumentPath(file);
using (var document = PdfDocument.Open(filePath))
{
var withoutText = PdfTextRemover.RemoveText(filePath);
File.WriteAllBytes(@"C:\temp\_tx.pdf", withoutText);
using (var documentWithoutText = PdfDocument.Open(withoutText))
{
Assert.Equal(document.NumberOfPages, documentWithoutText.NumberOfPages);
for (var i = 1; i <= documentWithoutText.NumberOfPages; i++)
{
Assert.NotEqual(document.GetPage(i).Text, string.Empty);
Assert.Equal(documentWithoutText.GetPage(i).Text, string.Empty);
}
}
}
}
}
}

View File

@@ -12,10 +12,11 @@
[Fact]
public void EscapeSpecialCharacter()
{
var writer = new TokenWriter();
using (var memStream = new MemoryStream())
{
TokenWriter.WriteToken(new StringToken("\\"), memStream);
TokenWriter.WriteToken(new StringToken("(Hello)"), memStream);
writer.WriteToken(new StringToken("\\"), memStream);
writer.WriteToken(new StringToken("(Hello)"), memStream);
// Read Test
memStream.Position = 0;

View File

@@ -10,6 +10,8 @@
/// </summary>
public class BeginMarkedContent : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -11,6 +11,8 @@
/// </summary>
public class BeginMarkedContentWithProperties : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -10,6 +10,8 @@
/// </summary>
public class DesignateMarkedContentPoint : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -11,6 +11,8 @@
/// </summary>
public class DesignateMarkedContentPointWithProperties : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -13,6 +13,8 @@
/// </summary>
public class SetNonStrokeColorAdvanced : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -11,6 +11,8 @@
/// </summary>
public class SetNonStrokeColorSpace : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -13,6 +13,8 @@
/// </summary>
public class SetStrokeColorAdvanced : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -11,6 +11,8 @@
/// </summary>
public class SetStrokeColorSpace : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -15,6 +15,8 @@
/// </summary>
public class ShowTextsWithPositioning : IGraphicsStateOperation
{
private static readonly TokenWriter TokenWriter = new TokenWriter();
/// <summary>
/// The symbol for this operation in a stream.
/// </summary>

View File

@@ -96,8 +96,6 @@
}
return CharacterIdentifierSystemInfo;
throw new InvalidOperationException("The Character Identifier System Information was never set.");
}
public void UseCMap(CMap other)

View File

@@ -14,6 +14,8 @@
private const string DictToken = "dict";
private const string FindResourceToken = "findresource";
private static readonly TokenWriter TokenWriter = new TokenWriter();
public static IReadOnlyList<byte> ConvertToCMapStream(IReadOnlyDictionary<char, byte> unicodeToCharacterCode)
{
using (var memoryStream = new MemoryStream())

View File

@@ -0,0 +1,37 @@
namespace UglyToad.PdfPig.Writer;
using Core;
using System.Collections.Generic;
using System.IO;
using Tokens;
/// <summary>
/// Writes any type of <see cref="IToken"/> to the corresponding PDF document format output.
/// </summary>
public interface ITokenWriter
{
/// <summary>
/// Writes the given input token to the output stream with the correct PDF format and encoding including whitespace and line breaks as applicable.
/// </summary>
/// <param name="token">The token to write to the stream.</param>
/// <param name="outputStream">The stream to write the token to.</param>
void WriteToken(IToken token, Stream outputStream);
/// <summary>
/// Writes pre-serialized token as an object token to the output stream.
/// </summary>
/// <param name="objectNumber">Object number of the indirect object.</param>
/// <param name="generation">Generation of the indirect object.</param>
/// <param name="data">Pre-serialized object contents.</param>
/// <param name="outputStream">The stream to write the token to.</param>
void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream);
/// <summary>
/// Writes a valid single section cross-reference (xref) table plus trailer dictionary to the output for the set of object offsets.
/// </summary>
/// <param name="objectOffsets">The byte offset from the start of the document for each object in the document.</param>
/// <param name="catalogToken">The object representing the catalog dictionary which is referenced from the trailer dictionary.</param>
/// <param name="outputStream">The output stream to write to.</param>
/// <param name="documentInformationReference">The object reference for the document information dictionary if present.</param>
void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets, IndirectReference catalogToken, Stream outputStream, IndirectReference? documentInformationReference);
}

View File

@@ -0,0 +1,97 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Filters;
using UglyToad.PdfPig.Graphics.Operations.TextShowing;
using UglyToad.PdfPig.Graphics.Operations;
using UglyToad.PdfPig.Graphics;
using UglyToad.PdfPig.Logging;
using UglyToad.PdfPig.Parser;
using UglyToad.PdfPig.Tokens;
namespace UglyToad.PdfPig.Writer
{
/// <summary>
/// Derived class of <see cref="TokenWriter"/> that does not write <see cref="ShowText"/> or <see cref="ShowTextsWithPositioning"/> operations in streams
/// </summary>
internal class NoTextTokenWriter : TokenWriter
{
/// <summary>
/// Write stream without <see cref="ShowText"/> or <see cref="ShowTextsWithPositioning"/> operations
/// </summary>
/// <param name="streamToken"></param>
/// <param name="outputStream"></param>
protected override void WriteStream(StreamToken streamToken, Stream outputStream)
{
if (!TryGetStreamWithoutText(streamToken, out var outputStreamToken))
{
outputStreamToken = streamToken;
}
WriteDictionary(outputStreamToken.StreamDictionary, outputStream);
WriteLineBreak(outputStream);
outputStream.Write(StreamStart, 0, StreamStart.Length);
WriteLineBreak(outputStream);
outputStream.Write(outputStreamToken.Data.ToArray(), 0, outputStreamToken.Data.Count);
WriteLineBreak(outputStream);
outputStream.Write(StreamEnd, 0, StreamEnd.Length);
}
/// <summary>
/// Try get a stream without <see cref="ShowText"/> or <see cref="ShowTextsWithPositioning"/> operations.
/// </summary>
/// <param name="streamToken"></param>
/// <param name="outputStreamToken"></param>
/// <returns>true if any text operation found (and we have a valid <paramref name="outputStreamToken"/> without the text operations),
/// false if no text operation found (in which case <paramref name="outputStreamToken"/> is null)</returns>
private bool TryGetStreamWithoutText(StreamToken streamToken, out StreamToken outputStreamToken)
{
var filterProvider = new FilterProviderWithLookup(DefaultFilterProvider.Instance);
IReadOnlyList<byte> bytes;
try
{
bytes = streamToken.Decode(filterProvider);
}
catch
{
outputStreamToken = null;
return false;
}
var pageContentParser = new PageContentParser(new ReflectionGraphicsStateOperationFactory());
IReadOnlyList<IGraphicsStateOperation> operations;
try
{
operations = pageContentParser.Parse(1, new ByteArrayInputBytes(bytes), new NoOpLog());
}
catch (Exception)
{
outputStreamToken = null;
return false;
}
using (var outputStreamT = new MemoryStream())
{
var haveText = false;
foreach (var op in operations)
{
if (op.Operator == ShowText.Symbol || op.Operator == ShowTextsWithPositioning.Symbol)
{
haveText = true;
continue;
}
op.Write(outputStreamT);
}
if (!haveText)
{
outputStreamToken = null;
return false;
}
outputStreamT.Seek(0, SeekOrigin.Begin);
outputStreamToken = DataCompresser.CompressToStream(outputStreamT.ToArray());
return true;
}
}
}
}

View File

@@ -8,7 +8,7 @@
{
private readonly Dictionary<byte[], IndirectReferenceToken> hashes = new Dictionary<byte[], IndirectReferenceToken>(new FNVByteComparison());
public PdfDedupStreamWriter(Stream stream, bool dispose) : base(stream, dispose)
public PdfDedupStreamWriter(Stream stream, bool dispose, ITokenWriter tokenWriter = null) : base(stream, dispose, tokenWriter)
{
}

View File

@@ -26,7 +26,7 @@ namespace UglyToad.PdfPig.Writer
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private bool completed = false;
internal int fontId = 0;
private int fontId = 0;
private readonly static ArrayToken DefaultProcSet = new ArrayToken(new List<NameToken>
{
@@ -89,20 +89,21 @@ namespace UglyToad.PdfPig.Writer
/// <param name="disposeStream">If stream should be disposed when builder is.</param>
/// <param name="type">Type of pdf stream writer to use</param>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream = false, PdfWriterType type = PdfWriterType.Default, decimal version = 1.7m)
/// <param name="tokenWriter">Token writer to use</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream = false, PdfWriterType type = PdfWriterType.Default, decimal version = 1.7m, ITokenWriter tokenWriter = null)
{
switch (type)
{
case PdfWriterType.ObjectInMemoryDedup:
context = new PdfDedupStreamWriter(stream, disposeStream);
context = new PdfDedupStreamWriter(stream, disposeStream, tokenWriter);
break;
default:
context = new PdfStreamWriter(stream, disposeStream);
context = new PdfStreamWriter(stream, disposeStream, tokenWriter);
break;
}
context.InitializePdf(version);
}
/// <summary>
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
/// </summary>

View File

@@ -18,16 +18,17 @@
protected bool DisposeStream { get; set; }
protected bool Initialized { get; set; }
protected int CurrentNumber { get; set; } = 1;
protected readonly ITokenWriter TokenWriter;
internal PdfStreamWriter(Stream baseStream, bool disposeStream = true)
internal PdfStreamWriter(Stream baseStream, bool disposeStream = true, ITokenWriter tokenWriter = null)
{
Stream = baseStream ?? throw new ArgumentNullException(nameof(baseStream));
if (!baseStream.CanWrite)
{
throw new ArgumentException("Output stream must be writable");
}
DisposeStream = disposeStream;
TokenWriter = tokenWriter ?? new TokenWriter();
}
public Stream Stream { get; protected set; }

View File

@@ -0,0 +1,100 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace UglyToad.PdfPig.Writer
{
/// <summary>
/// Class to remove text from PDFs, useful as a preprocessing step for Optical Character Recognition (OCR)
/// </summary>
public static class PdfTextRemover
{
/// <summary>
/// Return PDF without text as bytes
/// <param name="filePath">Path to PDF</param>
/// <param name="pagesBundle">List of pages to emit; if null all pages are emitted</param>
/// </summary>
public static byte[] RemoveText(string filePath, IReadOnlyList<int> pagesBundle = null)
{
using (var output = new MemoryStream())
{
RemoveText(output, filePath, pagesBundle);
return output.ToArray();
}
}
/// <summary>
/// Write PDF without text to the output stream. The caller must manage disposing the output stream.
/// <param name="output">Must be writable</param>
/// <param name="filePath">Path to PDF</param>
/// <param name="pagesBundle">List of pages to emit; if null all pages are emitted</param>
/// </summary>
public static void RemoveText(Stream output, string filePath, IReadOnlyList<int> pagesBundle = null)
{
using (var stream = File.OpenRead(filePath))
{
RemoveText(stream, output, pagesBundle);
}
}
/// <summary>
/// Remove text from the PDF (passed in as a byte array) and return it as a new byte array
/// <param name="file">PDF document (as byte array)</param>
/// <param name="pagesBundle">List of pages to emit; if null all pages are emitted</param>
/// <returns>PDF without text (as a byte array)</returns>
/// </summary>
public static byte[] RemoveText(byte[] file, IReadOnlyList<int> pagesBundle = null)
{
_ = file ?? throw new ArgumentNullException(nameof(file));
using (var output = new MemoryStream())
{
RemoveText(PdfDocument.Open(file), output, pagesBundle);
return output.ToArray();
}
}
/// <summary>
/// Remove text from the PDF in the input stream and write it to the output stream.
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
/// <param name="stream">Streams for the file contents, this must support reading and seeking.</param>
/// <param name="output">Must be writable</param>
/// <param name="pagesBundle">List of pages to emit; if null all pages are emitted</param>
/// </summary>
public static void RemoveText(Stream stream, Stream output, IReadOnlyList<int> pagesBundle = null)
{
_ = stream ?? throw new ArgumentNullException(nameof(stream));
_ = output ?? throw new ArgumentNullException(nameof(output));
RemoveText(PdfDocument.Open(stream), output, pagesBundle);
}
/// <summary>
/// Remove text from the PDF and write it to the output stream.
/// The caller must manage disposing the stream. The created PdfDocument will not dispose the stream.
/// <param name="file">PDF document</param>
/// <param name="output">Must be writable</param>
/// <param name="pagesBundle">List of pages to emit; if null all pages are emitted</param>
/// </summary>
public static void RemoveText(PdfDocument file, Stream output, IReadOnlyList<int> pagesBundle = null)
{
using (var document = new PdfDocumentBuilder(output, false, PdfWriterType.Default, file.Version, tokenWriter: new NoTextTokenWriter()))
{
if (pagesBundle == null)
{
for (var i = 1; i <= file.NumberOfPages; i++)
{
document.AddPage(file, i);
}
}
else
{
foreach (var i in pagesBundle)
{
document.AddPage(file, i);
}
}
}
}
}
}

View File

@@ -14,10 +14,8 @@
/// <summary>
/// Writes any type of <see cref="IToken"/> to the corresponding PDF document format output.
/// </summary>
public class TokenWriter
public class TokenWriter : ITokenWriter
{
private static readonly byte Backslash = GetByte("\\");
private static readonly byte ArrayStart = GetByte("[");
private static readonly byte ArrayEnd = GetByte("]");
@@ -46,10 +44,18 @@
private static readonly byte[] StartXref = OtherEncodings.StringAsLatin1Bytes("startxref");
private static readonly byte[] StreamStart = OtherEncodings.StringAsLatin1Bytes("stream");
private static readonly byte[] StreamEnd = OtherEncodings.StringAsLatin1Bytes("endstream");
/// <summary>
/// Bytes that indicate start of stream
/// </summary>
protected static readonly byte[] StreamStart = OtherEncodings.StringAsLatin1Bytes("stream");
/// <summary>
/// Bytes that indicate end start of stream
/// </summary>
protected static readonly byte[] StreamEnd = OtherEncodings.StringAsLatin1Bytes("endstream");
private static readonly byte StringStart = GetByte("(");
private static readonly byte StringEnd = GetByte(")");
private static readonly byte[] Trailer = OtherEncodings.StringAsLatin1Bytes("trailer");
@@ -79,7 +85,7 @@
/// </summary>
/// <param name="token">The token to write to the stream.</param>
/// <param name="outputStream">The stream to write the token to.</param>
public static void WriteToken(IToken token, Stream outputStream)
public void WriteToken(IToken token, Stream outputStream)
{
if (token == null)
{
@@ -129,14 +135,8 @@
}
}
/// <summary>
/// Writes a valid single section cross-reference (xref) table plus trailer dictionary to the output for the set of object offsets.
/// </summary>
/// <param name="objectOffsets">The byte offset from the start of the document for each object in the document.</param>
/// <param name="catalogToken">The object representing the catalog dictionary which is referenced from the trailer dictionary.</param>
/// <param name="outputStream">The output stream to write to.</param>
/// <param name="documentInformationReference">The object reference for the document information dictionary if present.</param>
internal static void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
/// <inheritdoc cref="ITokenWriter.WriteCrossReferenceTable" />
public void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
IndirectReference catalogToken,
Stream outputStream,
IndirectReference? documentInformationReference)
@@ -271,14 +271,8 @@
outputStream.Write(Eof, 0, Eof.Length);
}
/// <summary>
/// Writes pre-serialized token as an object token to the output stream.
/// </summary>
/// <param name="objectNumber">Object number of the indirect object.</param>
/// <param name="generation">Generation of the indirect object.</param>
/// <param name="data">Pre-serialized object contents.</param>
/// <param name="outputStream">The stream to write the token to.</param>
internal static void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
/// <inheritdoc cref="ITokenWriter.WriteObject" />
public void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
{
WriteLong(objectNumber, outputStream);
WriteWhitespace(outputStream);
@@ -297,14 +291,24 @@
WriteLineBreak(outputStream);
}
private static void WriteHex(HexToken hex, Stream stream)
/// <summary>
/// Write a hex value to the output stream
/// </summary>
/// <param name="hex"></param>
/// <param name="stream"></param>
protected void WriteHex(HexToken hex, Stream stream)
{
stream.WriteByte(HexStart);
stream.WriteText(hex.GetHexString());
stream.WriteByte(HexEnd);
}
private static void WriteArray(ArrayToken array, Stream outputStream)
/// <summary>
/// Write an array to the output stream, with whitespace at the end.
/// </summary>
/// <param name="array"></param>
/// <param name="outputStream"></param>
protected void WriteArray(ArrayToken array, Stream outputStream)
{
outputStream.WriteByte(ArrayStart);
WriteWhitespace(outputStream);
@@ -319,14 +323,24 @@
WriteWhitespace(outputStream);
}
private static void WriteBoolean(BooleanToken boolean, Stream outputStream)
/// <summary>
/// Write a boolean "true" or "false" to the output stream, with whitespace at the end.
/// </summary>
/// <param name="boolean"></param>
/// <param name="outputStream"></param>
protected void WriteBoolean(BooleanToken boolean, Stream outputStream)
{
var bytes = boolean.Data ? TrueBytes : FalseBytes;
outputStream.Write(bytes, 0, bytes.Length);
WriteWhitespace(outputStream);
}
private static void WriteComment(CommentToken comment, Stream outputStream)
/// <summary>
/// Write a "%comment" in the output stream, with a line break at the end.
/// </summary>
/// <param name="comment"></param>
/// <param name="outputStream"></param>
protected void WriteComment(CommentToken comment, Stream outputStream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(comment.Data);
outputStream.WriteByte(Comment);
@@ -334,7 +348,12 @@
WriteLineBreak(outputStream);
}
private static void WriteDictionary(DictionaryToken dictionary, Stream outputStream)
/// <summary>
/// Writes dictionary key/value pairs to output stream as Name/Token pairs.
/// </summary>
/// <param name="dictionary"></param>
/// <param name="outputStream"></param>
protected void WriteDictionary(DictionaryToken dictionary, Stream outputStream)
{
outputStream.Write(DictionaryStart, 0, DictionaryStart.Length);
@@ -356,7 +375,12 @@
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);
}
private static void WriteIndirectReference(IndirectReferenceToken reference, Stream outputStream)
/// <summary>
/// Write an indirect reference to the stream, with whitespace at the end.
/// </summary>
/// <param name="reference"></param>
/// <param name="outputStream"></param>
protected virtual void WriteIndirectReference(IndirectReferenceToken reference, Stream outputStream)
{
WriteLong(reference.Data.ObjectNumber, outputStream);
WriteWhitespace(outputStream);
@@ -368,12 +392,17 @@
WriteWhitespace(outputStream);
}
private static void WriteName(NameToken name, Stream outputStream)
/// <summary>
/// Write a name to the stream, with whitespace at the end.
/// </summary>
/// <param name="name"></param>
/// <param name="outputStream"></param>
protected virtual void WriteName(NameToken name, Stream outputStream)
{
WriteName(name.Data, outputStream);
}
private static void WriteName(string name, Stream outputStream)
private void WriteName(string name, Stream outputStream)
{
/*
* Beginning with PDF 1.2, any character except null (character code 0) may be
@@ -404,7 +433,12 @@
WriteWhitespace(outputStream);
}
private static void WriteNumber(NumericToken number, Stream outputStream)
/// <summary>
/// Write a number to the stream, with whitespace at the end.
/// </summary>
/// <param name="number"></param>
/// <param name="outputStream"></param>
protected virtual void WriteNumber(NumericToken number, Stream outputStream)
{
if (!number.HasDecimalPlaces)
{
@@ -419,7 +453,15 @@
WriteWhitespace(outputStream);
}
private static void WriteObject(ObjectToken objectToken, Stream outputStream)
/// <summary>
/// Write an object to the stream, with a line break at the end. It writes the following contents:
/// - "[ObjectNumber] [Generation] obj"
/// - Object data
/// - "endobj"
/// </summary>
/// <param name="objectToken"></param>
/// <param name="outputStream"></param>
protected virtual void WriteObject(ObjectToken objectToken, Stream outputStream)
{
WriteLong(objectToken.Number.ObjectNumber, outputStream);
WriteWhitespace(outputStream);
@@ -438,7 +480,16 @@
WriteLineBreak(outputStream);
}
private static void WriteStream(StreamToken streamToken, Stream outputStream)
/// <summary>
/// Write a stream token to the output stream, with the following contents:
/// - Dictionary specifying the length of the stream, any applied compression filters and additional information.
/// - Stream start indicator
/// - Bytes in the StreamToken data
/// - Stream end indicator
/// </summary>
/// <param name="streamToken"></param>
/// <param name="outputStream"></param>
protected virtual void WriteStream(StreamToken streamToken, Stream outputStream)
{
WriteDictionary(streamToken.StreamDictionary, outputStream);
WriteLineBreak(outputStream);
@@ -449,15 +500,22 @@
outputStream.Write(StreamEnd, 0, StreamEnd.Length);
}
private static int[] EscapeNeeded = new int[]
private static readonly int[] EscapeNeeded = new int[]
{
'\r', '\n', '\t', '\b', '\f', '\\'
};
private static int[] Escaped = new int[]
private static readonly int[] Escaped = new int[]
{
'r', 'n', 't', 'b', 'f', '\\'
};
private static void WriteString(StringToken stringToken, Stream outputStream)
/// <summary>
/// Write string to the stream, with whitespace at the end
/// </summary>
/// <param name="stringToken"></param>
/// <param name="outputStream"></param>
protected virtual void WriteString(StringToken stringToken, Stream outputStream)
{
outputStream.WriteByte(StringStart);
@@ -515,29 +573,47 @@
WriteWhitespace(outputStream);
}
private static void WriteInt(int value, Stream outputStream)
/// <summary>
/// Write an integer to the stream
/// </summary>
/// <param name="value"></param>
/// <param name="outputStream"></param>
protected virtual void WriteInt(int value, Stream outputStream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(value.ToString("G", CultureInfo.InvariantCulture));
outputStream.Write(bytes, 0, bytes.Length);
}
private static void WriteLineBreak(Stream outputStream)
/// <summary>
/// Write a line break to the output stream
/// </summary>
/// <param name="outputStream"></param>
protected virtual void WriteLineBreak(Stream outputStream)
{
outputStream.WriteNewLine();
}
private static void WriteLong(long value, Stream outputStream)
/// <summary>
/// Write a long to the stream
/// </summary>
/// <param name="value"></param>
/// <param name="outputStream"></param>
protected virtual void WriteLong(long value, Stream outputStream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(value.ToString("G", CultureInfo.InvariantCulture));
outputStream.Write(bytes, 0, bytes.Length);
}
private static void WriteWhitespace(Stream outputStream)
/// <summary>
/// Write a space to the output stream
/// </summary>
/// <param name="outputStream"></param>
protected virtual void WriteWhitespace(Stream outputStream)
{
outputStream.WriteByte(Whitespace);
}
private static void WriteFirstXrefEmptyEntry(Stream outputStream)
private void WriteFirstXrefEmptyEntry(Stream outputStream)
{
/*
* The first entry in the table (object number 0) is always free and has a generation number of 65,535;
@@ -591,5 +667,4 @@
}
}
}
}
}