review changes from Poltuu, test coverage for token replacement

This commit is contained in:
Plaisted 2021-02-10 12:27:12 -06:00
parent e7f96c0b16
commit 509164447b
5 changed files with 96 additions and 66 deletions

View File

@ -0,0 +1,52 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using PdfPig.Tokens;
using System.Collections.Generic;
using Xunit;
public class AdvancedPdfDocumentAccessTests
{
[Fact]
public void ReplacesObjectsFunc()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (var document = PdfDocument.Open(path))
{
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, tk =>
{
var dict = new Dictionary<NameToken, IToken>();
dict[NameToken.Length] = new NumericToken(0);
var replaced = new StreamToken(new DictionaryToken(dict), new List<byte>());
return replaced;
});
var page = document.GetPage(1);
Assert.Empty(page.Letters);
}
}
[Fact]
public void ReplacesObjects()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using (var document = PdfDocument.Open(path))
{
var dict = new Dictionary<NameToken, IToken>();
dict[NameToken.Length] = new NumericToken(0);
var replacement = new StreamToken(new DictionaryToken(dict), new List<byte>());
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, replacement);
var page = document.GetPage(1);
Assert.Empty(page.Letters);
}
}
}
}

View File

@ -145,21 +145,29 @@
public void CanMergeWithSelection()
{
var first = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new [] { File.ReadAllBytes(first) }, new [] { new[] {2, 1, 4, 3, 6, 5} });
var contents = File.ReadAllBytes(first);
var toCopy = new[] {2, 1, 4, 3, 6, 5};
var result = PdfMerger.Merge(new [] { contents }, new [] { toCopy });
WriteFile(nameof(CanMergeWithSelection), result);
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var merged = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(6, document.NumberOfPages);
foreach (var page in document.GetPages())
{
Assert.NotNull(page.Text);
Assert.Equal(6, merged.NumberOfPages);
for (var i =1;i<merged.NumberOfPages;i++)
{
Assert.Equal(
existing.GetPage(toCopy[i-1]).Text,
merged.GetPage(i).Text
);
}
}
}
[Fact]
public void CanMergeMultipleWithSelection()
{

View File

@ -10,8 +10,6 @@ namespace UglyToad.PdfPig.Writer
using Core;
using Fonts;
using PdfPig.Fonts.TrueType;
using Graphics.Operations;
using Parser.Parts;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Fonts.TrueType.Parser;
using System.Runtime.CompilerServices;
@ -28,10 +26,17 @@ namespace UglyToad.PdfPig.Writer
private readonly IPdfStreamWriter context;
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
private readonly Dictionary<IndirectReferenceToken, IToken> unwrittenTokens = new Dictionary<IndirectReferenceToken, IToken>();
private bool completed = false;
internal int fontId = 0;
internal int fontId = 0;
private readonly static ArrayToken DefaultProcSet = new ArrayToken(new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
});
/// <summary>
/// The standard of PDF/A compliance of the generated document. Defaults to <see cref="PdfAStandard.None"/>.
@ -59,12 +64,6 @@ namespace UglyToad.PdfPig.Writer
/// </summary>
internal IReadOnlyDictionary<Guid, FontStored> Fonts => fonts;
/// <summary>
/// The images currently available in the document builder added via <see cref="AddImage"/>. Keyed by id for internal purposes.
/// </summary>
internal IReadOnlyDictionary<Guid, ImageStored> Images => images;
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
@ -74,9 +73,9 @@ namespace UglyToad.PdfPig.Writer
context.InitializePdf(1.7m);
}
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(decimal version)
{
@ -298,7 +297,7 @@ namespace UglyToad.PdfPig.Writer
/// <param name="pageNumber">Page to copy.</param>
/// <returns>A builder for editing the page.</returns>
public PdfPageBuilder AddPage(PdfDocument document, int pageNumber)
{
{
if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs))
{
refs = new Dictionary<IndirectReference, IndirectReferenceToken>();
@ -444,8 +443,8 @@ namespace UglyToad.PdfPig.Writer
}
return dict;
}
}
}
private void CompleteDocument()
{
// write fonts to reserved object numbers
@ -454,16 +453,6 @@ namespace UglyToad.PdfPig.Writer
font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Reference);
}
var procSet = new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
};
int desiredLeafSize = 25;
var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
@ -487,7 +476,7 @@ namespace UglyToad.PdfPig.Writer
var pageDictionary = page.Value.pageDictionary;
pageDictionary[NameToken.Type] = NameToken.Page;
pageDictionary[NameToken.Parent] = leafRefs[leafNum];
pageDictionary[NameToken.ProcSet] = new ArrayToken(procSet);
pageDictionary[NameToken.ProcSet] = DefaultProcSet;
if (!pageDictionary.ContainsKey(NameToken.MediaBox))
{
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
@ -686,25 +675,6 @@ namespace UglyToad.PdfPig.Writer
}
}
internal class ImageStored
{
public Guid Id { get; }
public DictionaryToken StreamDictionary { get; }
public byte[] StreamData { get; }
public int ObjectNumber { get; }
public ImageStored(DictionaryToken streamDictionary, byte[] streamData, int objectNumber)
{
Id = Guid.NewGuid();
StreamDictionary = streamDictionary;
StreamData = streamData;
ObjectNumber = objectNumber;
}
}
/// <summary>
/// A key representing a font available to use on the current document builder. Create by adding a font to a document using either
/// <see cref="AddStandard14Font"/> or <see cref="AddTrueTypeFont"/>.
@ -813,7 +783,7 @@ namespace UglyToad.PdfPig.Writer
if (!completed)
{
CompleteDocument();
}
}
context.Dispose();
}

View File

@ -131,13 +131,13 @@
{
for (var i = 1; i <= existing.NumberOfPages; i++)
{
document.AddPage(existing, 1);
document.AddPage(existing, i);
}
} else
{
foreach (var i in pages)
{
document.AddPage(existing, 1);
document.AddPage(existing, i);
}
}
}

View File

@ -27,20 +27,20 @@
/// A builder used to add construct a page in a PDF document.
/// </summary>
public class PdfPageBuilder
{
{
// parent
private readonly PdfDocumentBuilder documentBuilder;
private readonly PdfDocumentBuilder documentBuilder;
// all page data other than content streams
internal readonly Dictionary<NameToken, IToken> pageDictionary = new Dictionary<NameToken, IToken>();
// streams
internal readonly List<IPageContentStream> contentStreams;
private IPageContentStream currentStream;
private IPageContentStream currentStream;
// maps fonts added using PdfDocumentBuilder to page font names
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
internal int nextFontId = 1;
internal int nextFontId = 1;
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
private int textSequence;
@ -400,7 +400,7 @@
};
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), data);
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var xObjects = resources.GetOrCreateDict(NameToken.Xobject);
var key = NameToken.Create($"I{imageKey++}");
@ -584,7 +584,7 @@
// Special cases
// Since we don't directly add font's to the pages resources, we have to go look at the document's font
if(srcResourceDictionary.TryGet(NameToken.Font, srcPage.pdfScanner, out DictionaryToken fontsDictionary))
{
{
var pageFontsDictionary = resources.GetOrCreateDict(NameToken.Font);
foreach (var fontSet in fontsDictionary.Data)