Merge pull request #351 from plaisted/master

AddPage fixes for streams and page dict inheritance
This commit is contained in:
Eliot Jones
2021-08-04 07:52:22 -04:00
committed by GitHub
5 changed files with 1094 additions and 964 deletions

View File

@@ -0,0 +1,61 @@
%PDF-1.1
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 200 100]>>
endobj
3 0 obj
<<
/Type/Page
/Parent 2 0 R
/Resources <<
/XObject << /SomeImage 4 0 R >>
>>
/Contents 5 0 R
>>
endobj
4 0 obj
<<
/Type/XObject
/Subtype/Image
/Width 200 % The width or height directly affects the image's file size.
/Height 100
/ColorSpace/DeviceRGB
/DecodeParms [] % Forces NativeImageDecoder.isSupported to return false.
/BitsPerComponent 8
/Length 580
/Filter [ /ASCIIHexDecode /DCTDecode ]
>>
% convert -size 1x1 xc:red jpeg:- | xxd -p -c40
stream
ffd8ffe000104a46494600010100000100010000ffdb004300030202020202030202020303030304
060404040404080606050609080a0a090809090a0c0f0c0a0b0e0b09090d110d0e0f101011100a0c
12131210130f101010ffdb00430103030304030408040408100b090b101010101010101010101010
1010101010101010101010101010101010101010101010101010101010101010101010101010ffc0
0011080001000103011100021101031101ffc40014000100000000000000000000000000000008ff
c40014100100000000000000000000000000000000ffc40015010101000000000000000000000000
00000709ffc40014110100000000000000000000000000000000ffda000c03010002110311003f00
3a03154dffd9
endstream
endobj
5 0 obj
<</Length 14>>
stream
500 0 0 400 0 0 cm
/SomeImage Do
endstream
endobj
xref
0 6
0000000000 65535 f
0000000008 00000 n
0000000054 00000 n
0000000128 00000 n
0000000246 00000 n
0000001201 00000 n
trailer
<</Root 1 0 R/Size 6>>
startxref
1281
%%EOF

View File

@@ -55,6 +55,53 @@
}
}
[Fact]
public void CanFastAddPageAndInheritProps()
{
var first = IntegrationHelpers.GetDocumentPath("inherited_mediabox.pdf");
var contents = File.ReadAllBytes(first);
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
{
output.AddPage(existing, 1);
results = output.Build();
}
using (var rewritted = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
{
var pg = rewritted.GetPage(1);
Assert.Equal(200, pg.MediaBox.Bounds.Width);
Assert.Equal(100, pg.MediaBox.Bounds.Height);
}
}
[Fact]
public void CanFastAddPageWithStreamSubtype()
{
var first = IntegrationHelpers.GetDocumentPath("steam_in_page_dict.pdf");
var contents = File.ReadAllBytes(first);
byte[] results = null;
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
using (var output = new PdfDocumentBuilder())
{
output.AddPage(existing, 1);
results = output.Build();
}
using (var rewritted = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
{
// really just checking for no exception...
var pg = rewritted.GetPage(1);
Assert.NotNull(pg.Content);
}
}
[Fact]
public void CanReadSingleBlankPage()
{
@@ -111,7 +158,7 @@
{
var page1 = document.GetPage(1);
Assert.Equal(new[] {"Hello", "World!"}, page1.GetWords().Select(x => x.Text));
Assert.Equal(new[] { "Hello", "World!" }, page1.GetWords().Select(x => x.Text));
}
}
@@ -680,7 +727,7 @@
using (var document = PdfDocument.Open(b))
{
Assert.Equal( 2, document.NumberOfPages);
Assert.Equal(2, document.NumberOfPages);
var page1 = document.GetPage(1);
@@ -740,7 +787,7 @@
for (int i = 0; i < letters.Count; i++)
{
var readerLetter = page1.Letters[i+18];
var readerLetter = page1.Letters[i + 18];
var writerLetter = letters[i];
Assert.Equal(readerLetter.Value, writerLetter.Value);
@@ -841,7 +888,7 @@
var count = 25 * 25 * 25 + 1;
using (var builder = new PdfDocumentBuilder())
{
for (var i = 0; i < count;i++)
for (var i = 0; i < count; i++)
{
builder.AddPage(PageSize.A4);
}
@@ -971,7 +1018,8 @@
{
unchecked { letters += 1; }
unchecked {
unchecked
{
location += letter.Location.X;
location += letter.Location.Y;
location += letter.Font.Name.Length;

View File

@@ -167,7 +167,6 @@
}
}
[Fact]
public void CanMergeMultipleWithSelection()
{

View File

@@ -90,7 +90,7 @@ namespace UglyToad.PdfPig.Writer
/// <param name="disposeStream">If stream should be disposed when builder is.</param>
/// <param name="type">Type of pdf stream writer to use</param>
/// <param name="version">Pdf version to use in header.</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream=false, PdfWriterType type=PdfWriterType.Default, decimal version=1.7m)
public PdfDocumentBuilder(Stream stream, bool disposeStream = false, PdfWriterType type = PdfWriterType.Default, decimal version = 1.7m)
{
switch (type)
{
@@ -289,7 +289,7 @@ namespace UglyToad.PdfPig.Writer
private readonly ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>> existingCopies =
new ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>>();
private readonly ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>> existingTrees =
new ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>>();
new ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>>();
/// <summary>
/// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
/// </summary>
@@ -312,7 +312,8 @@ namespace UglyToad.PdfPig.Writer
{
pagesInfos[i] = new PageInfo
{
Page = pageDict, Parents = parents
Page = pageDict,
Parents = parents
};
i++;
}
@@ -359,6 +360,18 @@ namespace UglyToad.PdfPig.Writer
{
CopyResourceDict(resourceToken, resources);
}
if (dict.TryGet(NameToken.MediaBox, out var mb))
{
copiedPageDict[NameToken.MediaBox] = WriterUtil.CopyToken(context, mb, document.Structure.TokenScanner, refs);
}
if (dict.TryGet(NameToken.CropBox, out var cb))
{
copiedPageDict[NameToken.CropBox] = WriterUtil.CopyToken(context, cb, document.Structure.TokenScanner, refs);
}
if (dict.TryGet(NameToken.Rotate, out var rt))
{
copiedPageDict[NameToken.Rotate] = WriterUtil.CopyToken(context, rt, document.Structure.TokenScanner, refs);
}
}
@@ -399,7 +412,16 @@ namespace UglyToad.PdfPig.Writer
if (item.Value is IndirectReferenceToken ir)
{
// convert indirect to direct as PdfPageBuilder needs to modify resource entries
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
var obj = document.Structure.TokenScanner.Get(ir.Data);
if (obj.Data is StreamToken)
{
// rare case, have seen /SubType as stream token, can't make direct
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
}
else
{
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, obj.Data, document.Structure.TokenScanner, refs);
}
}
else
{
@@ -458,7 +480,7 @@ namespace UglyToad.PdfPig.Writer
}
const int desiredLeafSize = 25; // allow customization at some point?
var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
var numLeafs = (int)Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
var leafRefs = new List<IndirectReferenceToken>();
var leafChildren = new List<List<IndirectReferenceToken>>();
@@ -589,12 +611,12 @@ namespace UglyToad.PdfPig.Writer
var children = new List<IndirectReferenceToken>();
if (pagesNodes.Count > desiredLeafSize)
{
var currentTreeDepth = (int) Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize));
var perBranch = (int) Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1));
var currentTreeDepth = (int)Math.Ceiling(Math.Log(pagesNodes.Count, desiredLeafSize));
var perBranch = (int)Math.Ceiling(Math.Pow(desiredLeafSize, currentTreeDepth - 1));
var branches = (int)Math.Ceiling(decimal.Divide(pagesNodes.Count, (decimal)perBranch));
for (var i = 0; i < branches; i++)
{
var part = pagesNodes.Skip(i*perBranch).Take(perBranch).ToList();
var part = pagesNodes.Skip(i * perBranch).Take(perBranch).ToList();
var result = CreatePageTree(part, thisObj);
count += result.Count;
children.Add(result.Ref);