diff --git a/src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj b/src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj
index b410c51b..4b398863 100644
--- a/src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj
+++ b/src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj
@@ -7,6 +7,7 @@
true
true
..\pdfpig.snk
+ annotations
diff --git a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
index 6804c372..9a05631f 100644
--- a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
+++ b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Tests.ContentStream
{
using PdfPig.Core;
+ using System.Globalization;
public class IndirectReferenceTests
{
@@ -33,50 +34,59 @@
[Fact]
public void IndirectReferenceHashTest()
{
- var reference0 = new IndirectReference(1574, 690);
- Assert.Equal(1574, reference0.ObjectNumber);
- Assert.Equal(690, reference0.Generation);
+ CultureInfo lastCulture = CultureInfo.CurrentCulture;
+ CultureInfo.CurrentCulture = new CultureInfo("en-US");
+ try
+ {
+ var reference0 = new IndirectReference(1574, 690);
+ Assert.Equal(1574, reference0.ObjectNumber);
+ Assert.Equal(690, reference0.Generation);
- var reference1 = new IndirectReference(-1574, 690);
- Assert.Equal(-1574, reference1.ObjectNumber);
- Assert.Equal(690, reference1.Generation);
+ var reference1 = new IndirectReference(-1574, 690);
+ Assert.Equal(-1574, reference1.ObjectNumber);
+ Assert.Equal(690, reference1.Generation);
- var reference2 = new IndirectReference(58949797283757, 16);
- Assert.Equal(58949797283757, reference2.ObjectNumber);
- Assert.Equal(16, reference2.Generation);
+ var reference2 = new IndirectReference(58949797283757, 16);
+ Assert.Equal(58949797283757, reference2.ObjectNumber);
+ Assert.Equal(16, reference2.Generation);
- var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
- Assert.Equal(-58949797283757, reference3.ObjectNumber);
- Assert.Equal(ushort.MaxValue, reference3.Generation);
+ var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
+ Assert.Equal(-58949797283757, reference3.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference3.Generation);
- var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
- Assert.Equal(140737488355327, reference4.ObjectNumber);
- Assert.Equal(ushort.MaxValue, reference4.Generation);
+ var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
+ Assert.Equal(140737488355327, reference4.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference4.Generation);
- var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
- Assert.Equal(-140737488355327, reference5.ObjectNumber);
- Assert.Equal(ushort.MaxValue, reference5.Generation);
+ var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
+ Assert.Equal(-140737488355327, reference5.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference5.Generation);
- var ex0 = Assert.Throws(() => new IndirectReference(140737488355328, 0));
- Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
- var ex1 = Assert.Throws(() => new IndirectReference(-140737488355328, 0));
- Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
-
- var ex2 = Assert.Throws(() => new IndirectReference(1574, -1));
- Assert.StartsWith("Generation number must not be a negative value.", ex2.Message);
-
- // We make sure object number is still correct even if generation is not
- var reference6 = new IndirectReference(1574, int.MaxValue);
- Assert.Equal(1574, reference6.ObjectNumber);
-
- var reference7 = new IndirectReference(-1574, ushort.MaxValue + 10);
- Assert.Equal(-1574, reference7.ObjectNumber);
+ var ex0 = Assert.Throws(() => new IndirectReference(140737488355328, 0));
+ Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
+ var ex1 = Assert.Throws(() => new IndirectReference(-140737488355328, 0));
+ Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
- var reference9 = new IndirectReference(-140737488355327, ushort.MaxValue + 10);
- Assert.Equal(-140737488355327, reference9.ObjectNumber);
+ var ex2 = Assert.Throws(() => new IndirectReference(1574, -1));
+ Assert.StartsWith("Generation number must not be a negative value.", ex2.Message);
- var reference10 = new IndirectReference(140737488355327, ushort.MaxValue * 10);
- Assert.Equal(140737488355327, reference10.ObjectNumber);
+ // We make sure object number is still correct even if generation is not
+ var reference6 = new IndirectReference(1574, int.MaxValue);
+ Assert.Equal(1574, reference6.ObjectNumber);
+
+ var reference7 = new IndirectReference(-1574, ushort.MaxValue + 10);
+ Assert.Equal(-1574, reference7.ObjectNumber);
+
+ var reference9 = new IndirectReference(-140737488355327, ushort.MaxValue + 10);
+ Assert.Equal(-140737488355327, reference9.ObjectNumber);
+
+ var reference10 = new IndirectReference(140737488355327, ushort.MaxValue * 10);
+ Assert.Equal(140737488355327, reference10.ObjectNumber);
+ }
+ finally
+ {
+ CultureInfo.CurrentCulture = lastCulture;
+ }
}
[Fact]
diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
index da1cf9cb..7328ba7f 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
@@ -132,7 +132,7 @@
var path = IntegrationHelpers.GetSpecificTestDocumentPath("StackOverflow_Issue_1122.pdf");
var ex = Assert.Throws(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
- Assert.StartsWith("Reached maximum search depth while getting indirect reference.", ex.Message);
+ Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
}
[Fact]
@@ -191,7 +191,7 @@
{
var path = IntegrationHelpers.GetSpecificTestDocumentPath("SpookyPass.pdf");
var ex = Assert.Throws(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
- Assert.Equal("Avoiding infinite recursion in ObjectLocationProvider.TryGetOffset() as 'offset' and 'reference.ObjectNumber' have the same value and opposite signs.", ex.Message);
+ Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
}
[Fact]
@@ -356,7 +356,8 @@
using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = true }))
{
var page = document.GetPage(13);
- Assert.Throws(() => DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
+ // This used to fail with an overflow exception when we failed to validate the zlib encoded data
+ Assert.NotNull(DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
}
}
diff --git a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
index 0fef93e4..f4ab1c5e 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
@@ -109,10 +109,8 @@ public class FirstPassParserTests
%%EOF
""";
- if (Environment.NewLine == "\n")
- {
- content = content.Replace("\n", "\r\n");
- }
+ // Handle "\r\n" or "\n" in the sourcecode in the same way
+ content = content.Replace("\r\n", "\n").Replace("\n", "\r\n");
var ib = StringBytesTestConverter.Convert(content, false);
diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
index be423630..fb222e15 100644
--- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
+++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
@@ -10,6 +10,7 @@
..\pdfpig.snk
2.1.30
enable
+ annotations
diff --git a/src/UglyToad.PdfPig/Filters/Adler32ChecksumStream.cs b/src/UglyToad.PdfPig/Filters/Adler32ChecksumStream.cs
new file mode 100644
index 00000000..fa1272dd
--- /dev/null
+++ b/src/UglyToad.PdfPig/Filters/Adler32ChecksumStream.cs
@@ -0,0 +1,82 @@
+namespace UglyToad.PdfPig.Filters
+{
+ using System;
+ using System.IO;
+
+ internal sealed class Adler32ChecksumStream : Stream
+ {
+ private readonly Stream underlyingStream;
+
+ public Adler32ChecksumStream(Stream writeStream)
+ {
+ underlyingStream = writeStream ?? throw new ArgumentNullException(nameof(writeStream));
+ }
+ public override bool CanRead => underlyingStream.CanRead;
+
+ public override bool CanSeek => false;
+
+ public override bool CanWrite => underlyingStream.CanWrite;
+
+ public override long Length => underlyingStream.Length;
+
+ public override long Position { get => underlyingStream.Position; set => throw new NotImplementedException(); }
+
+ public override void Flush()
+ {
+ underlyingStream.Flush();
+ }
+
+ public override int Read(byte[] buffer, int offset, int count)
+ {
+ int n = underlyingStream.Read(buffer, offset, count);
+
+ if (n > 0)
+ {
+ UpdateAdler(buffer.AsSpan(offset, n));
+ }
+ return n;
+ }
+
+ public override long Seek(long offset, SeekOrigin origin)
+ {
+ throw new InvalidOperationException();
+ }
+
+ public override void SetLength(long value)
+ {
+ throw new InvalidOperationException();
+ }
+
+ public override void Write(byte[] buffer, int offset, int count)
+ {
+ underlyingStream.Write(buffer, offset, count);
+
+ if (count > 0)
+ {
+ UpdateAdler(buffer.AsSpan(offset, count));
+ }
+ }
+
+ public uint Checksum { get; private set; } = 1;
+
+ private void UpdateAdler(Span span)
+ {
+ const uint MOD_ADLER = 65521;
+ uint a = Checksum & 0xFFFF;
+ uint b = (Checksum >> 16) & 0xFFFF;
+
+ foreach (byte c in span)
+ {
+ a = (a + c) % MOD_ADLER;
+ b = (b + a) % MOD_ADLER;
+ }
+
+ Checksum = (b << 16) | a;
+ }
+
+ public override void Close()
+ {
+ underlyingStream.Close();
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig/Filters/FlateFilter.cs b/src/UglyToad.PdfPig/Filters/FlateFilter.cs
index dd33d754..a5f3c686 100644
--- a/src/UglyToad.PdfPig/Filters/FlateFilter.cs
+++ b/src/UglyToad.PdfPig/Filters/FlateFilter.cs
@@ -2,6 +2,7 @@
{
using Fonts;
using System;
+ using System.Buffers.Binary;
using System.IO;
using System.IO.Compression;
using Tokens;
@@ -43,6 +44,15 @@
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
+
+ var length = parameters.GetIntOrDefault(NameToken.Length, -1);
+
+ if (length > 0 && length < input.Length)
+ {
+ // Truncates final "\r\n" or "\n" from source data if any. Fixes detecting where the adler checksum is. (Zlib uses framing for this)
+ input = input.Slice(0, length);
+ }
+
return Decompress(input, predictor, colors, bitsPerComponent, columns);
}
catch
@@ -55,29 +65,83 @@
private static Memory Decompress(Memory input, int predictor, int colors, int bitsPerComponent, int columns)
{
- using (var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input))
+#if NET
+ using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input);
+ try
{
- // The first 2 bytes are the header which DeflateStream does not support.
- memoryStream.ReadByte();
- memoryStream.ReadByte();
-
- try
+ using (var zlib = new ZLibStream(memoryStream, CompressionMode.Decompress))
+ using (var output = new MemoryStream((int)(input.Length * 1.5)))
+ using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
{
- using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
- using (var output = new MemoryStream((int)(input.Length * 1.5)))
- using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
- {
- deflate.CopyTo(f);
- f.Flush();
+ zlib.CopyTo(f);
+ f.Flush();
- return output.AsMemory();
- }
- }
- catch (InvalidDataException ex)
- {
- throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
+ return output.AsMemory();
}
}
+ catch (InvalidDataException ex)
+ {
+ throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
+ }
+#else
+ // Ideally we would like to use the ZLibStream class but that is only available in .NET 5+.
+ // We look at the raw data now
+ // * First we have 2 bytes, specifying the type of compression
+ // * Then we have the deflated data
+ // * Then we have a 4 byte checksum (Adler32)
+
+ // Would be so nice to have zlib do the framing here... but the deflate stream already reads data from the stream that we need.
+
+ using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input.Slice(2, input.Length - 2 /* Header */ - 4 /* Checksum */));
+ // The first 2 bytes are the header which DeflateStream can't handle. After the s
+ var adlerBytes = input.Slice(input.Length - 4, 4).Span;
+ uint expected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
+ uint altExpected = expected;
+
+ // Sometimes the data ends with "\r\n", "\r" or "\n" and we don't know if it is part of the zlib
+ // Ideally this would have been removed by the caller from the provided length...
+ if (adlerBytes[3] == '\n' || adlerBytes[3] == '\r')
+ {
+ if (adlerBytes[3] == '\n' && adlerBytes[2] == '\r')
+ {
+ // Now we don't know which value is the good one. The value could be ok, or padding.
+ // Lets allow both values for now. Allowing two out of 2^32 is much better than allowing everything
+ adlerBytes = input.Slice(input.Length - 6, 4).Span;
+ }
+ else
+ {
+ // Same but now for just '\n' or '\r' instead of '\r\n'
+ adlerBytes = input.Slice(input.Length - 5, 4).Span;
+ }
+
+ altExpected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
+ }
+
+
+ try
+ {
+ using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
+ using (var adlerStream = new Adler32ChecksumStream(deflate))
+ using (var output = new MemoryStream((int)(input.Length * 1.5)))
+ using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
+ {
+ adlerStream.CopyTo(f);
+ f.Flush();
+
+ uint actual = adlerStream.Checksum;
+ if (expected != actual && altExpected != actual)
+ {
+ throw new CorruptCompressedDataException("Flate stream has invalid checksum");
+ }
+
+ return output.AsMemory();
+ }
+ }
+ catch (InvalidDataException ex)
+ {
+ throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
+ }
+#endif
}
///
@@ -95,9 +159,10 @@
using (var compressStream = new MemoryStream())
using (var compressor = new DeflateStream(compressStream, CompressionLevel.Fastest))
+ using (var adlerStream = new Adler32ChecksumStream(compressor))
{
- compressor.Write(data, 0, data.Length);
- compressor.Close();
+ adlerStream.Write(data, 0, data.Length);
+ adlerStream.Close();
var compressed = compressStream.ToArray();
@@ -111,7 +176,7 @@
Array.Copy(compressed, 0, result, headerLength, compressed.Length);
// Write Checksum of raw data.
- var checksum = Adler32Checksum.Calculate(data);
+ var checksum = adlerStream.Checksum;
var offset = headerLength + compressed.Length;
diff --git a/src/UglyToad.PdfPig/Images/Png/Adler32Checksum.cs b/src/UglyToad.PdfPig/Images/Png/Adler32Checksum.cs
deleted file mode 100644
index 6e6c6aaf..00000000
--- a/src/UglyToad.PdfPig/Images/Png/Adler32Checksum.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-namespace UglyToad.PdfPig.Images.Png
-{
- using System;
-
- ///
- /// Used to calculate the Adler-32 checksum used for ZLIB data in accordance with
- /// RFC 1950: ZLIB Compressed Data Format Specification.
- ///
- internal static class Adler32Checksum
- {
- // Both sums (s1 and s2) are done modulo 65521.
- private const int AdlerModulus = 65521;
-
- ///
- /// Calculate the Adler-32 checksum for some data.
- ///
- public static int Calculate(ReadOnlySpan data)
- {
- // s1 is the sum of all bytes.
- var s1 = 1;
-
- // s2 is the sum of all s1 values.
- var s2 = 0;
-
- foreach (var b in data)
- {
- s1 = (s1 + b) % AdlerModulus;
- s2 = (s1 + s2) % AdlerModulus;
- }
-
- // The Adler-32 checksum is stored as s2*65536 + s1.
- return s2 * 65536 + s1;
- }
- }
-}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig/Images/Png/PngBuilder.cs b/src/UglyToad.PdfPig/Images/Png/PngBuilder.cs
index 14e6266c..f3225ffc 100644
--- a/src/UglyToad.PdfPig/Images/Png/PngBuilder.cs
+++ b/src/UglyToad.PdfPig/Images/Png/PngBuilder.cs
@@ -1,8 +1,10 @@
namespace UglyToad.PdfPig.Images.Png
{
+ using System.Buffers.Binary;
using System.IO;
using System.IO.Compression;
using System.Text;
+ using UglyToad.PdfPig.Filters;
///
/// Used to construct PNG images. Call to make a new builder.
@@ -121,9 +123,10 @@
const int checksumLength = 4;
using (var compressStream = new MemoryStream())
using (var compressor = new DeflateStream(compressStream, CompressionLevel.Fastest, true))
+ using (var adlerStream = new Adler32ChecksumStream(compressor))
{
- compressor.Write(data, 0, data.Length);
- compressor.Close();
+ adlerStream.Write(data, 0, data.Length);
+ adlerStream.Close();
compressStream.Seek(0, SeekOrigin.Begin);
@@ -143,15 +146,11 @@
}
// Write Checksum of raw data.
- var checksum = Adler32Checksum.Calculate(data);
+ var checksum = adlerStream.Checksum;
var offset = headerLength + compressStream.Length;
- result[offset++] = (byte)(checksum >> 24);
- result[offset++] = (byte)(checksum >> 16);
- result[offset++] = (byte)(checksum >> 8);
- result[offset] = (byte)(checksum >> 0);
-
+ BinaryPrimitives.WriteUInt32BigEndian(result.AsSpan((int)offset, 4), checksum);
return result;
}
}
diff --git a/src/UglyToad.PdfPig/Images/Png/PngFromPdfImageFactory.cs b/src/UglyToad.PdfPig/Images/Png/PngFromPdfImageFactory.cs
index 229b5039..4ac9f734 100644
--- a/src/UglyToad.PdfPig/Images/Png/PngFromPdfImageFactory.cs
+++ b/src/UglyToad.PdfPig/Images/Png/PngFromPdfImageFactory.cs
@@ -1,11 +1,11 @@
-namespace UglyToad.PdfPig.Images.Png
+namespace UglyToad.PdfPig.Images.Png
{
using System.Diagnostics.CodeAnalysis;
- using Content;
+ using Content;
using Graphics.Colors;
using UglyToad.PdfPig.Core;
- internal static class PngFromPdfImageFactory
+ internal static class PngFromPdfImageFactory
{
private static bool TryGenerateSoftMask(IPdfImage image, [NotNullWhen(true)] out ReadOnlySpan maskBytes)
{
@@ -26,9 +26,9 @@
return false;
}
- if (!mask.TryGetBytesAsMemory(out var maskMemory))
- {
- return false;
+ if (!mask.TryGetBytesAsMemory(out var maskMemory))
+ {
+ return false;
}
try
@@ -67,24 +67,24 @@
bytesPure[actualSize - 2] == ReadHelper.AsciiCarriageReturn &&
bytesPure[actualSize - 1] == ReadHelper.AsciiLineFeed);
}
-
- public static bool TryGenerate(IPdfImage image, [NotNullWhen(true)] out byte[]? bytes)
- {
- bytes = null;
-
- var hasValidDetails = image.ColorSpaceDetails != null && !(image.ColorSpaceDetails is UnsupportedColorSpaceDetails);
-
+
+ public static bool TryGenerate(IPdfImage image, [NotNullWhen(true)] out byte[]? bytes)
+ {
+ bytes = null;
+
+ var hasValidDetails = image.ColorSpaceDetails != null && !(image.ColorSpaceDetails is UnsupportedColorSpaceDetails);
+
var isColorSpaceSupported = hasValidDetails && image.ColorSpaceDetails!.BaseType != ColorSpace.Pattern;
-
- if (!isColorSpaceSupported || !image.TryGetBytesAsMemory(out var imageMemory))
- {
- return false;
+
+ if (!isColorSpaceSupported || !image.TryGetBytesAsMemory(out var imageMemory))
+ {
+ return false;
}
- var bytesPure = imageMemory.Span;
-
- try
- {
+ var bytesPure = imageMemory.Span;
+
+ try
+ {
bytesPure = ColorSpaceDetailsByteConverter.Convert(image.ColorSpaceDetails!, bytesPure,
image.BitsPerComponent, image.WidthInSamples, image.HeightInSamples);
@@ -108,7 +108,7 @@
}
}
- var builder = PngBuilder.Create(image.WidthInSamples, image.HeightInSamples, hasMask);
+ var builder = PngBuilder.Create(image.WidthInSamples, image.HeightInSamples, hasMask);
if (!IsCorrectlySized(image, bytesPure))
{
@@ -183,17 +183,17 @@
}
}
}
- }
-
- bytes = builder.Save();
- return true;
- }
- catch
+ }
+
+ bytes = builder.Save();
+ return true;
+ }
+ catch
{
- // ignored.
- }
-
- return false;
+ // ignored.
+ }
+
+ return false;
}
- }
-}
+ }
+}
diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
index 30fe2b6e..7d383a30 100644
--- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
+++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
@@ -239,6 +239,11 @@
var rootDictionary = DirectObjectFinder.Get(trailer.Root, pdfTokenScanner)!;
+ if (rootDictionary is null)
+ {
+ throw new PdfDocumentFormatException($"The root object in the trailer did not resolve to a readable dictionary.");
+ }
+
if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
{
rootDictionary = rootDictionary.With(NameToken.Type, NameToken.Catalog);
diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs
index 66d8d061..6c2d7c6c 100644
--- a/src/UglyToad.PdfPig/PdfExtensions.cs
+++ b/src/UglyToad.PdfPig/PdfExtensions.cs
@@ -7,6 +7,7 @@
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
+ using UglyToad.PdfPig.Util;
///
/// Extensions for PDF types.
@@ -62,6 +63,18 @@
double totalMaxEstSize = stream.Data.Length * 100;
var transform = stream.Data;
+
+ var length = stream.StreamDictionary.GetIntOrDefault(NameToken.Length, -1);
+
+ // If a length is available and it's smaller than the actual data length, use that. This trims whitespace (e.g. newlines) that might have been introduced during transport.
+ // And with that it handles some issues before individual filters have to deal with it.
+ //
+ // Do this before the first filter (to handle cases like multiple filters, etc).
+ if (length > 0 && length < transform.Length)
+ {
+ transform = transform.Slice(0, length);
+ }
+
for (var i = 0; i < filters.Count; i++)
{
var filter = filters[i];
@@ -89,6 +102,18 @@
double totalMaxEstSize = stream.Data.Length * 100;
var transform = stream.Data;
+
+ var length = stream.StreamDictionary.GetIntOrDefault(NameToken.Length, -1);
+
+ // If a length is available and it's smaller than the actual data length, use that. This trims whitespace (e.g. newlines) that might have been introduced during transport.
+ // And with that it handles some issues before individual filters have to deal with it.
+ //
+ // Do this before the first filter (to handle cases like multiple filters, etc).
+ if (length > 0 && length < transform.Length)
+ {
+ transform = transform.Slice(0, length);
+ }
+
for (var i = 0; i < filters.Count; i++)
{
var filter = filters[i];