From d85ea4f95d24fd0e55411f61fb85829458fab0bc Mon Sep 17 00:00:00 2001 From: Jason Nelson Date: Mon, 1 Apr 2024 22:26:25 -0700 Subject: [PATCH] Update Filters to operate over Spans --- src/UglyToad.PdfPig/Filters/Ascii85Filter.cs | 9 +- .../Filters/AsciiHexDecodeFilter.cs | 16 ++-- .../Filters/CcittFaxDecodeFilter.cs | 8 +- .../Filters/CcittFaxDecoderStream.cs | 2 +- .../Filters/DctDecodeFilter.cs | 2 +- src/UglyToad.PdfPig/Filters/FlateFilter.cs | 6 +- src/UglyToad.PdfPig/Filters/IFilter.cs | 3 +- .../Filters/Jbig2DecodeFilter.cs | 2 +- .../Filters/JpxDecodeFilter.cs | 2 +- src/UglyToad.PdfPig/Filters/LzwFilter.cs | 6 +- .../Filters/RunLengthFilter.cs | 96 +++++++++---------- src/UglyToad.PdfPig/PdfExtensions.cs | 4 +- 12 files changed, 71 insertions(+), 85 deletions(-) diff --git a/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs b/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs index d1d3710a..71f55c55 100644 --- a/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs +++ b/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs @@ -29,18 +29,17 @@ public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { - var inputSpan = input.Span; var asciiBuffer = new byte[5]; var index = 0; using var writer = new ArrayPoolBufferWriter(); - for (var i = 0; i < inputSpan.Length; i++) + for (var i = 0; i < input.Length; i++) { - var value = inputSpan[i]; + var value = input[i]; if (IsWhiteSpace(value)) { @@ -49,7 +48,7 @@ if (value == EndOfDataBytes[0]) { - if (i == inputSpan.Length - 1 || inputSpan[i + 1] == EndOfDataBytes[1]) + if (i == input.Length - 1 || input[i + 1] == EndOfDataBytes[1]) { if (index > 0) { diff --git a/src/UglyToad.PdfPig/Filters/AsciiHexDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/AsciiHexDecodeFilter.cs index 97e941eb..fe3c2e8e 100644 --- a/src/UglyToad.PdfPig/Filters/AsciiHexDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/AsciiHexDecodeFilter.cs @@ -1,7 +1,6 @@ namespace UglyToad.PdfPig.Filters { using System; - using System.Collections.Generic; using System.IO; using Tokens; @@ -9,7 +8,7 @@ /// /// Encodes/decodes data using the ASCII hexadecimal encoding where each byte is represented by two ASCII characters. /// - internal class AsciiHexDecodeFilter : IFilter + internal sealed class AsciiHexDecodeFilter : IFilter { private static readonly short[] ReverseHex = [ @@ -30,28 +29,27 @@ public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { - var inputSpan = input.Span; - var pair = new byte[2]; + Span pair = stackalloc byte[2]; var index = 0; using (var memoryStream = new MemoryStream()) using (var binaryWriter = new BinaryWriter(memoryStream)) { - for (var i = 0; i < inputSpan.Length; i++) + for (var i = 0; i < input.Length; i++) { - if (inputSpan[i] == '>') + if (input[i] == '>') { break; } - if (IsWhitespace(inputSpan[i]) || inputSpan[i] == '<') + if (IsWhitespace(input[i]) || input[i] == '<') { continue; } - pair[index] = inputSpan[i]; + pair[index] = input[i]; index++; if (index == 2) diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs index 2e6f057e..e8bae30e 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs @@ -1,9 +1,7 @@ namespace UglyToad.PdfPig.Filters { using System; - using System.Collections.Generic; using System.IO; - using System.Linq; using Tokens; using Util; @@ -12,13 +10,13 @@ /// /// Ported from https://github.com/apache/pdfbox/blob/714156a15ea6fcfe44ac09345b01e192cbd74450/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java /// - internal class CcittFaxDecodeFilter : IFilter + internal sealed class CcittFaxDecodeFilter : IFilter { /// public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); @@ -38,7 +36,7 @@ var k = decodeParms.GetIntOrDefault(NameToken.K, 0); var encodedByteAlign = decodeParms.GetBooleanOrDefault(NameToken.EncodedByteAlign, false); - var compressionType = DetermineCompressionType(input.Span, k); + var compressionType = DetermineCompressionType(input, k); using (var stream = new CcittFaxDecoderStream(new MemoryStream(input.ToArray()), cols, compressionType, encodedByteAlign)) { var arraySize = (cols + 7) / 8 * rows; diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs b/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs index 956218c3..12aa8fa9 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs @@ -329,7 +329,7 @@ if (index != columns) { - throw new InvalidOperationException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); + throw new InvalidOperationException($"Sum of run-lengths does not equal scan line width: {index} > {columns}"); } decodedLength = (index + 7) / 8; diff --git a/src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs index 028c3bbd..62449644 100644 --- a/src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/DctDecodeFilter.cs @@ -9,7 +9,7 @@ public bool IsSupported { get; } = false; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " + "This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries."); diff --git a/src/UglyToad.PdfPig/Filters/FlateFilter.cs b/src/UglyToad.PdfPig/Filters/FlateFilter.cs index a35d0ee2..dc3de860 100644 --- a/src/UglyToad.PdfPig/Filters/FlateFilter.cs +++ b/src/UglyToad.PdfPig/Filters/FlateFilter.cs @@ -2,10 +2,8 @@ { using Fonts; using System; - using System.Collections.Generic; using System.IO; using System.IO.Compression; - using System.Linq; using Tokens; using Util; @@ -20,7 +18,7 @@ /// See section 3.3.3 of the spec (version 1.7) for details on the FlateDecode filter. /// The flate decode filter may have a predictor function to further compress the stream. /// - internal class FlateFilter : IFilter + internal sealed class FlateFilter : IFilter { // Defaults are from table 3.7 in the spec (version 1.7) private const int DefaultColors = 1; @@ -34,7 +32,7 @@ public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); diff --git a/src/UglyToad.PdfPig/Filters/IFilter.cs b/src/UglyToad.PdfPig/Filters/IFilter.cs index 1ff76142..6765ebec 100644 --- a/src/UglyToad.PdfPig/Filters/IFilter.cs +++ b/src/UglyToad.PdfPig/Filters/IFilter.cs @@ -1,7 +1,6 @@ namespace UglyToad.PdfPig.Filters { using System; - using System.Collections.Generic; using Tokens; /// @@ -21,6 +20,6 @@ /// The dictionary of the (or other dictionary types, e.g. inline images) containing these bytes. /// The position of this filter in the pipeline used to encode data. /// The decoded bytes. - byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex); + byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex); } } diff --git a/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs b/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs index 18e68fa4..05c698a2 100644 --- a/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs @@ -9,7 +9,7 @@ public bool IsSupported { get; } = false; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { throw new NotSupportedException("The JBIG2 Filter for monochrome image data is not currently supported. " + "Try accessing the raw compressed data directly."); diff --git a/src/UglyToad.PdfPig/Filters/JpxDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/JpxDecodeFilter.cs index 23a6eb98..ef31a83e 100644 --- a/src/UglyToad.PdfPig/Filters/JpxDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/JpxDecodeFilter.cs @@ -9,7 +9,7 @@ public bool IsSupported { get; } = false; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { throw new NotSupportedException("The JPX Filter (JPEG2000) for image data is not currently supported. " + "Try accessing the raw compressed data directly."); diff --git a/src/UglyToad.PdfPig/Filters/LzwFilter.cs b/src/UglyToad.PdfPig/Filters/LzwFilter.cs index 35e2eb8d..11960778 100644 --- a/src/UglyToad.PdfPig/Filters/LzwFilter.cs +++ b/src/UglyToad.PdfPig/Filters/LzwFilter.cs @@ -29,7 +29,7 @@ namespace UglyToad.PdfPig.Filters public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); @@ -39,7 +39,7 @@ namespace UglyToad.PdfPig.Filters if (predictor > 1) { - var decompressed = Decode(input.Span, earlyChange == 1); + var decompressed = Decode(input, earlyChange == 1); var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32); var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent); @@ -50,7 +50,7 @@ namespace UglyToad.PdfPig.Filters return result; } - var data = Decode(input.Span, earlyChange == 1); + var data = Decode(input, earlyChange == 1); return data; } diff --git a/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs b/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs index 4e5a19b0..d2cede37 100644 --- a/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs +++ b/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs @@ -1,16 +1,15 @@ namespace UglyToad.PdfPig.Filters { using System; - using System.Collections.Generic; - using System.IO; using Tokens; + using UglyToad.PdfPig.Core; /// /// /// The Run Length filterencodes data in a simple byte-oriented format based on run length. /// The encoded data is a sequence of runs, where each run consists of a length byte followed by 1 to 128 bytes of data. /// - internal class RunLengthFilter : IFilter + internal sealed class RunLengthFilter : IFilter { private const byte EndOfDataLength = 128; @@ -18,60 +17,55 @@ public bool IsSupported { get; } = true; /// - public byte[] Decode(ReadOnlyMemory input, DictionaryToken streamDictionary, int filterIndex) + public byte[] Decode(ReadOnlySpan input, DictionaryToken streamDictionary, int filterIndex) { - var inputSpan = input.Span; - using (var memoryStream = new MemoryStream()) - using (var writer = new BinaryWriter(memoryStream)) + using var output = new ArrayPoolBufferWriter(input.Length); + + var i = 0; + while (i < input.Length) { - var i = 0; - while (i < inputSpan.Length) + var runLength = input[i]; + + if (runLength == EndOfDataLength) { - var runLength = inputSpan[i]; - - if (runLength == EndOfDataLength) - { - break; - } - - // if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output. - if (runLength <= 127) - { - var rangeToWriteLiterally = runLength + 1; - - while (rangeToWriteLiterally > 0) - { - i++; - - writer.Write(inputSpan[i]); - - rangeToWriteLiterally--; - } - - // Move to the following byte. - i++; - } - // Otherwise copy the single following byte 257 - length times (between 2 - 128 times) - else - { - var numberOfTimesToCopy = 257 - runLength; - - var byteToCopy = inputSpan[i + 1]; - - for (int j = 0; j < numberOfTimesToCopy; j++) - { - writer.Write(byteToCopy); - } - - // Move to the single byte after the byte to copy. - i += 2; - } + break; } - writer.Flush(); + // if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output. + if (runLength <= 127) + { + var rangeToWriteLiterally = runLength + 1; - return memoryStream.ToArray(); + while (rangeToWriteLiterally > 0) + { + i++; + + output.Write(input[i]); + + rangeToWriteLiterally--; + } + + // Move to the following byte. + i++; + } + // Otherwise copy the single following byte 257 - length times (between 2 - 128 times) + else + { + var numberOfTimesToCopy = 257 - runLength; + + var byteToCopy = input[i + 1]; + + for (int j = 0; j < numberOfTimesToCopy; j++) + { + output.Write(byteToCopy); + } + + // Move to the single byte after the byte to copy. + i += 2; + } } + + return output.WrittenSpan.ToArray(); } } -} +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index 60411fd6..303c3990 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -63,7 +63,7 @@ var transform = stream.Data; for (var i = 0; i < filters.Count; i++) { - transform = filters[i].Decode(transform, stream.StreamDictionary, i); + transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i); } return transform; @@ -79,7 +79,7 @@ var transform = stream.Data; for (var i = 0; i < filters.Count; i++) { - transform = filters[i].Decode(transform, stream.StreamDictionary, i); + transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i); } return transform;