Update Filters to operate over Spans

This commit is contained in:
Jason Nelson 2024-04-01 22:26:25 -07:00 committed by BobLd
parent 0efa68a8e4
commit d85ea4f95d
12 changed files with 71 additions and 85 deletions

View File

@ -29,18 +29,17 @@
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var inputSpan = input.Span;
var asciiBuffer = new byte[5]; var asciiBuffer = new byte[5];
var index = 0; var index = 0;
using var writer = new ArrayPoolBufferWriter<byte>(); using var writer = new ArrayPoolBufferWriter<byte>();
for (var i = 0; i < inputSpan.Length; i++) for (var i = 0; i < input.Length; i++)
{ {
var value = inputSpan[i]; var value = input[i];
if (IsWhiteSpace(value)) if (IsWhiteSpace(value))
{ {
@ -49,7 +48,7 @@
if (value == EndOfDataBytes[0]) if (value == EndOfDataBytes[0])
{ {
if (i == inputSpan.Length - 1 || inputSpan[i + 1] == EndOfDataBytes[1]) if (i == input.Length - 1 || input[i + 1] == EndOfDataBytes[1])
{ {
if (index > 0) if (index > 0)
{ {

View File

@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Filters namespace UglyToad.PdfPig.Filters
{ {
using System; using System;
using System.Collections.Generic;
using System.IO; using System.IO;
using Tokens; using Tokens;
@ -9,7 +8,7 @@
/// <summary> /// <summary>
/// Encodes/decodes data using the ASCII hexadecimal encoding where each byte is represented by two ASCII characters. /// Encodes/decodes data using the ASCII hexadecimal encoding where each byte is represented by two ASCII characters.
/// </summary> /// </summary>
internal class AsciiHexDecodeFilter : IFilter internal sealed class AsciiHexDecodeFilter : IFilter
{ {
private static readonly short[] ReverseHex = private static readonly short[] ReverseHex =
[ [
@ -30,28 +29,27 @@
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var inputSpan = input.Span; Span<byte> pair = stackalloc byte[2];
var pair = new byte[2];
var index = 0; var index = 0;
using (var memoryStream = new MemoryStream()) using (var memoryStream = new MemoryStream())
using (var binaryWriter = new BinaryWriter(memoryStream)) using (var binaryWriter = new BinaryWriter(memoryStream))
{ {
for (var i = 0; i < inputSpan.Length; i++) for (var i = 0; i < input.Length; i++)
{ {
if (inputSpan[i] == '>') if (input[i] == '>')
{ {
break; break;
} }
if (IsWhitespace(inputSpan[i]) || inputSpan[i] == '<') if (IsWhitespace(input[i]) || input[i] == '<')
{ {
continue; continue;
} }
pair[index] = inputSpan[i]; pair[index] = input[i];
index++; index++;
if (index == 2) if (index == 2)

View File

@ -1,9 +1,7 @@
namespace UglyToad.PdfPig.Filters namespace UglyToad.PdfPig.Filters
{ {
using System; using System;
using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq;
using Tokens; using Tokens;
using Util; using Util;
@ -12,13 +10,13 @@
/// ///
/// Ported from https://github.com/apache/pdfbox/blob/714156a15ea6fcfe44ac09345b01e192cbd74450/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java /// Ported from https://github.com/apache/pdfbox/blob/714156a15ea6fcfe44ac09345b01e192cbd74450/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
/// </summary> /// </summary>
internal class CcittFaxDecodeFilter : IFilter internal sealed class CcittFaxDecodeFilter : IFilter
{ {
/// <inheritdoc /> /// <inheritdoc />
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
@ -38,7 +36,7 @@
var k = decodeParms.GetIntOrDefault(NameToken.K, 0); var k = decodeParms.GetIntOrDefault(NameToken.K, 0);
var encodedByteAlign = decodeParms.GetBooleanOrDefault(NameToken.EncodedByteAlign, false); var encodedByteAlign = decodeParms.GetBooleanOrDefault(NameToken.EncodedByteAlign, false);
var compressionType = DetermineCompressionType(input.Span, k); var compressionType = DetermineCompressionType(input, k);
using (var stream = new CcittFaxDecoderStream(new MemoryStream(input.ToArray()), cols, compressionType, encodedByteAlign)) using (var stream = new CcittFaxDecoderStream(new MemoryStream(input.ToArray()), cols, compressionType, encodedByteAlign))
{ {
var arraySize = (cols + 7) / 8 * rows; var arraySize = (cols + 7) / 8 * rows;

View File

@ -329,7 +329,7 @@
if (index != columns) if (index != columns)
{ {
throw new InvalidOperationException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); throw new InvalidOperationException($"Sum of run-lengths does not equal scan line width: {index} > {columns}");
} }
decodedLength = (index + 7) / 8; decodedLength = (index + 7) / 8;

View File

@ -9,7 +9,7 @@
public bool IsSupported { get; } = false; public bool IsSupported { get; } = false;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " + throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " +
"This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries."); "This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries.");

View File

@ -2,10 +2,8 @@
{ {
using Fonts; using Fonts;
using System; using System;
using System.Collections.Generic;
using System.IO; using System.IO;
using System.IO.Compression; using System.IO.Compression;
using System.Linq;
using Tokens; using Tokens;
using Util; using Util;
@ -20,7 +18,7 @@
/// See section 3.3.3 of the spec (version 1.7) for details on the FlateDecode filter. /// See section 3.3.3 of the spec (version 1.7) for details on the FlateDecode filter.
/// The flate decode filter may have a predictor function to further compress the stream. /// The flate decode filter may have a predictor function to further compress the stream.
/// </remarks> /// </remarks>
internal class FlateFilter : IFilter internal sealed class FlateFilter : IFilter
{ {
// Defaults are from table 3.7 in the spec (version 1.7) // Defaults are from table 3.7 in the spec (version 1.7)
private const int DefaultColors = 1; private const int DefaultColors = 1;
@ -34,7 +32,7 @@
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);

View File

@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Filters namespace UglyToad.PdfPig.Filters
{ {
using System; using System;
using System.Collections.Generic;
using Tokens; using Tokens;
/// <summary> /// <summary>
@ -21,6 +20,6 @@
/// <param name="streamDictionary">The dictionary of the <see cref="StreamToken"/> (or other dictionary types, e.g. inline images) containing these bytes.</param> /// <param name="streamDictionary">The dictionary of the <see cref="StreamToken"/> (or other dictionary types, e.g. inline images) containing these bytes.</param>
/// <param name="filterIndex">The position of this filter in the pipeline used to encode data.</param> /// <param name="filterIndex">The position of this filter in the pipeline used to encode data.</param>
/// <returns>The decoded bytes.</returns> /// <returns>The decoded bytes.</returns>
byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex); byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex);
} }
} }

View File

@ -9,7 +9,7 @@
public bool IsSupported { get; } = false; public bool IsSupported { get; } = false;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
throw new NotSupportedException("The JBIG2 Filter for monochrome image data is not currently supported. " + throw new NotSupportedException("The JBIG2 Filter for monochrome image data is not currently supported. " +
"Try accessing the raw compressed data directly."); "Try accessing the raw compressed data directly.");

View File

@ -9,7 +9,7 @@
public bool IsSupported { get; } = false; public bool IsSupported { get; } = false;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
throw new NotSupportedException("The JPX Filter (JPEG2000) for image data is not currently supported. " + throw new NotSupportedException("The JPX Filter (JPEG2000) for image data is not currently supported. " +
"Try accessing the raw compressed data directly."); "Try accessing the raw compressed data directly.");

View File

@ -29,7 +29,7 @@ namespace UglyToad.PdfPig.Filters
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); var parameters = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
@ -39,7 +39,7 @@ namespace UglyToad.PdfPig.Filters
if (predictor > 1) if (predictor > 1)
{ {
var decompressed = Decode(input.Span, earlyChange == 1); var decompressed = Decode(input, earlyChange == 1);
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32); var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent); var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
@ -50,7 +50,7 @@ namespace UglyToad.PdfPig.Filters
return result; return result;
} }
var data = Decode(input.Span, earlyChange == 1); var data = Decode(input, earlyChange == 1);
return data; return data;
} }

View File

@ -1,16 +1,15 @@
namespace UglyToad.PdfPig.Filters namespace UglyToad.PdfPig.Filters
{ {
using System; using System;
using System.Collections.Generic;
using System.IO;
using Tokens; using Tokens;
using UglyToad.PdfPig.Core;
/// <inheritdoc /> /// <inheritdoc />
/// <summary> /// <summary>
/// The Run Length filterencodes data in a simple byte-oriented format based on run length. /// The Run Length filterencodes data in a simple byte-oriented format based on run length.
/// The encoded data is a sequence of runs, where each run consists of a length byte followed by 1 to 128 bytes of data. /// The encoded data is a sequence of runs, where each run consists of a length byte followed by 1 to 128 bytes of data.
/// </summary> /// </summary>
internal class RunLengthFilter : IFilter internal sealed class RunLengthFilter : IFilter
{ {
private const byte EndOfDataLength = 128; private const byte EndOfDataLength = 128;
@ -18,60 +17,55 @@
public bool IsSupported { get; } = true; public bool IsSupported { get; } = true;
/// <inheritdoc /> /// <inheritdoc />
public byte[] Decode(ReadOnlyMemory<byte> input, DictionaryToken streamDictionary, int filterIndex) public byte[] Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
{ {
var inputSpan = input.Span; using var output = new ArrayPoolBufferWriter<byte>(input.Length);
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream)) var i = 0;
while (i < input.Length)
{ {
var i = 0; var runLength = input[i];
while (i < inputSpan.Length)
if (runLength == EndOfDataLength)
{ {
var runLength = inputSpan[i]; break;
if (runLength == EndOfDataLength)
{
break;
}
// if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output.
if (runLength <= 127)
{
var rangeToWriteLiterally = runLength + 1;
while (rangeToWriteLiterally > 0)
{
i++;
writer.Write(inputSpan[i]);
rangeToWriteLiterally--;
}
// Move to the following byte.
i++;
}
// Otherwise copy the single following byte 257 - length times (between 2 - 128 times)
else
{
var numberOfTimesToCopy = 257 - runLength;
var byteToCopy = inputSpan[i + 1];
for (int j = 0; j < numberOfTimesToCopy; j++)
{
writer.Write(byteToCopy);
}
// Move to the single byte after the byte to copy.
i += 2;
}
} }
writer.Flush(); // if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output.
if (runLength <= 127)
{
var rangeToWriteLiterally = runLength + 1;
return memoryStream.ToArray(); while (rangeToWriteLiterally > 0)
{
i++;
output.Write(input[i]);
rangeToWriteLiterally--;
}
// Move to the following byte.
i++;
}
// Otherwise copy the single following byte 257 - length times (between 2 - 128 times)
else
{
var numberOfTimesToCopy = 257 - runLength;
var byteToCopy = input[i + 1];
for (int j = 0; j < numberOfTimesToCopy; j++)
{
output.Write(byteToCopy);
}
// Move to the single byte after the byte to copy.
i += 2;
}
} }
return output.WrittenSpan.ToArray();
} }
} }
} }

View File

@ -63,7 +63,7 @@
var transform = stream.Data; var transform = stream.Data;
for (var i = 0; i < filters.Count; i++) for (var i = 0; i < filters.Count; i++)
{ {
transform = filters[i].Decode(transform, stream.StreamDictionary, i); transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i);
} }
return transform; return transform;
@ -79,7 +79,7 @@
var transform = stream.Data; var transform = stream.Data;
for (var i = 0; i < filters.Count; i++) for (var i = 0; i < filters.Count; i++)
{ {
transform = filters[i].Decode(transform, stream.StreamDictionary, i); transform = filters[i].Decode(transform.Span, stream.StreamDictionary, i);
} }
return transform; return transform;