add lzw filter support

This commit is contained in:
Eliot Jones
2019-05-18 12:25:47 +01:00
parent 9a8becde3e
commit f3bc3a37b9
4 changed files with 310 additions and 1 deletions

View File

@@ -0,0 +1,84 @@
namespace UglyToad.PdfPig.Tests.Filters
{
using PdfPig.Filters;
using Xunit;
public class BitStreamTests
{
private readonly byte[] data = {
0b00101001,
0b11011100,
0b01000110,
0b11111011,
0b00101010,
0b11010111,
0b10010001,
0b11011011,
0b11110000,
0b00010111,
0b10101011
};
[Fact]
public void GetNumbers()
{
var bitStream = new BitStream(data);
var first = bitStream.Get(9);
var second = bitStream.Get(9);
var third = bitStream.Get(11);
var fourth = bitStream.Get(5);
var fifth = bitStream.Get(17);
Assert.Equal(0b001010011, first);
Assert.Equal(0b101110001, second);
Assert.Equal(0b00011011111, third);
Assert.Equal(0b01100, fourth);
Assert.Equal(0b10101011010111100, fifth);
}
[Fact]
public void GetNumbersCrossingBoundaries()
{
var bitStream = new BitStream(data);
var first = bitStream.Get(13);
var second = bitStream.Get(15);
var third = bitStream.Get(13);
Assert.Equal(0b0010100111011, first);
Assert.Equal(0b100010001101111, second);
Assert.Equal(0b1011001010101, third);
}
[Fact]
public void GetNumbersUntilOffsetResets()
{
var bitStream = new BitStream(data);
var first = bitStream.Get(9);
var second = bitStream.Get(9);
var third = bitStream.Get(9);
var fourth = bitStream.Get(9);
var fifth = bitStream.Get(9);
var sixth = bitStream.Get(9);
var seventh = bitStream.Get(9);
var eighth = bitStream.Get(9);
var ninth = bitStream.Get(9);
var end = bitStream.Get(7);
Assert.Equal(0b001010011, first);
Assert.Equal(0b101110001, second);
Assert.Equal(0b000110111, third);
Assert.Equal(0b110110010, fourth);
Assert.Equal(0b101011010, fifth);
Assert.Equal(0b111100100, sixth);
Assert.Equal(0b011101101, seventh);
Assert.Equal(0b111110000, eighth);
Assert.Equal(0b000101111, ninth);
Assert.Equal(0b0101011, end);
}
}
}

View File

@@ -0,0 +1,65 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
internal class BitStream
{
private readonly IReadOnlyList<byte> data;
private int currentWithinByteBitOffset;
private int currentByteIndex;
public BitStream(IReadOnlyList<byte> data)
{
this.data = data ?? throw new ArgumentNullException(nameof(data));
}
public int Get(int numberOfBits)
{
var endWithinByteBitOffset = (numberOfBits + currentWithinByteBitOffset) % 8;
var numberOfBytesToRead = (numberOfBits + currentWithinByteBitOffset) / 8;
if (endWithinByteBitOffset != 0)
{
numberOfBytesToRead++;
}
var result = 0;
for (var i = 0; i < numberOfBytesToRead; i++)
{
if (i > 0)
{
currentByteIndex++;
}
if (currentByteIndex >= data.Count)
{
throw new InvalidOperationException($"Reached the end of the bit stream while trying to read {i} bits.");
}
result <<= 8;
result |= data[currentByteIndex];
}
// Trim trailing bits.
if (endWithinByteBitOffset > 0)
{
result >>= 8 - endWithinByteBitOffset;
}
else
{
currentByteIndex++;
}
// 'And' out the leading bits.
var firstBitOfDataWithinInt = (sizeof(int) * 8) - numberOfBits;
result &= (int)(0xffffffff >> firstBitOfDataWithinInt);
currentWithinByteBitOffset = endWithinByteBitOffset;
return result;
}
}
}

View File

@@ -0,0 +1,157 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
using Util;
internal class LzwFilter : IFilter
{
private const int DefaultColors = 1;
private const int DefaultBitsPerComponent = 8;
private const int DefaultColumns = 1;
private const int ClearTable = 256;
private const int EodMarker = 257;
private const int NineBitBoundary = 511;
private const int TenBitBoundary = 1023;
private const int ElevenBitBoundary = 2047;
private readonly IDecodeParameterResolver decodeParameterResolver;
private readonly IPngPredictor pngPredictor;
public LzwFilter(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor)
{
this.decodeParameterResolver = decodeParameterResolver ?? throw new ArgumentNullException(nameof(decodeParameterResolver));
this.pngPredictor = pngPredictor ?? throw new ArgumentNullException(nameof(pngPredictor));
}
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
var parameters = decodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1);
var earlyChange = parameters.GetIntOrDefault(NameToken.EarlyChange, 1);
if (predictor > 1)
{
var decompressed = Decode(input, earlyChange == 1);
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
var result = pngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
return result;
}
var data = Decode(input, earlyChange == 1);
return data;
}
private static byte[] Decode(IReadOnlyList<byte> input, bool isEarlyChange)
{
var result = new List<byte>();
var table = GetDefaultTable();
var codeBits = 9;
var data = new BitStream(input);
var codeOffset = isEarlyChange ? 0 : 1;
var previous = -1;
while (true)
{
var next = data.Get(codeBits);
if (next == EodMarker)
{
break;
}
if (next == ClearTable)
{
table = GetDefaultTable();
previous = -1;
codeBits = 9;
continue;
}
if (table.TryGetValue(next, out var b))
{
result.AddRange(b);
if (previous >= 0)
{
var lastSequence = table[previous];
var newSequence = new byte[lastSequence.Length + 1];
Array.Copy(lastSequence, newSequence, lastSequence.Length);
newSequence[lastSequence.Length] = b[0];
table[table.Count] = newSequence;
}
}
else
{
var lastSequence = table[previous];
var newSequence = new byte[lastSequence.Length + 1];
Array.Copy(lastSequence, newSequence, lastSequence.Length);
newSequence[lastSequence.Length] = lastSequence[0];
result.AddRange(newSequence);
table[table.Count] = newSequence;
}
previous = next;
if (table.Count >= ElevenBitBoundary + codeOffset)
{
codeBits = 12;
}
else if (table.Count >= TenBitBoundary + codeOffset)
{
codeBits = 11;
}
else if (table.Count >= NineBitBoundary + codeOffset)
{
codeBits = 10;
}
else
{
codeBits = 9;
}
}
return result.ToArray();
}
private static Dictionary<int, byte[]> GetDefaultTable()
{
var table = new Dictionary<int, byte[]>();
for (var i = 0; i < 256; i++)
{
table[i] = new[] { (byte)i };
}
table[ClearTable] = null;
table[EodMarker] = null;
return table;
}
}
}

View File

@@ -17,6 +17,7 @@
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
IFilter RunLengthFunc() => new RunLengthFilter();
IFilter LzwFunc() => new LzwFilter(decodeParameterResolver, pngPredictor);
filterFactories = new Dictionary<string, Func<IFilter>>
{
@@ -27,7 +28,9 @@
{NameToken.FlateDecode.Data, FlateFunc},
{NameToken.FlateDecodeAbbreviation.Data, FlateFunc},
{NameToken.RunLengthDecode.Data, RunLengthFunc},
{NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc}
{NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc},
{NameToken.LzwDecode, LzwFunc},
{NameToken.LzwDecodeAbbreviation, LzwFunc}
};
}