diff --git a/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs b/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs new file mode 100644 index 00000000..a0b152ba --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs @@ -0,0 +1,84 @@ +namespace UglyToad.PdfPig.Tests.Filters +{ + using PdfPig.Filters; + using Xunit; + + public class BitStreamTests + { + private readonly byte[] data = { + 0b00101001, + 0b11011100, + 0b01000110, + 0b11111011, + 0b00101010, + 0b11010111, + 0b10010001, + 0b11011011, + 0b11110000, + 0b00010111, + 0b10101011 + }; + + [Fact] + public void GetNumbers() + { + var bitStream = new BitStream(data); + + var first = bitStream.Get(9); + var second = bitStream.Get(9); + var third = bitStream.Get(11); + var fourth = bitStream.Get(5); + var fifth = bitStream.Get(17); + + Assert.Equal(0b001010011, first); + Assert.Equal(0b101110001, second); + Assert.Equal(0b00011011111, third); + Assert.Equal(0b01100, fourth); + Assert.Equal(0b10101011010111100, fifth); + } + + [Fact] + public void GetNumbersCrossingBoundaries() + { + var bitStream = new BitStream(data); + + var first = bitStream.Get(13); + var second = bitStream.Get(15); + var third = bitStream.Get(13); + + Assert.Equal(0b0010100111011, first); + Assert.Equal(0b100010001101111, second); + Assert.Equal(0b1011001010101, third); + } + + [Fact] + public void GetNumbersUntilOffsetResets() + { + var bitStream = new BitStream(data); + + var first = bitStream.Get(9); + var second = bitStream.Get(9); + var third = bitStream.Get(9); + var fourth = bitStream.Get(9); + var fifth = bitStream.Get(9); + var sixth = bitStream.Get(9); + var seventh = bitStream.Get(9); + var eighth = bitStream.Get(9); + var ninth = bitStream.Get(9); + + var end = bitStream.Get(7); + + Assert.Equal(0b001010011, first); + Assert.Equal(0b101110001, second); + Assert.Equal(0b000110111, third); + Assert.Equal(0b110110010, fourth); + Assert.Equal(0b101011010, fifth); + Assert.Equal(0b111100100, sixth); + Assert.Equal(0b011101101, seventh); + Assert.Equal(0b111110000, eighth); + Assert.Equal(0b000101111, ninth); + + Assert.Equal(0b0101011, end); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/BitStream.cs b/src/UglyToad.PdfPig/Filters/BitStream.cs new file mode 100644 index 00000000..a5499a21 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/BitStream.cs @@ -0,0 +1,65 @@ +namespace UglyToad.PdfPig.Filters +{ + using System; + using System.Collections.Generic; + + internal class BitStream + { + private readonly IReadOnlyList data; + + private int currentWithinByteBitOffset; + private int currentByteIndex; + + public BitStream(IReadOnlyList data) + { + this.data = data ?? throw new ArgumentNullException(nameof(data)); + } + + public int Get(int numberOfBits) + { + var endWithinByteBitOffset = (numberOfBits + currentWithinByteBitOffset) % 8; + + var numberOfBytesToRead = (numberOfBits + currentWithinByteBitOffset) / 8; + + if (endWithinByteBitOffset != 0) + { + numberOfBytesToRead++; + } + + var result = 0; + for (var i = 0; i < numberOfBytesToRead; i++) + { + if (i > 0) + { + currentByteIndex++; + } + + if (currentByteIndex >= data.Count) + { + throw new InvalidOperationException($"Reached the end of the bit stream while trying to read {i} bits."); + } + + result <<= 8; + result |= data[currentByteIndex]; + } + + // Trim trailing bits. + if (endWithinByteBitOffset > 0) + { + result >>= 8 - endWithinByteBitOffset; + } + else + { + currentByteIndex++; + } + + // 'And' out the leading bits. + var firstBitOfDataWithinInt = (sizeof(int) * 8) - numberOfBits; + result &= (int)(0xffffffff >> firstBitOfDataWithinInt); + + currentWithinByteBitOffset = endWithinByteBitOffset; + + return result; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/LzwFilter.cs b/src/UglyToad.PdfPig/Filters/LzwFilter.cs new file mode 100644 index 00000000..85f78ef8 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/LzwFilter.cs @@ -0,0 +1,157 @@ +namespace UglyToad.PdfPig.Filters +{ + using System; + using System.Collections.Generic; + using Tokens; + using Util; + + internal class LzwFilter : IFilter + { + private const int DefaultColors = 1; + private const int DefaultBitsPerComponent = 8; + private const int DefaultColumns = 1; + + private const int ClearTable = 256; + private const int EodMarker = 257; + + private const int NineBitBoundary = 511; + private const int TenBitBoundary = 1023; + private const int ElevenBitBoundary = 2047; + + private readonly IDecodeParameterResolver decodeParameterResolver; + private readonly IPngPredictor pngPredictor; + + public LzwFilter(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor) + { + this.decodeParameterResolver = decodeParameterResolver ?? throw new ArgumentNullException(nameof(decodeParameterResolver)); + this.pngPredictor = pngPredictor ?? throw new ArgumentNullException(nameof(pngPredictor)); + } + + public byte[] Decode(IReadOnlyList input, DictionaryToken streamDictionary, int filterIndex) + { + var parameters = decodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); + + var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1); + + var earlyChange = parameters.GetIntOrDefault(NameToken.EarlyChange, 1); + + if (predictor > 1) + { + var decompressed = Decode(input, earlyChange == 1); + + var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32); + var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent); + var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns); + + var result = pngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns); + + return result; + } + + var data = Decode(input, earlyChange == 1); + + return data; + } + + private static byte[] Decode(IReadOnlyList input, bool isEarlyChange) + { + var result = new List(); + + var table = GetDefaultTable(); + + var codeBits = 9; + + var data = new BitStream(input); + + var codeOffset = isEarlyChange ? 0 : 1; + + var previous = -1; + + while (true) + { + var next = data.Get(codeBits); + + if (next == EodMarker) + { + break; + } + + if (next == ClearTable) + { + table = GetDefaultTable(); + previous = -1; + codeBits = 9; + continue; + } + + if (table.TryGetValue(next, out var b)) + { + result.AddRange(b); + + if (previous >= 0) + { + var lastSequence = table[previous]; + + var newSequence = new byte[lastSequence.Length + 1]; + + Array.Copy(lastSequence, newSequence, lastSequence.Length); + + newSequence[lastSequence.Length] = b[0]; + + table[table.Count] = newSequence; + } + } + else + { + var lastSequence = table[previous]; + + var newSequence = new byte[lastSequence.Length + 1]; + + Array.Copy(lastSequence, newSequence, lastSequence.Length); + + newSequence[lastSequence.Length] = lastSequence[0]; + + result.AddRange(newSequence); + + table[table.Count] = newSequence; + } + + previous = next; + + if (table.Count >= ElevenBitBoundary + codeOffset) + { + codeBits = 12; + } + else if (table.Count >= TenBitBoundary + codeOffset) + { + codeBits = 11; + } + else if (table.Count >= NineBitBoundary + codeOffset) + { + codeBits = 10; + } + else + { + codeBits = 9; + } + } + + return result.ToArray(); + } + + private static Dictionary GetDefaultTable() + { + var table = new Dictionary(); + + for (var i = 0; i < 256; i++) + { + table[i] = new[] { (byte)i }; + } + + table[ClearTable] = null; + table[EodMarker] = null; + + return table; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs b/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs index 92a79ec9..cf344b22 100644 --- a/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs @@ -17,6 +17,7 @@ IFilter AsciiHexFunc() => new AsciiHexDecodeFilter(); IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log); IFilter RunLengthFunc() => new RunLengthFilter(); + IFilter LzwFunc() => new LzwFilter(decodeParameterResolver, pngPredictor); filterFactories = new Dictionary> { @@ -27,7 +28,9 @@ {NameToken.FlateDecode.Data, FlateFunc}, {NameToken.FlateDecodeAbbreviation.Data, FlateFunc}, {NameToken.RunLengthDecode.Data, RunLengthFunc}, - {NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc} + {NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc}, + {NameToken.LzwDecode, LzwFunc}, + {NameToken.LzwDecodeAbbreviation, LzwFunc} }; }