mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 19:05:01 +08:00
add lzw filter support
This commit is contained in:
84
src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs
Normal file
84
src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs
Normal file
@@ -0,0 +1,84 @@
|
||||
namespace UglyToad.PdfPig.Tests.Filters
|
||||
{
|
||||
using PdfPig.Filters;
|
||||
using Xunit;
|
||||
|
||||
public class BitStreamTests
|
||||
{
|
||||
private readonly byte[] data = {
|
||||
0b00101001,
|
||||
0b11011100,
|
||||
0b01000110,
|
||||
0b11111011,
|
||||
0b00101010,
|
||||
0b11010111,
|
||||
0b10010001,
|
||||
0b11011011,
|
||||
0b11110000,
|
||||
0b00010111,
|
||||
0b10101011
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void GetNumbers()
|
||||
{
|
||||
var bitStream = new BitStream(data);
|
||||
|
||||
var first = bitStream.Get(9);
|
||||
var second = bitStream.Get(9);
|
||||
var third = bitStream.Get(11);
|
||||
var fourth = bitStream.Get(5);
|
||||
var fifth = bitStream.Get(17);
|
||||
|
||||
Assert.Equal(0b001010011, first);
|
||||
Assert.Equal(0b101110001, second);
|
||||
Assert.Equal(0b00011011111, third);
|
||||
Assert.Equal(0b01100, fourth);
|
||||
Assert.Equal(0b10101011010111100, fifth);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetNumbersCrossingBoundaries()
|
||||
{
|
||||
var bitStream = new BitStream(data);
|
||||
|
||||
var first = bitStream.Get(13);
|
||||
var second = bitStream.Get(15);
|
||||
var third = bitStream.Get(13);
|
||||
|
||||
Assert.Equal(0b0010100111011, first);
|
||||
Assert.Equal(0b100010001101111, second);
|
||||
Assert.Equal(0b1011001010101, third);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetNumbersUntilOffsetResets()
|
||||
{
|
||||
var bitStream = new BitStream(data);
|
||||
|
||||
var first = bitStream.Get(9);
|
||||
var second = bitStream.Get(9);
|
||||
var third = bitStream.Get(9);
|
||||
var fourth = bitStream.Get(9);
|
||||
var fifth = bitStream.Get(9);
|
||||
var sixth = bitStream.Get(9);
|
||||
var seventh = bitStream.Get(9);
|
||||
var eighth = bitStream.Get(9);
|
||||
var ninth = bitStream.Get(9);
|
||||
|
||||
var end = bitStream.Get(7);
|
||||
|
||||
Assert.Equal(0b001010011, first);
|
||||
Assert.Equal(0b101110001, second);
|
||||
Assert.Equal(0b000110111, third);
|
||||
Assert.Equal(0b110110010, fourth);
|
||||
Assert.Equal(0b101011010, fifth);
|
||||
Assert.Equal(0b111100100, sixth);
|
||||
Assert.Equal(0b011101101, seventh);
|
||||
Assert.Equal(0b111110000, eighth);
|
||||
Assert.Equal(0b000101111, ninth);
|
||||
|
||||
Assert.Equal(0b0101011, end);
|
||||
}
|
||||
}
|
||||
}
|
65
src/UglyToad.PdfPig/Filters/BitStream.cs
Normal file
65
src/UglyToad.PdfPig/Filters/BitStream.cs
Normal file
@@ -0,0 +1,65 @@
|
||||
namespace UglyToad.PdfPig.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
internal class BitStream
|
||||
{
|
||||
private readonly IReadOnlyList<byte> data;
|
||||
|
||||
private int currentWithinByteBitOffset;
|
||||
private int currentByteIndex;
|
||||
|
||||
public BitStream(IReadOnlyList<byte> data)
|
||||
{
|
||||
this.data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
public int Get(int numberOfBits)
|
||||
{
|
||||
var endWithinByteBitOffset = (numberOfBits + currentWithinByteBitOffset) % 8;
|
||||
|
||||
var numberOfBytesToRead = (numberOfBits + currentWithinByteBitOffset) / 8;
|
||||
|
||||
if (endWithinByteBitOffset != 0)
|
||||
{
|
||||
numberOfBytesToRead++;
|
||||
}
|
||||
|
||||
var result = 0;
|
||||
for (var i = 0; i < numberOfBytesToRead; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
currentByteIndex++;
|
||||
}
|
||||
|
||||
if (currentByteIndex >= data.Count)
|
||||
{
|
||||
throw new InvalidOperationException($"Reached the end of the bit stream while trying to read {i} bits.");
|
||||
}
|
||||
|
||||
result <<= 8;
|
||||
result |= data[currentByteIndex];
|
||||
}
|
||||
|
||||
// Trim trailing bits.
|
||||
if (endWithinByteBitOffset > 0)
|
||||
{
|
||||
result >>= 8 - endWithinByteBitOffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentByteIndex++;
|
||||
}
|
||||
|
||||
// 'And' out the leading bits.
|
||||
var firstBitOfDataWithinInt = (sizeof(int) * 8) - numberOfBits;
|
||||
result &= (int)(0xffffffff >> firstBitOfDataWithinInt);
|
||||
|
||||
currentWithinByteBitOffset = endWithinByteBitOffset;
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
157
src/UglyToad.PdfPig/Filters/LzwFilter.cs
Normal file
157
src/UglyToad.PdfPig/Filters/LzwFilter.cs
Normal file
@@ -0,0 +1,157 @@
|
||||
namespace UglyToad.PdfPig.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class LzwFilter : IFilter
|
||||
{
|
||||
private const int DefaultColors = 1;
|
||||
private const int DefaultBitsPerComponent = 8;
|
||||
private const int DefaultColumns = 1;
|
||||
|
||||
private const int ClearTable = 256;
|
||||
private const int EodMarker = 257;
|
||||
|
||||
private const int NineBitBoundary = 511;
|
||||
private const int TenBitBoundary = 1023;
|
||||
private const int ElevenBitBoundary = 2047;
|
||||
|
||||
private readonly IDecodeParameterResolver decodeParameterResolver;
|
||||
private readonly IPngPredictor pngPredictor;
|
||||
|
||||
public LzwFilter(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor)
|
||||
{
|
||||
this.decodeParameterResolver = decodeParameterResolver ?? throw new ArgumentNullException(nameof(decodeParameterResolver));
|
||||
this.pngPredictor = pngPredictor ?? throw new ArgumentNullException(nameof(pngPredictor));
|
||||
}
|
||||
|
||||
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
|
||||
{
|
||||
var parameters = decodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
|
||||
|
||||
var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1);
|
||||
|
||||
var earlyChange = parameters.GetIntOrDefault(NameToken.EarlyChange, 1);
|
||||
|
||||
if (predictor > 1)
|
||||
{
|
||||
var decompressed = Decode(input, earlyChange == 1);
|
||||
|
||||
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
|
||||
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
|
||||
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
|
||||
|
||||
var result = pngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
var data = Decode(input, earlyChange == 1);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
private static byte[] Decode(IReadOnlyList<byte> input, bool isEarlyChange)
|
||||
{
|
||||
var result = new List<byte>();
|
||||
|
||||
var table = GetDefaultTable();
|
||||
|
||||
var codeBits = 9;
|
||||
|
||||
var data = new BitStream(input);
|
||||
|
||||
var codeOffset = isEarlyChange ? 0 : 1;
|
||||
|
||||
var previous = -1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
var next = data.Get(codeBits);
|
||||
|
||||
if (next == EodMarker)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (next == ClearTable)
|
||||
{
|
||||
table = GetDefaultTable();
|
||||
previous = -1;
|
||||
codeBits = 9;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (table.TryGetValue(next, out var b))
|
||||
{
|
||||
result.AddRange(b);
|
||||
|
||||
if (previous >= 0)
|
||||
{
|
||||
var lastSequence = table[previous];
|
||||
|
||||
var newSequence = new byte[lastSequence.Length + 1];
|
||||
|
||||
Array.Copy(lastSequence, newSequence, lastSequence.Length);
|
||||
|
||||
newSequence[lastSequence.Length] = b[0];
|
||||
|
||||
table[table.Count] = newSequence;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var lastSequence = table[previous];
|
||||
|
||||
var newSequence = new byte[lastSequence.Length + 1];
|
||||
|
||||
Array.Copy(lastSequence, newSequence, lastSequence.Length);
|
||||
|
||||
newSequence[lastSequence.Length] = lastSequence[0];
|
||||
|
||||
result.AddRange(newSequence);
|
||||
|
||||
table[table.Count] = newSequence;
|
||||
}
|
||||
|
||||
previous = next;
|
||||
|
||||
if (table.Count >= ElevenBitBoundary + codeOffset)
|
||||
{
|
||||
codeBits = 12;
|
||||
}
|
||||
else if (table.Count >= TenBitBoundary + codeOffset)
|
||||
{
|
||||
codeBits = 11;
|
||||
}
|
||||
else if (table.Count >= NineBitBoundary + codeOffset)
|
||||
{
|
||||
codeBits = 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
codeBits = 9;
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToArray();
|
||||
}
|
||||
|
||||
private static Dictionary<int, byte[]> GetDefaultTable()
|
||||
{
|
||||
var table = new Dictionary<int, byte[]>();
|
||||
|
||||
for (var i = 0; i < 256; i++)
|
||||
{
|
||||
table[i] = new[] { (byte)i };
|
||||
}
|
||||
|
||||
table[ClearTable] = null;
|
||||
table[EodMarker] = null;
|
||||
|
||||
return table;
|
||||
}
|
||||
}
|
||||
}
|
@@ -17,6 +17,7 @@
|
||||
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
|
||||
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
|
||||
IFilter RunLengthFunc() => new RunLengthFilter();
|
||||
IFilter LzwFunc() => new LzwFilter(decodeParameterResolver, pngPredictor);
|
||||
|
||||
filterFactories = new Dictionary<string, Func<IFilter>>
|
||||
{
|
||||
@@ -27,7 +28,9 @@
|
||||
{NameToken.FlateDecode.Data, FlateFunc},
|
||||
{NameToken.FlateDecodeAbbreviation.Data, FlateFunc},
|
||||
{NameToken.RunLengthDecode.Data, RunLengthFunc},
|
||||
{NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc}
|
||||
{NameToken.RunLengthDecodeAbbreviation.Data, RunLengthFunc},
|
||||
{NameToken.LzwDecode, LzwFunc},
|
||||
{NameToken.LzwDecodeAbbreviation, LzwFunc}
|
||||
};
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user