From 9c2cd89de4f82ea1188d0eee77dbf6993b614bf6 Mon Sep 17 00:00:00 2001 From: Kasper Frank Date: Thu, 6 May 2021 18:03:04 +0200 Subject: [PATCH] Add support for the CCITTFaxDecode filter --- .../Filters/CcittFaxCompressionType.cs | 21 + .../Filters/CcittFaxDecodeFilter.cs | 126 ++- .../Filters/CcittFaxDecoderStream.cs | 788 ++++++++++++++++++ .../Filters/DecodeParameterResolver.cs | 19 +- .../Filters/DefaultFilterProvider.cs | 3 +- .../Filters/FilterProviderWithLookup.cs | 3 +- .../Graphics/Colors/ColorSpaceDetails.cs | 8 + .../Graphics/InlineImageBuilder.cs | 21 +- src/UglyToad.PdfPig/IO/StreamWrapper.cs | 74 ++ src/UglyToad.PdfPig/PdfExtensions.cs | 45 +- src/UglyToad.PdfPig/Util/ArrayHelper.cs | 30 + .../Util/ColorSpaceDetailsParser.cs | 8 +- 12 files changed, 1093 insertions(+), 53 deletions(-) create mode 100644 src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs create mode 100644 src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs create mode 100644 src/UglyToad.PdfPig/IO/StreamWrapper.cs create mode 100644 src/UglyToad.PdfPig/Util/ArrayHelper.cs diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs b/src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs new file mode 100644 index 00000000..da388445 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs @@ -0,0 +1,21 @@ +namespace UglyToad.PdfPig.Filters +{ + /// + /// Specifies the compression type to use with . + /// + internal enum CcittFaxCompressionType + { + /// + /// Modified Huffman - Group 3 (T4) + /// + ModifiedHuffman, + /// + /// Modified Read - Group 3 (optional T4) + /// + T4, + /// + /// Modified Modified Read - Group 4 (T6) + /// + T6 + } +} diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs index 4c061441..bf677d9f 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs @@ -1,19 +1,121 @@ namespace UglyToad.PdfPig.Filters { using System; - using System.Collections.Generic; - using Tokens; - - internal class CcittFaxDecodeFilter : IFilter - { + using System.Collections.Generic; + using System.IO; + using System.Linq; + using Tokens; + + /// + /// Decodes image data that has been encoded using either Group 3 or Group 4. + /// + /// Ported from https://github.com/apache/pdfbox/blob/714156a15ea6fcfe44ac09345b01e192cbd74450/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java + /// + internal class CcittFaxDecodeFilter : IFilter + { /// - public bool IsSupported { get; } = false; - + public bool IsSupported { get; } = true; + /// public byte[] Decode(IReadOnlyList input, DictionaryToken streamDictionary, int filterIndex) - { - throw new NotSupportedException("The CCITT Fax Filter for image data is not currently supported. " + - "Try accessing the raw compressed data directly."); - } + { + var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); + + var cols = decodeParms.GetInt(NameToken.Columns, 1728); + var rows = decodeParms.GetInt(NameToken.Rows, 0); + var height = streamDictionary.GetInt(NameToken.Height, NameToken.H, 0); + if (rows > 0 && height > 0) + { + // PDFBOX-771, PDFBOX-3727: rows in DecodeParms sometimes contains an incorrect value + rows = height; + } + else + { + // at least one of the values has to have a valid value + rows = Math.Max(rows, height); + } + + var k = decodeParms.GetInt(NameToken.K, 0); + var encodedByteAlign = decodeParms.GetBoolean(NameToken.EncodedByteAlign, false); + var compressionType = DetermineCompressionType(input, k); + using (var stream = new CcittFaxDecoderStream(new MemoryStream(input.ToArray()), cols, compressionType, encodedByteAlign)) + { + var arraySize = (cols + 7) / 8 * rows; + var decompressed = new byte[arraySize]; + ReadFromDecoderStream(stream, decompressed); + + // we expect black to be 1, if not invert the bitmap + var blackIsOne = decodeParms.GetBoolean(NameToken.BlackIs1, false); + if (!blackIsOne) + { + InvertBitmap(decompressed); + } + + return decompressed; + } + } + + private static CcittFaxCompressionType DetermineCompressionType(IReadOnlyList input, int k) + { + if (k == 0) + { + var compressionType = CcittFaxCompressionType.T4; // Group 3 1D + + if (input.Count < 20) + { + throw new InvalidOperationException("The format is invalid"); + } + + if (input[0] != 0 || (input[1] >> 4 != 1 && input[1] != 1)) + { + // leading EOL (0b000000000001) not found, search further and + // try RLE if not found + compressionType = CcittFaxCompressionType.ModifiedHuffman; + var b = (short)(((input[0] << 8) + (input[1] & 0xff)) >> 4); + for (var i = 12; i < 160; i++) + { + b = (short)((b << 1) + ((input[(i / 8)] >> (7 - (i % 8))) & 0x01)); + if ((b & 0xFFF) == 1) + { + return CcittFaxCompressionType.T4; + } + } + } + + return compressionType; + } + else if (k > 0) + { + // Group 3 2D + return CcittFaxCompressionType.T4; + } + else + { + return CcittFaxCompressionType.T6; + } + } + + private static void ReadFromDecoderStream(CcittFaxDecoderStream decoderStream, byte[] result) + { + int pos = 0; + int read; + while ((read = decoderStream.Read(result, pos, result.Length - pos)) > -1) + { + pos += read; + if (pos >= result.Length) + { + break; + } + } + decoderStream.Close(); + } + + private static void InvertBitmap(byte[] bufferData) + { + for (int i = 0, c = bufferData.Length; i < c; i++) + { + bufferData[i] = (byte)(~bufferData[i] & 0xFF); + } + } } -} \ No newline at end of file +} diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs b/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs new file mode 100644 index 00000000..85272670 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs @@ -0,0 +1,788 @@ +namespace UglyToad.PdfPig.Filters +{ + using System; + using System.IO; + using UglyToad.PdfPig.IO; + using UglyToad.PdfPig.Util; + + /// + /// CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. + /// + /// Ported from https://github.com/apache/pdfbox/blob/e644c29279e276bde14ce7a33bdeef0cb1001b3e/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java + /// + internal class CcittFaxDecoderStream : StreamWrapper + { + // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. + + private readonly int columns; + private readonly byte[] decodedRow; + + private readonly bool optionG32D; + private readonly bool optionByteAligned; + + private readonly CcittFaxCompressionType type; + + private int decodedLength; + private int decodedPos; + + private int[] changesReferenceRow; + private int[] changesCurrentRow; + private int changesReferenceRowCount; + private int changesCurrentRowCount; + + private int lastChangingElement = 0; + + private int buffer = -1; + private int bufferPos = -1; + + /// + /// Creates a CCITTFaxDecoderStream. + /// This constructor may be used for CCITT streams embedded in PDF files, + /// which use EncodedByteAlign. + /// + public CcittFaxDecoderStream(Stream stream, int columns, CcittFaxCompressionType type, bool byteAligned) + : base(stream) + { + this.columns = columns; + this.type = type; + + // We know this is only used for b/w (1 bit) + decodedRow = new byte[(columns + 7) / 8]; + changesReferenceRow = new int[columns + 2]; + changesCurrentRow = new int[columns + 2]; + + optionByteAligned = byteAligned; + switch (type) + { + case CcittFaxCompressionType.ModifiedHuffman: + optionG32D = false; + break; + case CcittFaxCompressionType.T4: + optionG32D = true; + break; + case CcittFaxCompressionType.T6: + optionG32D = false; + break; + default: + throw new ArgumentOutOfRangeException(nameof(type), type, "Illegal parameter"); + } + } + + private void Fetch() + { + if (decodedPos >= decodedLength) + { + decodedLength = 0; + + try + { + DecodeRow(); + } + catch (IOException) + { + if (decodedLength != 0) + { + throw; + } + + // ..otherwise, just let client code try to read past the + // end of stream + decodedLength = -1; + } + + decodedPos = 0; + } + } + + private void Decode1D() + { + var index = 0; + var white = true; + changesCurrentRowCount = 0; + + do + { + var completeRun = white ? DecodeRun(WhiteRunTree) : DecodeRun(BlackRunTree); + index += completeRun; + changesCurrentRow[changesCurrentRowCount++] = index; + + // Flip color for next run + white = !white; + } while (index < columns); + } + + private void Decode2D() + { + changesReferenceRowCount = changesCurrentRowCount; + var tmp = changesCurrentRow; + changesCurrentRow = changesReferenceRow; + changesReferenceRow = tmp; + + var white = true; + var index = 0; + changesCurrentRowCount = 0; + + mode: while (index < columns) + { + var node = CodeTree.Root; + + while (true) + { + node = node.Walk(ReadBit()); + + if (node == null) + { + goto mode; + } + else if (node.IsLeaf) + { + switch (node.Value) + { + case VALUE_HMODE: + var runLength = DecodeRun(white ? WhiteRunTree : BlackRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + + runLength = DecodeRun(white ? BlackRunTree : WhiteRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + break; + + case VALUE_PASSMODE: + var pChangingElement = GetNextChangingElement(index, white) + 1; + + if (pChangingElement >= changesReferenceRowCount) + { + index = columns; + } + else + { + index = changesReferenceRow[pChangingElement]; + } + + break; + + default: + // Vertical mode (-3 to 3) + var vChangingElement = GetNextChangingElement(index, white); + + if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) + { + index = columns + node.Value; + } + else + { + index = changesReferenceRow[vChangingElement] + node.Value; + } + + changesCurrentRow[changesCurrentRowCount] = index; + changesCurrentRowCount++; + white = !white; + + break; + } + + goto mode; + } + } + } + } + + private int GetNextChangingElement(int a0, bool white) + { + var start = (int)(lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1); + if (start > 2) + { + start -= 2; + } + + if (a0 == 0) + { + return start; + } + + for (var i = start; i < changesReferenceRowCount; i += 2) + { + if (a0 < changesReferenceRow[i]) + { + lastChangingElement = i; + return i; + } + } + + return -1; + } + + private void DecodeRowType2() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + Decode1D(); + } + + private void DecodeRowType4() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + eof: while (true) + { + // read till next EOL code + var node = EolOnlyTree.Root; + + while (true) + { + node = node.Walk(ReadBit()); + + if (node == null) + { + goto eof; + } + + if (node.IsLeaf) + { + goto done; + } + } + } + + done: + if (!optionG32D || ReadBit()) + { + Decode1D(); + } + else + { + Decode2D(); + } + } + + private void DecodeRowType6() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + Decode2D(); + } + + private void DecodeRow() + { + switch (type) + { + case CcittFaxCompressionType.ModifiedHuffman: + DecodeRowType2(); + break; + case CcittFaxCompressionType.T4: + DecodeRowType4(); + break; + case CcittFaxCompressionType.T6: + DecodeRowType6(); + break; + default: + throw new InvalidOperationException(type + " is not a supported compression type."); + } + + var index = 0; + var white = true; + + lastChangingElement = 0; + for (var i = 0; i <= changesCurrentRowCount; i++) + { + var nextChange = columns; + + if (i != changesCurrentRowCount) + { + nextChange = changesCurrentRow[i]; + } + + if (nextChange > columns) + { + nextChange = columns; + } + + var byteIndex = index / 8; + + while (index % 8 != 0 && (nextChange - index) > 0) + { + decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + if (index % 8 == 0) + { + byteIndex = index / 8; + var value = (byte)(white ? 0x00 : 0xff); + + while ((nextChange - index) > 7) + { + decodedRow[byteIndex] = value; + index += 8; + ++byteIndex; + } + } + + while ((nextChange - index) > 0) + { + if (index % 8 == 0) + { + decodedRow[byteIndex] = 0; + } + + decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + white = !white; + } + + if (index != columns) + { + throw new IOException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); + } + + decodedLength = (index + 7) / 8; + } + + private int DecodeRun(Tree tree) + { + var total = 0; + + var node = tree.Root; + + while (true) + { + var bit = ReadBit(); + node = node.Walk(bit); + + if (node == null) + { + throw new IOException("Unknown code in Huffman RLE stream"); + } + + if (node.IsLeaf) + { + total += node.Value; + if (node.Value >= 64) + { + node = tree.Root; + } + else if (node.Value >= 0) + { + return total; + } + else + { + return columns; + } + } + } + } + + private void ResetBuffer() + { + bufferPos = -1; + } + + private bool ReadBit() + { + if (bufferPos < 0 || bufferPos > 7) + { + buffer = Stream.ReadByte(); + + if (buffer == -1) + { + throw new IOException("Unexpected end of Huffman RLE stream"); + } + + bufferPos = 0; + } + + var isSet = ((buffer >> (7 - bufferPos)) & 1) == 1; + + bufferPos++; + + if (bufferPos > 7) + { + bufferPos = -1; + } + + return isSet; + } + + public override int ReadByte() + { + if (decodedLength < 0) + { + return 0x0; + } + + if (decodedPos >= decodedLength) + { + Fetch(); + + if (decodedLength < 0) + { + return 0x0; + } + } + + return decodedRow[decodedPos++] & 0xff; + } + + public override int Read(byte[] b, int off, int len) + { + if (decodedLength < 0) + { + ArrayHelper.Fill(b, off, off + len, (byte)0x0); + return len; + } + + if (decodedPos >= decodedLength) + { + Fetch(); + + if (decodedLength < 0) + { + ArrayHelper.Fill(b, off, off + len, (byte)0x0); + return len; + } + } + + var read = Math.Min(decodedLength - decodedPos, len); + Array.Copy(decodedRow, decodedPos, b, off, read); + decodedPos += read; + + return read; + } + + private class Node + { + public Node Left { get; set; } + public Node Right { get; set; } + + public int Value { get; set; } + + public bool CanBeFill { get; set; } + public bool IsLeaf { get; set; } + + public void Set(bool next, Node node) + { + if (!next) + { + Left = node; + } + else + { + Right = node; + } + } + + public Node Walk(bool next) + { + return next ? Right : Left; + } + + public override string ToString() + { + return $"[{nameof(IsLeaf)}={IsLeaf}, {nameof(Value)}={Value}, {nameof(CanBeFill)}={CanBeFill}]"; + } + } + + private class Tree + { + public Node Root { get; } = new Node(); + + public void Fill(int depth, int path, int value) + { + var current = Root; + + for (var i = 0; i < depth; i++) + { + var bitPos = depth - 1 - i; + var isSet = ((path >> bitPos) & 1) == 1; + var next = current.Walk(isSet); + + if (next == null) + { + next = new Node(); + + if (i == depth - 1) + { + next.Value = value; + next.IsLeaf = true; + } + + if (path == 0) + { + next.CanBeFill = true; + } + + current.Set(isSet, next); + } + else if (next.IsLeaf) + { + throw new IOException("node is leaf, no other following"); + } + + current = next; + } + } + + public void Fill(int depth, int path, Node node) + { + var current = Root; + + for (var i = 0; i < depth; i++) + { + var bitPos = depth - 1 - i; + var isSet = ((path >> bitPos) & 1) == 1; + var next = current.Walk(isSet); + + if (next == null) + { + if (i == depth - 1) + { + next = node; + } + else + { + next = new Node(); + } + + if (path == 0) + { + next.CanBeFill = true; + } + + current.Set(isSet, next); + } + else if (next.IsLeaf) + { + throw new IOException("node is leaf, no other following"); + } + + current = next; + } + } + } + + private static readonly short[][] BLACK_CODES = new short[][] { + new short[]{ // 2 bits + 0x2, 0x3, + }, + new short[]{ // 3 bits + 0x2, 0x3, + }, + new short[]{ // 4 bits + 0x2, 0x3, + }, + new short[]{ // 5 bits + 0x3, + }, + new short[]{ // 6 bits + 0x4, 0x5, + }, + new short[]{ // 7 bits + 0x4, 0x5, 0x7, + }, + new short[]{ // 8 bits + 0x4, 0x7, + }, + new short[]{ // 9 bits + 0x18, + }, + new short[]{ // 10 bits + 0x17, 0x18, 0x37, 0x8, 0xf, + }, + new short[]{ // 11 bits + 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, + }, + new short[]{ // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, + 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, + }, + new short[]{ // 13 bits + 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77, + } + }; + + private static readonly short[][] BLACK_RUN_LENGTHS = new short[][]{ + new short[]{ // 2 bits + 3, 2, + }, + new short[]{ // 3 bits + 1, 4, + }, + new short[]{ // 4 bits + 6, 5, + }, + new short[]{ // 5 bits + 7, + }, + new short[]{ // 6 bits + 9, 8, + }, + new short[]{ // 7 bits + 10, 11, 12, + }, + new short[]{ // 8 bits + 13, 14, + }, + new short[]{ // 9 bits + 15, + }, + new short[]{ // 10 bits + 16, 17, 0, 18, 64, + }, + new short[]{ // 11 bits + 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, + }, + new short[]{ // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, + 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, + 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, + }, + new short[]{ // 13 bits + 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, + 1152, 1216, + } + }; + + private static readonly short[][] WHITE_CODES = new short[][]{ + new short[]{ // 4 bits + 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, + }, + new short[]{ // 5 bits + 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, + }, + new short[]{ // 6 bits + 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, + }, + new short[]{ // 7 bits + 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, + }, + new short[]{ // 8 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, + 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, + 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, + }, + new short[]{ // 9 bits + 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + }, + new short[]{ // 10 bits + }, + new short[]{ // 11 bits + 0x8, 0xc, 0xd, + }, + new short[]{ // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, + } + }; + + private static readonly short[][] WHITE_RUN_LENGTHS = new short[][]{ + new short[]{ // 4 bits + 2, 3, 4, 5, 6, 7, + }, + new short[]{ // 5 bits + 128, 8, 9, 64, 10, 11, + }, + new short[]{ // 6 bits + 192, 1664, 16, 17, 13, 14, 15, 1, 12, + }, + new short[]{ // 7 bits + 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, + }, + new short[]{ // 8 bits + 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, + 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, + }, + new short[]{ // 9 bits + 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, + }, + new short[]{ // 10 bits + }, + new short[]{ // 11 bits + 1792, 1856, 1920, + }, + new short[]{ // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, + } + }; + + private static readonly Node EOL; + private static readonly Node FILL; + private static readonly Tree BlackRunTree; + private static readonly Tree WhiteRunTree; + private static readonly Tree EolOnlyTree; + private static readonly Tree CodeTree; + + const int VALUE_EOL = -2000; + const int VALUE_FILL = -1000; + const int VALUE_PASSMODE = -3000; + const int VALUE_HMODE = -4000; + + static CcittFaxDecoderStream() + { + EOL = new Node + { + IsLeaf = true, + Value = VALUE_EOL + }; + FILL = new Node + { + Value = VALUE_FILL + }; + FILL.Left = FILL; + FILL.Right = EOL; + + EolOnlyTree = new Tree(); + EolOnlyTree.Fill(12, 0, FILL); + EolOnlyTree.Fill(12, 1, EOL); + + BlackRunTree = new Tree(); + for (var i = 0; i < BLACK_CODES.Length; i++) + { + for (var j = 0; j < BLACK_CODES[i].Length; j++) + { + BlackRunTree.Fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); + } + } + BlackRunTree.Fill(12, 0, FILL); + BlackRunTree.Fill(12, 1, EOL); + + WhiteRunTree = new Tree(); + + for (var i = 0; i < WHITE_CODES.Length; i++) + { + for (var j = 0; j < WHITE_CODES[i].Length; j++) + { + WhiteRunTree.Fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); + } + } + + WhiteRunTree.Fill(12, 0, FILL); + WhiteRunTree.Fill(12, 1, EOL); + + CodeTree = new Tree(); + CodeTree.Fill(4, 1, VALUE_PASSMODE); // pass mode + CodeTree.Fill(3, 1, VALUE_HMODE); // H mode + CodeTree.Fill(1, 1, 0); // V(0) + CodeTree.Fill(3, 3, 1); // V_R(1) + CodeTree.Fill(6, 3, 2); // V_R(2) + CodeTree.Fill(7, 3, 3); // V_R(3) + CodeTree.Fill(3, 2, -1); // V_L(1) + CodeTree.Fill(6, 2, -2); // V_L(2) + CodeTree.Fill(7, 2, -3); // V_L(3) + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/DecodeParameterResolver.cs b/src/UglyToad.PdfPig/Filters/DecodeParameterResolver.cs index 7aa7ddf0..973b45a9 100644 --- a/src/UglyToad.PdfPig/Filters/DecodeParameterResolver.cs +++ b/src/UglyToad.PdfPig/Filters/DecodeParameterResolver.cs @@ -18,9 +18,9 @@ throw new ArgumentOutOfRangeException(nameof(index), "Index must be 0 or greater"); } - var filter = GetDictionaryObject(streamDictionary, NameToken.Filter, NameToken.F); + var filter = streamDictionary.GetDictionaryObject(NameToken.Filter, NameToken.F); - var parameters = GetDictionaryObject(streamDictionary, NameToken.DecodeParms, NameToken.Dp); + var parameters = streamDictionary.GetDictionaryObject(NameToken.DecodeParms, NameToken.Dp); switch (filter) { @@ -45,20 +45,5 @@ return new DictionaryToken(new Dictionary()); } - - private static IToken GetDictionaryObject(DictionaryToken dictionary, NameToken first, NameToken second) - { - if (dictionary.TryGet(first, out var token)) - { - return token; - } - - if (dictionary.TryGet(second, out token)) - { - return token; - } - - return null; - } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs index 7c198e96..ffd41ad1 100644 --- a/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs @@ -60,7 +60,8 @@ throw new ArgumentNullException(nameof(dictionary)); } - if (!dictionary.TryGet(NameToken.Filter, out var token)) + var token = dictionary.GetDictionaryObject(NameToken.Filter, NameToken.F); + if (token == null) { return EmptyArray.Instance; } diff --git a/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs b/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs index 5405782f..efc95d85 100644 --- a/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs +++ b/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs @@ -33,7 +33,8 @@ throw new ArgumentNullException(nameof(dictionary)); } - if (!dictionary.TryGet(NameToken.Filter, out var token)) + var token = dictionary.GetDictionaryObject(NameToken.Filter, NameToken.F); + if (token == null) { return EmptyArray.Instance; } diff --git a/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceDetails.cs b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceDetails.cs index 711144d9..7b613114 100644 --- a/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceDetails.cs +++ b/src/UglyToad.PdfPig/Graphics/Colors/ColorSpaceDetails.cs @@ -84,6 +84,14 @@ /// public class IndexedColorSpaceDetails : ColorSpaceDetails { + /// + /// The lossless CCITT compression schemes are used for bitonal black and white images. + /// This is equivalent to an IndexedColorSpaceDetails with a black and white palette. + /// Index 0 is white and index 1 is black. + /// + internal static readonly IndexedColorSpaceDetails CCITTFaxColorSpaceDetails + = new IndexedColorSpaceDetails(DeviceGrayColorSpaceDetails.Instance, 1, new byte[] { 255, 0 }); + /// /// The base color space in which the values in the color table are to be interpreted. /// It can be any device or CIE-based color space or(in PDF 1.3) a Separation or DeviceN space, diff --git a/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs b/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs index ec95c769..c7ddd123 100644 --- a/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs +++ b/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs @@ -114,30 +114,11 @@ var decode = decodeRaw.Data.OfType().Select(x => x.Data).ToArray(); - var filterDictionaryEntries = new Dictionary(); - var decodeParamsDict = GetByKeys(NameToken.DecodeParms, NameToken.Dp, false); - - if (decodeParamsDict == null) - { - var decodeParamsArray = GetByKeys(NameToken.DecodeParms, NameToken.Dp, false); - - if (decodeParamsArray != null) - { - filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsArray; - } - } - else - { - filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsDict; - } - - var streamDictionary = new DictionaryToken(filterDictionaryEntries); - var interpolate = GetByKeys(NameToken.Interpolate, NameToken.I, false)?.Data ?? false; return new InlineImage(bounds, width, height, bitsPerComponent, isMask, renderingIntent, interpolate, colorSpace, decode, Bytes, filters, - streamDictionary, + imgDic, details); } diff --git a/src/UglyToad.PdfPig/IO/StreamWrapper.cs b/src/UglyToad.PdfPig/IO/StreamWrapper.cs new file mode 100644 index 00000000..80a4668c --- /dev/null +++ b/src/UglyToad.PdfPig/IO/StreamWrapper.cs @@ -0,0 +1,74 @@ +namespace UglyToad.PdfPig.IO +{ + using System.IO; + + internal class StreamWrapper : Stream + { + protected readonly Stream Stream; + + public StreamWrapper(Stream stream) + { + Stream = stream; + } + + public override void Flush() + { + Stream.Flush(); + } + + public override long Seek(long offset, SeekOrigin origin) + { + return Stream.Seek(offset, origin); + } + + public override void SetLength(long value) + { + Stream.SetLength(value); + } + + public override int Read(byte[] buffer, int offset, int count) + { + return Stream.Read(buffer, offset, count); + } + + public override void Write(byte[] buffer, int offset, int count) + { + Stream.Write(buffer, offset, count); + } + + public override bool CanRead + { + get { return Stream.CanRead; } + } + + public override bool CanSeek + { + get { return Stream.CanSeek; } + } + + public override bool CanWrite + { + get { return Stream.CanWrite; } + } + + public override long Length + { + get { return Stream.Length; } + } + + public override long Position + { + get { return Stream.Position; } + set { Stream.Position = value; } + } + + protected override void Dispose(bool disposing) + { + base.Dispose(disposing); + // dispose stream + using (Stream) + { + } + } + } +} diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index 0d0809fb..cb9f527f 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -45,8 +45,51 @@ } return typedToken; + } + + internal static IToken GetDictionaryObject(this DictionaryToken dictionary, NameToken name) + { + if (dictionary.TryGet(name, out var token)) + { + return token; + } + + return null; + } + + internal static IToken GetDictionaryObject(this DictionaryToken dictionary, NameToken first, NameToken second) + { + if (dictionary.TryGet(first, out var token)) + { + return token; + } + + if (dictionary.TryGet(second, out token)) + { + return token; + } + + return null; } - + + internal static int GetInt(this DictionaryToken dictionary, NameToken name, int defaultValue) + { + var numericToken = dictionary.GetDictionaryObject(name) as NumericToken; + return numericToken?.Int ?? defaultValue; + } + + internal static int GetInt(this DictionaryToken dictionary, NameToken first, NameToken second, int defaultValue) + { + var numericToken = dictionary.GetDictionaryObject(first, second) as NumericToken; + return numericToken?.Int ?? defaultValue; + } + + internal static bool GetBoolean(this DictionaryToken dictionary, NameToken name, bool defaultValue) + { + var booleanToken = dictionary.GetDictionaryObject(name) as BooleanToken; + return booleanToken?.Data ?? defaultValue; + } + /// /// Get the decoded data from this stream. /// diff --git a/src/UglyToad.PdfPig/Util/ArrayHelper.cs b/src/UglyToad.PdfPig/Util/ArrayHelper.cs new file mode 100644 index 00000000..fc72157a --- /dev/null +++ b/src/UglyToad.PdfPig/Util/ArrayHelper.cs @@ -0,0 +1,30 @@ +namespace UglyToad.PdfPig.Util +{ + using System; + + internal static class ArrayHelper + { + public static void Fill(T[] array, int start, int end, T value) + { + if (array == null) + { + throw new ArgumentNullException(nameof(array)); + } + + if (start < 0 || start >= end) + { + throw new ArgumentOutOfRangeException(nameof(start)); + } + + if (end >= array.Length) + { + throw new ArgumentOutOfRangeException(nameof(end)); + } + + for (int i = start; i < end; i++) + { + array[i] = value; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs b/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs index ec15c479..0e53c038 100644 --- a/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs +++ b/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs @@ -1,6 +1,7 @@ namespace UglyToad.PdfPig.Util { - using System.Collections.Generic; + using System.Collections.Generic; + using System.Linq; using Content; using Core; using Filters; @@ -74,6 +75,11 @@ ILookupFilterProvider filterProvider, bool cannotRecurse = false) { + if (filterProvider.GetFilters(imageDictionary).OfType().Any()) + { + return IndexedColorSpaceDetails.CCITTFaxColorSpaceDetails; + } + if (!colorSpace.HasValue) { return UnsupportedColorSpaceDetails.Instance;