diff --git a/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs b/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs index 01f0673b..a7b592eb 100644 --- a/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs +++ b/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs @@ -1,6 +1,7 @@ namespace UglyToad.Pdf.Tests.Filters { using System; + using System.Net.NetworkInformation; using System.Text; using ContentStream; using Pdf.Filters; @@ -68,16 +69,30 @@ Assert.Equal(text, decodedText); } - [Fact] - public void DecodeWithInvalidCharactersThrows() + [Theory] + [InlineData("ZA")] + [InlineData("AM")] + public void DecodeWithInvalidCharactersThrows(string inputString) { - var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179"); + var input = Encoding.ASCII.GetBytes(inputString); Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); Assert.Throws(action); } + [Fact] + public void SubstitutesZeroForLastByte() + { + var input = Encoding.ASCII.GetBytes("AE5>"); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.UTF7.GetString(decoded); + + Assert.Equal("®P", decodedText); + } + [Fact] public void DecodesEncodedTextStoppingAtLastBrace() { diff --git a/src/UglyToad.Pdf.Tests/Filters/RunLengthFilterTests.cs b/src/UglyToad.Pdf.Tests/Filters/RunLengthFilterTests.cs new file mode 100644 index 00000000..9129fd3b --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Filters/RunLengthFilterTests.cs @@ -0,0 +1,68 @@ +namespace UglyToad.Pdf.Tests.Filters +{ + using ContentStream; + using Pdf.Filters; + using Xunit; + + public class RunLengthFilterTests + { + private readonly RunLengthFilter filter = new RunLengthFilter(); + + [Fact] + public void CanDecodeRunLengthEncodedData() + { + var data = new byte[] + { + // Write the following 6 bytes literally + 5, 0, 1, 2, 69, 12, 9, + // Repeat 52 (257 - 254) 3 times + 254, 52, + // Write the following 3 bytes literally + 2, 60, 61, 16, + // Repeat 12 (257 - 250) 7 times + 250, 12, + // Write the following 2 bytes literally + 1, 10, 19 + }; + + var decoded = filter.Decode(data, new PdfDictionary(), 1); + + var expectedResult = new byte[] + { + 0, 1, 2, 69, 12, 9, + 52, 52, 52, + 60, 61, 16, + 12, 12, 12, 12, 12, 12, 12, + 10, 19 + }; + + Assert.Equal(expectedResult, decoded); + } + + [Fact] + public void StopsAtEndOfDataByte() + { + var data = new byte[] + { + // Repeat 7 (257 - 254) 3 times + 254, 7, + // Write the following 2 bytes literally + 1, 128, 50, + // End of Data Byte + 128, + // Ignore these + 90, 6, 7 + }; + + var decoded = filter.Decode(data, new PdfDictionary(), 0); + + var expectedResult = new byte[] + { + 7, 7, 7, + 128, 50 + }; + + Assert.Equal(expectedResult, decoded); + } + } +} diff --git a/src/UglyToad.Pdf/Filters/DecodeResult.cs b/src/UglyToad.Pdf/Filters/DecodeResult.cs deleted file mode 100644 index 663ea211..00000000 --- a/src/UglyToad.Pdf/Filters/DecodeResult.cs +++ /dev/null @@ -1,51 +0,0 @@ -namespace UglyToad.Pdf.Filters -{ - using Cos; - - public class DecodeResult - { - /** Default decode result. */ - public static DecodeResult DEFAULT = new DecodeResult(new CosDictionary()); - - private readonly CosDictionary parameters; - private PDJPXColorSpace colorSpace; - - public DecodeResult(CosDictionary parameters) - { - this.parameters = parameters; - } - - public DecodeResult(CosDictionary parameters, PDJPXColorSpace colorSpace) - { - this.parameters = parameters; - this.colorSpace = colorSpace; - } - - /** - * Returns the stream parameters, repaired using the embedded stream data. - * @return the repaired stream parameters, or an empty dictionary - */ - public CosDictionary getParameters() - { - return parameters; - } - - /** - * Returns the embedded JPX color space, if any. - * @return the the embedded JPX color space, or null if there is none. - */ - public PDJPXColorSpace getJPXColorSpace() - { - return colorSpace; - } - - // Sets the JPX color space - void setColorSpace(PDJPXColorSpace colorSpace) - { - this.colorSpace = colorSpace; - } - } - - public class PDJPXColorSpace { } - -} diff --git a/src/UglyToad.Pdf/Filters/IFilterProvider.cs b/src/UglyToad.Pdf/Filters/IFilterProvider.cs index b2f42f77..9c35e40a 100644 --- a/src/UglyToad.Pdf/Filters/IFilterProvider.cs +++ b/src/UglyToad.Pdf/Filters/IFilterProvider.cs @@ -1,11 +1,7 @@ namespace UglyToad.Pdf.Filters { - using System; using System.Collections.Generic; - using System.Linq; using ContentStream; - using Cos; - using Logging; internal interface IFilterProvider { @@ -13,68 +9,4 @@ IReadOnlyList GetAllFilters(); } - - internal class MemoryFilterProvider : IFilterProvider - { - private readonly IReadOnlyDictionary> filterFactories; - - public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log) - { - IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log); - IFilter Ascii85Func() => new Ascii85Filter(); - IFilter AsciiHexFunc() => new AsciiHexDecodeFilter(); - - filterFactories = new Dictionary> - { - {CosName.FLATE_DECODE, FlateFunc}, - {CosName.FLATE_DECODE_ABBREVIATION, FlateFunc}, - {CosName.ASCII85_DECODE, Ascii85Func}, - {CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}, - {CosName.ASCII_HEX_DECODE, AsciiHexFunc}, - {CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc} - }; - } - - public IReadOnlyList GetFilters(PdfDictionary streamDictionary) - { - if (streamDictionary == null) - { - throw new ArgumentNullException(nameof(streamDictionary)); - } - - var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER); - - if (filterObject == null) - { - return new IFilter[0]; - } - - switch (filterObject) - { - case COSArray filters: - // TODO: presumably this may be invalid... - return filters.Select(x => GetFilterStrict((CosName) x)).ToList(); - case CosName name: - return new[] {GetFilterStrict(name)}; - default: - throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: " - + filterObject.GetType()); - } - } - - private IFilter GetFilterStrict(CosName name) - { - if (!filterFactories.TryGetValue(name, out var factory)) - { - throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue."); - } - - return factory(); - } - - public IReadOnlyList GetAllFilters() - { - throw new System.NotImplementedException(); - } - } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Filters/MemoryFilterProvider.cs b/src/UglyToad.Pdf/Filters/MemoryFilterProvider.cs new file mode 100644 index 00000000..7155e785 --- /dev/null +++ b/src/UglyToad.Pdf/Filters/MemoryFilterProvider.cs @@ -0,0 +1,76 @@ +namespace UglyToad.Pdf.Filters +{ + using System; + using System.Collections.Generic; + using System.Linq; + using ContentStream; + using Cos; + using Logging; + + internal class MemoryFilterProvider : IFilterProvider + { + private readonly IReadOnlyDictionary> filterFactories; + + public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log) + { + IFilter Ascii85Func() => new Ascii85Filter(); + IFilter AsciiHexFunc() => new AsciiHexDecodeFilter(); + IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log); + IFilter RunLengthFunc() => new RunLengthFilter(); + + filterFactories = new Dictionary> + { + {CosName.ASCII85_DECODE, Ascii85Func}, + {CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}, + {CosName.ASCII_HEX_DECODE, AsciiHexFunc}, + {CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc}, + {CosName.FLATE_DECODE, FlateFunc}, + {CosName.FLATE_DECODE_ABBREVIATION, FlateFunc}, + {CosName.RUN_LENGTH_DECODE, RunLengthFunc}, + {CosName.RUN_LENGTH_DECODE_ABBREVIATION, RunLengthFunc} + }; + } + + public IReadOnlyList GetFilters(PdfDictionary streamDictionary) + { + if (streamDictionary == null) + { + throw new ArgumentNullException(nameof(streamDictionary)); + } + + var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER); + + if (filterObject == null) + { + return new IFilter[0]; + } + + switch (filterObject) + { + case COSArray filters: + // TODO: presumably this may be invalid... + return filters.Select(x => GetFilterStrict((CosName) x)).ToList(); + case CosName name: + return new[] {GetFilterStrict(name)}; + default: + throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: " + + filterObject.GetType()); + } + } + + private IFilter GetFilterStrict(CosName name) + { + if (!filterFactories.TryGetValue(name, out var factory)) + { + throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue."); + } + + return factory(); + } + + public IReadOnlyList GetAllFilters() + { + throw new System.NotImplementedException(); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Filters/RunLengthFilter.cs b/src/UglyToad.Pdf/Filters/RunLengthFilter.cs new file mode 100644 index 00000000..f99a12f5 --- /dev/null +++ b/src/UglyToad.Pdf/Filters/RunLengthFilter.cs @@ -0,0 +1,65 @@ +namespace UglyToad.Pdf.Filters +{ + using System.IO; + using ContentStream; + + internal class RunLengthFilter : IFilter + { + private const byte EndOfDataLength = 128; + + public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex) + { + using (var memoryStream = new MemoryStream()) + using (var writer = new BinaryWriter(memoryStream)) + { + var i = 0; + while (i < input.Length) + { + var runLength = input[i]; + + if (runLength == EndOfDataLength) + { + break; + } + + // if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output. + if (runLength <= 127) + { + var rangeToWriteLiterally = runLength + 1; + + while (rangeToWriteLiterally > 0) + { + i++; + + writer.Write(input[i]); + + rangeToWriteLiterally--; + } + + // Move to the following byte. + i++; + } + // Otherwise copy the single following byte 257 - length times (between 2 - 128 times) + else + { + var numberOfTimesToCopy = 257 - runLength; + + var byteToCopy = input[i + 1]; + + for (int j = 0; j < numberOfTimesToCopy; j++) + { + writer.Write(byteToCopy); + } + + // Move to the single byte after the byte to copy. + i += 2; + } + } + + writer.Flush(); + + return memoryStream.ToArray(); + } + } + } +}