diff --git a/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs b/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs new file mode 100644 index 00000000..01f0673b --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs @@ -0,0 +1,95 @@ +namespace UglyToad.Pdf.Tests.Filters +{ + using System; + using System.Text; + using ContentStream; + using Pdf.Filters; + using Xunit; + + public class AsciiHexDecodeFilterTests + { + [Fact] + public void DecodesEncodedTextProperly() + { + const string text = "she sells seashells on the sea shore"; + + var input = Encoding.ASCII.GetBytes( + "7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265"); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.ASCII.GetString(decoded); + + Assert.Equal(text, decodedText); + } + + [Fact] + public void DecodesEncodedTextWithBracesProperly() + { + const string text = "she sells seashells on the sea shore"; + + var input = Encoding.ASCII.GetBytes( + "<7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265>"); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.ASCII.GetString(decoded); + + Assert.Equal(text, decodedText); + } + + [Fact] + public void DecodesEncodedTextWithWhitespaceProperly() + { + const string text = "once upon a time in a galaxy Far Far Away"; + + var input = Encoding.ASCII.GetBytes( + @"6F6E6365207 5706F6E206120 74696D6520696E + 20612067616C6178792046617220466172204177 6179"); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.ASCII.GetString(decoded); + + Assert.Equal(text, decodedText); + } + + [Fact] + public void DecodesEncodedTextLowercaseProperly() + { + const string text = "once upon a time in a galaxy Far Far Away"; + + var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179"); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.ASCII.GetString(decoded); + + Assert.Equal(text, decodedText); + } + + [Fact] + public void DecodeWithInvalidCharactersThrows() + { + var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179"); + + Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + Assert.Throws(action); + } + + [Fact] + public void DecodesEncodedTextStoppingAtLastBrace() + { + const string text = "once upon a time in a galaxy Far Far Away"; + + var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179> There is stuff following the EOD."); + + var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1); + + var decodedText = Encoding.ASCII.GetString(decoded); + + Assert.Equal(text, decodedText); + } + } +} diff --git a/src/UglyToad.Pdf/Filters/AsciiHexDecodeFilter.cs b/src/UglyToad.Pdf/Filters/AsciiHexDecodeFilter.cs new file mode 100644 index 00000000..4ed6f7ec --- /dev/null +++ b/src/UglyToad.Pdf/Filters/AsciiHexDecodeFilter.cs @@ -0,0 +1,95 @@ +namespace UglyToad.Pdf.Filters +{ + using System; + using System.IO; + using ContentStream; + + internal class AsciiHexDecodeFilter : IFilter + { + private static readonly short[] ReverseHex = + { + /* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 20 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 30 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 40 */ -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, + /* 50 */ 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, + /* 60 */ -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, + /* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, + /* 100 */ 13, 14, 15 + }; + + public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex) + { + var pair = new byte[2]; + var index = 0; + + using (var memoryStream = new MemoryStream()) + using (var binaryWriter = new BinaryWriter(memoryStream)) + { + for (var i = 0; i < input.Length; i++) + { + if (input[i] == '>') + { + break; + } + + if (IsWhitespace(input[i]) || input[i] == '<') + { + continue; + } + + pair[index] = input[i]; + index++; + + if (index == 2) + { + WriteHexToByte(pair, binaryWriter); + + index = 0; + } + } + + if (index > 0) + { + if (index == 1) + { + pair[1] = (byte) '0'; + } + + WriteHexToByte(pair, binaryWriter); + } + + binaryWriter.Flush(); + return memoryStream.ToArray(); + } + } + + private static void WriteHexToByte(byte[] hexBytes, BinaryWriter writer) + { + var first = ReverseHex[hexBytes[0]]; + var second = ReverseHex[hexBytes[1]]; + + if (first == -1) + { + throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]); + } + + if (second == -1) + { + throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]); + } + + var value = (byte) (first * 16 + second); + + writer.Write(value); + } + + private static bool IsWhitespace(byte c) + { + return c == 0 || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == ' '; + } + } +} diff --git a/src/UglyToad.Pdf/Filters/IFilterProvider.cs b/src/UglyToad.Pdf/Filters/IFilterProvider.cs index 228cb656..b2f42f77 100644 --- a/src/UglyToad.Pdf/Filters/IFilterProvider.cs +++ b/src/UglyToad.Pdf/Filters/IFilterProvider.cs @@ -22,13 +22,16 @@ { IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log); IFilter Ascii85Func() => new Ascii85Filter(); + IFilter AsciiHexFunc() => new AsciiHexDecodeFilter(); filterFactories = new Dictionary> { {CosName.FLATE_DECODE, FlateFunc}, {CosName.FLATE_DECODE_ABBREVIATION, FlateFunc}, {CosName.ASCII85_DECODE, Ascii85Func}, - {CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func} + {CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}, + {CosName.ASCII_HEX_DECODE, AsciiHexFunc}, + {CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc} }; }