add support for ascii hex encoding in streams

This commit is contained in:
Eliot Jones
2017-12-28 18:10:18 +00:00
parent 1feaf878cb
commit 26e244371b
3 changed files with 194 additions and 1 deletions

View File

@@ -0,0 +1,95 @@
namespace UglyToad.Pdf.Tests.Filters
{
using System;
using System.Text;
using ContentStream;
using Pdf.Filters;
using Xunit;
public class AsciiHexDecodeFilterTests
{
[Fact]
public void DecodesEncodedTextProperly()
{
const string text = "she sells seashells on the sea shore";
var input = Encoding.ASCII.GetBytes(
"7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.ASCII.GetString(decoded);
Assert.Equal(text, decodedText);
}
[Fact]
public void DecodesEncodedTextWithBracesProperly()
{
const string text = "she sells seashells on the sea shore";
var input = Encoding.ASCII.GetBytes(
"<7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265>");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.ASCII.GetString(decoded);
Assert.Equal(text, decodedText);
}
[Fact]
public void DecodesEncodedTextWithWhitespaceProperly()
{
const string text = "once upon a time in a galaxy Far Far Away";
var input = Encoding.ASCII.GetBytes(
@"6F6E6365207 5706F6E206120 74696D6520696E
20612067616C6178792046617220466172204177 6179");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.ASCII.GetString(decoded);
Assert.Equal(text, decodedText);
}
[Fact]
public void DecodesEncodedTextLowercaseProperly()
{
const string text = "once upon a time in a galaxy Far Far Away";
var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.ASCII.GetString(decoded);
Assert.Equal(text, decodedText);
}
[Fact]
public void DecodeWithInvalidCharactersThrows()
{
var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179");
Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
Assert.Throws<InvalidOperationException>(action);
}
[Fact]
public void DecodesEncodedTextStoppingAtLastBrace()
{
const string text = "once upon a time in a galaxy Far Far Away";
var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179> There is stuff following the EOD.");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.ASCII.GetString(decoded);
Assert.Equal(text, decodedText);
}
}
}

View File

@@ -0,0 +1,95 @@
namespace UglyToad.Pdf.Filters
{
using System;
using System.IO;
using ContentStream;
internal class AsciiHexDecodeFilter : IFilter
{
private static readonly short[] ReverseHex =
{
/* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 20 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 30 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 40 */ -1, -1, -1, -1, -1, -1, -1, -1, 0, 1,
/* 50 */ 2, 3, 4, 5, 6, 7, 8, 9, -1, -1,
/* 60 */ -1, -1, -1, -1, -1, 10, 11, 12, 13, 14,
/* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
/* 100 */ 13, 14, 15
};
public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex)
{
var pair = new byte[2];
var index = 0;
using (var memoryStream = new MemoryStream())
using (var binaryWriter = new BinaryWriter(memoryStream))
{
for (var i = 0; i < input.Length; i++)
{
if (input[i] == '>')
{
break;
}
if (IsWhitespace(input[i]) || input[i] == '<')
{
continue;
}
pair[index] = input[i];
index++;
if (index == 2)
{
WriteHexToByte(pair, binaryWriter);
index = 0;
}
}
if (index > 0)
{
if (index == 1)
{
pair[1] = (byte) '0';
}
WriteHexToByte(pair, binaryWriter);
}
binaryWriter.Flush();
return memoryStream.ToArray();
}
}
private static void WriteHexToByte(byte[] hexBytes, BinaryWriter writer)
{
var first = ReverseHex[hexBytes[0]];
var second = ReverseHex[hexBytes[1]];
if (first == -1)
{
throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]);
}
if (second == -1)
{
throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]);
}
var value = (byte) (first * 16 + second);
writer.Write(value);
}
private static bool IsWhitespace(byte c)
{
return c == 0 || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == ' ';
}
}
}

View File

@@ -22,13 +22,16 @@
{
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
IFilter Ascii85Func() => new Ascii85Filter();
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
filterFactories = new Dictionary<CosName, Func<IFilter>>
{
{CosName.FLATE_DECODE, FlateFunc},
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
{CosName.ASCII85_DECODE, Ascii85Func},
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc}
};
}