mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
add support for ascii hex encoding in streams
This commit is contained in:
95
src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs
Normal file
95
src/UglyToad.Pdf.Tests/Filters/AsciiHexDecodeFilterTests.cs
Normal file
@@ -0,0 +1,95 @@
|
||||
namespace UglyToad.Pdf.Tests.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Text;
|
||||
using ContentStream;
|
||||
using Pdf.Filters;
|
||||
using Xunit;
|
||||
|
||||
public class AsciiHexDecodeFilterTests
|
||||
{
|
||||
[Fact]
|
||||
public void DecodesEncodedTextProperly()
|
||||
{
|
||||
const string text = "she sells seashells on the sea shore";
|
||||
|
||||
var input = Encoding.ASCII.GetBytes(
|
||||
"7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.ASCII.GetString(decoded);
|
||||
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedTextWithBracesProperly()
|
||||
{
|
||||
const string text = "she sells seashells on the sea shore";
|
||||
|
||||
var input = Encoding.ASCII.GetBytes(
|
||||
"<7368652073656C6C73207365617368656C6C73206F6E20746865207365612073686F7265>");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.ASCII.GetString(decoded);
|
||||
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedTextWithWhitespaceProperly()
|
||||
{
|
||||
const string text = "once upon a time in a galaxy Far Far Away";
|
||||
|
||||
var input = Encoding.ASCII.GetBytes(
|
||||
@"6F6E6365207 5706F6E206120 74696D6520696E
|
||||
20612067616C6178792046617220466172204177 6179");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.ASCII.GetString(decoded);
|
||||
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedTextLowercaseProperly()
|
||||
{
|
||||
const string text = "once upon a time in a galaxy Far Far Away";
|
||||
|
||||
var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.ASCII.GetString(decoded);
|
||||
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodeWithInvalidCharactersThrows()
|
||||
{
|
||||
var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179");
|
||||
|
||||
Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
Assert.Throws<InvalidOperationException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedTextStoppingAtLastBrace()
|
||||
{
|
||||
const string text = "once upon a time in a galaxy Far Far Away";
|
||||
|
||||
var input = Encoding.ASCII.GetBytes("6f6e63652075706f6e20612074696d6520696e20612067616c61787920466172204661722041776179> There is stuff following the EOD.");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.ASCII.GetString(decoded);
|
||||
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
}
|
||||
}
|
95
src/UglyToad.Pdf/Filters/AsciiHexDecodeFilter.cs
Normal file
95
src/UglyToad.Pdf/Filters/AsciiHexDecodeFilter.cs
Normal file
@@ -0,0 +1,95 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using ContentStream;
|
||||
|
||||
internal class AsciiHexDecodeFilter : IFilter
|
||||
{
|
||||
private static readonly short[] ReverseHex =
|
||||
{
|
||||
/* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 20 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 30 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 40 */ -1, -1, -1, -1, -1, -1, -1, -1, 0, 1,
|
||||
/* 50 */ 2, 3, 4, 5, 6, 7, 8, 9, -1, -1,
|
||||
/* 60 */ -1, -1, -1, -1, -1, 10, 11, 12, 13, 14,
|
||||
/* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
|
||||
/* 100 */ 13, 14, 15
|
||||
};
|
||||
|
||||
public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex)
|
||||
{
|
||||
var pair = new byte[2];
|
||||
var index = 0;
|
||||
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var binaryWriter = new BinaryWriter(memoryStream))
|
||||
{
|
||||
for (var i = 0; i < input.Length; i++)
|
||||
{
|
||||
if (input[i] == '>')
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (IsWhitespace(input[i]) || input[i] == '<')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
pair[index] = input[i];
|
||||
index++;
|
||||
|
||||
if (index == 2)
|
||||
{
|
||||
WriteHexToByte(pair, binaryWriter);
|
||||
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
if (index == 1)
|
||||
{
|
||||
pair[1] = (byte) '0';
|
||||
}
|
||||
|
||||
WriteHexToByte(pair, binaryWriter);
|
||||
}
|
||||
|
||||
binaryWriter.Flush();
|
||||
return memoryStream.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteHexToByte(byte[] hexBytes, BinaryWriter writer)
|
||||
{
|
||||
var first = ReverseHex[hexBytes[0]];
|
||||
var second = ReverseHex[hexBytes[1]];
|
||||
|
||||
if (first == -1)
|
||||
{
|
||||
throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]);
|
||||
}
|
||||
|
||||
if (second == -1)
|
||||
{
|
||||
throw new InvalidOperationException("Invalid character encountered in hex encoded stream: " + (char)hexBytes[0]);
|
||||
}
|
||||
|
||||
var value = (byte) (first * 16 + second);
|
||||
|
||||
writer.Write(value);
|
||||
}
|
||||
|
||||
private static bool IsWhitespace(byte c)
|
||||
{
|
||||
return c == 0 || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == ' ';
|
||||
}
|
||||
}
|
||||
}
|
@@ -22,13 +22,16 @@
|
||||
{
|
||||
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
|
||||
IFilter Ascii85Func() => new Ascii85Filter();
|
||||
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
|
||||
|
||||
filterFactories = new Dictionary<CosName, Func<IFilter>>
|
||||
{
|
||||
{CosName.FLATE_DECODE, FlateFunc},
|
||||
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
|
||||
{CosName.ASCII85_DECODE, Ascii85Func},
|
||||
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}
|
||||
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
|
||||
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
|
||||
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc}
|
||||
};
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user