add run length filter and delete old code

This commit is contained in:
Eliot Jones
2017-12-29 11:08:59 +00:00
parent 26e244371b
commit f869bba72c
6 changed files with 227 additions and 122 deletions

View File

@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Tests.Filters
{
using System;
using System.Net.NetworkInformation;
using System.Text;
using ContentStream;
using Pdf.Filters;
@@ -68,16 +69,30 @@
Assert.Equal(text, decodedText);
}
[Fact]
public void DecodeWithInvalidCharactersThrows()
[Theory]
[InlineData("ZA")]
[InlineData("AM")]
public void DecodeWithInvalidCharactersThrows(string inputString)
{
var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179");
var input = Encoding.ASCII.GetBytes(inputString);
Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
Assert.Throws<InvalidOperationException>(action);
}
[Fact]
public void SubstitutesZeroForLastByte()
{
var input = Encoding.ASCII.GetBytes("AE5>");
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
var decodedText = Encoding.UTF7.GetString(decoded);
Assert.Equal("®P", decodedText);
}
[Fact]
public void DecodesEncodedTextStoppingAtLastBrace()
{

View File

@@ -0,0 +1,68 @@
namespace UglyToad.Pdf.Tests.Filters
{
using ContentStream;
using Pdf.Filters;
using Xunit;
public class RunLengthFilterTests
{
private readonly RunLengthFilter filter = new RunLengthFilter();
[Fact]
public void CanDecodeRunLengthEncodedData()
{
var data = new byte[]
{
// Write the following 6 bytes literally
5, 0, 1, 2, 69, 12, 9,
// Repeat 52 (257 - 254) 3 times
254, 52,
// Write the following 3 bytes literally
2, 60, 61, 16,
// Repeat 12 (257 - 250) 7 times
250, 12,
// Write the following 2 bytes literally
1, 10, 19
};
var decoded = filter.Decode(data, new PdfDictionary(), 1);
var expectedResult = new byte[]
{
0, 1, 2, 69, 12, 9,
52, 52, 52,
60, 61, 16,
12, 12, 12, 12, 12, 12, 12,
10, 19
};
Assert.Equal(expectedResult, decoded);
}
[Fact]
public void StopsAtEndOfDataByte()
{
var data = new byte[]
{
// Repeat 7 (257 - 254) 3 times
254, 7,
// Write the following 2 bytes literally
1, 128, 50,
// End of Data Byte
128,
// Ignore these
90, 6, 7
};
var decoded = filter.Decode(data, new PdfDictionary(), 0);
var expectedResult = new byte[]
{
7, 7, 7,
128, 50
};
Assert.Equal(expectedResult, decoded);
}
}
}

View File

@@ -1,51 +0,0 @@
namespace UglyToad.Pdf.Filters
{
using Cos;
public class DecodeResult
{
/** Default decode result. */
public static DecodeResult DEFAULT = new DecodeResult(new CosDictionary());
private readonly CosDictionary parameters;
private PDJPXColorSpace colorSpace;
public DecodeResult(CosDictionary parameters)
{
this.parameters = parameters;
}
public DecodeResult(CosDictionary parameters, PDJPXColorSpace colorSpace)
{
this.parameters = parameters;
this.colorSpace = colorSpace;
}
/**
* Returns the stream parameters, repaired using the embedded stream data.
* @return the repaired stream parameters, or an empty dictionary
*/
public CosDictionary getParameters()
{
return parameters;
}
/**
* Returns the embedded JPX color space, if any.
* @return the the embedded JPX color space, or null if there is none.
*/
public PDJPXColorSpace getJPXColorSpace()
{
return colorSpace;
}
// Sets the JPX color space
void setColorSpace(PDJPXColorSpace colorSpace)
{
this.colorSpace = colorSpace;
}
}
public class PDJPXColorSpace { }
}

View File

@@ -1,11 +1,7 @@
namespace UglyToad.Pdf.Filters
{
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using Logging;
internal interface IFilterProvider
{
@@ -13,68 +9,4 @@
IReadOnlyList<IFilter> GetAllFilters();
}
internal class MemoryFilterProvider : IFilterProvider
{
private readonly IReadOnlyDictionary<CosName, Func<IFilter>> filterFactories;
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
{
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
IFilter Ascii85Func() => new Ascii85Filter();
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
filterFactories = new Dictionary<CosName, Func<IFilter>>
{
{CosName.FLATE_DECODE, FlateFunc},
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
{CosName.ASCII85_DECODE, Ascii85Func},
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc}
};
}
public IReadOnlyList<IFilter> GetFilters(PdfDictionary streamDictionary)
{
if (streamDictionary == null)
{
throw new ArgumentNullException(nameof(streamDictionary));
}
var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER);
if (filterObject == null)
{
return new IFilter[0];
}
switch (filterObject)
{
case COSArray filters:
// TODO: presumably this may be invalid...
return filters.Select(x => GetFilterStrict((CosName) x)).ToList();
case CosName name:
return new[] {GetFilterStrict(name)};
default:
throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: "
+ filterObject.GetType());
}
}
private IFilter GetFilterStrict(CosName name)
{
if (!filterFactories.TryGetValue(name, out var factory))
{
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
}
return factory();
}
public IReadOnlyList<IFilter> GetAllFilters()
{
throw new System.NotImplementedException();
}
}
}

View File

@@ -0,0 +1,76 @@
namespace UglyToad.Pdf.Filters
{
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using Logging;
internal class MemoryFilterProvider : IFilterProvider
{
private readonly IReadOnlyDictionary<CosName, Func<IFilter>> filterFactories;
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
{
IFilter Ascii85Func() => new Ascii85Filter();
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
IFilter RunLengthFunc() => new RunLengthFilter();
filterFactories = new Dictionary<CosName, Func<IFilter>>
{
{CosName.ASCII85_DECODE, Ascii85Func},
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc},
{CosName.FLATE_DECODE, FlateFunc},
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
{CosName.RUN_LENGTH_DECODE, RunLengthFunc},
{CosName.RUN_LENGTH_DECODE_ABBREVIATION, RunLengthFunc}
};
}
public IReadOnlyList<IFilter> GetFilters(PdfDictionary streamDictionary)
{
if (streamDictionary == null)
{
throw new ArgumentNullException(nameof(streamDictionary));
}
var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER);
if (filterObject == null)
{
return new IFilter[0];
}
switch (filterObject)
{
case COSArray filters:
// TODO: presumably this may be invalid...
return filters.Select(x => GetFilterStrict((CosName) x)).ToList();
case CosName name:
return new[] {GetFilterStrict(name)};
default:
throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: "
+ filterObject.GetType());
}
}
private IFilter GetFilterStrict(CosName name)
{
if (!filterFactories.TryGetValue(name, out var factory))
{
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
}
return factory();
}
public IReadOnlyList<IFilter> GetAllFilters()
{
throw new System.NotImplementedException();
}
}
}

View File

@@ -0,0 +1,65 @@
namespace UglyToad.Pdf.Filters
{
using System.IO;
using ContentStream;
internal class RunLengthFilter : IFilter
{
private const byte EndOfDataLength = 128;
public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex)
{
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
{
var i = 0;
while (i < input.Length)
{
var runLength = input[i];
if (runLength == EndOfDataLength)
{
break;
}
// if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output.
if (runLength <= 127)
{
var rangeToWriteLiterally = runLength + 1;
while (rangeToWriteLiterally > 0)
{
i++;
writer.Write(input[i]);
rangeToWriteLiterally--;
}
// Move to the following byte.
i++;
}
// Otherwise copy the single following byte 257 - length times (between 2 - 128 times)
else
{
var numberOfTimesToCopy = 257 - runLength;
var byteToCopy = input[i + 1];
for (int j = 0; j < numberOfTimesToCopy; j++)
{
writer.Write(byteToCopy);
}
// Move to the single byte after the byte to copy.
i += 2;
}
}
writer.Flush();
return memoryStream.ToArray();
}
}
}
}