mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 11:44:51 +08:00
add run length filter and delete old code
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
namespace UglyToad.Pdf.Tests.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Net.NetworkInformation;
|
||||
using System.Text;
|
||||
using ContentStream;
|
||||
using Pdf.Filters;
|
||||
@@ -68,16 +69,30 @@
|
||||
Assert.Equal(text, decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodeWithInvalidCharactersThrows()
|
||||
[Theory]
|
||||
[InlineData("ZA")]
|
||||
[InlineData("AM")]
|
||||
public void DecodeWithInvalidCharactersThrows(string inputString)
|
||||
{
|
||||
var input = Encoding.ASCII.GetBytes("6f6eHappyHungryHippos6d6520696e20612067616c61787920466172204661722041776179");
|
||||
var input = Encoding.ASCII.GetBytes(inputString);
|
||||
|
||||
Action action = () => new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
Assert.Throws<InvalidOperationException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SubstitutesZeroForLastByte()
|
||||
{
|
||||
var input = Encoding.ASCII.GetBytes("AE5>");
|
||||
|
||||
var decoded = new AsciiHexDecodeFilter().Decode(input, new PdfDictionary(), 1);
|
||||
|
||||
var decodedText = Encoding.UTF7.GetString(decoded);
|
||||
|
||||
Assert.Equal("®P", decodedText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedTextStoppingAtLastBrace()
|
||||
{
|
||||
|
68
src/UglyToad.Pdf.Tests/Filters/RunLengthFilterTests.cs
Normal file
68
src/UglyToad.Pdf.Tests/Filters/RunLengthFilterTests.cs
Normal file
@@ -0,0 +1,68 @@
|
||||
namespace UglyToad.Pdf.Tests.Filters
|
||||
{
|
||||
using ContentStream;
|
||||
using Pdf.Filters;
|
||||
using Xunit;
|
||||
|
||||
public class RunLengthFilterTests
|
||||
{
|
||||
private readonly RunLengthFilter filter = new RunLengthFilter();
|
||||
|
||||
[Fact]
|
||||
public void CanDecodeRunLengthEncodedData()
|
||||
{
|
||||
var data = new byte[]
|
||||
{
|
||||
// Write the following 6 bytes literally
|
||||
5, 0, 1, 2, 69, 12, 9,
|
||||
// Repeat 52 (257 - 254) 3 times
|
||||
254, 52,
|
||||
// Write the following 3 bytes literally
|
||||
2, 60, 61, 16,
|
||||
// Repeat 12 (257 - 250) 7 times
|
||||
250, 12,
|
||||
// Write the following 2 bytes literally
|
||||
1, 10, 19
|
||||
};
|
||||
|
||||
var decoded = filter.Decode(data, new PdfDictionary(), 1);
|
||||
|
||||
var expectedResult = new byte[]
|
||||
{
|
||||
0, 1, 2, 69, 12, 9,
|
||||
52, 52, 52,
|
||||
60, 61, 16,
|
||||
12, 12, 12, 12, 12, 12, 12,
|
||||
10, 19
|
||||
};
|
||||
|
||||
Assert.Equal(expectedResult, decoded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StopsAtEndOfDataByte()
|
||||
{
|
||||
var data = new byte[]
|
||||
{
|
||||
// Repeat 7 (257 - 254) 3 times
|
||||
254, 7,
|
||||
// Write the following 2 bytes literally
|
||||
1, 128, 50,
|
||||
// End of Data Byte
|
||||
128,
|
||||
// Ignore these
|
||||
90, 6, 7
|
||||
};
|
||||
|
||||
var decoded = filter.Decode(data, new PdfDictionary(), 0);
|
||||
|
||||
var expectedResult = new byte[]
|
||||
{
|
||||
7, 7, 7,
|
||||
128, 50
|
||||
};
|
||||
|
||||
Assert.Equal(expectedResult, decoded);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,51 +0,0 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using Cos;
|
||||
|
||||
public class DecodeResult
|
||||
{
|
||||
/** Default decode result. */
|
||||
public static DecodeResult DEFAULT = new DecodeResult(new CosDictionary());
|
||||
|
||||
private readonly CosDictionary parameters;
|
||||
private PDJPXColorSpace colorSpace;
|
||||
|
||||
public DecodeResult(CosDictionary parameters)
|
||||
{
|
||||
this.parameters = parameters;
|
||||
}
|
||||
|
||||
public DecodeResult(CosDictionary parameters, PDJPXColorSpace colorSpace)
|
||||
{
|
||||
this.parameters = parameters;
|
||||
this.colorSpace = colorSpace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the stream parameters, repaired using the embedded stream data.
|
||||
* @return the repaired stream parameters, or an empty dictionary
|
||||
*/
|
||||
public CosDictionary getParameters()
|
||||
{
|
||||
return parameters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the embedded JPX color space, if any.
|
||||
* @return the the embedded JPX color space, or null if there is none.
|
||||
*/
|
||||
public PDJPXColorSpace getJPXColorSpace()
|
||||
{
|
||||
return colorSpace;
|
||||
}
|
||||
|
||||
// Sets the JPX color space
|
||||
void setColorSpace(PDJPXColorSpace colorSpace)
|
||||
{
|
||||
this.colorSpace = colorSpace;
|
||||
}
|
||||
}
|
||||
|
||||
public class PDJPXColorSpace { }
|
||||
|
||||
}
|
@@ -1,11 +1,7 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Logging;
|
||||
|
||||
internal interface IFilterProvider
|
||||
{
|
||||
@@ -13,68 +9,4 @@
|
||||
|
||||
IReadOnlyList<IFilter> GetAllFilters();
|
||||
}
|
||||
|
||||
internal class MemoryFilterProvider : IFilterProvider
|
||||
{
|
||||
private readonly IReadOnlyDictionary<CosName, Func<IFilter>> filterFactories;
|
||||
|
||||
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
|
||||
{
|
||||
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
|
||||
IFilter Ascii85Func() => new Ascii85Filter();
|
||||
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
|
||||
|
||||
filterFactories = new Dictionary<CosName, Func<IFilter>>
|
||||
{
|
||||
{CosName.FLATE_DECODE, FlateFunc},
|
||||
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
|
||||
{CosName.ASCII85_DECODE, Ascii85Func},
|
||||
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
|
||||
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
|
||||
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc}
|
||||
};
|
||||
}
|
||||
|
||||
public IReadOnlyList<IFilter> GetFilters(PdfDictionary streamDictionary)
|
||||
{
|
||||
if (streamDictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(streamDictionary));
|
||||
}
|
||||
|
||||
var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER);
|
||||
|
||||
if (filterObject == null)
|
||||
{
|
||||
return new IFilter[0];
|
||||
}
|
||||
|
||||
switch (filterObject)
|
||||
{
|
||||
case COSArray filters:
|
||||
// TODO: presumably this may be invalid...
|
||||
return filters.Select(x => GetFilterStrict((CosName) x)).ToList();
|
||||
case CosName name:
|
||||
return new[] {GetFilterStrict(name)};
|
||||
default:
|
||||
throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: "
|
||||
+ filterObject.GetType());
|
||||
}
|
||||
}
|
||||
|
||||
private IFilter GetFilterStrict(CosName name)
|
||||
{
|
||||
if (!filterFactories.TryGetValue(name, out var factory))
|
||||
{
|
||||
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
|
||||
}
|
||||
|
||||
return factory();
|
||||
}
|
||||
|
||||
public IReadOnlyList<IFilter> GetAllFilters()
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
}
|
||||
}
|
76
src/UglyToad.Pdf/Filters/MemoryFilterProvider.cs
Normal file
76
src/UglyToad.Pdf/Filters/MemoryFilterProvider.cs
Normal file
@@ -0,0 +1,76 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Logging;
|
||||
|
||||
internal class MemoryFilterProvider : IFilterProvider
|
||||
{
|
||||
private readonly IReadOnlyDictionary<CosName, Func<IFilter>> filterFactories;
|
||||
|
||||
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
|
||||
{
|
||||
IFilter Ascii85Func() => new Ascii85Filter();
|
||||
IFilter AsciiHexFunc() => new AsciiHexDecodeFilter();
|
||||
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
|
||||
IFilter RunLengthFunc() => new RunLengthFilter();
|
||||
|
||||
filterFactories = new Dictionary<CosName, Func<IFilter>>
|
||||
{
|
||||
{CosName.ASCII85_DECODE, Ascii85Func},
|
||||
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func},
|
||||
{CosName.ASCII_HEX_DECODE, AsciiHexFunc},
|
||||
{CosName.ASCII_HEX_DECODE_ABBREVIATION, AsciiHexFunc},
|
||||
{CosName.FLATE_DECODE, FlateFunc},
|
||||
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
|
||||
{CosName.RUN_LENGTH_DECODE, RunLengthFunc},
|
||||
{CosName.RUN_LENGTH_DECODE_ABBREVIATION, RunLengthFunc}
|
||||
};
|
||||
}
|
||||
|
||||
public IReadOnlyList<IFilter> GetFilters(PdfDictionary streamDictionary)
|
||||
{
|
||||
if (streamDictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(streamDictionary));
|
||||
}
|
||||
|
||||
var filterObject = streamDictionary.GetItemOrDefault(CosName.FILTER);
|
||||
|
||||
if (filterObject == null)
|
||||
{
|
||||
return new IFilter[0];
|
||||
}
|
||||
|
||||
switch (filterObject)
|
||||
{
|
||||
case COSArray filters:
|
||||
// TODO: presumably this may be invalid...
|
||||
return filters.Select(x => GetFilterStrict((CosName) x)).ToList();
|
||||
case CosName name:
|
||||
return new[] {GetFilterStrict(name)};
|
||||
default:
|
||||
throw new InvalidOperationException("The filter for a stream may be either a string or an array, instead this Pdf has: "
|
||||
+ filterObject.GetType());
|
||||
}
|
||||
}
|
||||
|
||||
private IFilter GetFilterStrict(CosName name)
|
||||
{
|
||||
if (!filterFactories.TryGetValue(name, out var factory))
|
||||
{
|
||||
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
|
||||
}
|
||||
|
||||
return factory();
|
||||
}
|
||||
|
||||
public IReadOnlyList<IFilter> GetAllFilters()
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
}
|
||||
}
|
65
src/UglyToad.Pdf/Filters/RunLengthFilter.cs
Normal file
65
src/UglyToad.Pdf/Filters/RunLengthFilter.cs
Normal file
@@ -0,0 +1,65 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using System.IO;
|
||||
using ContentStream;
|
||||
|
||||
internal class RunLengthFilter : IFilter
|
||||
{
|
||||
private const byte EndOfDataLength = 128;
|
||||
|
||||
public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex)
|
||||
{
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var writer = new BinaryWriter(memoryStream))
|
||||
{
|
||||
var i = 0;
|
||||
while (i < input.Length)
|
||||
{
|
||||
var runLength = input[i];
|
||||
|
||||
if (runLength == EndOfDataLength)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// if length byte in range 0 - 127 copy the following length + 1 bytes literally to the output.
|
||||
if (runLength <= 127)
|
||||
{
|
||||
var rangeToWriteLiterally = runLength + 1;
|
||||
|
||||
while (rangeToWriteLiterally > 0)
|
||||
{
|
||||
i++;
|
||||
|
||||
writer.Write(input[i]);
|
||||
|
||||
rangeToWriteLiterally--;
|
||||
}
|
||||
|
||||
// Move to the following byte.
|
||||
i++;
|
||||
}
|
||||
// Otherwise copy the single following byte 257 - length times (between 2 - 128 times)
|
||||
else
|
||||
{
|
||||
var numberOfTimesToCopy = 257 - runLength;
|
||||
|
||||
var byteToCopy = input[i + 1];
|
||||
|
||||
for (int j = 0; j < numberOfTimesToCopy; j++)
|
||||
{
|
||||
writer.Write(byteToCopy);
|
||||
}
|
||||
|
||||
// Move to the single byte after the byte to copy.
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
writer.Flush();
|
||||
|
||||
return memoryStream.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user