mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 10:55:04 +08:00
add support for ascii 85 decoding
This commit is contained in:
79
src/UglyToad.Pdf.Tests/Filters/Ascii85FilterTests.cs
Normal file
79
src/UglyToad.Pdf.Tests/Filters/Ascii85FilterTests.cs
Normal file
@@ -0,0 +1,79 @@
|
||||
namespace UglyToad.Pdf.Tests.Filters
|
||||
{
|
||||
using System.Text;
|
||||
using ContentStream;
|
||||
using Pdf.Filters;
|
||||
using Xunit;
|
||||
|
||||
public class Ascii85FilterTests
|
||||
{
|
||||
private readonly Ascii85Filter filter = new Ascii85Filter();
|
||||
|
||||
[Fact]
|
||||
public void DecodesWikipediaExample()
|
||||
{
|
||||
var bytes = Encoding.ASCII.GetBytes(
|
||||
@"9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
|
||||
O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2 / AKY
|
||||
i(DIb: @FD, *) + C]U =@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
|
||||
l(DId<j@<? 3r@:F % a + D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
|
||||
> uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>");
|
||||
|
||||
var result = filter.Decode(bytes, new PdfDictionary(), 0);
|
||||
|
||||
var text = Encoding.ASCII.GetString(result);
|
||||
|
||||
Assert.Equal("Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, " +
|
||||
"that by a perseverance of delight in the continued and indefatigable generation of knowledge, " +
|
||||
"exceeds the short vehemence of any carnal pleasure.",
|
||||
text);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DecodesEncodedPdfContent()
|
||||
{
|
||||
const string input =
|
||||
@"0d&.mDdmGg4?O`>9P&*SFD)dS2E2gC4pl@QEb/Zr$8N_r$:7]!01IZ=0eskNAdU47<+?7h+B3Ol2_m!C+?)#1+B1
|
||||
`9>:<KhASu!rA7]9oF*)G6@;U'.@ps6t@V$[&ART*lARTXoCj@HP2DlU*/0HBI+B1r?0H_r%1a#ac$<nof.3LB""+=MAS+D58'ATD3qCj@.
|
||||
F@;@;70ea^uAKYi.Eb-A7E+*6f+EV:*DBN1?0ek+_+B1r?<%9""=ASu!rA7]9oF*)G6@;U'<.3MT)$8<SS1,pCU6jd-H;e7
|
||||
C#1,U1&Ft""Og2'=;YEa`c,ASu!rA8,po+Dk\3BQ%F&+CT;%+CQ]A1,'h!Ft""Oh2'=;UBl%3eCh4`'DBMbD7O]H>0H_br.:""&q8d[6p/M
|
||||
T()<(%'A;f?Ma+CT;%+E_a:A0>K&EZek1D/aN,F)u&6DBNA*A0>f4BOu4*+EM76E,9eK+B3(_<%9""p.!0AMEb031ATMF#F<G%,DIIR2+Cno
|
||||
&@3B9%+CT.1.3LK*+=KNS6V0ilAoD^,@<=+N>p**=$</Jt-rY&$AKYo'+EV:.+Cf>,E,oN2F(oQ1+D#G#De*R""B-;&&FD,T'F!+n3AKY4b
|
||||
F*22=@:F%a+=SF4C'moi+=Li?EZeh0FD)e-@<>p#@;]TuBl.9kATKCFGA(],AKYo5BOu4*+CT;%+C#7pF_Pr+@VfTuDf0B:+=SF4C'moi+=
|
||||
Li?EZek1DKKT1F`2DD/TboKAKY](@:s.m/h%oBC'mC/$>""*cF*)G6@;Q?_DIdZpC&~>";
|
||||
|
||||
var result = filter.Decode(Encoding.ASCII.GetBytes(input), new PdfDictionary(), 0);
|
||||
|
||||
var text = Encoding.ASCII.GetString(result);
|
||||
|
||||
const string expected = @"1 0 obj
|
||||
<< /Length 568 >>
|
||||
stream
|
||||
2 J
|
||||
BT
|
||||
/F1 12 Tf
|
||||
0 Tc
|
||||
0 Tw
|
||||
72.5 712 TD
|
||||
[(Unencoded streams can be read easily) 65 (, )] TJ
|
||||
0 -14 TD
|
||||
[(b) 20 (ut generally tak) 10 (e more space than \311)] TJ
|
||||
T* (encoded streams.) Tj
|
||||
0 -28 TD
|
||||
[(Se) 25 (v) 15 (eral encoding methods are a) 20 (v) 25 (ailable in PDF) 80 (.)] TJ
|
||||
0 -14 TD
|
||||
(Some are used for compression and others simply) Tj
|
||||
T* [(to represent binary data in an ) 55 (ASCII format.)] TJ
|
||||
T* (Some of the compression encoding methods are \
|
||||
suitable ) Tj
|
||||
T* (for both data and images, while others are \
|
||||
suitable only ) Tj
|
||||
T* (for continuous-tone images.) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj";
|
||||
|
||||
Assert.Equal(expected.Replace("\r\n", "\n"), text);
|
||||
}
|
||||
}
|
||||
}
|
145
src/UglyToad.Pdf/Filters/Ascii85Filter.cs
Normal file
145
src/UglyToad.Pdf/Filters/Ascii85Filter.cs
Normal file
@@ -0,0 +1,145 @@
|
||||
namespace UglyToad.Pdf.Filters
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using ContentStream;
|
||||
|
||||
/// <summary>
|
||||
/// ASCII 85 (Base85) is a binary to text encoding using 5 ASCII characters per 4 bytes of data.
|
||||
/// </summary>
|
||||
public class Ascii85Filter : IFilter
|
||||
{
|
||||
private const byte EmptyBlock = (byte)'z';
|
||||
private const byte Offset = (byte)'!';
|
||||
private const byte EmptyCharacterPadding = (byte) 'u';
|
||||
|
||||
private static readonly byte[] EndOfDataBytes = { (byte)'~', (byte)'>' };
|
||||
|
||||
private static readonly int[] PowerByIndex = {
|
||||
1,
|
||||
85,
|
||||
85 * 85,
|
||||
85 * 85 * 85,
|
||||
85 * 85 * 85 *85
|
||||
};
|
||||
|
||||
public byte[] Decode(byte[] input, PdfDictionary streamDictionary, int filterIndex)
|
||||
{
|
||||
var asciiBuffer = new byte[5];
|
||||
|
||||
var index = 0;
|
||||
|
||||
using (var stream = new MemoryStream())
|
||||
using (var writer = new BinaryWriter(stream))
|
||||
{
|
||||
|
||||
|
||||
for (var i = 0; i < input.Length; i++)
|
||||
{
|
||||
var value = input[i];
|
||||
|
||||
if (IsWhiteSpace(value))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (value == EndOfDataBytes[0])
|
||||
{
|
||||
if (i == input.Length - 1 || input[i + 1] == EndOfDataBytes[1])
|
||||
{
|
||||
if (index > 0)
|
||||
{
|
||||
WriteData(asciiBuffer, index, writer);
|
||||
}
|
||||
|
||||
// The end
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: this shouldn't be possible?
|
||||
}
|
||||
|
||||
if (value == EmptyBlock)
|
||||
{
|
||||
if (index > 0)
|
||||
{
|
||||
// Should I throw here?
|
||||
}
|
||||
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
writer.Write(0);
|
||||
}
|
||||
|
||||
index = 0;
|
||||
|
||||
// We've completed our block.
|
||||
}
|
||||
else
|
||||
{
|
||||
asciiBuffer[index] = (byte) (value - Offset);
|
||||
index++;
|
||||
}
|
||||
|
||||
if (index == 5)
|
||||
{
|
||||
WriteData(asciiBuffer, index, writer);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
writer.Flush();
|
||||
|
||||
return stream.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteData(byte[] ascii, int index, BinaryWriter writer)
|
||||
{
|
||||
if (index < 2)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(index), "Cannot convert a block padded by 4 'u' characters.");
|
||||
}
|
||||
|
||||
// Write any empty padding if the block ended early.
|
||||
for (var i = index; i < 5; i++)
|
||||
{
|
||||
ascii[i] = EmptyCharacterPadding - Offset;
|
||||
}
|
||||
|
||||
int value = 0;
|
||||
value += ascii[0] * PowerByIndex[4];
|
||||
value += ascii[1] * PowerByIndex[3];
|
||||
value += ascii[2] * PowerByIndex[2];
|
||||
value += ascii[3] * PowerByIndex[1];
|
||||
value += ascii[4] * PowerByIndex[0];
|
||||
|
||||
writer.Write((byte)(value >> 24));
|
||||
|
||||
if (index > 2)
|
||||
{
|
||||
writer.Write((byte) (value >> 16));
|
||||
}
|
||||
|
||||
if (index > 3)
|
||||
{
|
||||
writer.Write((byte) (value >> 8));
|
||||
}
|
||||
|
||||
if (index > 4)
|
||||
{
|
||||
writer.Write((byte) value);
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsWhiteSpace(byte b)
|
||||
{
|
||||
if (b == '\r' || b == '\n' || b == ' ')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@@ -21,11 +21,14 @@
|
||||
public MemoryFilterProvider(IDecodeParameterResolver decodeParameterResolver, IPngPredictor pngPredictor, ILog log)
|
||||
{
|
||||
IFilter FlateFunc() => new FlateFilter(decodeParameterResolver, pngPredictor, log);
|
||||
IFilter Ascii85Func() => new Ascii85Filter();
|
||||
|
||||
filterFactories = new Dictionary<CosName, Func<IFilter>>
|
||||
{
|
||||
{CosName.FLATE_DECODE, FlateFunc},
|
||||
{CosName.FLATE_DECODE_ABBREVIATION, FlateFunc},
|
||||
{CosName.ASCII85_DECODE, Ascii85Func},
|
||||
{CosName.ASCII85_DECODE_ABBREVIATION, Ascii85Func}
|
||||
};
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user