mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 04:48:53 +08:00
skip single letter final blocks
align with the behavior of pdfbox and c implementations where single character final blocks are ignored rather than being written. also makes the error more informative in case it is ever encountered again. add more test cases. it is possible this is hiding the problem and will move the error elsewhere but this matches the implementation behavior of the 2 reference implementations. one other potential source for the error is if pdf supports '<~' as a start of data marker which i can't find in the spec but wikipedia says might be possible? without documents to trigger the error i think this is the best fix for now
This commit is contained in:
parent
781991b6bf
commit
7fe60ff8c3
@ -34,10 +34,39 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
|
||||
text);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReplacesZWithEmptyBytes()
|
||||
[Theory]
|
||||
[InlineData("BE", "h")]
|
||||
[InlineData("BOq", "he")]
|
||||
[InlineData("BOtu", "hel")]
|
||||
[InlineData("BOu!r", "hell")]
|
||||
[InlineData("BOu!rDZ", "hello")]
|
||||
[InlineData("BOu!rD]f", "hello ")]
|
||||
[InlineData("BOu!rD]j6", "hello w")]
|
||||
[InlineData("BOu!rD]j7B", "hello wo")]
|
||||
[InlineData("BOu!rD]j7BEW", "hello wor")]
|
||||
[InlineData("BOu!rD]j7BEbk", "hello worl")]
|
||||
[InlineData("BOu!rD]j7BEbo7", "hello world")]
|
||||
[InlineData("BOu!rD]j7BEbo80", "hello world!")]
|
||||
public void DecodesHelloWorld(string encoded, string decoded)
|
||||
{
|
||||
var bytes = Encoding.ASCII.GetBytes("9jqo^zBlbD-");
|
||||
var result = filter.Decode(
|
||||
Encoding.ASCII.GetBytes(encoded),
|
||||
dictionary,
|
||||
TestFilterProvider.Instance,
|
||||
0);
|
||||
|
||||
Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray()));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("9jqo^zBlbD-", "Man \0\0\0\0is d")]
|
||||
[InlineData("", "")]
|
||||
[InlineData("z", "\0\0\0\0")]
|
||||
[InlineData("zz", "\0\0\0\0\0\0\0\0")]
|
||||
[InlineData("zzz", "\0\0\0\0\0\0\0\0\0\0\0\0")]
|
||||
public void ReplacesZWithEmptyBytes(string encoded, string decoded)
|
||||
{
|
||||
var bytes = Encoding.ASCII.GetBytes(encoded);
|
||||
|
||||
var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);
|
||||
|
||||
@ -47,7 +76,7 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
|
||||
string text = Encoding.ASCII.GetString(result.Span);
|
||||
#endif
|
||||
|
||||
Assert.Equal("Man \0\0\0\0is d", text);
|
||||
Assert.Equal(decoded, text);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@ -60,14 +89,17 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
|
||||
Assert.Throws<InvalidOperationException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SingleCharacterLastThrows()
|
||||
[Theory]
|
||||
[InlineData("@rH:%B", "cool")]
|
||||
[InlineData("A~>", "")]
|
||||
[InlineData("@rH:%A~>", "cool")]
|
||||
public void SingleCharacterLastIgnores(string encoded, string decoded)
|
||||
{
|
||||
var bytes = Encoding.ASCII.GetBytes("9jqo^B");
|
||||
var bytes = Encoding.ASCII.GetBytes(encoded);
|
||||
|
||||
Action action = () => filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);
|
||||
var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);
|
||||
|
||||
Assert.Throws<ArgumentOutOfRangeException>(action);
|
||||
Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray()));
|
||||
}
|
||||
|
||||
private const string PdfContent = @"1 0 obj
|
||||
|
@ -2,6 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
using Core;
|
||||
using System.Text;
|
||||
using Tokens;
|
||||
|
||||
/// <summary>
|
||||
@ -13,7 +14,7 @@
|
||||
private const byte Offset = (byte)'!';
|
||||
private const byte EmptyCharacterPadding = (byte)'u';
|
||||
|
||||
private static ReadOnlySpan<byte> EndOfDataBytes => [(byte)'~', (byte)'>'];
|
||||
private static ReadOnlySpan<byte> EndOfDataBytes => "~>"u8;
|
||||
|
||||
private static readonly int[] PowerByIndex =
|
||||
[
|
||||
@ -52,7 +53,7 @@
|
||||
{
|
||||
if (index > 0)
|
||||
{
|
||||
WriteData(asciiBuffer, index, writer);
|
||||
WriteData(asciiBuffer, index, writer, true);
|
||||
}
|
||||
|
||||
index = 0;
|
||||
@ -88,24 +89,36 @@
|
||||
|
||||
if (index == 5)
|
||||
{
|
||||
WriteData(asciiBuffer, index, writer);
|
||||
WriteData(asciiBuffer, index, writer, false);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
WriteData(asciiBuffer, index, writer);
|
||||
WriteData(asciiBuffer, index, writer, true);
|
||||
}
|
||||
|
||||
return writer.WrittenMemory.ToArray();
|
||||
}
|
||||
|
||||
private static void WriteData(Span<byte> ascii, int index, ArrayPoolBufferWriter<byte> writer)
|
||||
private static void WriteData(
|
||||
Span<byte> ascii,
|
||||
int index,
|
||||
ArrayPoolBufferWriter<byte> writer,
|
||||
bool isAtEnd)
|
||||
{
|
||||
if (index < 2)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(index), "Cannot convert a block padded by 4 'u' characters.");
|
||||
if (isAtEnd)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var bufferTxt = Encoding.ASCII.GetString(ascii);
|
||||
var soFar = Encoding.ASCII.GetString(writer.GetSpan());
|
||||
throw new ArgumentOutOfRangeException(nameof(index),
|
||||
$"Cannot convert a this block because we're not at the end of the stream. Chunk: '{bufferTxt}'. Content: '{soFar}'");
|
||||
}
|
||||
|
||||
// Write any empty padding if the block ended early.
|
||||
|
Loading…
Reference in New Issue
Block a user