skip single letter final blocks

align with the behavior of pdfbox and c implementations where single character final blocks are ignored rather than being written. also makes the error more informative in case it is ever encountered again. add more test cases. it is possible this is hiding the problem and will move the error elsewhere but this matches the implementation behavior of the 2 reference implementations. one other potential source for the error is if pdf supports '<~' as a start of data marker which i can't find in the spec but wikipedia says might be possible? without documents to trigger the error i think this is the best fix for now
2025-08-20 04:48:53 +08:00 · 2025-07-08 18:14:29 -05:00 · 2025-07-08 18:14:29 -05:00 · 7fe60ff8c3
commit 7fe60ff8c3
parent 781991b6bf
2 changed files with 60 additions and 15 deletions
--- a/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs
@ -34,10 +34,39 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
                text);
        }

-        [Fact]
-        public void ReplacesZWithEmptyBytes()
+        [Theory]
+        [InlineData("BE", "h")]
+        [InlineData("BOq", "he")]
+        [InlineData("BOtu", "hel")]
+        [InlineData("BOu!r", "hell")]
+        [InlineData("BOu!rDZ", "hello")]
+        [InlineData("BOu!rD]f", "hello ")]
+        [InlineData("BOu!rD]j6", "hello w")]
+        [InlineData("BOu!rD]j7B", "hello wo")]
+        [InlineData("BOu!rD]j7BEW", "hello wor")]
+        [InlineData("BOu!rD]j7BEbk", "hello worl")]
+        [InlineData("BOu!rD]j7BEbo7", "hello world")]
+        [InlineData("BOu!rD]j7BEbo80", "hello world!")]
+        public void DecodesHelloWorld(string encoded, string decoded)
        {
-            var bytes = Encoding.ASCII.GetBytes("9jqo^zBlbD-");
+            var result = filter.Decode(
+                Encoding.ASCII.GetBytes(encoded),
+                dictionary,
+                TestFilterProvider.Instance,
+                0);
+
+            Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray()));
+        }
+
+        [Theory]
+        [InlineData("9jqo^zBlbD-", "Man \0\0\0\0is d")]
+        [InlineData("", "")]
+        [InlineData("z", "\0\0\0\0")]
+        [InlineData("zz", "\0\0\0\0\0\0\0\0")]
+        [InlineData("zzz", "\0\0\0\0\0\0\0\0\0\0\0\0")]
+        public void ReplacesZWithEmptyBytes(string encoded, string decoded)
+        {
+            var bytes = Encoding.ASCII.GetBytes(encoded);

            var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);

@ -47,7 +76,7 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
            string text = Encoding.ASCII.GetString(result.Span);
 #endif

-            Assert.Equal("Man \0\0\0\0is d", text);
+            Assert.Equal(decoded, text);
        }
        
        [Fact]
@ -60,14 +89,17 @@ O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi + DGm >@3BB / F * &OCAfu2
            Assert.Throws<InvalidOperationException>(action);
        }

-        [Fact]
-        public void SingleCharacterLastThrows()
+        [Theory]
+        [InlineData("@rH:%B", "cool")]
+        [InlineData("A~>", "")]
+        [InlineData("@rH:%A~>", "cool")]
+        public void SingleCharacterLastIgnores(string encoded, string decoded)
        {
-            var bytes = Encoding.ASCII.GetBytes("9jqo^B");
+            var bytes = Encoding.ASCII.GetBytes(encoded);

-            Action action = () => filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);
+            var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1);

-            Assert.Throws<ArgumentOutOfRangeException>(action);
+            Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray()));
        }

        private const string PdfContent = @"1 0 obj
--- a/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs
+++ b/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs
@ -2,6 +2,7 @@
 {
    using System;
    using Core;
+    using System.Text;
    using Tokens;

    /// <summary>
@ -13,7 +14,7 @@
        private const byte Offset = (byte)'!';
        private const byte EmptyCharacterPadding = (byte)'u';

-        private static ReadOnlySpan<byte> EndOfDataBytes => [(byte)'~', (byte)'>'];
+        private static ReadOnlySpan<byte> EndOfDataBytes => "~>"u8;

        private static readonly int[] PowerByIndex =
        [
@ -52,7 +53,7 @@
                    {
                        if (index > 0)
                        {
-                            WriteData(asciiBuffer, index, writer);
+                            WriteData(asciiBuffer, index, writer, true);
                        }

                        index = 0;
@ -88,24 +89,36 @@

                if (index == 5)
                {
-                    WriteData(asciiBuffer, index, writer);
+                    WriteData(asciiBuffer, index, writer, false);
                    index = 0;
                }
            }

            if (index > 0)
            {
-                WriteData(asciiBuffer, index, writer);
+                WriteData(asciiBuffer, index, writer, true);
            }

            return writer.WrittenMemory.ToArray();
        }

-        private static void WriteData(Span<byte> ascii, int index, ArrayPoolBufferWriter<byte> writer)
+        private static void WriteData(
+            Span<byte> ascii,
+            int index,
+            ArrayPoolBufferWriter<byte> writer,
+            bool isAtEnd)
        {
            if (index < 2)
            {
-                throw new ArgumentOutOfRangeException(nameof(index), "Cannot convert a block padded by 4 'u' characters.");
+                if (isAtEnd)
+                {
+                    return;
+                }
+
+                var bufferTxt = Encoding.ASCII.GetString(ascii);
+                var soFar = Encoding.ASCII.GetString(writer.GetSpan());
+                throw new ArgumentOutOfRangeException(nameof(index),
+                    $"Cannot convert a this block because we're not at the end of the stream. Chunk: '{bufferTxt}'. Content: '{soFar}'");
            }

            // Write any empty padding if the block ended early.