bug fix for indirect page link, bug fix for array in base font range in cmap

2025-09-19 10:47:56 +08:00 · 2018-01-07 11:51:18 +00:00
parent c75b9d10bd
commit a6c3dba25a
4 changed files with 115 additions and 23 deletions
--- a/src/UglyToad.Pdf.Tests/Fonts/Parser/Parts/BaseFontRangeParserTests.cs
+++ b/src/UglyToad.Pdf.Tests/Fonts/Parser/Parts/BaseFontRangeParserTests.cs
@@ -0,0 +1,61 @@
+namespace UglyToad.Pdf.Tests.Fonts.Parser.Parts
+{
+    using Pdf.Fonts.Cmap;
+    using Pdf.Fonts.Parser.Parts;
+    using Pdf.Tokenization.Tokens;
+    using Xunit;
+
+    public class BaseFontRangeParserTests
+    {
+        private readonly BaseFontRangeParser parser = new BaseFontRangeParser();
+
+        [Fact]
+        public void CanParseWithArray()
+        {
+            var input = StringBytesTestConverter.Scanner("<0003> <0004> [<0020> <0041>]");
+
+            var builder = new CharacterMapBuilder();
+
+            parser.Parse(new NumericToken(1), input, builder, false);
+
+            Assert.Equal(2, builder.BaseFontCharacterMap.Count);
+
+            Assert.Equal(" ", builder.BaseFontCharacterMap[3]);
+            Assert.Equal("A", builder.BaseFontCharacterMap[4]);
+        }
+
+        [Fact]
+        public void CanParseWithHex()
+        {
+            var input = StringBytesTestConverter.Scanner("<8141> <8147> <8141>");
+
+            var builder = new CharacterMapBuilder();
+
+            parser.Parse(new NumericToken(1), input, builder, false);
+
+            Assert.Equal(7, builder.BaseFontCharacterMap.Count);
+
+            Assert.Equal("腁", builder.BaseFontCharacterMap[33089]);
+            Assert.Equal(char.ConvertFromUtf32(33090), builder.BaseFontCharacterMap[33090]);
+        }
+
+        [Fact]
+        public void CanParseTwoRowsWithDifferentFormat()
+        {
+            var input = StringBytesTestConverter.Scanner(@"<0019> <001B> <3C>
+<0001> <0003> [/happy /feet /penguin]");
+
+            var builder = new CharacterMapBuilder();
+
+            parser.Parse(new NumericToken(2), input, builder, false);
+
+            Assert.Equal(6, builder.BaseFontCharacterMap.Count);
+
+            Assert.Equal("happy", builder.BaseFontCharacterMap[1]);
+            Assert.Equal("feet", builder.BaseFontCharacterMap[2]);
+            Assert.Equal("penguin", builder.BaseFontCharacterMap[3]);
+
+            Assert.Equal("<", builder.BaseFontCharacterMap[25]);
+        }
+    }
+}
--- a/src/UglyToad.Pdf.Tests/Integration/SwedishTouringCarChampionshipTests.cs
+++ b/src/UglyToad.Pdf.Tests/Integration/SwedishTouringCarChampionshipTests.cs
@@ -61,5 +61,14 @@
                Assert.Contains("Söderberg", page.Text);
            }
        }
+
+        //[Fact]
+        //public void localFileTest()
+        //{
+        //    using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\CV.pdf"))
+        //    {
+        //        var page = document.GetPage(1);
+        //    }
+        //}
    }
 }
--- a/src/UglyToad.Pdf/Content/Pages.cs
+++ b/src/UglyToad.Pdf/Content/Pages.cs
@@ -9,6 +9,7 @@
    using IO;
    using Logging;
    using Parser;
+    using Parser.Parts;

    internal class Pages
    {
@@ -111,7 +112,7 @@
            foreach (var kid in kids.OfType<CosObject>())
            {
                // todo: exit early
-                var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
+                var child = DirectObjectFinder.Find<PdfDictionary>(kid, pdfObjectParser, reader, isLenientParsing);
                
                var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);

--- a/src/UglyToad.Pdf/Fonts/Parser/Parts/BaseFontRangeParser.cs
+++ b/src/UglyToad.Pdf/Fonts/Parser/Parts/BaseFontRangeParser.cs
@@ -4,35 +4,39 @@
    using System.Collections.Generic;
    using System.Linq;
    using Cmap;
+    using Exceptions;
    using Tokenization.Scanner;
    using Tokenization.Tokens;

+    /// <summary>
+    /// The beginbfrange and endbfrange operators map i ranges of input codes to the corresponding output code range.
+    /// </summary>
    internal class BaseFontRangeParser : ICidFontPartParser<NumericToken>
    {
-        public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
+        public void Parse(NumericToken numberOfOperations, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
        {
-            for (var i = 0; i < numeric.Int; i++)
+            for (var i = 0; i < numberOfOperations.Int; i++)
            {
+                // The start of the input code range.
                if (!scanner.TryReadToken(out HexToken lowSourceCode))
                {
-                    // TODO: message
-                    throw new InvalidOperationException();
+                    throw new InvalidFontFormatException($"bfrange was missing the low source code: {scanner.CurrentToken}");
                }

+                // The inclusive end of the input code range.
                if (!scanner.TryReadToken(out HexToken highSourceCode))
                {
-                    // TODO: message
-                    throw new InvalidOperationException();
+                    throw new InvalidFontFormatException($"bfrange was missing the high source code: {scanner.CurrentToken}");
                }

                if (!scanner.MoveNext())
                {
-                    // TODO: message
-                    throw new InvalidOperationException();
+                    throw new InvalidFontFormatException("bfrange ended unexpectedly after the high source code.");
                }

                List<byte> destinationBytes = null;
                ArrayToken destinationArray = null;
+
                switch (scanner.CurrentToken)
                {
                    case ArrayToken arrayToken:
@@ -51,7 +55,35 @@
                var startCode = new List<byte>(lowSourceCode.Bytes);
                var endCode = highSourceCode.Bytes;

+                if (destinationArray != null)
+                {
                    int arrayIndex = 0;
+                    while (!done)
+                    {
+                        if (Compare(startCode, endCode) >= 0)
+                        {
+                            done = true;
+                        }
+
+                        var destination = destinationArray.Data[arrayIndex];
+
+                        if (destination is NameToken name)
+                        {
+                            builder.AddBaseFontCharacter(startCode, name.Data.Name);
+                        }
+                        else if (destination is HexToken hex)
+                        {
+                            builder.AddBaseFontCharacter(startCode, hex.Bytes);
+                        }
+                        
+                        Increment(startCode, startCode.Count - 1);
+
+                        arrayIndex++;
+                    }
+
+                    continue;
+                }
+
                while (!done)
                {
                    if (Compare(startCode, endCode) >= 0)
@@ -63,19 +95,8 @@

                    Increment(startCode, startCode.Count - 1);

-                    if (destinationArray == null)
-                    {
                    Increment(destinationBytes, destinationBytes.Count - 1);
                }
-                    else
-                    {
-                        arrayIndex++;
-                        if (arrayIndex < destinationArray.Data.Count)
-                        {
-                            destinationBytes = ((HexToken)destinationArray.Data[arrayIndex]).Bytes.ToList();
-                        }
-                    }
-                }
            }
        }