bug fix for indirect page link, bug fix for array in base font range in cmap

2025-09-21 04:17:57 +08:00 · 2018-01-07 11:51:18 +00:00
parent c75b9d10bd
commit a6c3dba25a
4 changed files with 115 additions and 23 deletions
--- a/src/UglyToad.Pdf.Tests/Fonts/Parser/Parts/BaseFontRangeParserTests.cs
+++ b/src/UglyToad.Pdf.Tests/Fonts/Parser/Parts/BaseFontRangeParserTests.cs
@@ -0,0 +1,61 @@
 namespace UglyToad.Pdf.Tests.Fonts.Parser.Parts
 {
    using Pdf.Fonts.Cmap;
    using Pdf.Fonts.Parser.Parts;
    using Pdf.Tokenization.Tokens;
    using Xunit;
    public class BaseFontRangeParserTests
    {
        private readonly BaseFontRangeParser parser = new BaseFontRangeParser();
        [Fact]
        public void CanParseWithArray()
        {
            var input = StringBytesTestConverter.Scanner("<0003> <0004> [<0020> <0041>]");
            var builder = new CharacterMapBuilder();
            parser.Parse(new NumericToken(1), input, builder, false);
            Assert.Equal(2, builder.BaseFontCharacterMap.Count);
            Assert.Equal(" ", builder.BaseFontCharacterMap[3]);
            Assert.Equal("A", builder.BaseFontCharacterMap[4]);
        }
        [Fact]
        public void CanParseWithHex()
        {
            var input = StringBytesTestConverter.Scanner("<8141> <8147> <8141>");
            var builder = new CharacterMapBuilder();
            parser.Parse(new NumericToken(1), input, builder, false);
            Assert.Equal(7, builder.BaseFontCharacterMap.Count);
            Assert.Equal("腁", builder.BaseFontCharacterMap[33089]);
            Assert.Equal(char.ConvertFromUtf32(33090), builder.BaseFontCharacterMap[33090]);
        }
        [Fact]
        public void CanParseTwoRowsWithDifferentFormat()
        {
            var input = StringBytesTestConverter.Scanner(@"<0019> <001B> <3C>
 <0001> <0003> [/happy /feet /penguin]");
            var builder = new CharacterMapBuilder();
            parser.Parse(new NumericToken(2), input, builder, false);
            Assert.Equal(6, builder.BaseFontCharacterMap.Count);
            Assert.Equal("happy", builder.BaseFontCharacterMap[1]);
            Assert.Equal("feet", builder.BaseFontCharacterMap[2]);
            Assert.Equal("penguin", builder.BaseFontCharacterMap[3]);
            Assert.Equal("<", builder.BaseFontCharacterMap[25]);
        }
    }
 }
--- a/src/UglyToad.Pdf.Tests/Integration/SwedishTouringCarChampionshipTests.cs
+++ b/src/UglyToad.Pdf.Tests/Integration/SwedishTouringCarChampionshipTests.cs
@@ -61,5 +61,14 @@
                Assert.Contains("Söderberg", page.Text);
            }
        }
        //[Fact]
        //public void localFileTest()
        //{
        //    using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\CV.pdf"))
        //    {
        //        var page = document.GetPage(1);
        //    }
        //}
    }
 }
--- a/src/UglyToad.Pdf/Content/Pages.cs
+++ b/src/UglyToad.Pdf/Content/Pages.cs
@@ -9,6 +9,7 @@
    using IO;
    using Logging;
    using Parser;
    using Parser.Parts;
    internal class Pages
    {
@@ -111,7 +112,7 @@
            foreach (var kid in kids.OfType<CosObject>())
            {
                // todo: exit early
-                var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
+                var child = DirectObjectFinder.Find<PdfDictionary>(kid, pdfObjectParser, reader, isLenientParsing);
                var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
--- a/src/UglyToad.Pdf/Fonts/Parser/Parts/BaseFontRangeParser.cs
+++ b/src/UglyToad.Pdf/Fonts/Parser/Parts/BaseFontRangeParser.cs
@@ -4,35 +4,39 @@
    using System.Collections.Generic;
    using System.Linq;
    using Cmap;
    using Exceptions;
    using Tokenization.Scanner;
    using Tokenization.Tokens;
    /// <summary>
    /// The beginbfrange and endbfrange operators map i ranges of input codes to the corresponding output code range.
    /// </summary>
    internal class BaseFontRangeParser : ICidFontPartParser<NumericToken>
    {
-        public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
+        public void Parse(NumericToken numberOfOperations, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
        {
-            for (var i = 0; i < numeric.Int; i++)
+            for (var i = 0; i < numberOfOperations.Int; i++)
            {
                // The start of the input code range.
                if (!scanner.TryReadToken(out HexToken lowSourceCode))
                {
-                    // TODO: message
+                    throw new InvalidFontFormatException($"bfrange was missing the low source code: {scanner.CurrentToken}");
                    throw new InvalidOperationException();
                }
                // The inclusive end of the input code range.
                if (!scanner.TryReadToken(out HexToken highSourceCode))
                {
-                    // TODO: message
+                    throw new InvalidFontFormatException($"bfrange was missing the high source code: {scanner.CurrentToken}");
                    throw new InvalidOperationException();
                }
                if (!scanner.MoveNext())
                {
-                    // TODO: message
+                    throw new InvalidFontFormatException("bfrange ended unexpectedly after the high source code.");
                    throw new InvalidOperationException();
                }
                List<byte> destinationBytes = null;
                ArrayToken destinationArray = null;
                switch (scanner.CurrentToken)
                {
                    case ArrayToken arrayToken:
@@ -51,7 +55,35 @@
                var startCode = new List<byte>(lowSourceCode.Bytes);
                var endCode = highSourceCode.Bytes;
                if (destinationArray != null)
                {
                    int arrayIndex = 0;
                    while (!done)
                    {
                        if (Compare(startCode, endCode) >= 0)
                        {
                            done = true;
                        }
                        var destination = destinationArray.Data[arrayIndex];
                        if (destination is NameToken name)
                        {
                            builder.AddBaseFontCharacter(startCode, name.Data.Name);
                        }
                        else if (destination is HexToken hex)
                        {
                            builder.AddBaseFontCharacter(startCode, hex.Bytes);
                        }
                        Increment(startCode, startCode.Count - 1);
                        arrayIndex++;
                    }
                    continue;
                }
                while (!done)
                {
                    if (Compare(startCode, endCode) >= 0)
@@ -63,19 +95,8 @@
                    Increment(startCode, startCode.Count - 1);
                    if (destinationArray == null)
                    {
                    Increment(destinationBytes, destinationBytes.Count - 1);
                }
                    else
                    {
                        arrayIndex++;
                        if (arrayIndex < destinationArray.Data.Count)
                        {
                            destinationBytes = ((HexToken)destinationArray.Data[arrayIndex]).Bytes.ToList();
                        }
                    }
                }
            }
        }