mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 10:47:56 +08:00
bug fix for indirect page link, bug fix for array in base font range in cmap
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
namespace UglyToad.Pdf.Tests.Fonts.Parser.Parts
|
||||
{
|
||||
using Pdf.Fonts.Cmap;
|
||||
using Pdf.Fonts.Parser.Parts;
|
||||
using Pdf.Tokenization.Tokens;
|
||||
using Xunit;
|
||||
|
||||
public class BaseFontRangeParserTests
|
||||
{
|
||||
private readonly BaseFontRangeParser parser = new BaseFontRangeParser();
|
||||
|
||||
[Fact]
|
||||
public void CanParseWithArray()
|
||||
{
|
||||
var input = StringBytesTestConverter.Scanner("<0003> <0004> [<0020> <0041>]");
|
||||
|
||||
var builder = new CharacterMapBuilder();
|
||||
|
||||
parser.Parse(new NumericToken(1), input, builder, false);
|
||||
|
||||
Assert.Equal(2, builder.BaseFontCharacterMap.Count);
|
||||
|
||||
Assert.Equal(" ", builder.BaseFontCharacterMap[3]);
|
||||
Assert.Equal("A", builder.BaseFontCharacterMap[4]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanParseWithHex()
|
||||
{
|
||||
var input = StringBytesTestConverter.Scanner("<8141> <8147> <8141>");
|
||||
|
||||
var builder = new CharacterMapBuilder();
|
||||
|
||||
parser.Parse(new NumericToken(1), input, builder, false);
|
||||
|
||||
Assert.Equal(7, builder.BaseFontCharacterMap.Count);
|
||||
|
||||
Assert.Equal("腁", builder.BaseFontCharacterMap[33089]);
|
||||
Assert.Equal(char.ConvertFromUtf32(33090), builder.BaseFontCharacterMap[33090]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanParseTwoRowsWithDifferentFormat()
|
||||
{
|
||||
var input = StringBytesTestConverter.Scanner(@"<0019> <001B> <3C>
|
||||
<0001> <0003> [/happy /feet /penguin]");
|
||||
|
||||
var builder = new CharacterMapBuilder();
|
||||
|
||||
parser.Parse(new NumericToken(2), input, builder, false);
|
||||
|
||||
Assert.Equal(6, builder.BaseFontCharacterMap.Count);
|
||||
|
||||
Assert.Equal("happy", builder.BaseFontCharacterMap[1]);
|
||||
Assert.Equal("feet", builder.BaseFontCharacterMap[2]);
|
||||
Assert.Equal("penguin", builder.BaseFontCharacterMap[3]);
|
||||
|
||||
Assert.Equal("<", builder.BaseFontCharacterMap[25]);
|
||||
}
|
||||
}
|
||||
}
|
@@ -61,5 +61,14 @@
|
||||
Assert.Contains("Söderberg", page.Text);
|
||||
}
|
||||
}
|
||||
|
||||
//[Fact]
|
||||
//public void localFileTest()
|
||||
//{
|
||||
// using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\CV.pdf"))
|
||||
// {
|
||||
// var page = document.GetPage(1);
|
||||
// }
|
||||
//}
|
||||
}
|
||||
}
|
@@ -9,6 +9,7 @@
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.Parts;
|
||||
|
||||
internal class Pages
|
||||
{
|
||||
@@ -111,7 +112,7 @@
|
||||
foreach (var kid in kids.OfType<CosObject>())
|
||||
{
|
||||
// todo: exit early
|
||||
var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
|
||||
var child = DirectObjectFinder.Find<PdfDictionary>(kid, pdfObjectParser, reader, isLenientParsing);
|
||||
|
||||
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
|
||||
|
||||
|
@@ -4,35 +4,39 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Cmap;
|
||||
using Exceptions;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
/// <summary>
|
||||
/// The beginbfrange and endbfrange operators map i ranges of input codes to the corresponding output code range.
|
||||
/// </summary>
|
||||
internal class BaseFontRangeParser : ICidFontPartParser<NumericToken>
|
||||
{
|
||||
public void Parse(NumericToken numeric, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
|
||||
public void Parse(NumericToken numberOfOperations, ITokenScanner scanner, CharacterMapBuilder builder, bool isLenientParsing)
|
||||
{
|
||||
for (var i = 0; i < numeric.Int; i++)
|
||||
for (var i = 0; i < numberOfOperations.Int; i++)
|
||||
{
|
||||
// The start of the input code range.
|
||||
if (!scanner.TryReadToken(out HexToken lowSourceCode))
|
||||
{
|
||||
// TODO: message
|
||||
throw new InvalidOperationException();
|
||||
throw new InvalidFontFormatException($"bfrange was missing the low source code: {scanner.CurrentToken}");
|
||||
}
|
||||
|
||||
// The inclusive end of the input code range.
|
||||
if (!scanner.TryReadToken(out HexToken highSourceCode))
|
||||
{
|
||||
// TODO: message
|
||||
throw new InvalidOperationException();
|
||||
throw new InvalidFontFormatException($"bfrange was missing the high source code: {scanner.CurrentToken}");
|
||||
}
|
||||
|
||||
if (!scanner.MoveNext())
|
||||
{
|
||||
// TODO: message
|
||||
throw new InvalidOperationException();
|
||||
throw new InvalidFontFormatException("bfrange ended unexpectedly after the high source code.");
|
||||
}
|
||||
|
||||
List<byte> destinationBytes = null;
|
||||
ArrayToken destinationArray = null;
|
||||
|
||||
switch (scanner.CurrentToken)
|
||||
{
|
||||
case ArrayToken arrayToken:
|
||||
@@ -51,7 +55,35 @@
|
||||
var startCode = new List<byte>(lowSourceCode.Bytes);
|
||||
var endCode = highSourceCode.Bytes;
|
||||
|
||||
if (destinationArray != null)
|
||||
{
|
||||
int arrayIndex = 0;
|
||||
while (!done)
|
||||
{
|
||||
if (Compare(startCode, endCode) >= 0)
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
|
||||
var destination = destinationArray.Data[arrayIndex];
|
||||
|
||||
if (destination is NameToken name)
|
||||
{
|
||||
builder.AddBaseFontCharacter(startCode, name.Data.Name);
|
||||
}
|
||||
else if (destination is HexToken hex)
|
||||
{
|
||||
builder.AddBaseFontCharacter(startCode, hex.Bytes);
|
||||
}
|
||||
|
||||
Increment(startCode, startCode.Count - 1);
|
||||
|
||||
arrayIndex++;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
while (!done)
|
||||
{
|
||||
if (Compare(startCode, endCode) >= 0)
|
||||
@@ -63,19 +95,8 @@
|
||||
|
||||
Increment(startCode, startCode.Count - 1);
|
||||
|
||||
if (destinationArray == null)
|
||||
{
|
||||
Increment(destinationBytes, destinationBytes.Count - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
arrayIndex++;
|
||||
if (arrayIndex < destinationArray.Data.Count)
|
||||
{
|
||||
destinationBytes = ((HexToken)destinationArray.Data[arrayIndex]).Bytes.ToList();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user