mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
Use pdfScanner in ReadVerticalDisplacements and fix #693 and return 0 in CMap on exception in ReadByte() if useLenientParsing is true and fix #692
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -4,6 +4,36 @@
|
||||
|
||||
public class GithubIssuesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Issue693()
|
||||
{
|
||||
var doc = IntegrationHelpers.GetDocumentPath("reference-2-numeric-error.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(doc, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = true }))
|
||||
{
|
||||
var page1 = document.GetPage(1);
|
||||
Assert.Equal(1269, page1.Letters.Count);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue692()
|
||||
{
|
||||
var doc = IntegrationHelpers.GetDocumentPath("cmap-parsing-exception.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(doc, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = true }))
|
||||
{
|
||||
var page1 = document.GetPage(1);
|
||||
Assert.Equal(796, page1.Letters.Count);
|
||||
}
|
||||
|
||||
using (var document = PdfDocument.Open(doc, new ParsingOptions() { UseLenientParsing = false, SkipMissingFonts = false }))
|
||||
{
|
||||
var ex = Assert.Throws<InvalidOperationException>(() => document.GetPage(1));
|
||||
Assert.StartsWith("Read byte called on input bytes which was at end of byte set.", ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue874()
|
||||
{
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
[
|
||||
"issue_671.pdf",
|
||||
"GHOSTSCRIPT-698363-0.pdf",
|
||||
"ErcotFacts.pdf"
|
||||
"ErcotFacts.pdf",
|
||||
"cmap-parsing-exception.pdf"
|
||||
];
|
||||
|
||||
[Theory]
|
||||
|
||||
@@ -166,7 +166,7 @@
|
||||
cidFontFactory,
|
||||
filterProvider,
|
||||
pdfScanner,
|
||||
parsingOptions.Logger);
|
||||
parsingOptions);
|
||||
|
||||
var type1Handler = new Type1FontHandler(pdfScanner, filterProvider, encodingReader);
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
namespace UglyToad.PdfPig
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Core;
|
||||
using Filters;
|
||||
using Parser.Parts;
|
||||
using Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
/// The CMap (character code map) maps character codes to character identifiers (CIDs).
|
||||
/// The set of characters which a CMap refers to is the "character set" (charset).
|
||||
/// </summary>
|
||||
internal class CMap
|
||||
internal sealed class CMap
|
||||
{
|
||||
public CharacterIdentifierSystemInfo Info { get; }
|
||||
|
||||
@@ -140,13 +140,12 @@
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return Name;
|
||||
}
|
||||
|
||||
public int ReadCode(IInputBytes bytes)
|
||||
public int ReadCode(IInputBytes bytes, bool useLenientParsing)
|
||||
{
|
||||
if (hasEmptyCodespace)
|
||||
{
|
||||
@@ -166,7 +165,7 @@
|
||||
break;
|
||||
}
|
||||
|
||||
result[i] = ReadByte(bytes);
|
||||
result[i] = ReadByte(bytes, useLenientParsing);
|
||||
}
|
||||
|
||||
for (int i = minCodeLength - 1; i < maxCodeLength; i++)
|
||||
@@ -181,17 +180,23 @@
|
||||
}
|
||||
if (byteCount < maxCodeLength)
|
||||
{
|
||||
result[byteCount] = ReadByte(bytes);
|
||||
result[byteCount] = ReadByte(bytes, useLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
throw new PdfDocumentFormatException($"CMap is invalid, min code length was {minCodeLength}, max was {maxCodeLength}.");
|
||||
}
|
||||
|
||||
private static byte ReadByte(IInputBytes bytes)
|
||||
private static byte ReadByte(IInputBytes bytes, bool useLenientParsing)
|
||||
{
|
||||
if (!bytes.MoveNext())
|
||||
{
|
||||
if (useLenientParsing)
|
||||
{
|
||||
// See issue #692
|
||||
return 0;
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Read byte called on input bytes which was at end of byte set. Current offset: " + bytes.CurrentOffset);
|
||||
}
|
||||
|
||||
@@ -208,6 +213,5 @@
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
/// Defines the information content (actual text) of the font
|
||||
/// as opposed to the display format.
|
||||
/// </summary>
|
||||
internal class ToUnicodeCMap
|
||||
internal sealed class ToUnicodeCMap
|
||||
{
|
||||
private readonly CMap? cMap;
|
||||
|
||||
@@ -45,9 +45,9 @@
|
||||
return cMap.TryConvertToUnicode(code, out value);
|
||||
}
|
||||
|
||||
public int ReadCode(IInputBytes inputBytes)
|
||||
public int ReadCode(IInputBytes inputBytes, bool useLenientParsing)
|
||||
{
|
||||
return cMap!.ReadCode(inputBytes);
|
||||
return cMap!.ReadCode(inputBytes, useLenientParsing);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,8 @@
|
||||
private readonly Dictionary<int, CharacterBoundingBox> boundingBoxCache
|
||||
= new Dictionary<int, CharacterBoundingBox>();
|
||||
|
||||
private readonly bool useLenientParsing;
|
||||
|
||||
public NameToken Name => BaseFont;
|
||||
|
||||
public NameToken BaseFont { get; }
|
||||
@@ -41,6 +43,7 @@
|
||||
CMap cmap,
|
||||
CMap? toUnicodeCMap,
|
||||
CMap? ucs2CMap,
|
||||
ParsingOptions parsingOptions,
|
||||
bool isChineseJapaneseOrKorean)
|
||||
{
|
||||
this.ucs2CMap = ucs2CMap;
|
||||
@@ -52,13 +55,15 @@
|
||||
ToUnicode = new ToUnicodeCMap(toUnicodeCMap);
|
||||
Details = cidFont.Details?.WithName(Name.Data)
|
||||
?? FontDetails.GetDefault(Name.Data);
|
||||
|
||||
useLenientParsing = parsingOptions.UseLenientParsing;
|
||||
}
|
||||
|
||||
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
|
||||
{
|
||||
var current = bytes.CurrentOffset;
|
||||
|
||||
var code = CMap.ReadCode(bytes);
|
||||
var code = CMap.ReadCode(bytes, useLenientParsing);
|
||||
|
||||
codeLength = (int)(bytes.CurrentOffset - current);
|
||||
|
||||
|
||||
@@ -15,23 +15,25 @@
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class Type0FontHandler : IFontHandler
|
||||
internal sealed class Type0FontHandler : IFontHandler
|
||||
{
|
||||
private readonly CidFontFactory cidFontFactory;
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IPdfTokenScanner scanner;
|
||||
private readonly ILog logger;
|
||||
private readonly ParsingOptions parsingOptions;
|
||||
|
||||
public Type0FontHandler(
|
||||
CidFontFactory cidFontFactory,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IPdfTokenScanner scanner,
|
||||
ILog logger)
|
||||
ParsingOptions parsingOptions)
|
||||
{
|
||||
this.cidFontFactory = cidFontFactory;
|
||||
this.filterProvider = filterProvider;
|
||||
this.scanner = scanner;
|
||||
this.logger = logger;
|
||||
logger = parsingOptions.Logger;
|
||||
this.parsingOptions = parsingOptions;
|
||||
}
|
||||
|
||||
public IFont Generate(DictionaryToken dictionary)
|
||||
@@ -91,7 +93,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
var font = new Type0Font(baseFont!, cidFont, cMap, toUnicodeCMap, ucs2CMap, isChineseJapaneseOrKorean);
|
||||
var font = new Type0Font(baseFont!, cidFont, cMap, toUnicodeCMap, ucs2CMap, parsingOptions, isChineseJapaneseOrKorean);
|
||||
|
||||
return font;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
using UglyToad.PdfPig.Logging;
|
||||
using Util;
|
||||
|
||||
internal class CidFontFactory
|
||||
internal sealed class CidFontFactory
|
||||
{
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
@@ -46,7 +46,7 @@
|
||||
defaultWidth = defaultWidthToken.Double;
|
||||
}
|
||||
|
||||
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);
|
||||
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary, pdfScanner);
|
||||
|
||||
FontDescriptor? descriptor = null;
|
||||
if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
|
||||
@@ -190,7 +190,7 @@
|
||||
return widths;
|
||||
}
|
||||
|
||||
private static VerticalWritingMetrics ReadVerticalDisplacements(DictionaryToken dict)
|
||||
private static VerticalWritingMetrics ReadVerticalDisplacements(DictionaryToken dict, IPdfTokenScanner pdfScanner)
|
||||
{
|
||||
var verticalDisplacements = new Dictionary<int, double>();
|
||||
var positionVectors = new Dictionary<int, PdfVector>();
|
||||
@@ -210,22 +210,21 @@
|
||||
}
|
||||
|
||||
// vertical metrics for individual CIDs.
|
||||
if (dict.TryGet(NameToken.W2, out var w2Token) && w2Token is ArrayToken w2)
|
||||
if (dict.TryGet(NameToken.W2, pdfScanner, out ArrayToken? w2))
|
||||
{
|
||||
for (var i = 0; i < w2.Data.Count; i++)
|
||||
{
|
||||
var c = (NumericToken)w2.Data[i];
|
||||
var c = DirectObjectFinder.Get<NumericToken>(w2.Data[i], pdfScanner);
|
||||
var next = w2.Data[++i];
|
||||
|
||||
if (next is ArrayToken array)
|
||||
if (DirectObjectFinder.TryGet(next, pdfScanner, out ArrayToken? array))
|
||||
{
|
||||
for (var j = 0; j < array.Data.Count; j++)
|
||||
{
|
||||
var cid = c.Int + j;
|
||||
// ReSharper disable InconsistentNaming
|
||||
var w1y = (NumericToken)array.Data[j];
|
||||
var v1x = (NumericToken)array.Data[++j];
|
||||
var v1y = (NumericToken)array.Data[++j];
|
||||
var w1y = DirectObjectFinder.Get<NumericToken>(array.Data[j], pdfScanner);
|
||||
var v1x = DirectObjectFinder.Get<NumericToken>(array.Data[++j], pdfScanner);
|
||||
var v1y = DirectObjectFinder.Get<NumericToken>(array.Data[++j], pdfScanner);
|
||||
|
||||
verticalDisplacements[cid] = w1y.Double;
|
||||
|
||||
@@ -236,9 +235,9 @@
|
||||
{
|
||||
var first = c.Int;
|
||||
var last = ((NumericToken)next).Int;
|
||||
var w1y = (NumericToken)w2.Data[++i];
|
||||
var v1x = (NumericToken)w2.Data[++i];
|
||||
var v1y = (NumericToken)w2.Data[++i];
|
||||
var w1y = DirectObjectFinder.Get<NumericToken>(w2.Data[++i], pdfScanner);
|
||||
var v1x = DirectObjectFinder.Get<NumericToken>(w2.Data[++i], pdfScanner);
|
||||
var v1y = DirectObjectFinder.Get<NumericToken>(w2.Data[++i], pdfScanner);
|
||||
// ReSharper restore InconsistentNaming
|
||||
|
||||
for (var cid = first; cid <= last; cid++)
|
||||
@@ -250,7 +249,7 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new VerticalWritingMetrics(dw2, verticalDisplacements, positionVectors);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user