generate synthetic encoding where not present and use direct object finder to lookup cropbox and mediabox

This commit is contained in:
Eliot Jones
2019-05-18 15:20:07 +01:00
parent 8a74d5b2f3
commit ffa7b3bcc7
4 changed files with 47 additions and 6 deletions

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{
using System;
using System.Collections.Generic;
using SystemFonts;
using Cmap;
using Encodings;
@@ -100,6 +101,34 @@
Encoding encoding = encodingReader.Read(dictionary, isLenientParsing, descriptor);
if (encoding == null && font?.TableRegister?.CMapTable != null
&& font.TableRegister.PostScriptTable?.GlyphNames != null)
{
var postscript = font.TableRegister.PostScriptTable;
// Synthesize an encoding
var fakeEncoding = new Dictionary<int, string>();
for (var i = 0; i < 256; i++)
{
if (font.TableRegister.CMapTable.TryGetGlyphIndex(i, out var index))
{
string glyphName;
if (index >= 0 && index < postscript.GlyphNames.Length)
{
glyphName = postscript.GlyphNames[index];
}
else
{
glyphName = index.ToString();
}
fakeEncoding[i] = glyphName;
}
}
encoding = new BuiltInEncoding(fakeEncoding);
}
return new TrueTypeSimpleFont(name, descriptor, toUnicodeCMap, encoding, font, firstCharacter, widths);
}

View File

@@ -155,6 +155,11 @@
return ToUnicode.TryGet(characterCode, out value);
}
if (encoding == null)
{
return false;
}
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
// Map the character code to a character name.

View File

@@ -43,6 +43,11 @@ namespace UglyToad.PdfPig.Fonts.TrueType.Tables.CMapSubTables
var offset = characterCode - FirstCharacterCode;
if (offset < 0 || offset >= glyphIndices.Length)
{
return 0;
}
return glyphIndices[offset];
}

View File

@@ -53,8 +53,8 @@
throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'.");
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, log, isLenientParsing);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, log, isLenientParsing);
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, isLenientParsing);
UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary);
@@ -135,10 +135,11 @@
return spaceUnits;
}
private static CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox, ILog log, bool isLenientParsing)
private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox, bool isLenientParsing)
{
CropBox cropBox;
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && cropBoxObject is ArrayToken cropBoxArray)
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray))
{
if (cropBoxArray.Length != 4 && isLenientParsing)
{
@@ -159,10 +160,11 @@
return cropBox;
}
private static MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, ILog log, bool isLenientParsing)
private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing)
{
MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) && mediaboxObject is ArrayToken mediaboxArray)
if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject)
&& DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray))
{
if (mediaboxArray.Length != 4 && isLenientParsing)
{