mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
add very hacky parsing for type 1 font files in order to read the encoding
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
namespace UglyToad.PdfPig.Tests.Fonts.Type1
|
||||
{
|
||||
using PdfPig.Fonts.Type1.Parser;
|
||||
using Xunit;
|
||||
|
||||
public class Type1FontParserTests
|
||||
{
|
||||
private readonly Type1FontParser parser = new Type1FontParser();
|
||||
|
||||
[Fact]
|
||||
public void CanRead()
|
||||
{
|
||||
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
|
||||
|
||||
parser.Parse(bytes.Bytes);
|
||||
}
|
||||
|
||||
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
|
||||
%%CreationDate: 1991 Aug 20 16:34:54
|
||||
% Copyright (C) 1997 American Mathematical Society. All Rights Reserved.
|
||||
11 dict begin
|
||||
/FontInfo 7 dict dup begin
|
||||
/version (1.0) readonly def
|
||||
/Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def
|
||||
/FullName (CMBX12) readonly def
|
||||
/FamilyName (Computer Modern) readonly def
|
||||
/Weight (Bold) readonly def
|
||||
/ItalicAngle 0 def
|
||||
/isFixedPitch false def
|
||||
end readonly def
|
||||
/FontName /WDKAAR+CMBX12 def
|
||||
/PaintType 0 def
|
||||
/FontType 1 def
|
||||
/FontMatrix [0.001 0 0 0.001 0 0] readonly def
|
||||
/Encoding 256 array
|
||||
0 1 255 {1 index exch /.notdef put} for
|
||||
dup 12 /fi put
|
||||
dup 46 /period put
|
||||
dup 49 /one put
|
||||
dup 50 /two put
|
||||
dup 51 /three put
|
||||
dup 52 /four put
|
||||
dup 53 /five put
|
||||
dup 65 /A put
|
||||
dup 66 /B put
|
||||
dup 67 /C put
|
||||
dup 69 /E put
|
||||
dup 73 /I put
|
||||
dup 77 /M put
|
||||
dup 78 /N put
|
||||
dup 80 /P put
|
||||
dup 82 /R put
|
||||
dup 83 /S put
|
||||
dup 84 /T put
|
||||
dup 97 /a put
|
||||
dup 98 /b put
|
||||
dup 99 /c put
|
||||
dup 100 /d put
|
||||
dup 101 /e put
|
||||
dup 102 /f put
|
||||
dup 103 /g put
|
||||
dup 104 /h put
|
||||
dup 105 /i put
|
||||
dup 107 /k put
|
||||
dup 108 /l put
|
||||
dup 109 /m put
|
||||
dup 110 /n put
|
||||
dup 111 /o put
|
||||
dup 112 /p put
|
||||
dup 114 /r put
|
||||
dup 115 /s put
|
||||
dup 116 /t put
|
||||
dup 117 /u put
|
||||
dup 118 /v put
|
||||
dup 120 /x put
|
||||
dup 121 /y put
|
||||
readonly def
|
||||
/FontBBox{-53 -251 1139 750}readonly def
|
||||
/UniqueID 5000769 def
|
||||
currentdict end
|
||||
currentfile eexec
|
||||
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ð7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†
|
||||
©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
|
||||
ÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£ÐªÙÖoc;„j—¶†©~E£Ðª7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ
|
||||
7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þv7Ô×1¼Iu`“ÂõÎ>ä‘9Á?î\ºlüýÄ6Ag_Â_–²ÂGÄ´/³0¨;2j~þ000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000000000000000000000000
|
||||
cleartomark";
|
||||
}
|
||||
}
|
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/ICML03-081.pdf
Normal file
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/ICML03-081.pdf
Normal file
Binary file not shown.
40
src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs
Normal file
40
src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs
Normal file
@@ -0,0 +1,40 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
public class LaTexTests
|
||||
{
|
||||
private static string GetFilename()
|
||||
{
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||
|
||||
return Path.Combine(documentFolder, "ICML03-081.pdf");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadContent()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
Assert.Contains("TacklingthePoorAssumptionsofNaiveBayesTextClassifiers", page.Text);
|
||||
|
||||
var page2 = document.GetPage(2);
|
||||
|
||||
Assert.Contains("isθc={θc1,θc2,...,θcn},", page2.Text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectNumberOfPages()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
Assert.Equal(8, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -10,10 +10,7 @@
|
||||
[Fact]
|
||||
public void Tests()
|
||||
{
|
||||
using (var document = PdfDocument.Open(@"C:\Users\eliot\Downloads\ICML03-081.pdf"))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -13,6 +13,7 @@
|
||||
<None Remove="Integration\Documents\FarmerMac.pdf" />
|
||||
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||
<None Remove="Integration\Documents\ICML03-081.pdf" />
|
||||
<None Remove="Integration\Documents\Judgement Document.pdf" />
|
||||
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
||||
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
||||
@@ -39,6 +40,9 @@
|
||||
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
<Content Include="Integration\Documents\ICML03-081.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
<Content Include="Integration\Documents\Judgement Document.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
|
@@ -1,6 +1,5 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
|
||||
{
|
||||
using System;
|
||||
using Cmap;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
@@ -13,6 +12,8 @@
|
||||
using Simple;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Type1;
|
||||
using Type1.Parser;
|
||||
|
||||
internal class Type1FontHandler : IFontHandler
|
||||
{
|
||||
@@ -22,10 +23,12 @@
|
||||
private readonly FontDescriptorFactory fontDescriptorFactory;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly IPdfObjectScanner scanner;
|
||||
private readonly Type1FontParser type1FontParser;
|
||||
|
||||
public Type1FontHandler(IPdfObjectParser pdfObjectParser, CMapCache cMapCache, IFilterProvider filterProvider,
|
||||
FontDescriptorFactory fontDescriptorFactory, IEncodingReader encodingReader,
|
||||
IPdfObjectScanner scanner)
|
||||
IPdfObjectScanner scanner,
|
||||
Type1FontParser type1FontParser)
|
||||
{
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
this.cMapCache = cMapCache;
|
||||
@@ -33,6 +36,7 @@
|
||||
this.fontDescriptorFactory = fontDescriptorFactory;
|
||||
this.encodingReader = encodingReader;
|
||||
this.scanner = scanner;
|
||||
this.type1FontParser = type1FontParser;
|
||||
}
|
||||
|
||||
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
@@ -60,7 +64,7 @@
|
||||
|
||||
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
||||
|
||||
ParseType1Font(descriptor, isLenientParsing);
|
||||
var font = ParseType1Font(descriptor, isLenientParsing);
|
||||
|
||||
var name = FontDictionaryAccessHelper.GetName(pdfObjectParser, dictionary, descriptor, reader, isLenientParsing);
|
||||
|
||||
@@ -79,19 +83,24 @@
|
||||
|
||||
Encoding encoding = encodingReader.Read(dictionary, reader, isLenientParsing, descriptor);
|
||||
|
||||
return new Type1Font(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap);
|
||||
if (encoding == null && font?.Encoding.Count > 0)
|
||||
{
|
||||
encoding = new BuiltInEncoding(font.Encoding);
|
||||
}
|
||||
|
||||
return new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap);
|
||||
}
|
||||
|
||||
private void ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
|
||||
private Type1Font ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
|
||||
{
|
||||
if (descriptor?.FontFile == null)
|
||||
{
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
if (descriptor.FontFile.ObjectKey.ObjectNumber == 0)
|
||||
{
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
@@ -100,14 +109,16 @@
|
||||
|
||||
if (stream == null)
|
||||
{
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
var raw = new PdfRawStream(stream);
|
||||
|
||||
var bytes = raw.Decode(filterProvider);
|
||||
|
||||
// TODO: parse
|
||||
var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes));
|
||||
|
||||
return font;
|
||||
}
|
||||
catch
|
||||
{
|
||||
@@ -116,6 +127,8 @@
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@
|
||||
/// <summary>
|
||||
/// TODO: implement this properly if you find a Type 1 font in the wild.
|
||||
/// </summary>
|
||||
internal class Type1Font : IFont
|
||||
internal class Type1FontSimple : IFont
|
||||
{
|
||||
private readonly int firstChar;
|
||||
private readonly int lastChar;
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
public bool IsVertical { get; } = false;
|
||||
|
||||
public Type1Font(CosName name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding, CMap toUnicodeCMap)
|
||||
public Type1FontSimple(CosName name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding, CMap toUnicodeCMap)
|
||||
{
|
||||
this.firstChar = firstChar;
|
||||
this.lastChar = lastChar;
|
@@ -1,10 +1,299 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Type1.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using Geometry;
|
||||
using IO;
|
||||
using Tokenization;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal class Type1FontParser
|
||||
{
|
||||
public void Parse()
|
||||
public Type1Font Parse(IInputBytes inputBytes)
|
||||
{
|
||||
var scanner = new CoreTokenScanner(inputBytes);
|
||||
|
||||
if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
|
||||
{
|
||||
throw new InvalidFontFormatException("The Type1 program did not start with '%!'.");
|
||||
}
|
||||
|
||||
string name;
|
||||
var parts = comment.Data.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length == 3)
|
||||
{
|
||||
name = parts[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
name = "Unknown";
|
||||
}
|
||||
|
||||
var comments = new List<string>();
|
||||
|
||||
while (scanner.MoveNext() && scanner.CurrentToken is CommentToken commentToken)
|
||||
{
|
||||
comments.Add(commentToken.Data);
|
||||
}
|
||||
|
||||
var dictionaries = new List<DictionaryToken>();
|
||||
|
||||
// Override arrays and names since type 1 handles these differently.
|
||||
var arrayTokenizer = new Type1ArrayTokenizer();
|
||||
var nameTokenizer = new Type1NameTokenizer();
|
||||
scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
|
||||
scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);
|
||||
|
||||
try
|
||||
{
|
||||
var tokenSet = new PreviousTokenSet();
|
||||
tokenSet.Add(scanner.CurrentToken);
|
||||
while (scanner.MoveNext())
|
||||
{
|
||||
if (scanner.CurrentToken is OperatorToken operatorToken)
|
||||
{
|
||||
HandleOperator(operatorToken, inputBytes, scanner, tokenSet, dictionaries);
|
||||
}
|
||||
|
||||
tokenSet.Add(scanner.CurrentToken);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
scanner.DeregisterCustomTokenizer(arrayTokenizer);
|
||||
scanner.DeregisterCustomTokenizer(nameTokenizer);
|
||||
}
|
||||
|
||||
var encoding = GetEncoding(dictionaries);
|
||||
var matrix = GetFontMatrix(dictionaries);
|
||||
var boundingBox = GetBoundingBox(dictionaries);
|
||||
|
||||
return new Type1Font(name, encoding, matrix, boundingBox);
|
||||
}
|
||||
|
||||
private void HandleOperator(OperatorToken token, IInputBytes bytes, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
|
||||
{
|
||||
switch (token.Data)
|
||||
{
|
||||
case "dict":
|
||||
var number = ((NumericToken)set[0]).Int;
|
||||
var dictionary = ReadDictionary(number, scanner);
|
||||
|
||||
dictionaries.Add(dictionary);
|
||||
break;
|
||||
case "currentfile":
|
||||
if (!scanner.MoveNext() || scanner.CurrentToken != OperatorToken.Eexec)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// For now we will not read this stuff.
|
||||
SkipEncryptedContent(bytes);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private void SkipEncryptedContent(IInputBytes bytes)
|
||||
{
|
||||
bytes.Seek(bytes.Length - 1);
|
||||
|
||||
while (bytes.MoveNext())
|
||||
{
|
||||
// skip to end.
|
||||
}
|
||||
}
|
||||
|
||||
private static DictionaryToken ReadDictionary(int keys, ISeekableTokenScanner scanner)
|
||||
{
|
||||
IToken previousToken = null;
|
||||
|
||||
var dictionary = new Dictionary<IToken, IToken>();
|
||||
|
||||
// Skip the operators "dup" etc to reach "begin".
|
||||
while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken operatorToken) || operatorToken.Data != "begin"))
|
||||
{
|
||||
// Skipping.
|
||||
}
|
||||
|
||||
for (int i = 0; i < keys; i++)
|
||||
{
|
||||
if (!scanner.TryReadToken(out NameToken key))
|
||||
{
|
||||
return new DictionaryToken(dictionary);
|
||||
}
|
||||
|
||||
if (key.Data.Equals(CosName.ENCODING))
|
||||
{
|
||||
dictionary[key] = ReadEncoding(scanner);
|
||||
continue;
|
||||
}
|
||||
|
||||
while (scanner.MoveNext())
|
||||
{
|
||||
if (scanner.CurrentToken == OperatorToken.Def)
|
||||
{
|
||||
dictionary[key] = previousToken;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (scanner.CurrentToken == OperatorToken.Dict)
|
||||
{
|
||||
if (!(previousToken is NumericToken numeric))
|
||||
{
|
||||
return new DictionaryToken(dictionary);
|
||||
}
|
||||
|
||||
var inner = ReadDictionary(numeric.Int, scanner);
|
||||
|
||||
previousToken = inner;
|
||||
}
|
||||
else if (scanner.CurrentToken == OperatorToken.Readonly)
|
||||
{
|
||||
// skip
|
||||
}
|
||||
else if (scanner.CurrentToken is OperatorToken op && op.Data == "end")
|
||||
{
|
||||
// skip
|
||||
}
|
||||
else
|
||||
{
|
||||
previousToken = scanner.CurrentToken;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new DictionaryToken(dictionary);
|
||||
}
|
||||
|
||||
private static ArrayToken ReadEncoding(ISeekableTokenScanner scanner)
|
||||
{
|
||||
var result = new List<IToken>();
|
||||
|
||||
// Treat encoding differently, it's what we came here for!
|
||||
if (!scanner.TryReadToken(out NumericToken _))
|
||||
{
|
||||
return new ArrayToken(result);
|
||||
}
|
||||
|
||||
if (!scanner.TryReadToken(out OperatorToken arrayOperatorToken) || arrayOperatorToken.Data != "array")
|
||||
{
|
||||
return new ArrayToken(result);
|
||||
}
|
||||
|
||||
while (scanner.MoveNext() && (!(scanner.CurrentToken is OperatorToken forOperator) || forOperator.Data != "for"))
|
||||
{
|
||||
// skip these operators for now, they're probably important...
|
||||
}
|
||||
|
||||
if (scanner.CurrentToken != OperatorToken.For)
|
||||
{
|
||||
return new ArrayToken(result);
|
||||
}
|
||||
|
||||
while (scanner.MoveNext() && scanner.CurrentToken != OperatorToken.Def && scanner.CurrentToken != OperatorToken.Readonly)
|
||||
{
|
||||
if (scanner.CurrentToken != OperatorToken.Dup)
|
||||
{
|
||||
throw new InvalidFontFormatException("Expected the array for encoding to begin with 'dup'.");
|
||||
}
|
||||
|
||||
scanner.MoveNext();
|
||||
var number = (NumericToken)scanner.CurrentToken;
|
||||
scanner.MoveNext();
|
||||
var name = (NameToken)scanner.CurrentToken;
|
||||
|
||||
if (!scanner.TryReadToken(out OperatorToken put) || put != OperatorToken.Put)
|
||||
{
|
||||
throw new InvalidFontFormatException("Expected the array entry to end with 'put'.");
|
||||
}
|
||||
|
||||
result.Add(number);
|
||||
result.Add(name);
|
||||
}
|
||||
|
||||
while (scanner.CurrentToken != OperatorToken.Def && scanner.MoveNext())
|
||||
{
|
||||
// skip
|
||||
}
|
||||
|
||||
return new ArrayToken(result);
|
||||
}
|
||||
|
||||
private static Dictionary<int, string> GetEncoding(IReadOnlyList<DictionaryToken> dictionaries)
|
||||
{
|
||||
var result = new Dictionary<int, string>();
|
||||
|
||||
foreach (var dictionary in dictionaries)
|
||||
{
|
||||
if (dictionary.TryGetByName(CosName.ENCODING, out var token) && token is ArrayToken encodingArray)
|
||||
{
|
||||
for (var i = 0; i < encodingArray.Data.Count; i += 2)
|
||||
{
|
||||
var code = (NumericToken) encodingArray.Data[i];
|
||||
var name = (NameToken) encodingArray.Data[i + 1];
|
||||
|
||||
result[code.Int] = name.Data.Name;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static ArrayToken GetFontMatrix(IReadOnlyList<DictionaryToken> dictionaries)
|
||||
{
|
||||
foreach (var dictionaryToken in dictionaries)
|
||||
{
|
||||
if (dictionaryToken.TryGetByName(CosName.FONT_MATRIX, out var token) && token is ArrayToken array)
|
||||
{
|
||||
return array;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static PdfRectangle GetBoundingBox(IReadOnlyList<DictionaryToken> dictionaries)
|
||||
{
|
||||
foreach (var dictionary in dictionaries)
|
||||
{
|
||||
if (dictionary.TryGetByName(CosName.FONT_BBOX, out var token) && token is ArrayToken array && array.Data.Count == 4)
|
||||
{
|
||||
var x1 = (NumericToken) array.Data[0];
|
||||
var y1 = (NumericToken) array.Data[1];
|
||||
var x2 = (NumericToken) array.Data[2];
|
||||
var y2 = (NumericToken) array.Data[3];
|
||||
|
||||
return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private class PreviousTokenSet
|
||||
{
|
||||
private readonly IToken[] tokens = new IToken[3];
|
||||
|
||||
public IToken this[int index] => tokens[2 - index];
|
||||
|
||||
public void Add(IToken token)
|
||||
{
|
||||
tokens[0] = tokens[1];
|
||||
tokens[1] = tokens[2];
|
||||
tokens[2] = token;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
32
src/UglyToad.PdfPig/Fonts/Type1/Type1Font.cs
Normal file
32
src/UglyToad.PdfPig/Fonts/Type1/Type1Font.cs
Normal file
@@ -0,0 +1,32 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Type1
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Cos;
|
||||
using Geometry;
|
||||
using Tokenization.Tokens;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <summary>
|
||||
/// The information from the Type 1 font file.
|
||||
/// </summary>
|
||||
internal class Type1Font
|
||||
{
|
||||
public string Name { get; }
|
||||
|
||||
public IReadOnlyDictionary<int, string> Encoding { get; }
|
||||
|
||||
[CanBeNull]
|
||||
public ArrayToken FontMatrix { get; }
|
||||
|
||||
[CanBeNull]
|
||||
public PdfRectangle BoundingBox { get; }
|
||||
|
||||
public Type1Font(string name, IReadOnlyDictionary<int, string> encoding, ArrayToken fontMatrix, PdfRectangle boundingBox)
|
||||
{
|
||||
Name = name;
|
||||
Encoding = encoding;
|
||||
FontMatrix = fontMatrix;
|
||||
BoundingBox = boundingBox;
|
||||
}
|
||||
}
|
||||
}
|
@@ -13,6 +13,7 @@
|
||||
using Fonts.Parser.Handlers;
|
||||
using Fonts.Parser.Parts;
|
||||
using Fonts.TrueType.Parser;
|
||||
using Fonts.Type1.Parser;
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Logging;
|
||||
@@ -90,7 +91,7 @@
|
||||
filterProvider,
|
||||
pdfObjectParser),
|
||||
new TrueTypeFontHandler(pdfObjectParser, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader),
|
||||
new Type1FontHandler(pdfObjectParser, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, pdfScanner),
|
||||
new Type1FontHandler(pdfObjectParser, cMapCache, filterProvider, fontDescriptorFactory, encodingReader, pdfScanner, new Type1FontParser()),
|
||||
new Type3FontHandler(pdfObjectParser, cMapCache, filterProvider, encodingReader));
|
||||
|
||||
var dynamicParser = container.Get<DynamicParser>();
|
||||
|
@@ -11,6 +11,13 @@
|
||||
public static readonly OperatorToken EndObject = new OperatorToken("endobj");
|
||||
public static readonly OperatorToken StartStream = new OperatorToken("stream");
|
||||
public static readonly OperatorToken EndStream = new OperatorToken("endstream");
|
||||
public static readonly OperatorToken Eexec = new OperatorToken("eexec");
|
||||
public static readonly OperatorToken Def = new OperatorToken("def");
|
||||
public static readonly OperatorToken Dict = new OperatorToken("dict");
|
||||
public static readonly OperatorToken Readonly = new OperatorToken("readonly");
|
||||
public static readonly OperatorToken Dup = new OperatorToken("dup");
|
||||
public static readonly OperatorToken For = new OperatorToken("for");
|
||||
public static readonly OperatorToken Put = new OperatorToken("put");
|
||||
|
||||
public string Data { get; }
|
||||
|
||||
@@ -39,6 +46,20 @@
|
||||
return StartStream;
|
||||
case "endstream":
|
||||
return EndStream;
|
||||
case "eexec":
|
||||
return Eexec;
|
||||
case "def":
|
||||
return Def;
|
||||
case "dict":
|
||||
return Dict;
|
||||
case "readonly":
|
||||
return Readonly;
|
||||
case "dup":
|
||||
return Dup;
|
||||
case "for":
|
||||
return For;
|
||||
case "put":
|
||||
return Put;
|
||||
default:
|
||||
return new OperatorToken(data);
|
||||
}
|
||||
|
76
src/UglyToad.PdfPig/Tokenization/Type1ArrayTokenizer.cs
Normal file
76
src/UglyToad.PdfPig/Tokenization/Type1ArrayTokenizer.cs
Normal file
@@ -0,0 +1,76 @@
|
||||
namespace UglyToad.PdfPig.Tokenization
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using IO;
|
||||
using Tokens;
|
||||
|
||||
internal class Type1ArrayTokenizer : ITokenizer
|
||||
{
|
||||
public bool ReadsNextByte { get; } = false;
|
||||
|
||||
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
|
||||
{
|
||||
token = null;
|
||||
|
||||
if (currentByte != '{')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var builder = new StringBuilder();
|
||||
|
||||
while (inputBytes.MoveNext())
|
||||
{
|
||||
if (inputBytes.CurrentByte == '}')
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
builder.Append((char) inputBytes.CurrentByte);
|
||||
}
|
||||
|
||||
var parts = builder.ToString().Split(new[] {" "}, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
var tokens = new List<IToken>();
|
||||
|
||||
foreach (var part in parts)
|
||||
{
|
||||
if (char.IsNumber(part[0]) || part[0] == '-')
|
||||
{
|
||||
if (decimal.TryParse(part, NumberStyles.AllowLeadingSign, null, out var value))
|
||||
{
|
||||
tokens.Add(new NumericToken(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
tokens.Add(OperatorToken.Create(part));
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (part[0] == '/')
|
||||
{
|
||||
tokens.Add(new NameToken(part.Substring(1)));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (part[0] == '(' && part[part.Length - 1] == ')')
|
||||
{
|
||||
tokens.Add(new StringToken(part));
|
||||
continue;
|
||||
}
|
||||
|
||||
tokens.Add(OperatorToken.Create(part));
|
||||
|
||||
}
|
||||
|
||||
token = new ArrayToken(tokens);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
42
src/UglyToad.PdfPig/Tokenization/Type1NameTokenizer.cs
Normal file
42
src/UglyToad.PdfPig/Tokenization/Type1NameTokenizer.cs
Normal file
@@ -0,0 +1,42 @@
|
||||
namespace UglyToad.PdfPig.Tokenization
|
||||
{
|
||||
using System.Text;
|
||||
using IO;
|
||||
using Parser.Parts;
|
||||
using Tokens;
|
||||
|
||||
internal class Type1NameTokenizer : ITokenizer
|
||||
{
|
||||
public bool ReadsNextByte { get; } = true;
|
||||
|
||||
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
|
||||
{
|
||||
token = null;
|
||||
|
||||
if (currentByte != '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var builder = new StringBuilder();
|
||||
while (inputBytes.MoveNext())
|
||||
{
|
||||
if (ReadHelper.IsWhitespace(inputBytes.CurrentByte)
|
||||
|| inputBytes.CurrentByte == '{'
|
||||
|| inputBytes.CurrentByte == '<'
|
||||
|| inputBytes.CurrentByte == '/'
|
||||
|| inputBytes.CurrentByte == '['
|
||||
|| inputBytes.CurrentByte == '(')
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
builder.Append((char)inputBytes.CurrentByte);
|
||||
}
|
||||
|
||||
token = new NameToken(builder.ToString());
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user