mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 03:34:52 +08:00
add support for parsing pfb files in type 1 fonts and an extra integration test
This commit is contained in:
BIN
src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb
Normal file
BIN
src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb
Normal file
Binary file not shown.
@@ -18,16 +18,16 @@
|
|||||||
{
|
{
|
||||||
var bytes = GetFileBytes("AdobeUtopia.pfa");
|
var bytes = GetFileBytes("AdobeUtopia.pfa");
|
||||||
|
|
||||||
parser.Parse(new ByteArrayInputBytes(bytes));
|
parser.Parse(new ByteArrayInputBytes(bytes),0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void CanReadBinaryEncryptedPortion()
|
public void CanReadBinaryEncryptedPortionOfFullPfb()
|
||||||
{
|
{
|
||||||
// TODO: support reading in these pfb files
|
// TODO: support reading in these pfb files
|
||||||
//var bytes = GetFileBytes("cmbx8.pfb");
|
var bytes = GetFileBytes("Raleway-Black.pfb");
|
||||||
|
|
||||||
//parser.Parse(new ByteArrayInputBytes(bytes));
|
parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
@@ -35,7 +35,7 @@
|
|||||||
{
|
{
|
||||||
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
|
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
|
||||||
|
|
||||||
parser.Parse(bytes.Bytes);
|
parser.Parse(bytes.Bytes, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
|
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
|
||||||
|
Binary file not shown.
@@ -0,0 +1,36 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class PigReproductionPowerpointTests
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||||
|
|
||||||
|
return Path.Combine(documentFolder, "Pig Reproduction Powerpoint.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanReadContent()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.Contains("Pigs per sow per year: 18 to 27", page.Text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectNumberOfPages()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
Assert.Equal(35, document.NumberOfPages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -11,12 +11,14 @@
|
|||||||
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
|
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
|
||||||
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
||||||
<None Remove="Fonts\Type1\AdobeUtopia.pfa" />
|
<None Remove="Fonts\Type1\AdobeUtopia.pfa" />
|
||||||
|
<None Remove="Fonts\Type1\Raleway-Black.pfb" />
|
||||||
<None Remove="Integration\Documents\FarmerMac.pdf" />
|
<None Remove="Integration\Documents\FarmerMac.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||||
<None Remove="Integration\Documents\ICML03-081.pdf" />
|
<None Remove="Integration\Documents\ICML03-081.pdf" />
|
||||||
<None Remove="Integration\Documents\Judgement Document.pdf" />
|
<None Remove="Integration\Documents\Judgement Document.pdf" />
|
||||||
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
||||||
|
<None Remove="Integration\Documents\Pig Reproduction Powerpoint.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
||||||
@@ -35,6 +37,9 @@
|
|||||||
<EmbeddedResource Include="Fonts\Type1\AdobeUtopia.pfa">
|
<EmbeddedResource Include="Fonts\Type1\AdobeUtopia.pfa">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</EmbeddedResource>
|
</EmbeddedResource>
|
||||||
|
<EmbeddedResource Include="Fonts\Type1\Raleway-Black.pfb">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</EmbeddedResource>
|
||||||
<Content Include="Integration\Documents\FarmerMac.pdf">
|
<Content Include="Integration\Documents\FarmerMac.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
@@ -53,6 +58,9 @@
|
|||||||
<Content Include="Integration\Documents\Multiple Page - from Mortality Statistics.pdf">
|
<Content Include="Integration\Documents\Multiple Page - from Mortality Statistics.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
<Content Include="Integration\Documents\Pig Reproduction Powerpoint.pdf">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</Content>
|
||||||
<Content Include="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf">
|
<Content Include="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
@@ -103,18 +103,17 @@
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var stream = pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data as StreamToken;
|
if (!(pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data is StreamToken stream))
|
||||||
|
|
||||||
if (stream == null)
|
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var length1 = stream.StreamDictionary.Get<NumericToken>(NameToken.Length1, pdfScanner);
|
||||||
|
var length2 = stream.StreamDictionary.Get<NumericToken>(NameToken.Length2, pdfScanner);
|
||||||
|
|
||||||
var bytes = stream.Decode(filterProvider);
|
var bytes = stream.Decode(filterProvider);
|
||||||
|
|
||||||
var text = OtherEncodings.BytesAsLatin1String(bytes);
|
var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes), length1.Int, length2.Int);
|
||||||
|
|
||||||
var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes));
|
|
||||||
|
|
||||||
return font;
|
return font;
|
||||||
}
|
}
|
||||||
|
@@ -12,6 +12,9 @@
|
|||||||
internal class Type1FontParser
|
internal class Type1FontParser
|
||||||
{
|
{
|
||||||
private const string ClearToMark = "cleartomark";
|
private const string ClearToMark = "cleartomark";
|
||||||
|
|
||||||
|
private const int PfbFileIndicator = 0x80;
|
||||||
|
|
||||||
private readonly Type1EncryptedPortionParser encryptedPortionParser;
|
private readonly Type1EncryptedPortionParser encryptedPortionParser;
|
||||||
|
|
||||||
public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser)
|
public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser)
|
||||||
@@ -19,8 +22,27 @@
|
|||||||
this.encryptedPortionParser = encryptedPortionParser;
|
this.encryptedPortionParser = encryptedPortionParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Type1Font Parse(IInputBytes inputBytes)
|
/// <summary>
|
||||||
|
/// Parses an embedded Adobe Type 1 font file.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="inputBytes">The bytes of the font program.</param>
|
||||||
|
/// <param name="length1">The length in bytes of the clear text portion of the font program.</param>
|
||||||
|
/// <param name="length2">The length in bytes of the encrypted portion of the font program.</param>
|
||||||
|
/// <returns>The parsed type 1 font.</returns>
|
||||||
|
public Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
|
||||||
{
|
{
|
||||||
|
var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;
|
||||||
|
|
||||||
|
IReadOnlyList<byte> eexecPortion = new byte[0];
|
||||||
|
|
||||||
|
if (isEntirePfbFile)
|
||||||
|
{
|
||||||
|
var (ascii, binary) = ReadPfbHeader(inputBytes);
|
||||||
|
|
||||||
|
eexecPortion = binary;
|
||||||
|
inputBytes = new ByteArrayInputBytes(ascii);
|
||||||
|
}
|
||||||
|
|
||||||
var scanner = new CoreTokenScanner(inputBytes);
|
var scanner = new CoreTokenScanner(inputBytes);
|
||||||
|
|
||||||
if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
|
if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
|
||||||
@@ -53,11 +75,10 @@
|
|||||||
var nameTokenizer = new Type1NameTokenizer();
|
var nameTokenizer = new Type1NameTokenizer();
|
||||||
scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
|
scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
|
||||||
scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);
|
scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);
|
||||||
|
|
||||||
var eexecPortion = new List<byte>();
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
var tempEexecPortion = new List<byte>();
|
||||||
var tokenSet = new PreviousTokenSet();
|
var tokenSet = new PreviousTokenSet();
|
||||||
tokenSet.Add(scanner.CurrentToken);
|
tokenSet.Add(scanner.CurrentToken);
|
||||||
while (scanner.MoveNext())
|
while (scanner.MoveNext())
|
||||||
@@ -80,7 +101,7 @@
|
|||||||
{
|
{
|
||||||
for (int i = 0; i < offset; i++)
|
for (int i = 0; i < offset; i++)
|
||||||
{
|
{
|
||||||
eexecPortion.Add((byte)ClearToMark[i]);
|
tempEexecPortion.Add((byte)ClearToMark[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,7 +118,7 @@
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
eexecPortion.Add(inputBytes.CurrentByte);
|
tempEexecPortion.Add(inputBytes.CurrentByte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -108,6 +129,11 @@
|
|||||||
|
|
||||||
tokenSet.Add(scanner.CurrentToken);
|
tokenSet.Add(scanner.CurrentToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isEntirePfbFile)
|
||||||
|
{
|
||||||
|
eexecPortion = tempEexecPortion;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
@@ -124,6 +150,65 @@
|
|||||||
return new Type1Font(name, encoding, matrix, boundingBox);
|
return new Type1Font(name, encoding, matrix, boundingBox);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Where an entire PFB file has been embedded in the PDF we read the header first.
|
||||||
|
/// </summary>
|
||||||
|
private static (byte[] ascii, byte[] binary) ReadPfbHeader(IInputBytes bytes)
|
||||||
|
{
|
||||||
|
int ReadSize(byte recordType)
|
||||||
|
{
|
||||||
|
bytes.MoveNext();
|
||||||
|
|
||||||
|
if (bytes.CurrentByte != PfbFileIndicator)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"File does not start with 0x80, which indicates a full PFB file. Instead got: {bytes.CurrentByte}");
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes.MoveNext();
|
||||||
|
|
||||||
|
if (bytes.CurrentByte != recordType)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Encountered unexpected header type in the PFB file: {bytes.CurrentByte}");
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes.MoveNext();
|
||||||
|
int size = bytes.CurrentByte;
|
||||||
|
bytes.MoveNext();
|
||||||
|
size += bytes.CurrentByte << 8;
|
||||||
|
bytes.MoveNext();
|
||||||
|
size += bytes.CurrentByte << 16;
|
||||||
|
bytes.MoveNext();
|
||||||
|
size += bytes.CurrentByte << 24;
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
var asciiSize = ReadSize(0x01);
|
||||||
|
var asciiPart = new byte[asciiSize];
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while (i < asciiSize)
|
||||||
|
{
|
||||||
|
bytes.MoveNext();
|
||||||
|
asciiPart[i] = bytes.CurrentByte;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
var binarySize = ReadSize(0x02);
|
||||||
|
|
||||||
|
var binaryPart = new byte[binarySize];
|
||||||
|
i = 0;
|
||||||
|
|
||||||
|
while (i < binarySize)
|
||||||
|
{
|
||||||
|
bytes.MoveNext();
|
||||||
|
binaryPart[i] = bytes.CurrentByte;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (asciiPart, binaryPart);
|
||||||
|
}
|
||||||
|
|
||||||
private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
|
private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List<DictionaryToken> dictionaries)
|
||||||
{
|
{
|
||||||
switch (token.Data)
|
switch (token.Data)
|
||||||
@@ -266,8 +351,8 @@
|
|||||||
{
|
{
|
||||||
for (var i = 0; i < encodingArray.Data.Count; i += 2)
|
for (var i = 0; i < encodingArray.Data.Count; i += 2)
|
||||||
{
|
{
|
||||||
var code = (NumericToken) encodingArray.Data[i];
|
var code = (NumericToken)encodingArray.Data[i];
|
||||||
var name = (NameToken) encodingArray.Data[i + 1];
|
var name = (NameToken)encodingArray.Data[i + 1];
|
||||||
|
|
||||||
result[code.Int] = name.Data;
|
result[code.Int] = name.Data;
|
||||||
}
|
}
|
||||||
@@ -298,10 +383,10 @@
|
|||||||
{
|
{
|
||||||
if (dictionary.TryGet(NameToken.FontBbox, out var token) && token is ArrayToken array && array.Data.Count == 4)
|
if (dictionary.TryGet(NameToken.FontBbox, out var token) && token is ArrayToken array && array.Data.Count == 4)
|
||||||
{
|
{
|
||||||
var x1 = (NumericToken) array.Data[0];
|
var x1 = (NumericToken)array.Data[0];
|
||||||
var y1 = (NumericToken) array.Data[1];
|
var y1 = (NumericToken)array.Data[1];
|
||||||
var x2 = (NumericToken) array.Data[2];
|
var x2 = (NumericToken)array.Data[2];
|
||||||
var y2 = (NumericToken) array.Data[3];
|
var y2 = (NumericToken)array.Data[3];
|
||||||
|
|
||||||
return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data);
|
return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data);
|
||||||
}
|
}
|
||||||
@@ -309,7 +394,7 @@
|
|||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PreviousTokenSet
|
private class PreviousTokenSet
|
||||||
{
|
{
|
||||||
private readonly IToken[] tokens = new IToken[3];
|
private readonly IToken[] tokens = new IToken[3];
|
||||||
|
@@ -3,6 +3,8 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using Parser.Parts;
|
||||||
|
using Scanner;
|
||||||
using Util.JetBrains.Annotations;
|
using Util.JetBrains.Annotations;
|
||||||
|
|
||||||
internal class DictionaryToken : IDataToken<IReadOnlyDictionary<string, IToken>>
|
internal class DictionaryToken : IDataToken<IReadOnlyDictionary<string, IToken>>
|
||||||
@@ -39,6 +41,21 @@
|
|||||||
{
|
{
|
||||||
Data = data;
|
Data = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public T Get<T>(NameToken name, IPdfTokenScanner scanner) where T : IToken
|
||||||
|
{
|
||||||
|
if (!TryGet(name, out var token) || !(token is T typedToken))
|
||||||
|
{
|
||||||
|
if (!(token is IndirectReferenceToken indirectReference))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
typedToken = DirectObjectFinder.Get<T>(indirectReference, scanner);
|
||||||
|
}
|
||||||
|
|
||||||
|
return typedToken;
|
||||||
|
}
|
||||||
|
|
||||||
public bool TryGet(NameToken name, out IToken token)
|
public bool TryGet(NameToken name, out IToken token)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user