diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb
new file mode 100644
index 00000000..95674f6d
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb differ
diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
index 82eebd27..88a86698 100644
--- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
@@ -18,16 +18,16 @@
{
var bytes = GetFileBytes("AdobeUtopia.pfa");
- parser.Parse(new ByteArrayInputBytes(bytes));
+ parser.Parse(new ByteArrayInputBytes(bytes),0, 0);
}
[Fact]
- public void CanReadBinaryEncryptedPortion()
+ public void CanReadBinaryEncryptedPortionOfFullPfb()
{
// TODO: support reading in these pfb files
- //var bytes = GetFileBytes("cmbx8.pfb");
+ var bytes = GetFileBytes("Raleway-Black.pfb");
- //parser.Parse(new ByteArrayInputBytes(bytes));
+ parser.Parse(new ByteArrayInputBytes(bytes), 0, 0);
}
[Fact]
@@ -35,7 +35,7 @@
{
var bytes = StringBytesTestConverter.Convert(Cmbx12, false);
- parser.Parse(bytes.Bytes);
+ parser.Parse(bytes.Bytes, 0, 0);
}
private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0
diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf
new file mode 100644
index 00000000..417f1c07
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs
new file mode 100644
index 00000000..a1fa9256
--- /dev/null
+++ b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs
@@ -0,0 +1,36 @@
+namespace UglyToad.PdfPig.Tests.Integration
+{
+ using System;
+ using System.IO;
+ using Xunit;
+
+ public class PigReproductionPowerpointTests
+ {
+ private static string GetFilename()
+ {
+ var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
+
+ return Path.Combine(documentFolder, "Pig Reproduction Powerpoint.pdf");
+ }
+
+ [Fact]
+ public void CanReadContent()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ Assert.Contains("Pigs per sow per year: 18 to 27", page.Text);
+ }
+ }
+
+ [Fact]
+ public void HasCorrectNumberOfPages()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ Assert.Equal(35, document.NumberOfPages);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
index 5e2469c6..7d85295c 100644
--- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
+++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
@@ -11,12 +11,14 @@
+
+
@@ -35,6 +37,9 @@
PreserveNewest
+
+ PreserveNewest
+
PreserveNewest
@@ -53,6 +58,9 @@
PreserveNewest
+
+ PreserveNewest
+
PreserveNewest
diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
index c519e557..659e97ae 100644
--- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
+++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs
@@ -103,18 +103,17 @@
try
{
- var stream = pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data as StreamToken;
-
- if (stream == null)
+ if (!(pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data is StreamToken stream))
{
return null;
}
+
+ var length1 = stream.StreamDictionary.Get(NameToken.Length1, pdfScanner);
+ var length2 = stream.StreamDictionary.Get(NameToken.Length2, pdfScanner);
var bytes = stream.Decode(filterProvider);
-
- var text = OtherEncodings.BytesAsLatin1String(bytes);
-
- var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes));
+
+ var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes), length1.Int, length2.Int);
return font;
}
diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
index b50cb874..c2490fbd 100644
--- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs
@@ -12,6 +12,9 @@
internal class Type1FontParser
{
private const string ClearToMark = "cleartomark";
+
+ private const int PfbFileIndicator = 0x80;
+
private readonly Type1EncryptedPortionParser encryptedPortionParser;
public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser)
@@ -19,8 +22,27 @@
this.encryptedPortionParser = encryptedPortionParser;
}
- public Type1Font Parse(IInputBytes inputBytes)
+ ///
+ /// Parses an embedded Adobe Type 1 font file.
+ ///
+ /// The bytes of the font program.
+ /// The length in bytes of the clear text portion of the font program.
+ /// The length in bytes of the encrypted portion of the font program.
+ /// The parsed type 1 font.
+ public Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
{
+ var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;
+
+ IReadOnlyList eexecPortion = new byte[0];
+
+ if (isEntirePfbFile)
+ {
+ var (ascii, binary) = ReadPfbHeader(inputBytes);
+
+ eexecPortion = binary;
+ inputBytes = new ByteArrayInputBytes(ascii);
+ }
+
var scanner = new CoreTokenScanner(inputBytes);
if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
@@ -53,11 +75,10 @@
var nameTokenizer = new Type1NameTokenizer();
scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer);
scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer);
-
- var eexecPortion = new List();
-
+
try
{
+ var tempEexecPortion = new List();
var tokenSet = new PreviousTokenSet();
tokenSet.Add(scanner.CurrentToken);
while (scanner.MoveNext())
@@ -80,7 +101,7 @@
{
for (int i = 0; i < offset; i++)
{
- eexecPortion.Add((byte)ClearToMark[i]);
+ tempEexecPortion.Add((byte)ClearToMark[i]);
}
}
@@ -97,7 +118,7 @@
continue;
}
- eexecPortion.Add(inputBytes.CurrentByte);
+ tempEexecPortion.Add(inputBytes.CurrentByte);
}
}
else
@@ -108,6 +129,11 @@
tokenSet.Add(scanner.CurrentToken);
}
+
+ if (!isEntirePfbFile)
+ {
+ eexecPortion = tempEexecPortion;
+ }
}
finally
{
@@ -124,6 +150,65 @@
return new Type1Font(name, encoding, matrix, boundingBox);
}
+ ///
+ /// Where an entire PFB file has been embedded in the PDF we read the header first.
+ ///
+ private static (byte[] ascii, byte[] binary) ReadPfbHeader(IInputBytes bytes)
+ {
+ int ReadSize(byte recordType)
+ {
+ bytes.MoveNext();
+
+ if (bytes.CurrentByte != PfbFileIndicator)
+ {
+ throw new InvalidOperationException($"File does not start with 0x80, which indicates a full PFB file. Instead got: {bytes.CurrentByte}");
+ }
+
+ bytes.MoveNext();
+
+ if (bytes.CurrentByte != recordType)
+ {
+ throw new InvalidOperationException($"Encountered unexpected header type in the PFB file: {bytes.CurrentByte}");
+ }
+
+ bytes.MoveNext();
+ int size = bytes.CurrentByte;
+ bytes.MoveNext();
+ size += bytes.CurrentByte << 8;
+ bytes.MoveNext();
+ size += bytes.CurrentByte << 16;
+ bytes.MoveNext();
+ size += bytes.CurrentByte << 24;
+
+ return size;
+ }
+
+ var asciiSize = ReadSize(0x01);
+ var asciiPart = new byte[asciiSize];
+
+ int i = 0;
+ while (i < asciiSize)
+ {
+ bytes.MoveNext();
+ asciiPart[i] = bytes.CurrentByte;
+ i++;
+ }
+
+ var binarySize = ReadSize(0x02);
+
+ var binaryPart = new byte[binarySize];
+ i = 0;
+
+ while (i < binarySize)
+ {
+ bytes.MoveNext();
+ binaryPart[i] = bytes.CurrentByte;
+ i++;
+ }
+
+ return (asciiPart, binaryPart);
+ }
+
private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List dictionaries)
{
switch (token.Data)
@@ -266,8 +351,8 @@
{
for (var i = 0; i < encodingArray.Data.Count; i += 2)
{
- var code = (NumericToken) encodingArray.Data[i];
- var name = (NameToken) encodingArray.Data[i + 1];
+ var code = (NumericToken)encodingArray.Data[i];
+ var name = (NameToken)encodingArray.Data[i + 1];
result[code.Int] = name.Data;
}
@@ -298,10 +383,10 @@
{
if (dictionary.TryGet(NameToken.FontBbox, out var token) && token is ArrayToken array && array.Data.Count == 4)
{
- var x1 = (NumericToken) array.Data[0];
- var y1 = (NumericToken) array.Data[1];
- var x2 = (NumericToken) array.Data[2];
- var y2 = (NumericToken) array.Data[3];
+ var x1 = (NumericToken)array.Data[0];
+ var y1 = (NumericToken)array.Data[1];
+ var x2 = (NumericToken)array.Data[2];
+ var y2 = (NumericToken)array.Data[3];
return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data);
}
@@ -309,7 +394,7 @@
return null;
}
-
+
private class PreviousTokenSet
{
private readonly IToken[] tokens = new IToken[3];
diff --git a/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs b/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs
index 42a5b12e..143ad6fe 100644
--- a/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs
@@ -3,6 +3,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
+ using Parser.Parts;
+ using Scanner;
using Util.JetBrains.Annotations;
internal class DictionaryToken : IDataToken>
@@ -39,6 +41,21 @@
{
Data = data;
}
+
+ public T Get(NameToken name, IPdfTokenScanner scanner) where T : IToken
+ {
+ if (!TryGet(name, out var token) || !(token is T typedToken))
+ {
+ if (!(token is IndirectReferenceToken indirectReference))
+ {
+ throw new InvalidOperationException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}.");
+ }
+
+ typedToken = DirectObjectFinder.Get(indirectReference, scanner);
+ }
+
+ return typedToken;
+ }
public bool TryGet(NameToken name, out IToken token)
{