diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb new file mode 100644 index 00000000..95674f6d Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Raleway-Black.pfb differ diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs index 82eebd27..88a86698 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs @@ -18,16 +18,16 @@ { var bytes = GetFileBytes("AdobeUtopia.pfa"); - parser.Parse(new ByteArrayInputBytes(bytes)); + parser.Parse(new ByteArrayInputBytes(bytes),0, 0); } [Fact] - public void CanReadBinaryEncryptedPortion() + public void CanReadBinaryEncryptedPortionOfFullPfb() { // TODO: support reading in these pfb files - //var bytes = GetFileBytes("cmbx8.pfb"); + var bytes = GetFileBytes("Raleway-Black.pfb"); - //parser.Parse(new ByteArrayInputBytes(bytes)); + parser.Parse(new ByteArrayInputBytes(bytes), 0, 0); } [Fact] @@ -35,7 +35,7 @@ { var bytes = StringBytesTestConverter.Convert(Cmbx12, false); - parser.Parse(bytes.Bytes); + parser.Parse(bytes.Bytes, 0, 0); } private const string Cmbx12 = @"%!PS-AdobeFont-1.1: CMBX12 1.0 diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf new file mode 100644 index 00000000..417f1c07 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/Pig Reproduction Powerpoint.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs new file mode 100644 index 00000000..a1fa9256 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs @@ -0,0 +1,36 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using System; + using System.IO; + using Xunit; + + public class PigReproductionPowerpointTests + { + private static string GetFilename() + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + return Path.Combine(documentFolder, "Pig Reproduction Powerpoint.pdf"); + } + + [Fact] + public void CanReadContent() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.Contains("Pigs per sow per year: 18 to 27", page.Text); + } + } + + [Fact] + public void HasCorrectNumberOfPages() + { + using (var document = PdfDocument.Open(GetFilename())) + { + Assert.Equal(35, document.NumberOfPages); + } + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj index 5e2469c6..7d85295c 100644 --- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj +++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj @@ -11,12 +11,14 @@ + + @@ -35,6 +37,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -53,6 +58,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs index c519e557..659e97ae 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -103,18 +103,17 @@ try { - var stream = pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data as StreamToken; - - if (stream == null) + if (!(pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data is StreamToken stream)) { return null; } + + var length1 = stream.StreamDictionary.Get(NameToken.Length1, pdfScanner); + var length2 = stream.StreamDictionary.Get(NameToken.Length2, pdfScanner); var bytes = stream.Decode(filterProvider); - - var text = OtherEncodings.BytesAsLatin1String(bytes); - - var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes)); + + var font = type1FontParser.Parse(new ByteArrayInputBytes(bytes), length1.Int, length2.Int); return font; } diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs index b50cb874..c2490fbd 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs @@ -12,6 +12,9 @@ internal class Type1FontParser { private const string ClearToMark = "cleartomark"; + + private const int PfbFileIndicator = 0x80; + private readonly Type1EncryptedPortionParser encryptedPortionParser; public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser) @@ -19,8 +22,27 @@ this.encryptedPortionParser = encryptedPortionParser; } - public Type1Font Parse(IInputBytes inputBytes) + /// + /// Parses an embedded Adobe Type 1 font file. + /// + /// The bytes of the font program. + /// The length in bytes of the clear text portion of the font program. + /// The length in bytes of the encrypted portion of the font program. + /// The parsed type 1 font. + public Type1Font Parse(IInputBytes inputBytes, int length1, int length2) { + var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator; + + IReadOnlyList eexecPortion = new byte[0]; + + if (isEntirePfbFile) + { + var (ascii, binary) = ReadPfbHeader(inputBytes); + + eexecPortion = binary; + inputBytes = new ByteArrayInputBytes(ascii); + } + var scanner = new CoreTokenScanner(inputBytes); if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!")) @@ -53,11 +75,10 @@ var nameTokenizer = new Type1NameTokenizer(); scanner.RegisterCustomTokenizer((byte)'{', arrayTokenizer); scanner.RegisterCustomTokenizer((byte)'/', nameTokenizer); - - var eexecPortion = new List(); - + try { + var tempEexecPortion = new List(); var tokenSet = new PreviousTokenSet(); tokenSet.Add(scanner.CurrentToken); while (scanner.MoveNext()) @@ -80,7 +101,7 @@ { for (int i = 0; i < offset; i++) { - eexecPortion.Add((byte)ClearToMark[i]); + tempEexecPortion.Add((byte)ClearToMark[i]); } } @@ -97,7 +118,7 @@ continue; } - eexecPortion.Add(inputBytes.CurrentByte); + tempEexecPortion.Add(inputBytes.CurrentByte); } } else @@ -108,6 +129,11 @@ tokenSet.Add(scanner.CurrentToken); } + + if (!isEntirePfbFile) + { + eexecPortion = tempEexecPortion; + } } finally { @@ -124,6 +150,65 @@ return new Type1Font(name, encoding, matrix, boundingBox); } + /// + /// Where an entire PFB file has been embedded in the PDF we read the header first. + /// + private static (byte[] ascii, byte[] binary) ReadPfbHeader(IInputBytes bytes) + { + int ReadSize(byte recordType) + { + bytes.MoveNext(); + + if (bytes.CurrentByte != PfbFileIndicator) + { + throw new InvalidOperationException($"File does not start with 0x80, which indicates a full PFB file. Instead got: {bytes.CurrentByte}"); + } + + bytes.MoveNext(); + + if (bytes.CurrentByte != recordType) + { + throw new InvalidOperationException($"Encountered unexpected header type in the PFB file: {bytes.CurrentByte}"); + } + + bytes.MoveNext(); + int size = bytes.CurrentByte; + bytes.MoveNext(); + size += bytes.CurrentByte << 8; + bytes.MoveNext(); + size += bytes.CurrentByte << 16; + bytes.MoveNext(); + size += bytes.CurrentByte << 24; + + return size; + } + + var asciiSize = ReadSize(0x01); + var asciiPart = new byte[asciiSize]; + + int i = 0; + while (i < asciiSize) + { + bytes.MoveNext(); + asciiPart[i] = bytes.CurrentByte; + i++; + } + + var binarySize = ReadSize(0x02); + + var binaryPart = new byte[binarySize]; + i = 0; + + while (i < binarySize) + { + bytes.MoveNext(); + binaryPart[i] = bytes.CurrentByte; + i++; + } + + return (asciiPart, binaryPart); + } + private static void HandleOperator(OperatorToken token, ISeekableTokenScanner scanner, PreviousTokenSet set, List dictionaries) { switch (token.Data) @@ -266,8 +351,8 @@ { for (var i = 0; i < encodingArray.Data.Count; i += 2) { - var code = (NumericToken) encodingArray.Data[i]; - var name = (NameToken) encodingArray.Data[i + 1]; + var code = (NumericToken)encodingArray.Data[i]; + var name = (NameToken)encodingArray.Data[i + 1]; result[code.Int] = name.Data; } @@ -298,10 +383,10 @@ { if (dictionary.TryGet(NameToken.FontBbox, out var token) && token is ArrayToken array && array.Data.Count == 4) { - var x1 = (NumericToken) array.Data[0]; - var y1 = (NumericToken) array.Data[1]; - var x2 = (NumericToken) array.Data[2]; - var y2 = (NumericToken) array.Data[3]; + var x1 = (NumericToken)array.Data[0]; + var y1 = (NumericToken)array.Data[1]; + var x2 = (NumericToken)array.Data[2]; + var y2 = (NumericToken)array.Data[3]; return new PdfRectangle(x1.Data, y1.Data, x2.Data, y2.Data); } @@ -309,7 +394,7 @@ return null; } - + private class PreviousTokenSet { private readonly IToken[] tokens = new IToken[3]; diff --git a/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs b/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs index 42a5b12e..143ad6fe 100644 --- a/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs +++ b/src/UglyToad.PdfPig/Tokenization/Tokens/DictionaryToken.cs @@ -3,6 +3,8 @@ using System; using System.Collections.Generic; using System.Linq; + using Parser.Parts; + using Scanner; using Util.JetBrains.Annotations; internal class DictionaryToken : IDataToken> @@ -39,6 +41,21 @@ { Data = data; } + + public T Get(NameToken name, IPdfTokenScanner scanner) where T : IToken + { + if (!TryGet(name, out var token) || !(token is T typedToken)) + { + if (!(token is IndirectReferenceToken indirectReference)) + { + throw new InvalidOperationException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}."); + } + + typedToken = DirectObjectFinder.Get(indirectReference, scanner); + } + + return typedToken; + } public bool TryGet(NameToken name, out IToken token) {