diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs index 4674f884..5f500475 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs @@ -54,17 +54,17 @@ private static byte[] GetFileBytes(string name) { - var manifestFiles = typeof(Type1FontParserTests).Assembly.GetManifestResourceNames(); + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Fonts", "Type1")); + var files = Directory.GetFiles(documentFolder); - var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0); + var file = files.FirstOrDefault(x => x.IndexOf(name, StringComparison.OrdinalIgnoreCase) >= 0); - using (var memoryStream = new MemoryStream()) - using (var stream = typeof(Type1FontParserTests).Assembly.GetManifestResourceStream(match)) + if (file == null) { - stream.CopyTo(memoryStream); - - return memoryStream.ToArray(); + throw new InvalidOperationException($"Could not find test file {name} in folder {documentFolder}."); } + + return File.ReadAllBytes(file); } } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs new file mode 100644 index 00000000..26bf8947 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs @@ -0,0 +1,36 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + using Xunit; + + public class IntegrationDocumentTests + { + [Theory] + [MemberData(nameof(GetAllDocuments))] + public void CanReadAllPages(string documentName) + { + using (var document = PdfDocument.Open(documentName, new ParsingOptions{ UseLenientParsing = false})) + { + for (var i = 0; i < document.NumberOfPages; i++) + { + document.GetPage(i + 1); + } + } + } + + public static IEnumerable GetAllDocuments + { + get + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + var files = Directory.GetFiles(documentFolder, "*.pdf"); + + return files.Select(x => new object[] {x}); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs index a1fa9256..f8e1fbb9 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/PigReproductionPowerpointTests.cs @@ -32,5 +32,17 @@ Assert.Equal(35, document.NumberOfPages); } } + + [Fact] + public void CanReadAllPages() + { + using (var document = PdfDocument.Open(GetFilename())) + { + for (var i = 0; i < document.NumberOfPages; i++) + { + document.GetPage(i + 1); + } + } + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj index b43d4233..dbf3eb2c 100644 --- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj +++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj @@ -29,6 +29,22 @@ + + + + + + + + + + + + + + + + diff --git a/src/UglyToad.PdfPig/Fonts/Parser/FontDictionaryAccessHelper.cs b/src/UglyToad.PdfPig/Fonts/Parser/FontDictionaryAccessHelper.cs index bbd9541b..23af489c 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/FontDictionaryAccessHelper.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/FontDictionaryAccessHelper.cs @@ -63,7 +63,7 @@ var parsed = DirectObjectFinder.Get(obj, pdfScanner); - var descriptor = fontDescriptorFactory.Generate(parsed, isLenientParsing); + var descriptor = fontDescriptorFactory.Generate(parsed, pdfScanner, isLenientParsing); return descriptor; } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs index d349e036..f95d4af5 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -1,5 +1,6 @@ namespace UglyToad.PdfPig.Fonts.Parser.Handlers { + using System; using Cmap; using CompactFontFormat; using Encodings; @@ -118,6 +119,7 @@ && NameToken.Type1C.Equals(subTypeName)) { compactFontFormatParser.Parse(new CompactFontFormatData(bytes)); + throw new NotSupportedException("TODO: support Compact Font Format..."); return null; } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Parts/CidFontFactory.cs b/src/UglyToad.PdfPig/Fonts/Parser/Parts/CidFontFactory.cs index b2876d8c..ca6eb15f 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Parts/CidFontFactory.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Parts/CidFontFactory.cs @@ -44,7 +44,7 @@ FontDescriptor descriptor = null; if (TryGetFontDescriptor(dictionary, out var descriptorDictionary)) { - descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing); + descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing); } var fontProgram = ReadDescriptorFile(descriptor); @@ -248,7 +248,7 @@ if (entry is NameToken name) { - if (!name.Equals(NameToken.CidToGidMap) && !isLenientParsing) + if (!name.Equals(NameToken.Identity) && !isLenientParsing) { throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}."); } diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Parts/FontDescriptorFactory.cs b/src/UglyToad.PdfPig/Fonts/Parser/Parts/FontDescriptorFactory.cs index e813b035..29085785 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Parts/FontDescriptorFactory.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Parts/FontDescriptorFactory.cs @@ -2,20 +2,22 @@ { using System; using Geometry; + using PdfPig.Parser.Parts; + using Tokenization.Scanner; using Tokenization.Tokens; using Util; using Util.JetBrains.Annotations; internal class FontDescriptorFactory { - public FontDescriptor Generate(DictionaryToken dictionary, bool isLenientParsing) + public FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner, bool isLenientParsing) { if (dictionary == null) { throw new ArgumentNullException(nameof(dictionary)); } - var name = GetFontName(dictionary, isLenientParsing); + var name = GetFontName(dictionary, pdfScanner, isLenientParsing); var family = GetFontFamily(dictionary); var stretch = GetFontStretch(dictionary); var flags = GetFlags(dictionary, isLenientParsing); @@ -55,10 +57,16 @@ return number.Data; } - private static NameToken GetFontName(DictionaryToken dictionary, bool isLenientParsing) + private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing) { if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken)) { + if (name is IndirectReferenceToken nameReference) + { + var indirectName = DirectObjectFinder.Get(nameReference, scanner); + return indirectName; + } + if (isLenientParsing) { nameToken = NameToken.Create(string.Empty); diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 7849248c..8f1495c0 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -71,6 +71,11 @@ } catch { + if (fontProgram != null) + { + var result = fontProgram.Encoding.TryGetValue(characterCode, out value); + return result; + } // our quick hack has failed, we should decode the type 1 font! } @@ -78,7 +83,7 @@ } var name = encoding.GetName(characterCode); - + try { value = GlyphList.AdobeGlyphList.NameToUnicode(name); @@ -107,9 +112,19 @@ return new PdfRectangle(0, 0, 250, 0); } + if (fontProgram == null) + { + return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0); + } + var rect = fontProgram.GetCharacterBoundingBox(characterCode); - return rect; + if (!rect.HasValue) + { + return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0); + } + + return rect.Value; } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/CharacterPath.cs b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/CharacterPath.cs index e2f14519..ab69c605 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/CharacterPath.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/CharStrings/CharacterPath.cs @@ -56,8 +56,13 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings commands.Add(new Close()); } - public PdfRectangle GetBoundingRectangle() + public PdfRectangle? GetBoundingRectangle() { + if (commands.Count == 0) + { + return null; + } + var minX = decimal.MaxValue; var maxX = decimal.MinValue; @@ -139,7 +144,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings } var path = $""; - var bboxRect = BboxToRect(bbox, "yellow"); + var bboxRect = bbox.HasValue ? BboxToRect(bbox.Value, "yellow") : string.Empty; var others = string.Join(" ", bboxes.Select(x => BboxToRect(x, "gray"))); var result = $"{path} {bboxRect} {others}"; diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Type1FontProgram.cs b/src/UglyToad.PdfPig/Fonts/Type1/Type1FontProgram.cs index a5d6dab8..4badff7b 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/Type1FontProgram.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/Type1FontProgram.cs @@ -59,27 +59,29 @@ CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings)); } - public PdfRectangle GetCharacterBoundingBox(int characterCode) + public PdfRectangle? GetCharacterBoundingBox(int characterCode) { var b = Encoding[characterCode]; var glyph = CharStrings.Generate(b); var bbox = glyph.GetBoundingRectangle(); + if (!bbox.HasValue) + { + return null; + } + if (Debugger.IsAttached) { - if (bbox.Bottom < BoundingBox.Bottom - || bbox.Top > BoundingBox.Top - || bbox.Left < BoundingBox.Left - || bbox.Right > BoundingBox.Right) - { - // Debugger.Break(); - } - var full = glyph.ToFullSvg(); Console.WriteLine(full); } return bbox; } + + public bool ContainsNamedCharacter(string name) + { + return CharStrings.CharStrings.ContainsKey(name); + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 2262e2bd..4565c3c7 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -123,7 +123,8 @@ if (!foundUnicode && !isLenientParsing) { - throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}."); + // TODO: record warning + // throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}."); } var wordSpacing = 0m;