bug fixes for reading from type 1 fonts. test all pages in documents. use content rather than embedded resources for #4

This commit is contained in:
Eliot Jones
2018-11-15 19:52:42 +00:00
parent 64a35e3217
commit 7e1bcf6f64
12 changed files with 124 additions and 27 deletions

View File

@@ -54,17 +54,17 @@
private static byte[] GetFileBytes(string name) private static byte[] GetFileBytes(string name)
{ {
var manifestFiles = typeof(Type1FontParserTests).Assembly.GetManifestResourceNames(); var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Fonts", "Type1"));
var files = Directory.GetFiles(documentFolder);
var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0); var file = files.FirstOrDefault(x => x.IndexOf(name, StringComparison.OrdinalIgnoreCase) >= 0);
using (var memoryStream = new MemoryStream()) if (file == null)
using (var stream = typeof(Type1FontParserTests).Assembly.GetManifestResourceStream(match))
{ {
stream.CopyTo(memoryStream); throw new InvalidOperationException($"Could not find test file {name} in folder {documentFolder}.");
return memoryStream.ToArray();
} }
return File.ReadAllBytes(file);
} }
} }
} }

View File

@@ -0,0 +1,36 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Xunit;
public class IntegrationDocumentTests
{
[Theory]
[MemberData(nameof(GetAllDocuments))]
public void CanReadAllPages(string documentName)
{
using (var document = PdfDocument.Open(documentName, new ParsingOptions{ UseLenientParsing = false}))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
document.GetPage(i + 1);
}
}
}
public static IEnumerable<object[]> GetAllDocuments
{
get
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
var files = Directory.GetFiles(documentFolder, "*.pdf");
return files.Select(x => new object[] {x});
}
}
}
}

View File

@@ -32,5 +32,17 @@
Assert.Equal(35, document.NumberOfPages); Assert.Equal(35, document.NumberOfPages);
} }
} }
[Fact]
public void CanReadAllPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
document.GetPage(i + 1);
}
}
}
} }
} }

View File

@@ -29,6 +29,22 @@
</Content> </Content>
</ItemGroup> </ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="Fonts\Type1\AdobeUtopia.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX10.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX12.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMCSC10.pfa" />
<EmbeddedResource Remove="Fonts\Type1\Raleway-Black.pfb" />
</ItemGroup>
<ItemGroup>
<Content Include="Fonts\Type1\AdobeUtopia.pfa" />
<Content Include="Fonts\Type1\CMBX10.pfa" />
<Content Include="Fonts\Type1\CMBX12.pfa" />
<Content Include="Fonts\Type1\CMCSC10.pfa" />
<Content Include="Fonts\Type1\Raleway-Black.pfb" />
</ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" /> <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" />
<PackageReference Include="System.Drawing.Common" Version="4.5.0-preview2-26406-04" /> <PackageReference Include="System.Drawing.Common" Version="4.5.0-preview2-26406-04" />

View File

@@ -63,7 +63,7 @@
var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, pdfScanner); var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, pdfScanner);
var descriptor = fontDescriptorFactory.Generate(parsed, isLenientParsing); var descriptor = fontDescriptorFactory.Generate(parsed, pdfScanner, isLenientParsing);
return descriptor; return descriptor;
} }

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{ {
using System;
using Cmap; using Cmap;
using CompactFontFormat; using CompactFontFormat;
using Encodings; using Encodings;
@@ -118,6 +119,7 @@
&& NameToken.Type1C.Equals(subTypeName)) && NameToken.Type1C.Equals(subTypeName))
{ {
compactFontFormatParser.Parse(new CompactFontFormatData(bytes)); compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
throw new NotSupportedException("TODO: support Compact Font Format...");
return null; return null;
} }

View File

@@ -44,7 +44,7 @@
FontDescriptor descriptor = null; FontDescriptor descriptor = null;
if (TryGetFontDescriptor(dictionary, out var descriptorDictionary)) if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
{ {
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing); descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing);
} }
var fontProgram = ReadDescriptorFile(descriptor); var fontProgram = ReadDescriptorFile(descriptor);
@@ -248,7 +248,7 @@
if (entry is NameToken name) if (entry is NameToken name)
{ {
if (!name.Equals(NameToken.CidToGidMap) && !isLenientParsing) if (!name.Equals(NameToken.Identity) && !isLenientParsing)
{ {
throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}."); throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}.");
} }

View File

@@ -2,20 +2,22 @@
{ {
using System; using System;
using Geometry; using Geometry;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
using Tokenization.Tokens; using Tokenization.Tokens;
using Util; using Util;
using Util.JetBrains.Annotations; using Util.JetBrains.Annotations;
internal class FontDescriptorFactory internal class FontDescriptorFactory
{ {
public FontDescriptor Generate(DictionaryToken dictionary, bool isLenientParsing) public FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner, bool isLenientParsing)
{ {
if (dictionary == null) if (dictionary == null)
{ {
throw new ArgumentNullException(nameof(dictionary)); throw new ArgumentNullException(nameof(dictionary));
} }
var name = GetFontName(dictionary, isLenientParsing); var name = GetFontName(dictionary, pdfScanner, isLenientParsing);
var family = GetFontFamily(dictionary); var family = GetFontFamily(dictionary);
var stretch = GetFontStretch(dictionary); var stretch = GetFontStretch(dictionary);
var flags = GetFlags(dictionary, isLenientParsing); var flags = GetFlags(dictionary, isLenientParsing);
@@ -55,10 +57,16 @@
return number.Data; return number.Data;
} }
private static NameToken GetFontName(DictionaryToken dictionary, bool isLenientParsing) private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing)
{ {
if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken)) if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken))
{ {
if (name is IndirectReferenceToken nameReference)
{
var indirectName = DirectObjectFinder.Get<NameToken>(nameReference, scanner);
return indirectName;
}
if (isLenientParsing) if (isLenientParsing)
{ {
nameToken = NameToken.Create(string.Empty); nameToken = NameToken.Create(string.Empty);

View File

@@ -71,6 +71,11 @@
} }
catch catch
{ {
if (fontProgram != null)
{
var result = fontProgram.Encoding.TryGetValue(characterCode, out value);
return result;
}
// our quick hack has failed, we should decode the type 1 font! // our quick hack has failed, we should decode the type 1 font!
} }
@@ -78,7 +83,7 @@
} }
var name = encoding.GetName(characterCode); var name = encoding.GetName(characterCode);
try try
{ {
value = GlyphList.AdobeGlyphList.NameToUnicode(name); value = GlyphList.AdobeGlyphList.NameToUnicode(name);
@@ -107,9 +112,19 @@
return new PdfRectangle(0, 0, 250, 0); return new PdfRectangle(0, 0, 250, 0);
} }
if (fontProgram == null)
{
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
}
var rect = fontProgram.GetCharacterBoundingBox(characterCode); var rect = fontProgram.GetCharacterBoundingBox(characterCode);
return rect; if (!rect.HasValue)
{
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
}
return rect.Value;
} }
public TransformationMatrix GetFontMatrix() public TransformationMatrix GetFontMatrix()

View File

@@ -56,8 +56,13 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
commands.Add(new Close()); commands.Add(new Close());
} }
public PdfRectangle GetBoundingRectangle() public PdfRectangle? GetBoundingRectangle()
{ {
if (commands.Count == 0)
{
return null;
}
var minX = decimal.MaxValue; var minX = decimal.MaxValue;
var maxX = decimal.MinValue; var maxX = decimal.MinValue;
@@ -139,7 +144,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
} }
var path = $"<path d='{glyph}' stroke='cyan' stroke-width='3'></path>"; var path = $"<path d='{glyph}' stroke='cyan' stroke-width='3'></path>";
var bboxRect = BboxToRect(bbox, "yellow"); var bboxRect = bbox.HasValue ? BboxToRect(bbox.Value, "yellow") : string.Empty;
var others = string.Join(" ", bboxes.Select(x => BboxToRect(x, "gray"))); var others = string.Join(" ", bboxes.Select(x => BboxToRect(x, "gray")));
var result = $"<svg transform='scale(1, -1)' width='2000' height='2000'>{path} {bboxRect} {others}</svg>"; var result = $"<svg transform='scale(1, -1)' width='2000' height='2000'>{path} {bboxRect} {others}</svg>";

View File

@@ -59,27 +59,29 @@
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings)); CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
} }
public PdfRectangle GetCharacterBoundingBox(int characterCode) public PdfRectangle? GetCharacterBoundingBox(int characterCode)
{ {
var b = Encoding[characterCode]; var b = Encoding[characterCode];
var glyph = CharStrings.Generate(b); var glyph = CharStrings.Generate(b);
var bbox = glyph.GetBoundingRectangle(); var bbox = glyph.GetBoundingRectangle();
if (!bbox.HasValue)
{
return null;
}
if (Debugger.IsAttached) if (Debugger.IsAttached)
{ {
if (bbox.Bottom < BoundingBox.Bottom
|| bbox.Top > BoundingBox.Top
|| bbox.Left < BoundingBox.Left
|| bbox.Right > BoundingBox.Right)
{
// Debugger.Break();
}
var full = glyph.ToFullSvg(); var full = glyph.ToFullSvg();
Console.WriteLine(full); Console.WriteLine(full);
} }
return bbox; return bbox;
} }
public bool ContainsNamedCharacter(string name)
{
return CharStrings.CharStrings.ContainsKey(name);
}
} }
} }

View File

@@ -123,7 +123,8 @@
if (!foundUnicode && !isLenientParsing) if (!foundUnicode && !isLenientParsing)
{ {
throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}."); // TODO: record warning
// throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
} }
var wordSpacing = 0m; var wordSpacing = 0m;