bug fixes for reading from type 1 fonts. test all pages in documents. use content rather than embedded resources for #4

This commit is contained in:
Eliot Jones
2018-11-15 19:52:42 +00:00
parent 64a35e3217
commit 7e1bcf6f64
12 changed files with 124 additions and 27 deletions

View File

@@ -54,17 +54,17 @@
private static byte[] GetFileBytes(string name)
{
var manifestFiles = typeof(Type1FontParserTests).Assembly.GetManifestResourceNames();
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Fonts", "Type1"));
var files = Directory.GetFiles(documentFolder);
var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0);
var file = files.FirstOrDefault(x => x.IndexOf(name, StringComparison.OrdinalIgnoreCase) >= 0);
using (var memoryStream = new MemoryStream())
using (var stream = typeof(Type1FontParserTests).Assembly.GetManifestResourceStream(match))
if (file == null)
{
stream.CopyTo(memoryStream);
return memoryStream.ToArray();
throw new InvalidOperationException($"Could not find test file {name} in folder {documentFolder}.");
}
return File.ReadAllBytes(file);
}
}
}

View File

@@ -0,0 +1,36 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Xunit;
public class IntegrationDocumentTests
{
[Theory]
[MemberData(nameof(GetAllDocuments))]
public void CanReadAllPages(string documentName)
{
using (var document = PdfDocument.Open(documentName, new ParsingOptions{ UseLenientParsing = false}))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
document.GetPage(i + 1);
}
}
}
public static IEnumerable<object[]> GetAllDocuments
{
get
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
var files = Directory.GetFiles(documentFolder, "*.pdf");
return files.Select(x => new object[] {x});
}
}
}
}

View File

@@ -32,5 +32,17 @@
Assert.Equal(35, document.NumberOfPages);
}
}
[Fact]
public void CanReadAllPages()
{
using (var document = PdfDocument.Open(GetFilename()))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
document.GetPage(i + 1);
}
}
}
}
}

View File

@@ -29,6 +29,22 @@
</Content>
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="Fonts\Type1\AdobeUtopia.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX10.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX12.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMCSC10.pfa" />
<EmbeddedResource Remove="Fonts\Type1\Raleway-Black.pfb" />
</ItemGroup>
<ItemGroup>
<Content Include="Fonts\Type1\AdobeUtopia.pfa" />
<Content Include="Fonts\Type1\CMBX10.pfa" />
<Content Include="Fonts\Type1\CMBX12.pfa" />
<Content Include="Fonts\Type1\CMCSC10.pfa" />
<Content Include="Fonts\Type1\Raleway-Black.pfb" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" />
<PackageReference Include="System.Drawing.Common" Version="4.5.0-preview2-26406-04" />

View File

@@ -63,7 +63,7 @@
var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, pdfScanner);
var descriptor = fontDescriptorFactory.Generate(parsed, isLenientParsing);
var descriptor = fontDescriptorFactory.Generate(parsed, pdfScanner, isLenientParsing);
return descriptor;
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
{
using System;
using Cmap;
using CompactFontFormat;
using Encodings;
@@ -118,6 +119,7 @@
&& NameToken.Type1C.Equals(subTypeName))
{
compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
throw new NotSupportedException("TODO: support Compact Font Format...");
return null;
}

View File

@@ -44,7 +44,7 @@
FontDescriptor descriptor = null;
if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
{
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing);
}
var fontProgram = ReadDescriptorFile(descriptor);
@@ -248,7 +248,7 @@
if (entry is NameToken name)
{
if (!name.Equals(NameToken.CidToGidMap) && !isLenientParsing)
if (!name.Equals(NameToken.Identity) && !isLenientParsing)
{
throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}.");
}

View File

@@ -2,20 +2,22 @@
{
using System;
using Geometry;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
using Tokenization.Tokens;
using Util;
using Util.JetBrains.Annotations;
internal class FontDescriptorFactory
{
public FontDescriptor Generate(DictionaryToken dictionary, bool isLenientParsing)
public FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner, bool isLenientParsing)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
var name = GetFontName(dictionary, isLenientParsing);
var name = GetFontName(dictionary, pdfScanner, isLenientParsing);
var family = GetFontFamily(dictionary);
var stretch = GetFontStretch(dictionary);
var flags = GetFlags(dictionary, isLenientParsing);
@@ -55,10 +57,16 @@
return number.Data;
}
private static NameToken GetFontName(DictionaryToken dictionary, bool isLenientParsing)
private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing)
{
if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken))
{
if (name is IndirectReferenceToken nameReference)
{
var indirectName = DirectObjectFinder.Get<NameToken>(nameReference, scanner);
return indirectName;
}
if (isLenientParsing)
{
nameToken = NameToken.Create(string.Empty);

View File

@@ -71,6 +71,11 @@
}
catch
{
if (fontProgram != null)
{
var result = fontProgram.Encoding.TryGetValue(characterCode, out value);
return result;
}
// our quick hack has failed, we should decode the type 1 font!
}
@@ -107,9 +112,19 @@
return new PdfRectangle(0, 0, 250, 0);
}
if (fontProgram == null)
{
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
}
var rect = fontProgram.GetCharacterBoundingBox(characterCode);
return rect;
if (!rect.HasValue)
{
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
}
return rect.Value;
}
public TransformationMatrix GetFontMatrix()

View File

@@ -56,8 +56,13 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
commands.Add(new Close());
}
public PdfRectangle GetBoundingRectangle()
public PdfRectangle? GetBoundingRectangle()
{
if (commands.Count == 0)
{
return null;
}
var minX = decimal.MaxValue;
var maxX = decimal.MinValue;
@@ -139,7 +144,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
}
var path = $"<path d='{glyph}' stroke='cyan' stroke-width='3'></path>";
var bboxRect = BboxToRect(bbox, "yellow");
var bboxRect = bbox.HasValue ? BboxToRect(bbox.Value, "yellow") : string.Empty;
var others = string.Join(" ", bboxes.Select(x => BboxToRect(x, "gray")));
var result = $"<svg transform='scale(1, -1)' width='2000' height='2000'>{path} {bboxRect} {others}</svg>";

View File

@@ -59,27 +59,29 @@
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
}
public PdfRectangle GetCharacterBoundingBox(int characterCode)
public PdfRectangle? GetCharacterBoundingBox(int characterCode)
{
var b = Encoding[characterCode];
var glyph = CharStrings.Generate(b);
var bbox = glyph.GetBoundingRectangle();
if (Debugger.IsAttached)
if (!bbox.HasValue)
{
if (bbox.Bottom < BoundingBox.Bottom
|| bbox.Top > BoundingBox.Top
|| bbox.Left < BoundingBox.Left
|| bbox.Right > BoundingBox.Right)
{
// Debugger.Break();
return null;
}
if (Debugger.IsAttached)
{
var full = glyph.ToFullSvg();
Console.WriteLine(full);
}
return bbox;
}
public bool ContainsNamedCharacter(string name)
{
return CharStrings.CharStrings.ContainsKey(name);
}
}
}

View File

@@ -123,7 +123,8 @@
if (!foundUnicode && !isLenientParsing)
{
throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
// TODO: record warning
// throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
}
var wordSpacing = 0m;