support reading text from type 1 fonts which use standard 14 fonts

This commit is contained in:
Eliot Jones
2018-01-06 20:51:20 +00:00
parent 96d787e498
commit d1aa390f01
6 changed files with 133 additions and 10 deletions

View File

@@ -2,6 +2,7 @@
{
using System;
using System.IO;
using Content;
using Xunit;
public class SinglePageType1FontTests
@@ -16,9 +17,7 @@
[Fact]
public void HasCorrectNumberOfPages()
{
var file = GetFilename();
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
using (var document = PdfDocument.Open(GetFilename()))
{
Assert.Equal(1, document.NumberOfPages);
}
@@ -29,9 +28,20 @@
{
using (var document = PdfDocument.Open(GetFilename()))
{
//var page = document.GetPage(1);
var page = document.GetPage(1);
//Assert.Equal(PageSize.Letter, page.Size);
Assert.Equal(PageSize.Letter, page.Size);
}
}
[Fact]
public void HasCorrectPageText()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.Equal("PDF test that contains the word yadda", page.Text.Trim());
}
}
}

View File

@@ -3,6 +3,7 @@
using System;
using Geometry;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
/// <summary>
/// Specifies the conversion from the transformed coordinate space to the original untransformed coordinate space.
@@ -92,6 +93,15 @@
return new PdfPoint(x, y);
}
[Pure]
public PdfVector Transform(PdfVector original)
{
var x = A * original.X + C * original.Y + E;
var y = B * original.X + D * original.Y + F;
return new PdfVector(x, y);
}
public static TransformationMatrix FromValues(decimal a, decimal b, decimal c, decimal d, decimal e, decimal f)
=> FromArray(new[] {a, b, c, d, e, f});
public static TransformationMatrix FromArray(decimal[] values)

View File

@@ -0,0 +1,22 @@
namespace UglyToad.Pdf.Fonts.Encodings
{
using System;
internal class AdobeFontMetricsEncoding : Encoding
{
public override string EncodingName { get; } = "AFM";
public AdobeFontMetricsEncoding(FontMetrics metrics)
{
if (metrics == null)
{
throw new ArgumentNullException(nameof(metrics));
}
foreach (var characterMetric in metrics.CharacterMetrics)
{
Add(characterMetric.Value.CharacterCode, characterMetric.Key);
}
}
}
}

View File

@@ -15,8 +15,7 @@
{
if (!dictionary.TryGetItemOfType(CosName.FIRST_CHAR, out CosInt firstChar))
{
throw new InvalidFontFormatException(
$"No first character entry was found in the font dictionary for this TrueType font: {dictionary}.");
throw new InvalidFontFormatException($"No first character entry was found in the font dictionary for this TrueType font: {dictionary}.");
}
return firstChar.AsInt();
@@ -26,8 +25,7 @@
{
if (!dictionary.TryGetItemOfType(CosName.LAST_CHAR, out CosInt lastChar))
{
throw new InvalidFontFormatException(
$"No last character entry was found in the font dictionary for this TrueType font: {dictionary}.");
throw new InvalidFontFormatException($"No last character entry was found in the font dictionary for this TrueType font: {dictionary}.");
}
return lastChar.AsInt();

View File

@@ -29,6 +29,20 @@
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var usingStandard14Only = !dictionary.ContainsKey(CosName.FIRST_CHAR);
if (usingStandard14Only)
{
if (!dictionary.TryGetName(CosName.BASE_FONT, out var standard14Name))
{
throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}");
}
var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Name);
return new Type1Standard14Font(metrics);
}
var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
@@ -69,7 +83,7 @@
throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}");
}
}
else if (encodingBase is CosDictionary encodingDictionary)
else if (encodingBase is CosDictionary)
{
throw new NotImplementedException("No support for reading encoding from dictionary yet.");
}

View File

@@ -0,0 +1,69 @@
namespace UglyToad.Pdf.Fonts.Simple
{
using System;
using Core;
using Cos;
using Encodings;
using Geometry;
using IO;
internal class Type1Standard14Font: IFont
{
private static readonly TransformationMatrix FontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0);
private readonly FontMetrics standardFontMetrics;
private readonly Encoding encoding;
public CosName Name { get; }
public bool IsVertical { get; }
public Type1Standard14Font(FontMetrics standardFontMetrics)
{
this.standardFontMetrics = standardFontMetrics ?? throw new ArgumentNullException(nameof(standardFontMetrics));
encoding = new AdobeFontMetricsEncoding(standardFontMetrics);
Name = CosName.Create(standardFontMetrics.FontName);
IsVertical = false;
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
{
codeLength = 1;
return bytes.CurrentByte;
}
public bool TryGetUnicode(int characterCode, out string value)
{
var name = encoding.GetName(characterCode);
var listed = GlyphList.AdobeGlyphList.NameToUnicode(name);
value = listed;
return true;
}
public PdfVector GetDisplacement(int characterCode)
{
return FontMatrix.Transform(new PdfVector(GetWidth(characterCode), 0));
}
public decimal GetWidth(int characterCode)
{
var name = encoding.GetName(characterCode);
if (!standardFontMetrics.CharacterMetrics.TryGetValue(name, out var metrics))
{
return 250;
}
return metrics.WidthX;
}
public TransformationMatrix GetFontMatrix()
{
return FontMatrix;
}
}
}