mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
support reading text from type 1 fonts which use standard 14 fonts
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class SinglePageType1FontTests
|
||||
@@ -16,9 +17,7 @@
|
||||
[Fact]
|
||||
public void HasCorrectNumberOfPages()
|
||||
{
|
||||
var file = GetFilename();
|
||||
|
||||
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
Assert.Equal(1, document.NumberOfPages);
|
||||
}
|
||||
@@ -29,9 +28,20 @@
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
//var page = document.GetPage(1);
|
||||
var page = document.GetPage(1);
|
||||
|
||||
//Assert.Equal(PageSize.Letter, page.Size);
|
||||
Assert.Equal(PageSize.Letter, page.Size);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectPageText()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
Assert.Equal("PDF test that contains the word yadda", page.Text.Trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@
|
||||
using System;
|
||||
using Geometry;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Specifies the conversion from the transformed coordinate space to the original untransformed coordinate space.
|
||||
@@ -92,6 +93,15 @@
|
||||
return new PdfPoint(x, y);
|
||||
}
|
||||
|
||||
[Pure]
|
||||
public PdfVector Transform(PdfVector original)
|
||||
{
|
||||
var x = A * original.X + C * original.Y + E;
|
||||
var y = B * original.X + D * original.Y + F;
|
||||
|
||||
return new PdfVector(x, y);
|
||||
}
|
||||
|
||||
public static TransformationMatrix FromValues(decimal a, decimal b, decimal c, decimal d, decimal e, decimal f)
|
||||
=> FromArray(new[] {a, b, c, d, e, f});
|
||||
public static TransformationMatrix FromArray(decimal[] values)
|
||||
|
22
src/UglyToad.Pdf/Fonts/Encodings/AdobeFontMetricsEncoding.cs
Normal file
22
src/UglyToad.Pdf/Fonts/Encodings/AdobeFontMetricsEncoding.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
namespace UglyToad.Pdf.Fonts.Encodings
|
||||
{
|
||||
using System;
|
||||
|
||||
internal class AdobeFontMetricsEncoding : Encoding
|
||||
{
|
||||
public override string EncodingName { get; } = "AFM";
|
||||
|
||||
public AdobeFontMetricsEncoding(FontMetrics metrics)
|
||||
{
|
||||
if (metrics == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(metrics));
|
||||
}
|
||||
|
||||
foreach (var characterMetric in metrics.CharacterMetrics)
|
||||
{
|
||||
Add(characterMetric.Value.CharacterCode, characterMetric.Key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -15,8 +15,7 @@
|
||||
{
|
||||
if (!dictionary.TryGetItemOfType(CosName.FIRST_CHAR, out CosInt firstChar))
|
||||
{
|
||||
throw new InvalidFontFormatException(
|
||||
$"No first character entry was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
throw new InvalidFontFormatException($"No first character entry was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
}
|
||||
|
||||
return firstChar.AsInt();
|
||||
@@ -26,8 +25,7 @@
|
||||
{
|
||||
if (!dictionary.TryGetItemOfType(CosName.LAST_CHAR, out CosInt lastChar))
|
||||
{
|
||||
throw new InvalidFontFormatException(
|
||||
$"No last character entry was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
throw new InvalidFontFormatException($"No last character entry was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
}
|
||||
|
||||
return lastChar.AsInt();
|
||||
|
@@ -29,6 +29,20 @@
|
||||
|
||||
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var usingStandard14Only = !dictionary.ContainsKey(CosName.FIRST_CHAR);
|
||||
|
||||
if (usingStandard14Only)
|
||||
{
|
||||
if (!dictionary.TryGetName(CosName.BASE_FONT, out var standard14Name))
|
||||
{
|
||||
throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}");
|
||||
}
|
||||
|
||||
var metrics = Standard14.GetAdobeFontMetrics(standard14Name.Name);
|
||||
|
||||
return new Type1Standard14Font(metrics);
|
||||
}
|
||||
|
||||
var firstCharacter = FontDictionaryAccessHelper.GetFirstCharacter(dictionary);
|
||||
|
||||
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
||||
@@ -69,7 +83,7 @@
|
||||
throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}");
|
||||
}
|
||||
}
|
||||
else if (encodingBase is CosDictionary encodingDictionary)
|
||||
else if (encodingBase is CosDictionary)
|
||||
{
|
||||
throw new NotImplementedException("No support for reading encoding from dictionary yet.");
|
||||
}
|
||||
|
69
src/UglyToad.Pdf/Fonts/Simple/Type1Standard14Font.cs
Normal file
69
src/UglyToad.Pdf/Fonts/Simple/Type1Standard14Font.cs
Normal file
@@ -0,0 +1,69 @@
|
||||
namespace UglyToad.Pdf.Fonts.Simple
|
||||
{
|
||||
using System;
|
||||
using Core;
|
||||
using Cos;
|
||||
using Encodings;
|
||||
using Geometry;
|
||||
using IO;
|
||||
|
||||
internal class Type1Standard14Font: IFont
|
||||
{
|
||||
private static readonly TransformationMatrix FontMatrix = TransformationMatrix.FromValues(0.001m, 0, 0, 0.001m, 0, 0);
|
||||
|
||||
private readonly FontMetrics standardFontMetrics;
|
||||
private readonly Encoding encoding;
|
||||
|
||||
public CosName Name { get; }
|
||||
public bool IsVertical { get; }
|
||||
|
||||
public Type1Standard14Font(FontMetrics standardFontMetrics)
|
||||
{
|
||||
this.standardFontMetrics = standardFontMetrics ?? throw new ArgumentNullException(nameof(standardFontMetrics));
|
||||
encoding = new AdobeFontMetricsEncoding(standardFontMetrics);
|
||||
|
||||
Name = CosName.Create(standardFontMetrics.FontName);
|
||||
|
||||
IsVertical = false;
|
||||
}
|
||||
|
||||
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
|
||||
{
|
||||
codeLength = 1;
|
||||
return bytes.CurrentByte;
|
||||
}
|
||||
|
||||
public bool TryGetUnicode(int characterCode, out string value)
|
||||
{
|
||||
var name = encoding.GetName(characterCode);
|
||||
|
||||
var listed = GlyphList.AdobeGlyphList.NameToUnicode(name);
|
||||
|
||||
value = listed;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public PdfVector GetDisplacement(int characterCode)
|
||||
{
|
||||
return FontMatrix.Transform(new PdfVector(GetWidth(characterCode), 0));
|
||||
}
|
||||
|
||||
public decimal GetWidth(int characterCode)
|
||||
{
|
||||
var name = encoding.GetName(characterCode);
|
||||
|
||||
if (!standardFontMetrics.CharacterMetrics.TryGetValue(name, out var metrics))
|
||||
{
|
||||
return 250;
|
||||
}
|
||||
|
||||
return metrics.WidthX;
|
||||
}
|
||||
|
||||
public TransformationMatrix GetFontMatrix()
|
||||
{
|
||||
return FontMatrix;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user