add code for drawing type 1 glyphs and converting to svg

This commit is contained in:
Eliot Jones
2018-11-13 20:45:54 +00:00
parent ab9de799f9
commit 904f773525
23 changed files with 40247 additions and 65 deletions

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,27 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.IO;
using Xunit;
public class PlosOneTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "journal.pone.0196757.pdf");
}
[Fact]
public void CanReadPageOneContent()
{
using (var document = PdfDocument.Open(GetFilename().Replace("ICML03-081.pdf", "journal.pone.0196757.pdf")))
{
var page = document.GetPage(1);
var text = page.Text;
Assert.True(text.Length > 50);
}
}
}
}

View File

@@ -89,10 +89,10 @@
encoding = new BuiltInEncoding(font.Encoding);
}
return new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap);
return new Type1FontSimple(name, firstCharacter, lastCharacter, widths, descriptor, encoding, toUnicodeCMap, font);
}
private Type1Font ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
private Type1FontProgram ParseType1Font(FontDescriptor descriptor, bool isLenientParsing)
{
if (descriptor?.FontFile == null)
{

View File

@@ -7,6 +7,7 @@
using Geometry;
using IO;
using Tokenization.Tokens;
using Type1;
/// <summary>
/// A font based on the Adobe Type 1 font format.
@@ -22,6 +23,7 @@
private readonly FontDescriptor fontDescriptor;
private readonly Encoding encoding;
private readonly Type1FontProgram fontProgram;
private readonly ToUnicodeCMap toUnicodeCMap;
@@ -31,13 +33,16 @@
public bool IsVertical { get; } = false;
public Type1FontSimple(NameToken name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding, CMap toUnicodeCMap)
public Type1FontSimple(NameToken name, int firstChar, int lastChar, decimal[] widths, FontDescriptor fontDescriptor, Encoding encoding,
CMap toUnicodeCMap,
Type1FontProgram fontProgram)
{
this.firstChar = firstChar;
this.lastChar = lastChar;
this.widths = widths;
this.fontDescriptor = fontDescriptor;
this.encoding = encoding;
this.fontProgram = fontProgram;
this.toUnicodeCMap = new ToUnicodeCMap(toUnicodeCMap);
Name = name;
}
@@ -102,6 +107,8 @@
return new PdfRectangle(0, 0, 250, 0);
}
this.fontProgram.GetCharacterBoundingBox(characterCode);
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
}

View File

@@ -0,0 +1,143 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
{
using System.Collections.Generic;
using System.Text;
using Geometry;
internal class CharacterPath
{
private readonly List<IPathCommand> commands = new List<IPathCommand>();
private PdfPoint? currentPosition;
public void MoveTo(decimal x, decimal y)
{
currentPosition = new PdfPoint(x, y);
commands.Add(new Move(currentPosition.Value));
}
public void LineTo(decimal x, decimal y)
{
if (currentPosition.HasValue)
{
var to = new PdfPoint(x, y);
commands.Add(new Line(currentPosition.Value, to));
currentPosition = to;
}
else
{
MoveTo(x, y);
}
}
public void QuadraticCurveTo(decimal x1, decimal y1, decimal x2, decimal y2) { }
public void BezierCurveTo(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3)
{
if (currentPosition.HasValue)
{
var to = new PdfPoint(x3, y3);
commands.Add(new BezierCurve(currentPosition.Value,
new PdfPoint(x1, y1), new PdfPoint(x2, y2), to));
currentPosition = to;
}
else
{
MoveTo(x3, y3);
}
}
public void SetWindingRuleMode(int windingRule) { }
public void ClosePath()
{
commands.Add(new Close());
}
public string ToSvg()
{
var builder = new StringBuilder();
foreach (var pathCommand in commands)
{
pathCommand.WriteSvg(builder);
}
if (builder[builder.Length - 1] == ' ')
{
builder.Remove(builder.Length - 1, 1);
}
return builder.ToString();
}
private interface IPathCommand
{
void WriteSvg(StringBuilder builder);
}
private class Close : IPathCommand
{
public void WriteSvg(StringBuilder builder)
{
builder.Append("Z ");
}
}
private class Move : IPathCommand
{
public PdfPoint Location { get; }
public Move(PdfPoint location)
{
Location = location;
}
public void WriteSvg(StringBuilder builder)
{
builder.Append("M ").Append(Location.X).Append(' ').Append(Location.Y).Append(' ');
}
}
private class Line : IPathCommand
{
public PdfPoint From { get; }
public PdfPoint To { get; }
public Line(PdfPoint from, PdfPoint to)
{
From = from;
To = to;
}
public void WriteSvg(StringBuilder builder)
{
builder.AppendFormat("L {0} {1} ", To.X, To.Y);
}
}
private class BezierCurve : IPathCommand
{
public PdfPoint StartPoint { get; }
public PdfPoint FirstControlPoint { get; }
public PdfPoint SecondControlPoint { get; }
public PdfPoint EndPoint { get; }
public BezierCurve(PdfPoint startPoint, PdfPoint firstControlPoint, PdfPoint secondControlPoint, PdfPoint endPoint)
{
StartPoint = startPoint;
FirstControlPoint = firstControlPoint;
SecondControlPoint = secondControlPoint;
EndPoint = endPoint;
}
public void WriteSvg(StringBuilder builder)
{
builder.AppendFormat("C {0} {1}, {2} {3}, {4} {5} ", FirstControlPoint.X, FirstControlPoint.Y, SecondControlPoint.X, SecondControlPoint.Y,
EndPoint.X, EndPoint.Y);
}
}
}
}

View File

@@ -20,7 +20,7 @@
var first = context.Stack.PopTop();
var second = context.Stack.PopTop();
var result = first / second;
var result = second / first;
context.Stack.Push(result);
}

View File

@@ -2,6 +2,8 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands
{
using System.Collections.Generic;
/// <summary>
/// Represents the deferred execution of a Type 1 Build Char command.
/// </summary>
@@ -30,24 +32,30 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands
internal class Type1Stack
{
private readonly List<decimal> stack = new List<decimal>();
public decimal PopTop()
{
throw new NotImplementedException();
var result = stack[stack.Count - 1];
stack.RemoveAt(stack.Count - 1);
return result;
}
public decimal PopBottom()
{
throw new NotImplementedException();
var result = stack[0];
stack.RemoveAt(0);
return result;
}
public void Push(decimal value)
{
stack.Add(value);
}
public void Clear()
{
stack.Clear();
}
}
}

View File

@@ -17,6 +17,7 @@
public static void Run(Type1BuildCharContext context)
{
context.Path.ClosePath();
context.Stack.Clear();
}
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction
{
using Geometry;
/// <summary>
/// Horizontal line-to command.
/// </summary>
@@ -18,6 +20,10 @@
public static void Run(Type1BuildCharContext context)
{
var deltaX = context.Stack.PopBottom();
var x = context.CurrentPosition.X + deltaX;
context.Path.LineTo(x, context.CurrentPosition.Y);
context.CurrentPosition = new PdfPoint(x, context.CurrentPosition.Y);
context.Stack.Clear();
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction
{
using Geometry;
/// <summary>
/// Relative line-to command. Creates a line moving a distance relative to the current point.
/// </summary>
@@ -20,6 +22,12 @@
var deltaX = context.Stack.PopBottom();
var deltaY = context.Stack.PopBottom();
var x = context.CurrentPosition.X + deltaX;
var y = context.CurrentPosition.Y + deltaY;
context.Path.LineTo(x, y);
context.CurrentPosition = new PdfPoint(x, y);
context.Stack.Clear();
}
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction
{
using Geometry;
/// <summary>
/// Relative move to command. starts a new subpath of the current path in the same manner as moveto.
/// However, the number pair is interpreted as a displacement relative to the current point (x, y) rather than as an absolute coordinate.
@@ -26,6 +28,18 @@
var deltaX = context.Stack.PopBottom();
var deltaY = context.Stack.PopBottom();
if (context.IsFlexing)
{
}
else
{
var x = context.CurrentPosition.X + deltaX;
var y = context.CurrentPosition.Y + deltaY;
context.CurrentPosition = new PdfPoint(x, y);
context.Path.MoveTo(x, y);
}
context.Stack.Clear();
}
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction
{
using Geometry;
/// <summary>
/// Relative rcurveto. Whereas the arguments to the rcurveto operator in the PostScript language are all relative to the current
/// point, the arguments to rrcurveto are relative to each other.
@@ -26,6 +28,19 @@
var dx3 = context.Stack.PopBottom();
var dy3 = context.Stack.PopBottom();
var x1 = context.CurrentPosition.X + dx1;
var y1 = context.CurrentPosition.Y + dy1;
var x2 = x1 + dx2;
var y2 = y1 + dy2;
var x3 = x2 + dx3;
var y3 = y2 + dy3;
context.Path.BezierCurveTo(x1, y1, x2, y2, x3, y3);
context.CurrentPosition = new PdfPoint(x3, y3);
context.Stack.Clear();
}
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction
{
using Geometry;
/// <summary>
/// Vertical-line to command.
/// </summary>
@@ -18,6 +20,10 @@
public static void Run(Type1BuildCharContext context)
{
var deltaY = context.Stack.PopBottom();
var y = context.CurrentPosition.Y + deltaY;
context.Path.LineTo(context.CurrentPosition.X, y);
context.CurrentPosition = new PdfPoint(context.CurrentPosition.X, y);
context.Stack.Clear();
}

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.StartFinishOutline
{
using Geometry;
/// <summary>
/// The name hsbw stands for horizontal sidebearing and width;
/// horizontal indicates that the y component of both the sidebearing and width is 0.
@@ -23,6 +25,11 @@
var leftSidebearingPointX = context.Stack.PopBottom();
var characterWidthVectorX = context.Stack.PopBottom();
context.LeftSideBearing = leftSidebearingPointX;
context.Width = characterWidthVectorX;
context.CurrentPosition = new PdfPoint(leftSidebearingPointX, 0);
context.Stack.Clear();
}
}

View File

@@ -1,9 +1,21 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands
{
using Geometry;
internal class Type1BuildCharContext
{
public Type1Stack Stack { get; }
public decimal Width { get; set; }
public Type1Stack PostscriptStack { get; }
public decimal LeftSideBearing { get; set; }
public bool IsFlexing { get; set; }
public CharacterPath Path { get; } = new CharacterPath();
public PdfPoint CurrentPosition { get; set; }
public Type1Stack Stack { get; } = new Type1Stack();
public Type1Stack PostscriptStack { get; } = new Type1Stack();
}
}

View File

@@ -66,9 +66,9 @@
return new Type1CharStrings(charStringResults, subroutineResults);
}
private static IReadOnlyList<DiscriminatedUnion<decimal, LazyType1Command>> ParseSingle(IReadOnlyList<byte> charStringBytes)
private static IReadOnlyList<Union<decimal, LazyType1Command>> ParseSingle(IReadOnlyList<byte> charStringBytes)
{
var interpreted = new List<DiscriminatedUnion<decimal, LazyType1Command>>();
var interpreted = new List<Union<decimal, LazyType1Command>>();
for (var i = 0; i < charStringBytes.Count; i++)
{
@@ -83,13 +83,13 @@
throw new InvalidOperationException($"Could not find command with code {b}.");
}
interpreted.Add(new DiscriminatedUnion<decimal, LazyType1Command>.Case2(command));
interpreted.Add(new Union<decimal, LazyType1Command>.Case2(command));
}
else
{
var val = InterpretNumber(b, charStringBytes, ref i);
interpreted.Add(new DiscriminatedUnion<decimal, LazyType1Command>.Case1(val));
interpreted.Add(new Union<decimal, LazyType1Command>.Case1(val));
}
}

View File

@@ -46,7 +46,14 @@
private void Run(CommandSequence sequence)
{
var context = new Type1BuildCharContext();
foreach (var command in sequence.Commands)
{
command.Match(x => context.Stack.Push(x),
x => x.Run(context));
}
var str = context.Path.ToSvg();
}
public class CommandSequence
@@ -54,9 +61,9 @@
/// <summary>
/// The ordered list of numbers and commands for a Type 1 charstring or subroutine.
/// </summary>
public IReadOnlyList<DiscriminatedUnion<decimal, LazyType1Command>> Commands { get; }
public IReadOnlyList<Union<decimal, LazyType1Command>> Commands { get; }
public CommandSequence(IReadOnlyList<DiscriminatedUnion<decimal, LazyType1Command>> commands)
public CommandSequence(IReadOnlyList<Union<decimal, LazyType1Command>> commands)
{
Commands = commands ?? throw new ArgumentNullException(nameof(commands));
}

View File

@@ -29,7 +29,7 @@
/// <param name="length1">The length in bytes of the clear text portion of the font program.</param>
/// <param name="length2">The length in bytes of the encrypted portion of the font program.</param>
/// <returns>The parsed type 1 font.</returns>
public Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
public Type1FontProgram Parse(IInputBytes inputBytes, int length1, int length2)
{
// Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way.
var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;
@@ -148,7 +148,7 @@
var (privateDictionary, charStrings) = encryptedPortionParser.Parse(eexecPortion, false);
return new Type1Font(name, encoding, matrix, boundingBox ?? new PdfRectangle(), privateDictionary);
return new Type1FontProgram(name, encoding, matrix, boundingBox ?? new PdfRectangle(), privateDictionary, charStrings);
}
/// <summary>
@@ -349,6 +349,7 @@
return new ArrayToken(result);
}
private static Dictionary<int, string> GetEncoding(IReadOnlyList<DictionaryToken> dictionaries)
{
var result = new Dictionary<int, string>();

View File

@@ -1,36 +0,0 @@
namespace UglyToad.PdfPig.Fonts.Type1
{
using System;
using System.Collections.Generic;
using Geometry;
using Tokenization.Tokens;
using Util.JetBrains.Annotations;
/// <summary>
/// The information from the Type 1 font file.
/// </summary>
internal class Type1Font
{
public string Name { get; }
public IReadOnlyDictionary<int, string> Encoding { get; }
[CanBeNull]
public ArrayToken FontMatrix { get; }
[CanBeNull]
public PdfRectangle BoundingBox { get; }
public Type1PrivateDictionary PrivateDictionary { get; }
public Type1Font(string name, IReadOnlyDictionary<int, string> encoding, ArrayToken fontMatrix, PdfRectangle boundingBox,
Type1PrivateDictionary privateDictionary)
{
Name = name;
Encoding = encoding;
FontMatrix = fontMatrix;
BoundingBox = boundingBox;
PrivateDictionary = privateDictionary ?? throw new ArgumentNullException(nameof(privateDictionary));
}
}
}

View File

@@ -0,0 +1,68 @@
namespace UglyToad.PdfPig.Fonts.Type1
{
using System;
using System.Collections.Generic;
using CharStrings;
using Geometry;
using Tokenization.Tokens;
using Util.JetBrains.Annotations;
/// <summary>
/// The information from the Type 1 font file.
/// </summary>
internal class Type1FontProgram
{
/// <summary>
/// The name of the font.
/// </summary>
public string Name { get; }
/// <summary>
/// The encoding dictionary defining a name for each character code.
/// </summary>
public IReadOnlyDictionary<int, string> Encoding { get; }
[CanBeNull]
public ArrayToken FontMatrix { get; }
/// <summary>
/// A rectangle in glyph coordinates specifying the font bounding box.
/// This is the smallest rectangle enclosing the shape that would result if all of the glyphs were overlayed on each other.
/// </summary>
public PdfRectangle BoundingBox { get; }
[NotNull]
public Type1PrivateDictionary PrivateDictionary { get; }
[NotNull]
public Type1CharStrings CharStrings { get; }
/// <summary>
/// Create a new <see cref="Type1FontProgram"/> from the information retrieved from the PDF document.
/// </summary>
/// <param name="name">The name of the font.</param>
/// <param name="encoding"></param>
/// <param name="fontMatrix"></param>
/// <param name="boundingBox"></param>
/// <param name="privateDictionary"></param>
/// <param name="charStrings"></param>
public Type1FontProgram(string name, IReadOnlyDictionary<int, string> encoding, ArrayToken fontMatrix, PdfRectangle boundingBox,
Type1PrivateDictionary privateDictionary,
Type1CharStrings charStrings)
{
Name = name;
Encoding = encoding;
FontMatrix = fontMatrix;
BoundingBox = boundingBox;
PrivateDictionary = privateDictionary ?? throw new ArgumentNullException(nameof(privateDictionary));
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
}
public PdfRectangle GetCharacterBoundingBox(int characterCode)
{
var b = Encoding[characterCode];
CharStrings.Generate(b);
return new PdfRectangle();
}
}
}

View File

@@ -10,7 +10,7 @@
/// These hints help ensure that the shape is as close as possible to the original design even where the character
/// must be represented in few pixels.
/// Note that subroutines are also defined in the private dictionary however for the purposes of this API they are
/// stored on the parent <see cref="Type1Font"/>.
/// stored on the parent <see cref="Type1FontProgram"/>.
/// </summary>
internal class Type1PrivateDictionary
{

View File

@@ -89,6 +89,7 @@
}
else
{
// TODO: this can be an array of stream objects... investigate
var contentStream = DirectObjectFinder.Get<StreamToken>(contents, pdfScanner);
if (contentStream == null)

View File

@@ -1,15 +1,25 @@
using System;
// ReSharper disable InconsistentNaming
namespace UglyToad.PdfPig.Util
{
// ReSharper disable once InconsistentNaming
internal abstract class DiscriminatedUnion<A, B>
internal abstract class Union<A, B>
{
public abstract T Match<T>(Func<A, T> first, Func<B, T> second);
public abstract void Match(Action<A> first, Action<B> second);
private DiscriminatedUnion() { }
private Union() { }
public sealed class Case1 : DiscriminatedUnion<A, B>
public static Case1 One(A item)
{
return new Case1(item);
}
public static Case2 Two(B item)
{
return new Case2(item);
}
public sealed class Case1 : Union<A, B>
{
public readonly A Item;
@@ -18,9 +28,9 @@ namespace UglyToad.PdfPig.Util
Item = item;
}
public override T Match<T>(Func<A, T> first, Func<B, T> second)
public override void Match(Action<A> first, Action<B> second)
{
return first(Item);
first(Item);
}
public override string ToString()
@@ -29,7 +39,7 @@ namespace UglyToad.PdfPig.Util
}
}
public sealed class Case2 : DiscriminatedUnion<A, B>
public sealed class Case2 : Union<A, B>
{
public readonly B Item;
@@ -38,9 +48,9 @@ namespace UglyToad.PdfPig.Util
Item = item;
}
public override T Match<T>(Func<A, T> first, Func<B, T> second)
public override void Match(Action<A> first, Action<B> second)
{
return second(Item);
second(Item);
}
public override string ToString()