support gs operator, fix systemfonts, apply rotation to glyphs

- begin adding support for extended graphics state (the 'gs' operator) including setting the font #39.
- apply page level rotation to the glyph bounding box and width to get correct glyph sizes #41.
- wrap page rotation in a value type to ensure the value is restricted to right angle rotations and provide convenience members #42.
- fix bug where system font finder never worked for truetype fonts because it began reading the file from the wrong offset.
This commit is contained in:
Eliot Jones
2019-07-06 14:03:23 +01:00
parent 88e02cabab
commit c495065178
17 changed files with 375 additions and 150 deletions

View File

@@ -1,8 +1,6 @@
namespace UglyToad.PdfPig.Tests.Graphics
{
using System.Collections.Generic;
using Content;
using PdfPig.Fonts;
using PdfPig.Geometry;
using PdfPig.Graphics;
using PdfPig.IO;
@@ -66,22 +64,9 @@
public void ClosePath()
{
}
}
internal class TestResourceStore : IResourceStore
{
public void LoadResourceDictionary(DictionaryToken dictionary, bool isLenientParsing)
public void SetNamedGraphicsState(NameToken stateName)
{
}
public IFont GetFont(NameToken name)
{
return null;
}
public StreamToken GetXObject(NameToken name)
{
return null;
}
}
}

View File

@@ -0,0 +1,33 @@
namespace UglyToad.PdfPig.Tests.Graphics
{
using Content;
using PdfPig.Fonts;
using PdfPig.Tokens;
internal class TestResourceStore : IResourceStore
{
public void LoadResourceDictionary(DictionaryToken dictionary, bool isLenientParsing)
{
}
public IFont GetFont(NameToken name)
{
return null;
}
public StreamToken GetXObject(NameToken name)
{
return null;
}
public DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name)
{
return null;
}
public IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing)
{
return null;
}
}
}

View File

@@ -43,6 +43,7 @@
"UglyToad.PdfPig.Content.DocumentInformation",
"UglyToad.PdfPig.Content.Letter",
"UglyToad.PdfPig.Content.Page",
"UglyToad.PdfPig.Content.PageRotationDegrees",
"UglyToad.PdfPig.Content.PageSize",
"UglyToad.PdfPig.Content.Word",
"UglyToad.PdfPig.Content.TextLine",

View File

@@ -10,5 +10,9 @@
IFont GetFont(NameToken name);
StreamToken GetXObject(NameToken name);
DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name);
IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing);
}
}

View File

@@ -32,7 +32,7 @@
/// <summary>
/// The rotation of the page in degrees (clockwise). Valid values are 0, 90, 180 and 270.
/// </summary>
public int Rotation { get; }
public PageRotationDegrees Rotation { get; }
internal PageContent Content { get; }
@@ -72,7 +72,7 @@
[NotNull]
public Experimental ExperimentalAccess { get; }
internal Page(int number, DictionaryToken dictionary, MediaBox mediaBox, CropBox cropBox, int rotation, PageContent content,
internal Page(int number, DictionaryToken dictionary, MediaBox mediaBox, CropBox cropBox, PageRotationDegrees rotation, PageContent content,
AnnotationProvider annotationProvider)
{
if (number <= 0)
@@ -80,11 +80,6 @@
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
}
if (rotation != 0 && rotation != 90 && rotation != 180 && rotation != 270)
{
throw new ArgumentOutOfRangeException(nameof(rotation), $"Rotation must be 0, 90, 180 or 270. Got: {rotation}.");
}
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
Number = number;

View File

@@ -0,0 +1,116 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Diagnostics.Contracts;
using Core;
/// <summary>
/// Represents the rotation of a page in a PDF document defined by the page dictionary in degrees clockwise.
/// </summary>
public struct PageRotationDegrees : IEquatable<PageRotationDegrees>
{
/// <summary>
/// The rotation of the page in degrees clockwise.
/// </summary>
public int Value { get; }
/// <summary>
/// Get the rotation expressed in radians (anti-clockwise).
/// </summary>
public decimal Radians
{
get
{
switch (Value)
{
case 0:
return 0;
case 90:
return -(decimal)(0.5 * Math.PI);
case 180:
return -(decimal) Math.PI;
case 270:
return -(decimal) (1.5 * Math.PI);
default:
throw new InvalidOperationException($"Invalid value for rotation: {Value}.");
}
}
}
/// <summary>
/// Create a <see cref="PageRotationDegrees"/>.
/// </summary>
/// <param name="rotation">Rotation in degrees clockwise.</param>
public PageRotationDegrees(int rotation)
{
if (rotation != 0 && rotation != 90 && rotation != 180 && rotation != 270)
{
throw new ArgumentOutOfRangeException(nameof(rotation), $"Rotation must be 0, 90, 180 or 270. Got: {rotation}.");
}
Value = rotation;
}
[Pure]
internal TransformationMatrix Rotate(TransformationMatrix matrix)
{
TransformationMatrix thisMatrix;
switch (Value)
{
case 0:
thisMatrix = TransformationMatrix.FromArray(new[]{ 1m, 0, 0, 1 });
break;
case 90:
thisMatrix = TransformationMatrix.FromArray(new[] {0m, -1, 1, 0});
break;
case 180:
thisMatrix = TransformationMatrix.FromArray(new[] {-1m, 0, 0, -1});
break;
case 270:
thisMatrix = TransformationMatrix.FromArray(new[] {0m, 1, -1, 0});
break;
default:
throw new InvalidOperationException($"Invalid value for rotation: {Value}.");
}
return thisMatrix.Multiply(matrix);
}
/// <inheritdoc />
public override int GetHashCode()
{
return Value.GetHashCode();
}
/// <inheritdoc />
public override string ToString()
{
return Value.ToString();
}
/// <inheritdoc />
public override bool Equals(object obj)
{
return obj is PageRotationDegrees degrees && Equals(degrees);
}
/// <inheritdoc />
public bool Equals(PageRotationDegrees other)
{
return Value == other.Value;
}
/// <summary>
/// Equal.
/// </summary>
public static bool operator ==(PageRotationDegrees degrees1, PageRotationDegrees degrees2)
{
return degrees1.Equals(degrees2);
}
/// <summary>
/// Not equal.
/// </summary>
public static bool operator !=(PageRotationDegrees degrees1, PageRotationDegrees degrees2) => !(degrees1 == degrees2);
}
}

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using Exceptions;
using Fonts;
using Parser.Parts;
using Tokenization.Scanner;
@@ -15,6 +16,8 @@
private readonly Dictionary<IndirectReference, IFont> loadedFonts = new Dictionary<IndirectReference, IFont>();
private readonly Dictionary<NameToken, IndirectReference> currentResourceState = new Dictionary<NameToken, IndirectReference>();
private readonly Dictionary<NameToken, DictionaryToken> extendedGraphicsStates = new Dictionary<NameToken, DictionaryToken>();
public ResourceContainer(IPdfTokenScanner scanner, IFontFactory fontFactory)
{
this.scanner = scanner;
@@ -26,7 +29,7 @@
if (resourceDictionary.TryGet(NameToken.Font, out var fontBase))
{
var fontDictionary = DirectObjectFinder.Get<DictionaryToken>(fontBase, scanner);
LoadFontDictionary(fontDictionary, isLenientParsing);
}
@@ -44,6 +47,17 @@
currentResourceState[NameToken.Create(pair.Key)] = reference.Data;
}
}
if (resourceDictionary.TryGet(NameToken.ExtGState, scanner, out DictionaryToken extGStateDictionaryToken))
{
foreach (var pair in extGStateDictionaryToken.Data)
{
var name = NameToken.Create(pair.Key);
var state = DirectObjectFinder.Get<DictionaryToken>(pair.Value, scanner);
extendedGraphicsStates[name] = state;
}
}
}
private void LoadFontDictionary(DictionaryToken fontDictionary, bool isLenientParsing)
@@ -68,7 +82,7 @@
{
continue;
}
var fontObject = DirectObjectFinder.Get<DictionaryToken>(objectKey, scanner);
if (fontObject == null)
@@ -79,7 +93,7 @@
loadedFonts[reference] = fontFactory.Get(fontObject, isLenientParsing);
}
}
public IFont GetFont(NameToken name)
{
var reference = currentResourceState[name];
@@ -89,6 +103,18 @@
return font;
}
public IFont GetFontDirectly(IndirectReferenceToken fontReferenceToken, bool isLenientParsing)
{
if (!DirectObjectFinder.TryGet(fontReferenceToken, scanner, out DictionaryToken fontDictionaryToken))
{
throw new PdfDocumentFormatException($"The requested font reference token {fontReferenceToken} wasn't a font.");
}
var font = fontFactory.Get(fontDictionaryToken, isLenientParsing);
return font;
}
public StreamToken GetXObject(NameToken name)
{
var reference = currentResourceState[name];
@@ -97,6 +123,10 @@
return stream;
}
public DictionaryToken GetExtendedGraphicsStateDictionary(NameToken name)
{
return extendedGraphicsStates[name];
}
}
}

View File

@@ -0,0 +1,17 @@
namespace UglyToad.PdfPig.Fonts
{
using Geometry;
internal class CharacterBoundingBox
{
public PdfRectangle GlyphBounds { get; }
public decimal Width { get; }
public CharacterBoundingBox(PdfRectangle glyphBounds, decimal width)
{
GlyphBounds = glyphBounds;
Width = width;
}
}
}

View File

@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Fonts
{
using Core;
using Geometry;
using IO;
using Tokens;
@@ -19,17 +18,4 @@
TransformationMatrix GetFontMatrix();
}
internal class CharacterBoundingBox
{
public PdfRectangle GlyphBounds { get; }
public decimal Width { get; }
public CharacterBoundingBox(PdfRectangle glyphBounds, decimal width)
{
GlyphBounds = glyphBounds;
Width = width;
}
}
}

View File

@@ -11,89 +11,6 @@
using TrueType;
using Util.JetBrains.Annotations;
/// <summary>
/// Some TrueType fonts use both the Standard 14 descriptor and the TrueType font from disk.
/// </summary>
internal class TrueTypeStandard14FallbackSimpleFont : IFont
{
private static readonly TransformationMatrix DefaultTransformation =
TransformationMatrix.FromValues(1m / 1000m, 0, 0, 1m / 1000m, 0, 0);
private readonly FontMetrics fontMetrics;
private readonly Encoding encoding;
private readonly TrueTypeFontProgram font;
public NameToken Name { get; }
public bool IsVertical { get; } = false;
public TrueTypeStandard14FallbackSimpleFont(NameToken name, FontMetrics fontMetrics, Encoding encoding, TrueTypeFontProgram font)
{
this.fontMetrics = fontMetrics;
this.encoding = encoding ?? throw new ArgumentNullException(nameof(encoding));
this.font = font;
Name = name;
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
{
codeLength = 1;
return bytes.CurrentByte;
}
public bool TryGetUnicode(int characterCode, out string value)
{
value = null;
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
// Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
}
catch
{
return false;
}
return true;
}
public CharacterBoundingBox GetBoundingBox(int characterCode)
{
var fontMatrix = GetFontMatrix();
if (font != null && font.TryGetBoundingBox(characterCode, out var bounds))
{
bounds = fontMatrix.Transform(bounds);
return new CharacterBoundingBox(bounds, bounds.Width);
}
var name = encoding.GetName(characterCode);
var metrics = fontMetrics.CharacterMetrics[name];
bounds = fontMatrix.Transform(metrics.BoundingBox);
var width = fontMatrix.TransformX(metrics.WidthX);
return new CharacterBoundingBox(bounds, width);
}
public TransformationMatrix GetFontMatrix()
{
if (font?.TableRegister.HeaderTable != null)
{
var scale = (decimal)font.GetFontMatrixMultiplier();
return TransformationMatrix.FromValues(1 / scale, 0, 0, 1 / scale, 0, 0);
}
return DefaultTransformation;
}
}
internal class TrueTypeSimpleFont : IFont
{
private static readonly TransformationMatrix DefaultTransformation =
@@ -194,14 +111,14 @@
{
boundingBox = DefaultTransformation.Transform(boundingBox);
}
decimal width;
var index = characterCode - firstCharacter;
if (widths != null && index >= 0 && index < widths.Length)
{
fromFont = false;
width = widths[index];
width = widths[index];
}
else if (fontProgram != null)
{
@@ -214,7 +131,7 @@
{
throw new InvalidOperationException($"Could not retrieve width for character code: {characterCode} in font {Name}.");
}
if (fromFont)
{
width = fontMatrix.Transform(new PdfVector(width, 0)).X;
@@ -227,10 +144,22 @@
return new CharacterBoundingBox(boundingBox, width);
}
public TransformationMatrix GetFontMatrix()
{
var scale = 1000m;
if (fontProgram?.TableRegister.HeaderTable != null)
{
scale = fontProgram.GetFontMatrixMultiplier();
}
return TransformationMatrix.FromValues(1m / scale, 0, 0, 1m / scale, 0, 0);
}
private PdfRectangle GetBoundingBoxInGlyphSpace(int characterCode, out bool fromFont)
{
fromFont = true;
if (fontProgram == null)
{
return descriptor.BoundingBox;
@@ -262,18 +191,6 @@
return widths[index];
}
public TransformationMatrix GetFontMatrix()
{
var scale = 1000m;
if (fontProgram?.TableRegister.HeaderTable != null)
{
scale = fontProgram.GetFontMatrixMultiplier();
}
return TransformationMatrix.FromValues(1m / scale, 0, 0, 1m / scale, 0, 0);
}
}
}

View File

@@ -0,0 +1,92 @@
namespace UglyToad.PdfPig.Fonts.Simple
{
using System;
using Core;
using Encodings;
using IO;
using Tokens;
using TrueType;
/// <summary>
/// Some TrueType fonts use both the Standard 14 descriptor and the TrueType font from disk.
/// </summary>
internal class TrueTypeStandard14FallbackSimpleFont : IFont
{
private static readonly TransformationMatrix DefaultTransformation =
TransformationMatrix.FromValues(1m / 1000m, 0, 0, 1m / 1000m, 0, 0);
private readonly FontMetrics fontMetrics;
private readonly Encoding encoding;
private readonly TrueTypeFontProgram font;
public NameToken Name { get; }
public bool IsVertical { get; } = false;
public TrueTypeStandard14FallbackSimpleFont(NameToken name, FontMetrics fontMetrics, Encoding encoding, TrueTypeFontProgram font)
{
this.fontMetrics = fontMetrics;
this.encoding = encoding ?? throw new ArgumentNullException(nameof(encoding));
this.font = font;
Name = name;
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
{
codeLength = 1;
return bytes.CurrentByte;
}
public bool TryGetUnicode(int characterCode, out string value)
{
value = null;
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
// Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
}
catch
{
return false;
}
return true;
}
public CharacterBoundingBox GetBoundingBox(int characterCode)
{
var fontMatrix = GetFontMatrix();
if (font != null && font.TryGetBoundingBox(characterCode, out var bounds))
{
bounds = fontMatrix.Transform(bounds);
return new CharacterBoundingBox(bounds, bounds.Width);
}
var name = encoding.GetName(characterCode);
var metrics = fontMetrics.CharacterMetrics[name];
bounds = fontMatrix.Transform(metrics.BoundingBox);
var width = fontMatrix.TransformX(metrics.WidthX);
return new CharacterBoundingBox(bounds, width);
}
public TransformationMatrix GetFontMatrix()
{
if (font?.TableRegister.HeaderTable != null)
{
var scale = (decimal)font.GetFontMatrixMultiplier();
return TransformationMatrix.FromValues(1 / scale, 0, 0, 1 / scale, 0, 0);
}
return DefaultTransformation;
}
}
}

View File

@@ -240,6 +240,7 @@
}
}
data.Seek(0);
font = trueTypeFontParser.Parse(data);
var psName = font.TableRegister.NameTable?.GetPostscriptName() ?? font.Name;
if (!cache.ContainsKey(psName))

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using Content;
using Core;
using Fonts;
using Geometry;
using IO;
@@ -20,12 +21,14 @@
private readonly List<PdfPath> paths = new List<PdfPath>();
private readonly IResourceStore resourceStore;
private readonly UserSpaceUnit userSpaceUnit;
private readonly PageRotationDegrees rotation;
private readonly bool isLenientParsing;
private readonly IPdfTokenScanner pdfScanner;
private readonly XObjectFactory xObjectFactory;
private readonly ILog log;
private Stack<CurrentGraphicsState> graphicsStack = new Stack<CurrentGraphicsState>();
private IFont activeExtendedGraphicsStateFont = null;
public TextMatrices TextMatrices { get; } = new TextMatrices();
@@ -43,13 +46,14 @@
public List<Letter> Letters = new List<Letter>();
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing,
public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing,
IPdfTokenScanner pdfScanner,
XObjectFactory xObjectFactory,
ILog log)
{
this.resourceStore = resourceStore;
this.userSpaceUnit = userSpaceUnit;
this.rotation = rotation;
this.isLenientParsing = isLenientParsing;
this.pdfScanner = pdfScanner;
this.xObjectFactory = xObjectFactory;
@@ -91,6 +95,7 @@
public void PopState()
{
graphicsStack.Pop();
activeExtendedGraphicsStateFont = null;
}
public void PushState()
@@ -102,7 +107,7 @@
{
var currentState = GetCurrentState();
var font = resourceStore.GetFont(currentState.FontState.FontName);
var font = currentState.FontState.FromExtendedGraphicsState ? activeExtendedGraphicsStateFont : resourceStore.GetFont(currentState.FontState.FontName);
if (font == null)
{
@@ -147,11 +152,11 @@
var boundingBox = font.GetBoundingBox(code);
var transformedGlyphBounds = transformationMatrix
var transformedGlyphBounds = rotation.Rotate(transformationMatrix)
.Transform(TextMatrices.TextMatrix
.Transform(renderingMatrix
.Transform(boundingBox.GlyphBounds)));
var transformedPdfBounds = transformationMatrix
var transformedPdfBounds = rotation.Rotate(transformationMatrix)
.Transform(TextMatrices.TextMatrix
.Transform(renderingMatrix.Transform(new PdfRectangle(0, 0, boundingBox.Width, 0))));
@@ -274,6 +279,36 @@
CurrentPath = null;
}
public void SetNamedGraphicsState(NameToken stateName)
{
var currentGraphicsState = GetCurrentState();
var state = resourceStore.GetExtendedGraphicsStateDictionary(stateName);
if (state.TryGet(NameToken.Lw, pdfScanner, out NumericToken lwToken))
{
currentGraphicsState.LineWidth = lwToken.Data;
}
if (state.TryGet(NameToken.Lc, pdfScanner, out NumericToken lcToken))
{
currentGraphicsState.CapStyle = (LineCapStyle) lcToken.Int;
}
if (state.TryGet(NameToken.Lj, pdfScanner, out NumericToken ljToken))
{
currentGraphicsState.JoinStyle = (LineJoinStyle) ljToken.Int;
}
if (state.TryGet(NameToken.Font, pdfScanner, out ArrayToken fontArray) && fontArray.Length == 2
&& fontArray.Data[0] is IndirectReferenceToken fontReference && fontArray.Data[1] is NumericToken sizeToken)
{
currentGraphicsState.FontState.FromExtendedGraphicsState = true;
currentGraphicsState.FontState.FontSize = sizeToken.Data;
activeExtendedGraphicsStateFont = resourceStore.GetFontDirectly(fontReference, isLenientParsing);
}
}
private void AdjustTextMatrix(decimal tx, decimal ty)
{
var matrix = TransformationMatrix.GetTranslationMatrix(tx, ty);

View File

@@ -2,6 +2,7 @@
namespace UglyToad.PdfPig.Graphics
{
using Core;
using Operations.SpecialGraphicsState;
using PdfPig.Core;
using Tokens;
@@ -10,6 +11,11 @@ namespace UglyToad.PdfPig.Graphics
/// </summary>
public class CurrentFontState : IDeepCloneable<CurrentFontState>
{
/// <summary>
/// Whether the font comes from the extended graphics state via the <see cref="SetGraphicsStateParametersFromDictionary"/> operator.
/// </summary>
public bool FromExtendedGraphicsState { get; set; } = false;
/// <summary>
/// A value in unscaled text space units which is added to the horizontal (or vertical if in vertical writing mode)
/// glyph displacement.

View File

@@ -81,5 +81,11 @@
/// Close the current path.
/// </summary>
void ClosePath();
/// <summary>
/// Update the graphics state to apply the state from the named ExtGState dictionary.
/// </summary>
/// <param name="stateName">The name of the state to apply.</param>
void SetNamedGraphicsState(NameToken stateName);
}
}

View File

@@ -34,6 +34,7 @@
/// <inheritdoc />
public void Run(IOperationContext operationContext)
{
operationContext.SetNamedGraphicsState(Name);
}
/// <inheritdoc />

View File

@@ -53,10 +53,10 @@
throw new InvalidOperationException($"Page {number} had its type specified as {type} rather than 'Page'.");
}
var rotation = pageTreeMembers.Rotation;
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
{
rotation = rotateToken.Int;
rotation = new PageRotationDegrees(rotateToken.Int);
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing);
@@ -100,7 +100,7 @@
}
}
content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
content = GetContent(bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
}
else
{
@@ -113,7 +113,7 @@
var bytes = contentStream.Decode(filterProvider);
content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing);
content = GetContent(bytes, cropBox, userSpaceUnit, rotation, isLenientParsing);
}
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing));
@@ -121,11 +121,11 @@
return page;
}
private PageContent GetContent(IReadOnlyList<byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing)
private PageContent GetContent(IReadOnlyList<byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, bool isLenientParsing)
{
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing, pdfScanner, xObjectFactory, log);
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, rotation, isLenientParsing, pdfScanner, xObjectFactory, log);
return context.Process(operations);
}