changes to cmap to correctly map for cid fonts. tests for codespace range

This commit is contained in:
Eliot Jones
2017-12-27 17:49:08 +00:00
parent 889b6a346d
commit ae79108d3c
10 changed files with 213 additions and 30 deletions

View File

@@ -0,0 +1,110 @@
namespace UglyToad.Pdf.Tests.Fonts.Cmap
{
using System;
using System.Linq;
using Pdf.Fonts.Cmap;
using Pdf.Tokenization.Tokens;
using Xunit;
public class CodespaceRangeTests
{
[Theory]
[InlineData("00", "80", 0, 128, 1)]
[InlineData("8140", "9ffc", 33088, 40956, 2)]
public void CreatesCorrectly(string startHex, string endHex, int startInt, int endInt, int codeLength)
{
var range = new CodespaceRange(GetHexBytes(startHex.ToCharArray()),
GetHexBytes(endHex.ToCharArray()));
Assert.Equal(startInt, range.StartInt);
Assert.Equal(endInt, range.EndInt);
Assert.Equal(codeLength, range.CodeLength);
}
[Fact]
public void MatchesCodeIsNullThrows()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0' );
var codespace = new CodespaceRange(start, end);
Action action = () => codespace.Matches(null);
Assert.Throws<ArgumentNullException>(action);
}
[Fact]
public void IsFullMatchCodeIsNullThrows()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
Action action = () => codespace.IsFullMatch(null, 2);
Assert.Throws<ArgumentNullException>(action);
}
[Fact]
public void MatchesCodeWrongLengthFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('6', '9', '0', '1'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeLowerThanStartFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('0', '1'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeHigherThanEndFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('9', '6'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeInRangeTrue()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('5', 'A'));
Assert.True(matches);
}
private static byte[] GetHexBytes(params char[] characters)
{
var token = new HexToken(characters);
return token.Bytes.ToArray();
}
}
}

View File

@@ -3,7 +3,7 @@
using CidFonts; using CidFonts;
/// <summary> /// <summary>
/// Specifies the character collection associated with the <see cref="CharacterIdentifierFont"/> (CIDFont). /// Specifies the character collection associated with the <see cref="ICidFont"/> (CIDFont).
/// </summary> /// </summary>
public struct CharacterIdentifierSystemInfo public struct CharacterIdentifierSystemInfo
{ {

View File

@@ -1,6 +1,5 @@
namespace UglyToad.Pdf.Fonts.CidFonts namespace UglyToad.Pdf.Fonts.CidFonts
{ {
using Cmap;
using Cos; using Cos;
/// <summary> /// <summary>
@@ -36,5 +35,9 @@
CidFontType CidFontType { get; } CidFontType CidFontType { get; }
FontDescriptor Descriptor { get; } FontDescriptor Descriptor { get; }
decimal GetWidthFromFont(int characterCode);
decimal GetWidthFromDictionary(int cid);
} }
} }

View File

@@ -15,5 +15,15 @@
public CharacterIdentifierSystemInfo SystemInfo { get; } public CharacterIdentifierSystemInfo SystemInfo { get; }
public CidFontType CidFontType => CidFontType.Type0; public CidFontType CidFontType => CidFontType.Type0;
public FontDescriptor Descriptor { get; } public FontDescriptor Descriptor { get; }
public decimal GetWidthFromFont(int characterCode)
{
throw new System.NotImplementedException();
}
public decimal GetWidthFromDictionary(int cid)
{
throw new System.NotImplementedException();
}
} }
} }

View File

@@ -35,5 +35,20 @@
this.verticalWritingMetrics = verticalWritingMetrics; this.verticalWritingMetrics = verticalWritingMetrics;
this.widths = widths; this.widths = widths;
} }
public decimal GetWidthFromFont(int characterCode)
{
throw new System.NotImplementedException();
}
public decimal GetWidthFromDictionary(int cid)
{
if (widths.TryGetValue(cid, out var width))
{
return width;
}
return Descriptor.MissingWidth;
}
} }
} }

View File

@@ -6,21 +6,36 @@
using IO; using IO;
using Util.JetBrains.Annotations; using Util.JetBrains.Annotations;
/// <summary>
/// The CMap (character code map) maps character codes to character identifiers (CIDs).
/// The set of characters which a CMap refers to is the "character set" (charset).
/// </summary>
internal class CMap internal class CMap
{ {
public CharacterIdentifierSystemInfo Info { get; } public CharacterIdentifierSystemInfo Info { get; }
/// <summary>
/// Defines the type of the internal organization of the CMap file.
/// </summary>
public int Type { get; } public int Type { get; }
public int WMode { get; } /// <summary>
/// Defines the name of the CMap file.
/// </summary>
public string Name { get; } public string Name { get; }
/// <summary>
/// The version number of the CIDFont file.
/// </summary>
[CanBeNull]
public string Version { get; } public string Version { get; }
[NotNull] [NotNull]
public IReadOnlyDictionary<int, string> BaseFontCharacterMap { get; } public IReadOnlyDictionary<int, string> BaseFontCharacterMap { get; }
/// <summary>
/// Describes the set of valid input character codes.
/// </summary>
[NotNull] [NotNull]
public IReadOnlyList<CodespaceRange> CodespaceRanges { get; } public IReadOnlyList<CodespaceRange> CodespaceRanges { get; }
@@ -30,6 +45,9 @@
[NotNull] [NotNull]
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; } public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
/// <summary>
/// Controls whether the font associated with the CMap writes horizontally or vertically.
/// </summary>
public WritingMode WritingMode { get; } public WritingMode WritingMode { get; }
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0; public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
@@ -54,15 +72,6 @@
minCodeLength = CodespaceRanges.Min(x => x.CodeLength); minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
} }
private string cmapName = null;
private string cmapVersion = null;
private int cmapType = -1;
private string registry = null;
private string ordering = null;
private int supplement = 0;
// CID mappings // CID mappings
private readonly Dictionary<int, int> codeToCid = new Dictionary<int, int>(); private readonly Dictionary<int, int> codeToCid = new Dictionary<int, int>();
private readonly List<CidRange> codeToCidRanges = new List<CidRange>(); private readonly List<CidRange> codeToCidRanges = new List<CidRange>();
@@ -111,7 +120,7 @@
public override string ToString() public override string ToString()
{ {
return cmapName; return Name;
} }
public int ReadCode(IInputBytes bytes) public int ReadCode(IInputBytes bytes)
@@ -130,7 +139,7 @@
int byteCount = i + 1; int byteCount = i + 1;
foreach (CodespaceRange range in CodespaceRanges) foreach (CodespaceRange range in CodespaceRanges)
{ {
if (range.isFullMatch(result, byteCount)) if (range.IsFullMatch(result, byteCount))
{ {
return ByteArrayToInt(result, byteCount); return ByteArrayToInt(result, byteCount);
} }

View File

@@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts.Cmap namespace UglyToad.Pdf.Fonts.Cmap
{ {
using System;
using System.Collections.Generic; using System.Collections.Generic;
/// <summary> /// <summary>
@@ -7,16 +8,25 @@
/// </summary> /// </summary>
public class CodespaceRange public class CodespaceRange
{ {
/// <summary>
/// The lower-bound of this range.
/// </summary>
public IReadOnlyList<byte> Start { get; } public IReadOnlyList<byte> Start { get; }
/// <summary>
/// The upper-bound of this range.
/// </summary>
public IReadOnlyList<byte> End { get; } public IReadOnlyList<byte> End { get; }
public int StartInt { get; } public int StartInt { get; }
public int EndInt { get; } public int EndInt { get; }
/// <summary>
/// The number of bytes for numbers in this range.
/// </summary>
public int CodeLength { get; } public int CodeLength { get; }
/// <summary> /// <summary>
/// Creates a new instance of <see cref="CodespaceRange"/>. /// Creates a new instance of <see cref="CodespaceRange"/>.
/// </summary> /// </summary>
@@ -28,20 +38,30 @@
EndInt = end.ToInt(end.Count); EndInt = end.ToInt(end.Count);
CodeLength = start.Count; CodeLength = start.Count;
} }
/** /// <summary>
* Returns true if the given code bytes match this codespace range. /// Returns <see langword="true"/> if the given code bytes match this codespace range.
*/ /// </summary>
public bool matches(byte[] code) public bool Matches(byte[] code)
{ {
return isFullMatch(code, code.Length); if (code == null)
{
throw new ArgumentNullException(nameof(code));
}
return IsFullMatch(code, code.Length);
} }
/** /// <summary>
* Returns true if the given code bytes match this codespace range. /// Returns true if the given code bytes match this codespace range.
*/ /// </summary>
public bool isFullMatch(byte[] code, int codeLen) public bool IsFullMatch(byte[] code, int codeLen)
{ {
if (code == null)
{
throw new ArgumentNullException(nameof(code));
}
// code must be the same length as the bounding codes // code must be the same length as the bounding codes
if (codeLen == CodeLength) if (codeLen == CodeLength)
{ {

View File

@@ -41,7 +41,7 @@
{ {
var current = bytes.CurrentOffset; var current = bytes.CurrentOffset;
var code = ToUnicode.ReadCode(bytes); var code = CMap.ReadCode(bytes);
codeLength = bytes.CurrentOffset - current; codeLength = bytes.CurrentOffset - current;
@@ -70,7 +70,19 @@
public PdfVector GetDisplacement(int characterCode) public PdfVector GetDisplacement(int characterCode)
{ {
return new PdfVector(0.333m, 0); // This width is in units scaled up by 1000
var width = GetWidth(characterCode);
return new PdfVector(width / 1000, 0);
}
public decimal GetWidth(int characterCode)
{
var cid = CMap.ConvertToCid(characterCode);
var fromFont = CidFont.GetWidthFromDictionary(cid);
return fromFont;
} }
} }
} }

View File

@@ -15,5 +15,7 @@
bool TryGetUnicode(int characterCode, out string value); bool TryGetUnicode(int characterCode, out string value);
PdfVector GetDisplacement(int characterCode); PdfVector GetDisplacement(int characterCode);
decimal GetWidth(int characterCode);
} }
} }

View File

@@ -87,6 +87,8 @@
font.TryGetUnicode(code, out var unicode); font.TryGetUnicode(code, out var unicode);
var width = font.GetWidth(code);
var wordSpacing = 0m; var wordSpacing = 0m;
if (code == ' ' && codeLength == 1) if (code == ' ' && codeLength == 1)
{ {
@@ -97,7 +99,7 @@
if (font.IsVertical) if (font.IsVertical)
{ {
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request."); throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
} }
var displacement = font.GetDisplacement(code); var displacement = font.GetDisplacement(code);