changes to cmap to correctly map for cid fonts. tests for codespace range

This commit is contained in:
Eliot Jones
2017-12-27 17:49:08 +00:00
parent 889b6a346d
commit ae79108d3c
10 changed files with 213 additions and 30 deletions

View File

@@ -0,0 +1,110 @@
namespace UglyToad.Pdf.Tests.Fonts.Cmap
{
using System;
using System.Linq;
using Pdf.Fonts.Cmap;
using Pdf.Tokenization.Tokens;
using Xunit;
public class CodespaceRangeTests
{
[Theory]
[InlineData("00", "80", 0, 128, 1)]
[InlineData("8140", "9ffc", 33088, 40956, 2)]
public void CreatesCorrectly(string startHex, string endHex, int startInt, int endInt, int codeLength)
{
var range = new CodespaceRange(GetHexBytes(startHex.ToCharArray()),
GetHexBytes(endHex.ToCharArray()));
Assert.Equal(startInt, range.StartInt);
Assert.Equal(endInt, range.EndInt);
Assert.Equal(codeLength, range.CodeLength);
}
[Fact]
public void MatchesCodeIsNullThrows()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0' );
var codespace = new CodespaceRange(start, end);
Action action = () => codespace.Matches(null);
Assert.Throws<ArgumentNullException>(action);
}
[Fact]
public void IsFullMatchCodeIsNullThrows()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
Action action = () => codespace.IsFullMatch(null, 2);
Assert.Throws<ArgumentNullException>(action);
}
[Fact]
public void MatchesCodeWrongLengthFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('6', '9', '0', '1'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeLowerThanStartFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('0', '1'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeHigherThanEndFalse()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('9', '6'));
Assert.False(matches);
}
[Fact]
public void MatchesCodeInRangeTrue()
{
var start = GetHexBytes('0', 'A');
var end = GetHexBytes('8', '0');
var codespace = new CodespaceRange(start, end);
var matches = codespace.Matches(GetHexBytes('5', 'A'));
Assert.True(matches);
}
private static byte[] GetHexBytes(params char[] characters)
{
var token = new HexToken(characters);
return token.Bytes.ToArray();
}
}
}

View File

@@ -3,7 +3,7 @@
using CidFonts;
/// <summary>
/// Specifies the character collection associated with the <see cref="CharacterIdentifierFont"/> (CIDFont).
/// Specifies the character collection associated with the <see cref="ICidFont"/> (CIDFont).
/// </summary>
public struct CharacterIdentifierSystemInfo
{

View File

@@ -1,6 +1,5 @@
namespace UglyToad.Pdf.Fonts.CidFonts
{
using Cmap;
using Cos;
/// <summary>
@@ -36,5 +35,9 @@
CidFontType CidFontType { get; }
FontDescriptor Descriptor { get; }
decimal GetWidthFromFont(int characterCode);
decimal GetWidthFromDictionary(int cid);
}
}

View File

@@ -15,5 +15,15 @@
public CharacterIdentifierSystemInfo SystemInfo { get; }
public CidFontType CidFontType => CidFontType.Type0;
public FontDescriptor Descriptor { get; }
public decimal GetWidthFromFont(int characterCode)
{
throw new System.NotImplementedException();
}
public decimal GetWidthFromDictionary(int cid)
{
throw new System.NotImplementedException();
}
}
}

View File

@@ -35,5 +35,20 @@
this.verticalWritingMetrics = verticalWritingMetrics;
this.widths = widths;
}
public decimal GetWidthFromFont(int characterCode)
{
throw new System.NotImplementedException();
}
public decimal GetWidthFromDictionary(int cid)
{
if (widths.TryGetValue(cid, out var width))
{
return width;
}
return Descriptor.MissingWidth;
}
}
}

View File

@@ -6,21 +6,36 @@
using IO;
using Util.JetBrains.Annotations;
/// <summary>
/// The CMap (character code map) maps character codes to character identifiers (CIDs).
/// The set of characters which a CMap refers to is the "character set" (charset).
/// </summary>
internal class CMap
{
public CharacterIdentifierSystemInfo Info { get; }
/// <summary>
/// Defines the type of the internal organization of the CMap file.
/// </summary>
public int Type { get; }
public int WMode { get; }
/// <summary>
/// Defines the name of the CMap file.
/// </summary>
public string Name { get; }
/// <summary>
/// The version number of the CIDFont file.
/// </summary>
[CanBeNull]
public string Version { get; }
[NotNull]
public IReadOnlyDictionary<int, string> BaseFontCharacterMap { get; }
/// <summary>
/// Describes the set of valid input character codes.
/// </summary>
[NotNull]
public IReadOnlyList<CodespaceRange> CodespaceRanges { get; }
@@ -30,6 +45,9 @@
[NotNull]
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
/// <summary>
/// Controls whether the font associated with the CMap writes horizontally or vertically.
/// </summary>
public WritingMode WritingMode { get; }
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
@@ -54,15 +72,6 @@
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
}
private string cmapName = null;
private string cmapVersion = null;
private int cmapType = -1;
private string registry = null;
private string ordering = null;
private int supplement = 0;
// CID mappings
private readonly Dictionary<int, int> codeToCid = new Dictionary<int, int>();
private readonly List<CidRange> codeToCidRanges = new List<CidRange>();
@@ -111,7 +120,7 @@
public override string ToString()
{
return cmapName;
return Name;
}
public int ReadCode(IInputBytes bytes)
@@ -130,7 +139,7 @@
int byteCount = i + 1;
foreach (CodespaceRange range in CodespaceRanges)
{
if (range.isFullMatch(result, byteCount))
if (range.IsFullMatch(result, byteCount))
{
return ByteArrayToInt(result, byteCount);
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts.Cmap
{
using System;
using System.Collections.Generic;
/// <summary>
@@ -7,14 +8,23 @@
/// </summary>
public class CodespaceRange
{
/// <summary>
/// The lower-bound of this range.
/// </summary>
public IReadOnlyList<byte> Start { get; }
/// <summary>
/// The upper-bound of this range.
/// </summary>
public IReadOnlyList<byte> End { get; }
public int StartInt { get; }
public int EndInt { get; }
/// <summary>
/// The number of bytes for numbers in this range.
/// </summary>
public int CodeLength { get; }
/// <summary>
@@ -29,19 +39,29 @@
CodeLength = start.Count;
}
/**
* Returns true if the given code bytes match this codespace range.
*/
public bool matches(byte[] code)
/// <summary>
/// Returns <see langword="true"/> if the given code bytes match this codespace range.
/// </summary>
public bool Matches(byte[] code)
{
return isFullMatch(code, code.Length);
if (code == null)
{
throw new ArgumentNullException(nameof(code));
}
/**
* Returns true if the given code bytes match this codespace range.
*/
public bool isFullMatch(byte[] code, int codeLen)
return IsFullMatch(code, code.Length);
}
/// <summary>
/// Returns true if the given code bytes match this codespace range.
/// </summary>
public bool IsFullMatch(byte[] code, int codeLen)
{
if (code == null)
{
throw new ArgumentNullException(nameof(code));
}
// code must be the same length as the bounding codes
if (codeLen == CodeLength)
{

View File

@@ -41,7 +41,7 @@
{
var current = bytes.CurrentOffset;
var code = ToUnicode.ReadCode(bytes);
var code = CMap.ReadCode(bytes);
codeLength = bytes.CurrentOffset - current;
@@ -70,7 +70,19 @@
public PdfVector GetDisplacement(int characterCode)
{
return new PdfVector(0.333m, 0);
// This width is in units scaled up by 1000
var width = GetWidth(characterCode);
return new PdfVector(width / 1000, 0);
}
public decimal GetWidth(int characterCode)
{
var cid = CMap.ConvertToCid(characterCode);
var fromFont = CidFont.GetWidthFromDictionary(cid);
return fromFont;
}
}
}

View File

@@ -15,5 +15,7 @@
bool TryGetUnicode(int characterCode, out string value);
PdfVector GetDisplacement(int characterCode);
decimal GetWidth(int characterCode);
}
}

View File

@@ -87,6 +87,8 @@
font.TryGetUnicode(code, out var unicode);
var width = font.GetWidth(code);
var wordSpacing = 0m;
if (code == ' ' && codeLength == 1)
{
@@ -97,7 +99,7 @@
if (font.IsVertical)
{
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request.");
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
}
var displacement = font.GetDisplacement(code);