mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 10:55:04 +08:00
changes to cmap to correctly map for cid fonts. tests for codespace range
This commit is contained in:
110
src/UglyToad.Pdf.Tests/Fonts/Cmap/CodespaceRangeTests.cs
Normal file
110
src/UglyToad.Pdf.Tests/Fonts/Cmap/CodespaceRangeTests.cs
Normal file
@@ -0,0 +1,110 @@
|
||||
namespace UglyToad.Pdf.Tests.Fonts.Cmap
|
||||
{
|
||||
using System;
|
||||
using System.Linq;
|
||||
using Pdf.Fonts.Cmap;
|
||||
using Pdf.Tokenization.Tokens;
|
||||
using Xunit;
|
||||
|
||||
public class CodespaceRangeTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData("00", "80", 0, 128, 1)]
|
||||
[InlineData("8140", "9ffc", 33088, 40956, 2)]
|
||||
public void CreatesCorrectly(string startHex, string endHex, int startInt, int endInt, int codeLength)
|
||||
{
|
||||
var range = new CodespaceRange(GetHexBytes(startHex.ToCharArray()),
|
||||
GetHexBytes(endHex.ToCharArray()));
|
||||
|
||||
Assert.Equal(startInt, range.StartInt);
|
||||
Assert.Equal(endInt, range.EndInt);
|
||||
|
||||
Assert.Equal(codeLength, range.CodeLength);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MatchesCodeIsNullThrows()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0' );
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
Action action = () => codespace.Matches(null);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsFullMatchCodeIsNullThrows()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0');
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
Action action = () => codespace.IsFullMatch(null, 2);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MatchesCodeWrongLengthFalse()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0');
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
var matches = codespace.Matches(GetHexBytes('6', '9', '0', '1'));
|
||||
|
||||
Assert.False(matches);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MatchesCodeLowerThanStartFalse()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0');
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
var matches = codespace.Matches(GetHexBytes('0', '1'));
|
||||
|
||||
Assert.False(matches);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MatchesCodeHigherThanEndFalse()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0');
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
var matches = codespace.Matches(GetHexBytes('9', '6'));
|
||||
|
||||
Assert.False(matches);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MatchesCodeInRangeTrue()
|
||||
{
|
||||
var start = GetHexBytes('0', 'A');
|
||||
var end = GetHexBytes('8', '0');
|
||||
|
||||
var codespace = new CodespaceRange(start, end);
|
||||
|
||||
var matches = codespace.Matches(GetHexBytes('5', 'A'));
|
||||
|
||||
Assert.True(matches);
|
||||
}
|
||||
|
||||
private static byte[] GetHexBytes(params char[] characters)
|
||||
{
|
||||
var token = new HexToken(characters);
|
||||
|
||||
return token.Bytes.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
@@ -3,7 +3,7 @@
|
||||
using CidFonts;
|
||||
|
||||
/// <summary>
|
||||
/// Specifies the character collection associated with the <see cref="CharacterIdentifierFont"/> (CIDFont).
|
||||
/// Specifies the character collection associated with the <see cref="ICidFont"/> (CIDFont).
|
||||
/// </summary>
|
||||
public struct CharacterIdentifierSystemInfo
|
||||
{
|
||||
|
@@ -1,6 +1,5 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using Cmap;
|
||||
using Cos;
|
||||
|
||||
/// <summary>
|
||||
@@ -36,5 +35,9 @@
|
||||
CidFontType CidFontType { get; }
|
||||
|
||||
FontDescriptor Descriptor { get; }
|
||||
|
||||
decimal GetWidthFromFont(int characterCode);
|
||||
|
||||
decimal GetWidthFromDictionary(int cid);
|
||||
}
|
||||
}
|
@@ -15,5 +15,15 @@
|
||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
public CidFontType CidFontType => CidFontType.Type0;
|
||||
public FontDescriptor Descriptor { get; }
|
||||
|
||||
public decimal GetWidthFromFont(int characterCode)
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
public decimal GetWidthFromDictionary(int cid)
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -35,5 +35,20 @@
|
||||
this.verticalWritingMetrics = verticalWritingMetrics;
|
||||
this.widths = widths;
|
||||
}
|
||||
|
||||
public decimal GetWidthFromFont(int characterCode)
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
public decimal GetWidthFromDictionary(int cid)
|
||||
{
|
||||
if (widths.TryGetValue(cid, out var width))
|
||||
{
|
||||
return width;
|
||||
}
|
||||
|
||||
return Descriptor.MissingWidth;
|
||||
}
|
||||
}
|
||||
}
|
@@ -6,21 +6,36 @@
|
||||
using IO;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <summary>
|
||||
/// The CMap (character code map) maps character codes to character identifiers (CIDs).
|
||||
/// The set of characters which a CMap refers to is the "character set" (charset).
|
||||
/// </summary>
|
||||
internal class CMap
|
||||
{
|
||||
public CharacterIdentifierSystemInfo Info { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Defines the type of the internal organization of the CMap file.
|
||||
/// </summary>
|
||||
public int Type { get; }
|
||||
|
||||
public int WMode { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Defines the name of the CMap file.
|
||||
/// </summary>
|
||||
public string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The version number of the CIDFont file.
|
||||
/// </summary>
|
||||
[CanBeNull]
|
||||
public string Version { get; }
|
||||
|
||||
[NotNull]
|
||||
public IReadOnlyDictionary<int, string> BaseFontCharacterMap { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Describes the set of valid input character codes.
|
||||
/// </summary>
|
||||
[NotNull]
|
||||
public IReadOnlyList<CodespaceRange> CodespaceRanges { get; }
|
||||
|
||||
@@ -30,6 +45,9 @@
|
||||
[NotNull]
|
||||
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Controls whether the font associated with the CMap writes horizontally or vertically.
|
||||
/// </summary>
|
||||
public WritingMode WritingMode { get; }
|
||||
|
||||
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
|
||||
@@ -54,15 +72,6 @@
|
||||
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
|
||||
}
|
||||
|
||||
private string cmapName = null;
|
||||
private string cmapVersion = null;
|
||||
private int cmapType = -1;
|
||||
|
||||
private string registry = null;
|
||||
private string ordering = null;
|
||||
private int supplement = 0;
|
||||
|
||||
|
||||
// CID mappings
|
||||
private readonly Dictionary<int, int> codeToCid = new Dictionary<int, int>();
|
||||
private readonly List<CidRange> codeToCidRanges = new List<CidRange>();
|
||||
@@ -111,7 +120,7 @@
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return cmapName;
|
||||
return Name;
|
||||
}
|
||||
|
||||
public int ReadCode(IInputBytes bytes)
|
||||
@@ -130,7 +139,7 @@
|
||||
int byteCount = i + 1;
|
||||
foreach (CodespaceRange range in CodespaceRanges)
|
||||
{
|
||||
if (range.isFullMatch(result, byteCount))
|
||||
if (range.IsFullMatch(result, byteCount))
|
||||
{
|
||||
return ByteArrayToInt(result, byteCount);
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts.Cmap
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
/// <summary>
|
||||
@@ -7,16 +8,25 @@
|
||||
/// </summary>
|
||||
public class CodespaceRange
|
||||
{
|
||||
/// <summary>
|
||||
/// The lower-bound of this range.
|
||||
/// </summary>
|
||||
public IReadOnlyList<byte> Start { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The upper-bound of this range.
|
||||
/// </summary>
|
||||
public IReadOnlyList<byte> End { get; }
|
||||
|
||||
public int StartInt { get; }
|
||||
|
||||
public int EndInt { get; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// The number of bytes for numbers in this range.
|
||||
/// </summary>
|
||||
public int CodeLength { get; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="CodespaceRange"/>.
|
||||
/// </summary>
|
||||
@@ -28,20 +38,30 @@
|
||||
EndInt = end.ToInt(end.Count);
|
||||
CodeLength = start.Count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given code bytes match this codespace range.
|
||||
*/
|
||||
public bool matches(byte[] code)
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> if the given code bytes match this codespace range.
|
||||
/// </summary>
|
||||
public bool Matches(byte[] code)
|
||||
{
|
||||
return isFullMatch(code, code.Length);
|
||||
if (code == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(code));
|
||||
}
|
||||
|
||||
return IsFullMatch(code, code.Length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given code bytes match this codespace range.
|
||||
*/
|
||||
public bool isFullMatch(byte[] code, int codeLen)
|
||||
/// <summary>
|
||||
/// Returns true if the given code bytes match this codespace range.
|
||||
/// </summary>
|
||||
public bool IsFullMatch(byte[] code, int codeLen)
|
||||
{
|
||||
if (code == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(code));
|
||||
}
|
||||
|
||||
// code must be the same length as the bounding codes
|
||||
if (codeLen == CodeLength)
|
||||
{
|
||||
|
@@ -41,7 +41,7 @@
|
||||
{
|
||||
var current = bytes.CurrentOffset;
|
||||
|
||||
var code = ToUnicode.ReadCode(bytes);
|
||||
var code = CMap.ReadCode(bytes);
|
||||
|
||||
codeLength = bytes.CurrentOffset - current;
|
||||
|
||||
@@ -70,7 +70,19 @@
|
||||
|
||||
public PdfVector GetDisplacement(int characterCode)
|
||||
{
|
||||
return new PdfVector(0.333m, 0);
|
||||
// This width is in units scaled up by 1000
|
||||
var width = GetWidth(characterCode);
|
||||
|
||||
return new PdfVector(width / 1000, 0);
|
||||
}
|
||||
|
||||
public decimal GetWidth(int characterCode)
|
||||
{
|
||||
var cid = CMap.ConvertToCid(characterCode);
|
||||
|
||||
var fromFont = CidFont.GetWidthFromDictionary(cid);
|
||||
|
||||
return fromFont;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -15,5 +15,7 @@
|
||||
bool TryGetUnicode(int characterCode, out string value);
|
||||
|
||||
PdfVector GetDisplacement(int characterCode);
|
||||
|
||||
decimal GetWidth(int characterCode);
|
||||
}
|
||||
}
|
||||
|
@@ -87,6 +87,8 @@
|
||||
|
||||
font.TryGetUnicode(code, out var unicode);
|
||||
|
||||
var width = font.GetWidth(code);
|
||||
|
||||
var wordSpacing = 0m;
|
||||
if (code == ' ' && codeLength == 1)
|
||||
{
|
||||
@@ -97,7 +99,7 @@
|
||||
|
||||
if (font.IsVertical)
|
||||
{
|
||||
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request.");
|
||||
throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file.");
|
||||
}
|
||||
|
||||
var displacement = font.GetDisplacement(code);
|
||||
|
Reference in New Issue
Block a user