diff --git a/src/UglyToad.Pdf.Tests/Fonts/Cmap/CodespaceRangeTests.cs b/src/UglyToad.Pdf.Tests/Fonts/Cmap/CodespaceRangeTests.cs new file mode 100644 index 00000000..84b5b8c7 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Fonts/Cmap/CodespaceRangeTests.cs @@ -0,0 +1,110 @@ +namespace UglyToad.Pdf.Tests.Fonts.Cmap +{ + using System; + using System.Linq; + using Pdf.Fonts.Cmap; + using Pdf.Tokenization.Tokens; + using Xunit; + + public class CodespaceRangeTests + { + [Theory] + [InlineData("00", "80", 0, 128, 1)] + [InlineData("8140", "9ffc", 33088, 40956, 2)] + public void CreatesCorrectly(string startHex, string endHex, int startInt, int endInt, int codeLength) + { + var range = new CodespaceRange(GetHexBytes(startHex.ToCharArray()), + GetHexBytes(endHex.ToCharArray())); + + Assert.Equal(startInt, range.StartInt); + Assert.Equal(endInt, range.EndInt); + + Assert.Equal(codeLength, range.CodeLength); + } + + [Fact] + public void MatchesCodeIsNullThrows() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0' ); + + var codespace = new CodespaceRange(start, end); + + Action action = () => codespace.Matches(null); + + Assert.Throws(action); + } + + [Fact] + public void IsFullMatchCodeIsNullThrows() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0'); + + var codespace = new CodespaceRange(start, end); + + Action action = () => codespace.IsFullMatch(null, 2); + + Assert.Throws(action); + } + + [Fact] + public void MatchesCodeWrongLengthFalse() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0'); + + var codespace = new CodespaceRange(start, end); + + var matches = codespace.Matches(GetHexBytes('6', '9', '0', '1')); + + Assert.False(matches); + } + + [Fact] + public void MatchesCodeLowerThanStartFalse() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0'); + + var codespace = new CodespaceRange(start, end); + + var matches = codespace.Matches(GetHexBytes('0', '1')); + + Assert.False(matches); + } + + [Fact] + public void MatchesCodeHigherThanEndFalse() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0'); + + var codespace = new CodespaceRange(start, end); + + var matches = codespace.Matches(GetHexBytes('9', '6')); + + Assert.False(matches); + } + + [Fact] + public void MatchesCodeInRangeTrue() + { + var start = GetHexBytes('0', 'A'); + var end = GetHexBytes('8', '0'); + + var codespace = new CodespaceRange(start, end); + + var matches = codespace.Matches(GetHexBytes('5', 'A')); + + Assert.True(matches); + } + + private static byte[] GetHexBytes(params char[] characters) + { + var token = new HexToken(characters); + + return token.Bytes.ToArray(); + } + } +} diff --git a/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs b/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs index 327cec3e..fc6a3114 100644 --- a/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs +++ b/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs @@ -3,7 +3,7 @@ using CidFonts; /// - /// Specifies the character collection associated with the (CIDFont). + /// Specifies the character collection associated with the (CIDFont). /// public struct CharacterIdentifierSystemInfo { diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs index 1cecca83..19939dc0 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/ICidFont.cs @@ -1,6 +1,5 @@ namespace UglyToad.Pdf.Fonts.CidFonts { - using Cmap; using Cos; /// @@ -36,5 +35,9 @@ CidFontType CidFontType { get; } FontDescriptor Descriptor { get; } + + decimal GetWidthFromFont(int characterCode); + + decimal GetWidthFromDictionary(int cid); } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs index 519ccb5e..87f1e784 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/Type0CidFont.cs @@ -15,5 +15,15 @@ public CharacterIdentifierSystemInfo SystemInfo { get; } public CidFontType CidFontType => CidFontType.Type0; public FontDescriptor Descriptor { get; } + + public decimal GetWidthFromFont(int characterCode) + { + throw new System.NotImplementedException(); + } + + public decimal GetWidthFromDictionary(int cid) + { + throw new System.NotImplementedException(); + } } } diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs b/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs index 59dde2f7..bd2c501f 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/Type2CidFont.cs @@ -35,5 +35,20 @@ this.verticalWritingMetrics = verticalWritingMetrics; this.widths = widths; } + + public decimal GetWidthFromFont(int characterCode) + { + throw new System.NotImplementedException(); + } + + public decimal GetWidthFromDictionary(int cid) + { + if (widths.TryGetValue(cid, out var width)) + { + return width; + } + + return Descriptor.MissingWidth; + } } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Fonts/Cmap/Cmap.cs b/src/UglyToad.Pdf/Fonts/Cmap/CMapChangeCase.cs similarity index 85% rename from src/UglyToad.Pdf/Fonts/Cmap/Cmap.cs rename to src/UglyToad.Pdf/Fonts/Cmap/CMapChangeCase.cs index b4f10929..5d7de4f6 100644 --- a/src/UglyToad.Pdf/Fonts/Cmap/Cmap.cs +++ b/src/UglyToad.Pdf/Fonts/Cmap/CMapChangeCase.cs @@ -6,21 +6,36 @@ using IO; using Util.JetBrains.Annotations; + /// + /// The CMap (character code map) maps character codes to character identifiers (CIDs). + /// The set of characters which a CMap refers to is the "character set" (charset). + /// internal class CMap { public CharacterIdentifierSystemInfo Info { get; } + /// + /// Defines the type of the internal organization of the CMap file. + /// public int Type { get; } - public int WMode { get; } - + /// + /// Defines the name of the CMap file. + /// public string Name { get; } + /// + /// The version number of the CIDFont file. + /// + [CanBeNull] public string Version { get; } [NotNull] public IReadOnlyDictionary BaseFontCharacterMap { get; } + /// + /// Describes the set of valid input character codes. + /// [NotNull] public IReadOnlyList CodespaceRanges { get; } @@ -30,6 +45,9 @@ [NotNull] public IReadOnlyList CidCharacterMappings { get; } + /// + /// Controls whether the font associated with the CMap writes horizontally or vertically. + /// public WritingMode WritingMode { get; } public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0; @@ -54,15 +72,6 @@ minCodeLength = CodespaceRanges.Min(x => x.CodeLength); } - private string cmapName = null; - private string cmapVersion = null; - private int cmapType = -1; - - private string registry = null; - private string ordering = null; - private int supplement = 0; - - // CID mappings private readonly Dictionary codeToCid = new Dictionary(); private readonly List codeToCidRanges = new List(); @@ -111,7 +120,7 @@ public override string ToString() { - return cmapName; + return Name; } public int ReadCode(IInputBytes bytes) @@ -130,7 +139,7 @@ int byteCount = i + 1; foreach (CodespaceRange range in CodespaceRanges) { - if (range.isFullMatch(result, byteCount)) + if (range.IsFullMatch(result, byteCount)) { return ByteArrayToInt(result, byteCount); } diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs index f5aa086e..3b3247c4 100644 --- a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs +++ b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs @@ -1,5 +1,6 @@ namespace UglyToad.Pdf.Fonts.Cmap { + using System; using System.Collections.Generic; /// @@ -7,16 +8,25 @@ /// public class CodespaceRange { + /// + /// The lower-bound of this range. + /// public IReadOnlyList Start { get; } + /// + /// The upper-bound of this range. + /// public IReadOnlyList End { get; } public int StartInt { get; } public int EndInt { get; } - + + /// + /// The number of bytes for numbers in this range. + /// public int CodeLength { get; } - + /// /// Creates a new instance of . /// @@ -28,20 +38,30 @@ EndInt = end.ToInt(end.Count); CodeLength = start.Count; } - - /** - * Returns true if the given code bytes match this codespace range. - */ - public bool matches(byte[] code) + + /// + /// Returns if the given code bytes match this codespace range. + /// + public bool Matches(byte[] code) { - return isFullMatch(code, code.Length); + if (code == null) + { + throw new ArgumentNullException(nameof(code)); + } + + return IsFullMatch(code, code.Length); } - /** - * Returns true if the given code bytes match this codespace range. - */ - public bool isFullMatch(byte[] code, int codeLen) + /// + /// Returns true if the given code bytes match this codespace range. + /// + public bool IsFullMatch(byte[] code, int codeLen) { + if (code == null) + { + throw new ArgumentNullException(nameof(code)); + } + // code must be the same length as the bounding codes if (codeLen == CodeLength) { diff --git a/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs b/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs index 2aab9bb2..bebefcaf 100644 --- a/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs @@ -41,7 +41,7 @@ { var current = bytes.CurrentOffset; - var code = ToUnicode.ReadCode(bytes); + var code = CMap.ReadCode(bytes); codeLength = bytes.CurrentOffset - current; @@ -70,7 +70,19 @@ public PdfVector GetDisplacement(int characterCode) { - return new PdfVector(0.333m, 0); + // This width is in units scaled up by 1000 + var width = GetWidth(characterCode); + + return new PdfVector(width / 1000, 0); + } + + public decimal GetWidth(int characterCode) + { + var cid = CMap.ConvertToCid(characterCode); + + var fromFont = CidFont.GetWidthFromDictionary(cid); + + return fromFont; } } } diff --git a/src/UglyToad.Pdf/Fonts/IFont.cs b/src/UglyToad.Pdf/Fonts/IFont.cs index 818061b5..0eea2891 100644 --- a/src/UglyToad.Pdf/Fonts/IFont.cs +++ b/src/UglyToad.Pdf/Fonts/IFont.cs @@ -15,5 +15,7 @@ bool TryGetUnicode(int characterCode, out string value); PdfVector GetDisplacement(int characterCode); + + decimal GetWidth(int characterCode); } } diff --git a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs index 1475aa07..6a18c124 100644 --- a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs @@ -87,6 +87,8 @@ font.TryGetUnicode(code, out var unicode); + var width = font.GetWidth(code); + var wordSpacing = 0m; if (code == ' ' && codeLength == 1) { @@ -97,7 +99,7 @@ if (font.IsVertical) { - throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request."); + throw new NotImplementedException("Vertical fonts are currently unsupported, please submit a pull request or issue with an example file."); } var displacement = font.GetDisplacement(code);