mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
add test for non latin characters and use normal ints rather than octal in the encoding classes.
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,55 @@
|
|||||||
|
namespace UglyToad.Pdf.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using Content;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class SinglePageNonLatinAcrobatDistillerTests
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||||
|
|
||||||
|
return Path.Combine(documentFolder, "Single Page Non Latin - from acrobat distiller.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectNumberOfPages()
|
||||||
|
{
|
||||||
|
var file = GetFilename();
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||||
|
{
|
||||||
|
Assert.Equal(1, document.NumberOfPages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectPageSize()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.Equal(PageSize.Letter, page.Size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GetsCorrectPageTextIgnoringHiddenCharacters()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
var text = string.Join(string.Empty, page.Letters.Select(x => x.Value));
|
||||||
|
|
||||||
|
// For some reason the C# string reverses these characters but they are extracted correctly.
|
||||||
|
// TODO: Need someone who can read these to check them
|
||||||
|
Assert.Equal("Hello ﺪﻤﺤﻣ World. ", text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
using Content;
|
using Content;
|
||||||
using Xunit;
|
using Xunit;
|
||||||
|
|
||||||
@@ -35,5 +36,18 @@
|
|||||||
Assert.Equal(PageSize.Letter, page.Size);
|
Assert.Equal(PageSize.Letter, page.Size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GetsCorrectPageTextIgnoringHiddenCharacters()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
var text = string.Join(string.Empty, page.Letters.Select(x => x.Value));
|
||||||
|
|
||||||
|
Assert.Equal("I am a simple pdf.", text);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||||
|
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
@@ -29,6 +30,9 @@
|
|||||||
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
<Content Include="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</Content>
|
||||||
<Content Include="Integration\Documents\Single Page Simple - from google drive.pdf">
|
<Content Include="Integration\Documents\Single Page Simple - from google drive.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
|||||||
@@ -37,63 +37,63 @@ namespace UglyToad.Pdf.Cos
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
set(i, (char)i, temporaryMap);
|
Set(i, (char)i, temporaryMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
// then do all deviations (based on the table in ISO 32000-1:2008)
|
// then do all deviations (based on the table in ISO 32000-1:2008)
|
||||||
// block 1
|
// block 1
|
||||||
set(0x18, '\u02D8', temporaryMap); // BREVE
|
Set(0x18, '\u02D8', temporaryMap); // BREVE
|
||||||
set(0x19, '\u02C7', temporaryMap); // CARON
|
Set(0x19, '\u02C7', temporaryMap); // CARON
|
||||||
set(0x1A, '\u02C6', temporaryMap); // MODIFIER LETTER CIRCUMFLEX ACCENT
|
Set(0x1A, '\u02C6', temporaryMap); // MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||||
set(0x1B, '\u02D9', temporaryMap); // DOT ABOVE
|
Set(0x1B, '\u02D9', temporaryMap); // DOT ABOVE
|
||||||
set(0x1C, '\u02DD', temporaryMap); // DOUBLE ACUTE ACCENT
|
Set(0x1C, '\u02DD', temporaryMap); // DOUBLE ACUTE ACCENT
|
||||||
set(0x1D, '\u02DB', temporaryMap); // OGONEK
|
Set(0x1D, '\u02DB', temporaryMap); // OGONEK
|
||||||
set(0x1E, '\u02DA', temporaryMap); // RING ABOVE
|
Set(0x1E, '\u02DA', temporaryMap); // RING ABOVE
|
||||||
set(0x1F, '\u02DC', temporaryMap); // SMALL TILDE
|
Set(0x1F, '\u02DC', temporaryMap); // SMALL TILDE
|
||||||
// block 2
|
// block 2
|
||||||
set(0x7F, ReplacementCharacter, temporaryMap); // undefined
|
Set(0x7F, ReplacementCharacter, temporaryMap); // undefined
|
||||||
set(0x80, '\u2022', temporaryMap); // BULLET
|
Set(0x80, '\u2022', temporaryMap); // BULLET
|
||||||
set(0x81, '\u2020', temporaryMap); // DAGGER
|
Set(0x81, '\u2020', temporaryMap); // DAGGER
|
||||||
set(0x82, '\u2021', temporaryMap); // DOUBLE DAGGER
|
Set(0x82, '\u2021', temporaryMap); // DOUBLE DAGGER
|
||||||
set(0x83, '\u2026', temporaryMap); // HORIZONTAL ELLIPSIS
|
Set(0x83, '\u2026', temporaryMap); // HORIZONTAL ELLIPSIS
|
||||||
set(0x84, '\u2014', temporaryMap); // EM DASH
|
Set(0x84, '\u2014', temporaryMap); // EM DASH
|
||||||
set(0x85, '\u2013', temporaryMap); // EN DASH
|
Set(0x85, '\u2013', temporaryMap); // EN DASH
|
||||||
set(0x86, '\u0192', temporaryMap); // LATIN SMALL LETTER SCRIPT F
|
Set(0x86, '\u0192', temporaryMap); // LATIN SMALL LETTER SCRIPT F
|
||||||
set(0x87, '\u2044', temporaryMap); // FRACTION SLASH (solidus)
|
Set(0x87, '\u2044', temporaryMap); // FRACTION SLASH (solidus)
|
||||||
set(0x88, '\u2039', temporaryMap); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
Set(0x88, '\u2039', temporaryMap); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
set(0x89, '\u203A', temporaryMap); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
Set(0x89, '\u203A', temporaryMap); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
set(0x8A, '\u2212', temporaryMap); // MINUS SIGN
|
Set(0x8A, '\u2212', temporaryMap); // MINUS SIGN
|
||||||
set(0x8B, '\u2030', temporaryMap); // PER MILLE SIGN
|
Set(0x8B, '\u2030', temporaryMap); // PER MILLE SIGN
|
||||||
set(0x8C, '\u201E', temporaryMap); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase)
|
Set(0x8C, '\u201E', temporaryMap); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase)
|
||||||
set(0x8D, '\u201C', temporaryMap); // LEFT DOUBLE QUOTATION MARK (quotedblleft)
|
Set(0x8D, '\u201C', temporaryMap); // LEFT DOUBLE QUOTATION MARK (quotedblleft)
|
||||||
set(0x8E, '\u201D', temporaryMap); // RIGHT DOUBLE QUOTATION MARK (quotedblright)
|
Set(0x8E, '\u201D', temporaryMap); // RIGHT DOUBLE QUOTATION MARK (quotedblright)
|
||||||
set(0x8F, '\u2018', temporaryMap); // LEFT SINGLE QUOTATION MARK (quoteleft)
|
Set(0x8F, '\u2018', temporaryMap); // LEFT SINGLE QUOTATION MARK (quoteleft)
|
||||||
set(0x90, '\u2019', temporaryMap); // RIGHT SINGLE QUOTATION MARK (quoteright)
|
Set(0x90, '\u2019', temporaryMap); // RIGHT SINGLE QUOTATION MARK (quoteright)
|
||||||
set(0x91, '\u201A', temporaryMap); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase)
|
Set(0x91, '\u201A', temporaryMap); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase)
|
||||||
set(0x92, '\u2122', temporaryMap); // TRADE MARK SIGN
|
Set(0x92, '\u2122', temporaryMap); // TRADE MARK SIGN
|
||||||
set(0x93, '\uFB01', temporaryMap); // LATIN SMALL LIGATURE FI
|
Set(0x93, '\uFB01', temporaryMap); // LATIN SMALL LIGATURE FI
|
||||||
set(0x94, '\uFB02', temporaryMap); // LATIN SMALL LIGATURE FL
|
Set(0x94, '\uFB02', temporaryMap); // LATIN SMALL LIGATURE FL
|
||||||
set(0x95, '\u0141', temporaryMap); // LATIN CAPITAL LETTER L WITH STROKE
|
Set(0x95, '\u0141', temporaryMap); // LATIN CAPITAL LETTER L WITH STROKE
|
||||||
set(0x96, '\u0152', temporaryMap); // LATIN CAPITAL LIGATURE OE
|
Set(0x96, '\u0152', temporaryMap); // LATIN CAPITAL LIGATURE OE
|
||||||
set(0x97, '\u0160', temporaryMap); // LATIN CAPITAL LETTER S WITH CARON
|
Set(0x97, '\u0160', temporaryMap); // LATIN CAPITAL LETTER S WITH CARON
|
||||||
set(0x98, '\u0178', temporaryMap); // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
Set(0x98, '\u0178', temporaryMap); // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
set(0x99, '\u017D', temporaryMap); // LATIN CAPITAL LETTER Z WITH CARON
|
Set(0x99, '\u017D', temporaryMap); // LATIN CAPITAL LETTER Z WITH CARON
|
||||||
set(0x9A, '\u0131', temporaryMap); // LATIN SMALL LETTER DOTLESS I
|
Set(0x9A, '\u0131', temporaryMap); // LATIN SMALL LETTER DOTLESS I
|
||||||
set(0x9B, '\u0142', temporaryMap); // LATIN SMALL LETTER L WITH STROKE
|
Set(0x9B, '\u0142', temporaryMap); // LATIN SMALL LETTER L WITH STROKE
|
||||||
set(0x9C, '\u0153', temporaryMap); // LATIN SMALL LIGATURE OE
|
Set(0x9C, '\u0153', temporaryMap); // LATIN SMALL LIGATURE OE
|
||||||
set(0x9D, '\u0161', temporaryMap); // LATIN SMALL LETTER S WITH CARON
|
Set(0x9D, '\u0161', temporaryMap); // LATIN SMALL LETTER S WITH CARON
|
||||||
set(0x9E, '\u017E', temporaryMap); // LATIN SMALL LETTER Z WITH CARON
|
Set(0x9E, '\u017E', temporaryMap); // LATIN SMALL LETTER Z WITH CARON
|
||||||
set(0x9F, ReplacementCharacter, temporaryMap); // undefined
|
Set(0x9F, ReplacementCharacter, temporaryMap); // undefined
|
||||||
set(0xA0, '\u20AC', temporaryMap); // EURO SIGN
|
Set(0xA0, '\u20AC', temporaryMap); // EURO SIGN
|
||||||
// end of deviations
|
// end of deviations
|
||||||
|
|
||||||
UnicodeToCode = temporaryMap;
|
UnicodeToCode = temporaryMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void set(int code, char unicode, Dictionary<char, int> unicodeToCode)
|
private static void Set(int code, char unicode, Dictionary<char, int> unicodeToCode)
|
||||||
{
|
{
|
||||||
CodeToUni[code] = unicode;
|
CodeToUni[code] = unicode;
|
||||||
unicodeToCode.Add(unicode, code);
|
unicodeToCode[unicode] = code;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using Cos;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Maps character codes to glyph names from a PostScript encoding.
|
/// Maps character codes to glyph names from a PostScript encoding.
|
||||||
@@ -55,5 +56,41 @@
|
|||||||
NameToCode[name] = code;
|
NameToCode[name] = code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool TryGetNamedEncoding(CosName name, out Encoding encoding)
|
||||||
|
{
|
||||||
|
encoding = null;
|
||||||
|
|
||||||
|
if (name == null)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.Equals(CosName.STANDARD_ENCODING))
|
||||||
|
{
|
||||||
|
encoding = StandardEncoding.Instance;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.Equals(CosName.WIN_ANSI_ENCODING))
|
||||||
|
{
|
||||||
|
encoding = WinAnsiEncoding.Instance;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.Equals(CosName.MAC_EXPERT_ENCODING))
|
||||||
|
{
|
||||||
|
encoding = MacExpertEncoding.Instance;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.Equals(CosName.MAC_ROMAN_ENCODING))
|
||||||
|
{
|
||||||
|
encoding = MacRomanEncoding.Instance;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Encodings
|
namespace UglyToad.Pdf.Fonts.Encodings
|
||||||
{
|
{
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class MacExpertEncoding : Encoding
|
internal class MacExpertEncoding : Encoding
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -182,7 +184,7 @@
|
|||||||
{
|
{
|
||||||
foreach (var valueTuple in EncodingTable)
|
foreach (var valueTuple in EncodingTable)
|
||||||
{
|
{
|
||||||
Add(valueTuple.Item1, valueTuple.Item2);
|
Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Encodings
|
namespace UglyToad.Pdf.Fonts.Encodings
|
||||||
{
|
{
|
||||||
|
using Util;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Similar to the <see cref="MacRomanEncoding"/> with 15 additional entries.
|
/// Similar to the <see cref="MacRomanEncoding"/> with 15 additional entries.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -31,7 +33,7 @@
|
|||||||
{
|
{
|
||||||
foreach (var valueTuple in EncodingTable)
|
foreach (var valueTuple in EncodingTable)
|
||||||
{
|
{
|
||||||
Add(valueTuple.Item1, valueTuple.Item2);
|
Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Encodings
|
namespace UglyToad.Pdf.Fonts.Encodings
|
||||||
{
|
{
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class MacRomanEncoding : Encoding
|
internal class MacRomanEncoding : Encoding
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -226,7 +228,7 @@
|
|||||||
{
|
{
|
||||||
foreach (var valueTuple in EncodingTable)
|
foreach (var valueTuple in EncodingTable)
|
||||||
{
|
{
|
||||||
Add(valueTuple.Item1, valueTuple.Item2);
|
Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Encodings
|
namespace UglyToad.Pdf.Fonts.Encodings
|
||||||
{
|
{
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class StandardEncoding : Encoding
|
internal class StandardEncoding : Encoding
|
||||||
{
|
{
|
||||||
private static readonly (int, string)[] EncodingTable =
|
private static readonly (int, string)[] EncodingTable =
|
||||||
@@ -163,7 +165,7 @@
|
|||||||
{
|
{
|
||||||
foreach (var valueTuple in EncodingTable)
|
foreach (var valueTuple in EncodingTable)
|
||||||
{
|
{
|
||||||
Add(valueTuple.Item1, valueTuple.Item2);
|
Add(OctalHelpers.FromOctalInt(valueTuple.Item1), valueTuple.Item2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,13 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Encodings
|
namespace UglyToad.Pdf.Fonts.Encodings
|
||||||
{
|
{
|
||||||
|
using Util;
|
||||||
|
|
||||||
internal class WinAnsiEncoding : Encoding
|
internal class WinAnsiEncoding : Encoding
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The encoding table is taken from the Appendix of the specification.
|
||||||
|
/// These codes are in octal.
|
||||||
|
/// </summary>
|
||||||
private static readonly (int, string)[] EncodingTable =
|
private static readonly (int, string)[] EncodingTable =
|
||||||
{
|
{
|
||||||
(0101, "A"),
|
(0101, "A"),
|
||||||
@@ -233,7 +239,10 @@
|
|||||||
{
|
{
|
||||||
foreach (var valueTuple in EncodingTable)
|
foreach (var valueTuple in EncodingTable)
|
||||||
{
|
{
|
||||||
Add(valueTuple.Item1, valueTuple.Item2);
|
// Convert out of octal before creating
|
||||||
|
var code = OctalHelpers.FromOctalInt(valueTuple.Item1);
|
||||||
|
|
||||||
|
Add(code, valueTuple.Item2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character.
|
// In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character.
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
||||||
{
|
{
|
||||||
|
using System;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using Cmap;
|
using Cmap;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using Encodings;
|
||||||
using Exceptions;
|
using Exceptions;
|
||||||
using Filters;
|
using Filters;
|
||||||
using IO;
|
using IO;
|
||||||
@@ -60,6 +62,33 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Encoding encoding = null;
|
||||||
|
if (dictionary.TryGetValue(CosName.ENCODING, out var encodingBase))
|
||||||
|
{
|
||||||
|
// Symbolic fonts default to standard encoding.
|
||||||
|
if (descriptor.Flags.HasFlag(FontFlags.Symbolic))
|
||||||
|
{
|
||||||
|
encoding = StandardEncoding.Instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (encodingBase is CosName encodingName)
|
||||||
|
{
|
||||||
|
if (!Encoding.TryGetNamedEncoding(encodingName, out encoding))
|
||||||
|
{
|
||||||
|
// TODO: PDFBox would not throw here.
|
||||||
|
throw new InvalidFontFormatException($"Unrecognised encoding name: {encodingName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (encodingBase is CosDictionary encodingDictionary)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException("No support for reading encoding from dictionary yet.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new NotImplementedException("No support for reading encoding from font yet.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap);
|
return new TrueTypeSimpleFont(name, firstCharacter, lastCharacter, widths, descriptor, toUnicodeCMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Simple
|
namespace UglyToad.Pdf.Fonts.Simple
|
||||||
{
|
{
|
||||||
|
using System;
|
||||||
using Cmap;
|
using Cmap;
|
||||||
using Composite;
|
using Composite;
|
||||||
using Cos;
|
using Cos;
|
||||||
@@ -46,9 +47,20 @@
|
|||||||
value = null;
|
value = null;
|
||||||
|
|
||||||
if (!ToUnicode.CanMapToUnicode)
|
if (!ToUnicode.CanMapToUnicode)
|
||||||
|
{
|
||||||
|
// For now just cast to character
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
value = ((char) characterCode).ToString();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ToUnicode.TryGet(characterCode, out value);
|
return ToUnicode.TryGet(characterCode, out value);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
using IO;
|
using IO;
|
||||||
using Parser.Parts;
|
using Parser.Parts;
|
||||||
using Tokens;
|
using Tokens;
|
||||||
|
using Util;
|
||||||
|
|
||||||
public class StringTokenizer : ITokenizer
|
public class StringTokenizer : ITokenizer
|
||||||
{
|
{
|
||||||
@@ -51,7 +52,7 @@
|
|||||||
|
|
||||||
if (octalsRead == 3 || !nextCharacterOctal)
|
if (octalsRead == 3 || !nextCharacterOctal)
|
||||||
{
|
{
|
||||||
var characterCode = FromOctal(octal);
|
var characterCode = OctalHelpers.FromOctalDigits(octal);
|
||||||
|
|
||||||
// For now :(
|
// For now :(
|
||||||
// TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
|
// TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
|
||||||
@@ -159,7 +160,7 @@
|
|||||||
octals[i] = octals[i - 1];
|
octals[i] = octals[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
var value = OctalCharacterToShort(nextOctalChar);
|
var value = nextOctalChar.CharacterToShort();
|
||||||
|
|
||||||
octals[0] = value;
|
octals[0] = value;
|
||||||
}
|
}
|
||||||
@@ -220,7 +221,7 @@
|
|||||||
case '5':
|
case '5':
|
||||||
case '6':
|
case '6':
|
||||||
case '7':
|
case '7':
|
||||||
octal[0] = OctalCharacterToShort(c);
|
octal[0] = c.CharacterToShort();
|
||||||
isOctalActive = true;
|
isOctalActive = true;
|
||||||
octalsRead = 1;
|
octalsRead = 1;
|
||||||
break;
|
break;
|
||||||
@@ -242,60 +243,5 @@
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static short OctalCharacterToShort(char c)
|
|
||||||
{
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case '0':
|
|
||||||
return 0;
|
|
||||||
case '1':
|
|
||||||
return 1;
|
|
||||||
case '2':
|
|
||||||
return 2;
|
|
||||||
case '3':
|
|
||||||
return 3;
|
|
||||||
case '4':
|
|
||||||
return 4;
|
|
||||||
case '5':
|
|
||||||
return 5;
|
|
||||||
case '6':
|
|
||||||
return 6;
|
|
||||||
case '7':
|
|
||||||
return 7;
|
|
||||||
case '8':
|
|
||||||
return 8;
|
|
||||||
case '9':
|
|
||||||
return 9;
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int FromOctal(short[] octal)
|
|
||||||
{
|
|
||||||
int Power(int x, int pow)
|
|
||||||
{
|
|
||||||
int ret = 1;
|
|
||||||
while (pow != 0)
|
|
||||||
{
|
|
||||||
if ((pow & 1) == 1)
|
|
||||||
ret *= x;
|
|
||||||
x *= x;
|
|
||||||
pow >>= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int sum = 0;
|
|
||||||
for (int i = octal.Length - 1; i >= 0; i--)
|
|
||||||
{
|
|
||||||
var power = i;
|
|
||||||
sum += octal[i] * Power(8, power);
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
79
src/UglyToad.Pdf/Util/OctalHelpers.cs
Normal file
79
src/UglyToad.Pdf/Util/OctalHelpers.cs
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
namespace UglyToad.Pdf.Util
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
|
||||||
|
internal static class OctalHelpers
|
||||||
|
{
|
||||||
|
public static short CharacterToShort(this char c)
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '0':
|
||||||
|
return 0;
|
||||||
|
case '1':
|
||||||
|
return 1;
|
||||||
|
case '2':
|
||||||
|
return 2;
|
||||||
|
case '3':
|
||||||
|
return 3;
|
||||||
|
case '4':
|
||||||
|
return 4;
|
||||||
|
case '5':
|
||||||
|
return 5;
|
||||||
|
case '6':
|
||||||
|
return 6;
|
||||||
|
case '7':
|
||||||
|
return 7;
|
||||||
|
case '8':
|
||||||
|
return 8;
|
||||||
|
case '9':
|
||||||
|
return 9;
|
||||||
|
default:
|
||||||
|
throw new InvalidOperationException($"Could not convert the character {c} to a short.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int FromOctalDigits(short[] octal)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
int sum = 0;
|
||||||
|
for (int i = octal.Length - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
var power = i;
|
||||||
|
sum += octal[i] * QuickPower(8, power);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int FromOctalInt(int input)
|
||||||
|
{
|
||||||
|
var str = input.ToString();
|
||||||
|
|
||||||
|
int sum = 0;
|
||||||
|
for (var i = str.Length - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
var part = str[i].CharacterToShort();
|
||||||
|
|
||||||
|
sum += part * QuickPower(8, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int QuickPower(int x, int pow)
|
||||||
|
{
|
||||||
|
int ret = 1;
|
||||||
|
while (pow != 0)
|
||||||
|
{
|
||||||
|
if ((pow & 1) == 1)
|
||||||
|
ret *= x;
|
||||||
|
x *= x;
|
||||||
|
pow >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user