mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 09:21:57 +08:00
Handle alternate Unicode name representation cXXX and fix #943
This commit is contained in:
parent
bcc8ccecbe
commit
d12afb0b8d
@ -152,6 +152,13 @@
|
||||
|
||||
unicode = char.ConvertFromUtf32(codePoint);
|
||||
}
|
||||
else if (name.StartsWith("c", StringComparison.OrdinalIgnoreCase) && name.Length >= 3 && name.Length <= 4)
|
||||
{
|
||||
// name representation cXXX
|
||||
var codePoint = int.Parse(name.AsSpanOrSubstring(1), NumberStyles.Integer, CultureInfo.InvariantCulture);
|
||||
System.Diagnostics.Debug.Assert(codePoint > 0);
|
||||
unicode = char.ConvertFromUtf32(codePoint);
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
|
||||
@ -1,9 +1,32 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using Content;
|
||||
using DocumentLayoutAnalysis.PageSegmenter;
|
||||
using DocumentLayoutAnalysis.WordExtractor;
|
||||
|
||||
public class GithubIssuesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Issue943()
|
||||
{
|
||||
var path = IntegrationHelpers.GetDocumentPath("MOZILLA-10225-0.pdf");
|
||||
|
||||
using (var document = PdfDocument.Open(path))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
Assert.NotNull(page);
|
||||
|
||||
var letters = page.Letters;
|
||||
Assert.NotNull(letters);
|
||||
|
||||
var words = NearestNeighbourWordExtractor.Instance.GetWords(page.Letters);
|
||||
var blocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
|
||||
|
||||
Assert.Equal("Rocket and Spacecraft Propulsion", blocks[0].TextLines[0].Text);
|
||||
Assert.Equal("Principles, Practice and New Developments (Second Edition)", blocks[0].TextLines[1].Text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Issue736()
|
||||
{
|
||||
|
||||
Loading…
Reference in New Issue
Block a user