mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 19:05:01 +08:00
bug fixes for reading from type 1 fonts. test all pages in documents. use content rather than embedded resources for #4
This commit is contained in:
@@ -54,17 +54,17 @@
|
||||
|
||||
private static byte[] GetFileBytes(string name)
|
||||
{
|
||||
var manifestFiles = typeof(Type1FontParserTests).Assembly.GetManifestResourceNames();
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Fonts", "Type1"));
|
||||
var files = Directory.GetFiles(documentFolder);
|
||||
|
||||
var match = manifestFiles.Single(x => x.IndexOf(name, StringComparison.InvariantCultureIgnoreCase) >= 0);
|
||||
var file = files.FirstOrDefault(x => x.IndexOf(name, StringComparison.OrdinalIgnoreCase) >= 0);
|
||||
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var stream = typeof(Type1FontParserTests).Assembly.GetManifestResourceStream(match))
|
||||
if (file == null)
|
||||
{
|
||||
stream.CopyTo(memoryStream);
|
||||
|
||||
return memoryStream.ToArray();
|
||||
throw new InvalidOperationException($"Could not find test file {name} in folder {documentFolder}.");
|
||||
}
|
||||
|
||||
return File.ReadAllBytes(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,36 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Xunit;
|
||||
|
||||
public class IntegrationDocumentTests
|
||||
{
|
||||
[Theory]
|
||||
[MemberData(nameof(GetAllDocuments))]
|
||||
public void CanReadAllPages(string documentName)
|
||||
{
|
||||
using (var document = PdfDocument.Open(documentName, new ParsingOptions{ UseLenientParsing = false}))
|
||||
{
|
||||
for (var i = 0; i < document.NumberOfPages; i++)
|
||||
{
|
||||
document.GetPage(i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static IEnumerable<object[]> GetAllDocuments
|
||||
{
|
||||
get
|
||||
{
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||
|
||||
var files = Directory.GetFiles(documentFolder, "*.pdf");
|
||||
|
||||
return files.Select(x => new object[] {x});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -32,5 +32,17 @@
|
||||
Assert.Equal(35, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadAllPages()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
for (var i = 0; i < document.NumberOfPages; i++)
|
||||
{
|
||||
document.GetPage(i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -29,6 +29,22 @@
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Remove="Fonts\Type1\AdobeUtopia.pfa" />
|
||||
<EmbeddedResource Remove="Fonts\Type1\CMBX10.pfa" />
|
||||
<EmbeddedResource Remove="Fonts\Type1\CMBX12.pfa" />
|
||||
<EmbeddedResource Remove="Fonts\Type1\CMCSC10.pfa" />
|
||||
<EmbeddedResource Remove="Fonts\Type1\Raleway-Black.pfb" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Include="Fonts\Type1\AdobeUtopia.pfa" />
|
||||
<Content Include="Fonts\Type1\CMBX10.pfa" />
|
||||
<Content Include="Fonts\Type1\CMBX12.pfa" />
|
||||
<Content Include="Fonts\Type1\CMCSC10.pfa" />
|
||||
<Content Include="Fonts\Type1\Raleway-Black.pfb" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" />
|
||||
<PackageReference Include="System.Drawing.Common" Version="4.5.0-preview2-26406-04" />
|
||||
|
@@ -63,7 +63,7 @@
|
||||
|
||||
var parsed = DirectObjectFinder.Get<DictionaryToken>(obj, pdfScanner);
|
||||
|
||||
var descriptor = fontDescriptorFactory.Generate(parsed, isLenientParsing);
|
||||
var descriptor = fontDescriptorFactory.Generate(parsed, pdfScanner, isLenientParsing);
|
||||
|
||||
return descriptor;
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig.Fonts.Parser.Handlers
|
||||
{
|
||||
using System;
|
||||
using Cmap;
|
||||
using CompactFontFormat;
|
||||
using Encodings;
|
||||
@@ -118,6 +119,7 @@
|
||||
&& NameToken.Type1C.Equals(subTypeName))
|
||||
{
|
||||
compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
|
||||
throw new NotSupportedException("TODO: support Compact Font Format...");
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@@ -44,7 +44,7 @@
|
||||
FontDescriptor descriptor = null;
|
||||
if (TryGetFontDescriptor(dictionary, out var descriptorDictionary))
|
||||
{
|
||||
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
|
||||
descriptor = descriptorFactory.Generate(descriptorDictionary, pdfScanner, isLenientParsing);
|
||||
}
|
||||
|
||||
var fontProgram = ReadDescriptorFile(descriptor);
|
||||
@@ -248,7 +248,7 @@
|
||||
|
||||
if (entry is NameToken name)
|
||||
{
|
||||
if (!name.Equals(NameToken.CidToGidMap) && !isLenientParsing)
|
||||
if (!name.Equals(NameToken.Identity) && !isLenientParsing)
|
||||
{
|
||||
throw new InvalidOperationException($"The CIDToGIDMap in a Type 0 font should have the value /Identity, instead got: {name}.");
|
||||
}
|
||||
|
@@ -2,20 +2,22 @@
|
||||
{
|
||||
using System;
|
||||
using Geometry;
|
||||
using PdfPig.Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
internal class FontDescriptorFactory
|
||||
{
|
||||
public FontDescriptor Generate(DictionaryToken dictionary, bool isLenientParsing)
|
||||
public FontDescriptor Generate(DictionaryToken dictionary, IPdfTokenScanner pdfScanner, bool isLenientParsing)
|
||||
{
|
||||
if (dictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var name = GetFontName(dictionary, isLenientParsing);
|
||||
var name = GetFontName(dictionary, pdfScanner, isLenientParsing);
|
||||
var family = GetFontFamily(dictionary);
|
||||
var stretch = GetFontStretch(dictionary);
|
||||
var flags = GetFlags(dictionary, isLenientParsing);
|
||||
@@ -55,10 +57,16 @@
|
||||
return number.Data;
|
||||
}
|
||||
|
||||
private static NameToken GetFontName(DictionaryToken dictionary, bool isLenientParsing)
|
||||
private static NameToken GetFontName(DictionaryToken dictionary, IPdfTokenScanner scanner, bool isLenientParsing)
|
||||
{
|
||||
if (!dictionary.TryGet(NameToken.FontName, out var name) || !(name is NameToken nameToken))
|
||||
{
|
||||
if (name is IndirectReferenceToken nameReference)
|
||||
{
|
||||
var indirectName = DirectObjectFinder.Get<NameToken>(nameReference, scanner);
|
||||
return indirectName;
|
||||
}
|
||||
|
||||
if (isLenientParsing)
|
||||
{
|
||||
nameToken = NameToken.Create(string.Empty);
|
||||
|
@@ -71,6 +71,11 @@
|
||||
}
|
||||
catch
|
||||
{
|
||||
if (fontProgram != null)
|
||||
{
|
||||
var result = fontProgram.Encoding.TryGetValue(characterCode, out value);
|
||||
return result;
|
||||
}
|
||||
// our quick hack has failed, we should decode the type 1 font!
|
||||
}
|
||||
|
||||
@@ -107,9 +112,19 @@
|
||||
return new PdfRectangle(0, 0, 250, 0);
|
||||
}
|
||||
|
||||
if (fontProgram == null)
|
||||
{
|
||||
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
|
||||
}
|
||||
|
||||
var rect = fontProgram.GetCharacterBoundingBox(characterCode);
|
||||
|
||||
return rect;
|
||||
if (!rect.HasValue)
|
||||
{
|
||||
return new PdfRectangle(0, 0, widths[characterCode - firstChar], 0);
|
||||
}
|
||||
|
||||
return rect.Value;
|
||||
}
|
||||
|
||||
public TransformationMatrix GetFontMatrix()
|
||||
|
@@ -56,8 +56,13 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
|
||||
commands.Add(new Close());
|
||||
}
|
||||
|
||||
public PdfRectangle GetBoundingRectangle()
|
||||
public PdfRectangle? GetBoundingRectangle()
|
||||
{
|
||||
if (commands.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var minX = decimal.MaxValue;
|
||||
var maxX = decimal.MinValue;
|
||||
|
||||
@@ -139,7 +144,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
|
||||
}
|
||||
|
||||
var path = $"<path d='{glyph}' stroke='cyan' stroke-width='3'></path>";
|
||||
var bboxRect = BboxToRect(bbox, "yellow");
|
||||
var bboxRect = bbox.HasValue ? BboxToRect(bbox.Value, "yellow") : string.Empty;
|
||||
var others = string.Join(" ", bboxes.Select(x => BboxToRect(x, "gray")));
|
||||
var result = $"<svg transform='scale(1, -1)' width='2000' height='2000'>{path} {bboxRect} {others}</svg>";
|
||||
|
||||
|
@@ -59,27 +59,29 @@
|
||||
CharStrings = charStrings ?? throw new ArgumentNullException(nameof(charStrings));
|
||||
}
|
||||
|
||||
public PdfRectangle GetCharacterBoundingBox(int characterCode)
|
||||
public PdfRectangle? GetCharacterBoundingBox(int characterCode)
|
||||
{
|
||||
var b = Encoding[characterCode];
|
||||
var glyph = CharStrings.Generate(b);
|
||||
var bbox = glyph.GetBoundingRectangle();
|
||||
|
||||
if (Debugger.IsAttached)
|
||||
if (!bbox.HasValue)
|
||||
{
|
||||
if (bbox.Bottom < BoundingBox.Bottom
|
||||
|| bbox.Top > BoundingBox.Top
|
||||
|| bbox.Left < BoundingBox.Left
|
||||
|| bbox.Right > BoundingBox.Right)
|
||||
{
|
||||
// Debugger.Break();
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Debugger.IsAttached)
|
||||
{
|
||||
var full = glyph.ToFullSvg();
|
||||
Console.WriteLine(full);
|
||||
}
|
||||
|
||||
return bbox;
|
||||
}
|
||||
|
||||
public bool ContainsNamedCharacter(string name)
|
||||
{
|
||||
return CharStrings.CharStrings.ContainsKey(name);
|
||||
}
|
||||
}
|
||||
}
|
@@ -123,7 +123,8 @@
|
||||
|
||||
if (!foundUnicode && !isLenientParsing)
|
||||
{
|
||||
throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
|
||||
// TODO: record warning
|
||||
// throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}.");
|
||||
}
|
||||
|
||||
var wordSpacing = 0m;
|
||||
|
Reference in New Issue
Block a user