mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 06:15:58 +08:00
create Type0 font, notes about font format, heavy duty refactoring to inject dependencies rather than god object
This commit is contained in:
parent
206eb91ff1
commit
f4d58e8aa9
118
font-notes.md
Normal file
118
font-notes.md
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# Fonts #
|
||||||
|
|
||||||
|
## Types of Font ##
|
||||||
|
|
||||||
|
<pre><code>
|
||||||
|
|
||||||
|
------ Composite Fonts -------
|
||||||
|
|
||||||
|
Type0 (Composed of glyphs from a CIDFont)
|
||||||
|
|
||||||
|
Children:
|
||||||
|
|
||||||
|
CIDFont CIDFontType0 (Type 1 font glyph descriptions)
|
||||||
|
CIDFontType2 (TrueType font glyph descriptions)
|
||||||
|
|
||||||
|
------ Simple Fonts Below -------
|
||||||
|
|
||||||
|
Type 1 Type 1 (defines gylphs using type 1 font technology)
|
||||||
|
MMType1 (multiple master font - extends type 1 fonts to support many typefaces for a single font)
|
||||||
|
|
||||||
|
Type 3 (defines glyphs with streams of PDF graphics operations)
|
||||||
|
|
||||||
|
TrueType (from the TrueType font format)
|
||||||
|
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
|
||||||
|
## Terminology ##
|
||||||
|
|
||||||
|
+ Font dictionary: PDF dictionary with information about the font
|
||||||
|
+ Font program: Glyph information in specialized font format
|
||||||
|
|
||||||
|
## Composite Fonts ##
|
||||||
|
|
||||||
|
+ Glyphs are selected from a font-like CIDFont.
|
||||||
|
+ Has a single CIDFont descendant.
|
||||||
|
+ Multiple-byte sequences select a single glyph.
|
||||||
|
|
||||||
|
Used for multiple-byte character encodings and large numbers of glyphs.
|
||||||
|
|
||||||
|
Well suited to Chinese, Japanese and Korean (CJK).
|
||||||
|
|
||||||
|
CID stands for character identifier. This is a number used to access glyph descriptions.
|
||||||
|
|
||||||
|
The CMap maps between character codes and CID numbers for the glyphs.
|
||||||
|
|
||||||
|
A CIDFont file provides the glyph descriptions for a character collection. The glyph descriptions are
|
||||||
|
identified by CIDs.
|
||||||
|
|
||||||
|
CID keyed font combines a CMap with a CIDFont.
|
||||||
|
|
||||||
|
The **Encoding** contains the CMap.
|
||||||
|
The **DescendantFonts** contains the CIDFont to use with the CMap.
|
||||||
|
|
||||||
|
### CIDFont ###
|
||||||
|
|
||||||
|
A Type0 font descendant (CIDFont) must be either a CIDFontType0 (Adobe Type 1) or CIDFontType2 (TrueType).
|
||||||
|
|
||||||
|
For Type 2 CIDFonts (TrueType) the glyphs are identified by a glyph index (GID).
|
||||||
|
|
||||||
|
+ If the font program is embedded as a stream the CIDFont dictionary must contain a CIDToGIDMap which maps
|
||||||
|
from CIDs to Glyph Indexes.
|
||||||
|
|
||||||
|
+ If the font program is a predefined external font the CIDFont must not contain a CIDToGIDMap. It
|
||||||
|
may only use a predefined CMap.
|
||||||
|
|
||||||
|
Though a CID may not be used to select the glyph as in the predefined case, it is always used to select glyph
|
||||||
|
metrics. Every CIDFont must describe CID 0 which is the ```.notdef``` character for missing characters.
|
||||||
|
|
||||||
|
### Glyph Metrics in CIDFonts ###
|
||||||
|
|
||||||
|
Widths for CIDFonts are defined in the DW and W entries in the CIDFont dictionary.
|
||||||
|
|
||||||
|
+ DW provides the default width for glyphs which are not specified individually.
|
||||||
|
+ W defines widths for individual CIDs.
|
||||||
|
|
||||||
|
Vertical writing has other stuff, see the spec.
|
||||||
|
|
||||||
|
### CMap ###
|
||||||
|
|
||||||
|
The CMap maps from character codes to character selectors (CIDs).
|
||||||
|
|
||||||
|
The CMap defines the writing mode horizontal or vertical.
|
||||||
|
|
||||||
|
### Type 0 Fonts ###
|
||||||
|
|
||||||
|
The **Font dictionary** has the following entries:
|
||||||
|
|
||||||
|
+ Type (name): /Font
|
||||||
|
+ Subtype (name): /Type0
|
||||||
|
+ BaseFont (name): The PostScript name of the font.
|
||||||
|
+ Encoding (name/stream R): Name of a predefined CMap or a stream for an embedded CMap.
|
||||||
|
+ DescendantFonts (array): Single element pointing to the CIDFont.
|
||||||
|
+ ToUnicode (stream R)?: Stream containing a CMap file to map codes to Unicode.
|
||||||
|
|
||||||
|
## Simple Fonts ##
|
||||||
|
|
||||||
|
+ Glyphs are selected by single-byte character codes. Index into a 256 entry glyph table.
|
||||||
|
+ Only supports horizontal writing mode.
|
||||||
|
|
||||||
|
## Further Description ##
|
||||||
|
|
||||||
|
### Type 1 Fonts ###
|
||||||
|
|
||||||
|
The **Font program** is a PostScript program describing glyph shape. See the Adobe Type 1 Font Format specification.
|
||||||
|
|
||||||
|
The **Font dictionary** has the following entries:
|
||||||
|
|
||||||
|
+ Type (name): /Font
|
||||||
|
+ Subtype (name): /Type1
|
||||||
|
+ Name (name?): Font name
|
||||||
|
+ BaseFont (name): The PostScript name of the font. Equivalent to the FontName value in the **Font program**.
|
||||||
|
+ FirstChar (int): The first character code in the Widths array.
|
||||||
|
+ LastChar (int) The last character code in the Widths array.
|
||||||
|
+ Widths (numeric[] R): An array defining the glyph width in units of 1000 == 1 text space unit.
|
||||||
|
+ FontDescriptor (Dict<> R): Describes font metrics other than widths.
|
||||||
|
+ Encoding (name/Dict<> R): Specifies the character encoding if different from default.
|
||||||
|
+ ToUnicode (stream R): CMap mapping character code to Unicode.
|
@ -2,6 +2,7 @@
|
|||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using Content;
|
using Content;
|
||||||
|
using ContentStream;
|
||||||
using IO;
|
using IO;
|
||||||
using Pdf.Cos;
|
using Pdf.Cos;
|
||||||
using Pdf.Fonts;
|
using Pdf.Fonts;
|
||||||
@ -44,6 +45,10 @@
|
|||||||
|
|
||||||
internal class TestResourceStore : IResourceStore
|
internal class TestResourceStore : IResourceStore
|
||||||
{
|
{
|
||||||
|
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
public IFont GetFont(CosName name)
|
public IFont GetFont(CosName name)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
|
@ -13,15 +13,14 @@
|
|||||||
|
|
||||||
public class PageContentParserTests
|
public class PageContentParserTests
|
||||||
{
|
{
|
||||||
private readonly PageContentParser parser = new PageContentParser();
|
private readonly PageContentParser parser = new PageContentParser(new ReflectionGraphicsStateOperationFactory());
|
||||||
private readonly IGraphicsStateOperationFactory operationFactory = new ReflectionGraphicsStateOperationFactory();
|
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void CorrectlyExtractsOperations()
|
public void CorrectlyExtractsOperations()
|
||||||
{
|
{
|
||||||
var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false);
|
var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false);
|
||||||
|
|
||||||
var result = parser.Parse(new ReflectionGraphicsStateOperationFactory(), input.Bytes);
|
var result = parser.Parse(input.Bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
@ -36,7 +35,7 @@
|
|||||||
ET";
|
ET";
|
||||||
var input = StringBytesTestConverter.Convert(s, false);
|
var input = StringBytesTestConverter.Convert(s, false);
|
||||||
|
|
||||||
var result = parser.Parse(operationFactory, input.Bytes);
|
var result = parser.Parse(input.Bytes);
|
||||||
|
|
||||||
Assert.Equal(7, result.Count);
|
Assert.Equal(7, result.Count);
|
||||||
|
|
||||||
@ -72,7 +71,7 @@ ET";
|
|||||||
|
|
||||||
var input = StringBytesTestConverter.Convert(s, false);
|
var input = StringBytesTestConverter.Convert(s, false);
|
||||||
|
|
||||||
var result = parser.Parse(operationFactory, input.Bytes);
|
var result = parser.Parse(input.Bytes);
|
||||||
|
|
||||||
Assert.Equal(4, result.Count);
|
Assert.Equal(4, result.Count);
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ namespace UglyToad.Pdf.Tests.Tokenization
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using ContentStream;
|
||||||
using Pdf.Cos;
|
using Pdf.Cos;
|
||||||
using Pdf.Tokenization;
|
using Pdf.Tokenization;
|
||||||
using Pdf.Tokenization.Tokens;
|
using Pdf.Tokenization.Tokens;
|
||||||
|
11
src/UglyToad.Pdf/Content/IPageFactory.cs
Normal file
11
src/UglyToad.Pdf/Content/IPageFactory.cs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
namespace UglyToad.Pdf.Content
|
||||||
|
{
|
||||||
|
using ContentStream;
|
||||||
|
using IO;
|
||||||
|
|
||||||
|
internal interface IPageFactory
|
||||||
|
{
|
||||||
|
Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
|
||||||
|
bool isLenientParsing);
|
||||||
|
}
|
||||||
|
}
|
14
src/UglyToad.Pdf/Content/IResourceStore.cs
Normal file
14
src/UglyToad.Pdf/Content/IResourceStore.cs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
namespace UglyToad.Pdf.Content
|
||||||
|
{
|
||||||
|
using ContentStream;
|
||||||
|
using Cos;
|
||||||
|
using Fonts;
|
||||||
|
using IO;
|
||||||
|
|
||||||
|
internal interface IResourceStore
|
||||||
|
{
|
||||||
|
void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||||
|
|
||||||
|
IFont GetFont(CosName name);
|
||||||
|
}
|
||||||
|
}
|
@ -2,21 +2,9 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Diagnostics;
|
|
||||||
using ContentStream;
|
|
||||||
using Cos;
|
|
||||||
using Filters;
|
|
||||||
using Geometry;
|
|
||||||
using Graphics;
|
|
||||||
using IO;
|
|
||||||
using Parser;
|
|
||||||
using Util;
|
|
||||||
|
|
||||||
public class Page
|
public class Page
|
||||||
{
|
{
|
||||||
private readonly ParsingArguments parsingArguments;
|
|
||||||
private readonly PdfDictionary dictionary;
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The 1 indexed page number.
|
/// The 1 indexed page number.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -28,78 +16,16 @@
|
|||||||
|
|
||||||
public IReadOnlyList<string> Text => Content?.Text ?? new string[0];
|
public IReadOnlyList<string> Text => Content?.Text ?? new string[0];
|
||||||
|
|
||||||
internal Page(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, ParsingArguments parsingArguments)
|
internal Page(int number, MediaBox mediaBox, PageContent content)
|
||||||
{
|
{
|
||||||
if (number <= 0)
|
if (number <= 0)
|
||||||
{
|
{
|
||||||
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
|
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
|
||||||
}
|
}
|
||||||
|
|
||||||
this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
|
|
||||||
this.parsingArguments = parsingArguments ?? throw new ArgumentNullException(nameof(parsingArguments));
|
|
||||||
|
|
||||||
Number = number;
|
Number = number;
|
||||||
|
MediaBox = mediaBox;
|
||||||
var type = dictionary.GetName(CosName.TYPE);
|
Content = content;
|
||||||
|
|
||||||
if (type != null && !type.Equals(CosName.PAGE) && !parsingArguments.IsLenientParsing)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException($"Created page number {number} but its type was specified as {type} rather than 'Page'.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
|
|
||||||
{
|
|
||||||
var x1 = mediaboxArray.getInt(0);
|
|
||||||
var y1 = mediaboxArray.getInt(1);
|
|
||||||
var x2 = mediaboxArray.getInt(2);
|
|
||||||
var y2 = mediaboxArray.getInt(3);
|
|
||||||
|
|
||||||
MediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MediaBox = pageTreeMembers.GetMediaBox();
|
|
||||||
|
|
||||||
if (MediaBox == null)
|
|
||||||
{
|
|
||||||
if (parsingArguments.IsLenientParsing)
|
|
||||||
{
|
|
||||||
MediaBox = MediaBox.A4;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException("No mediabox was present for page: " + number);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
|
|
||||||
{
|
|
||||||
parsingArguments.CachingProviders.ResourceContainer.LoadResourceDictionary(resource, parsingArguments);
|
|
||||||
}
|
|
||||||
|
|
||||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
|
||||||
if (contentObject != null)
|
|
||||||
{
|
|
||||||
var contentStream = parsingArguments.Container.Get<DynamicParser>()
|
|
||||||
.Parse(parsingArguments, contentObject, false) as RawCosStream;
|
|
||||||
|
|
||||||
if (contentStream == null)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
|
||||||
}
|
|
||||||
|
|
||||||
var contents = contentStream.Decode(parsingArguments.Container.Get<IFilterProvider>());
|
|
||||||
|
|
||||||
var operations = parsingArguments.Container.Get<PageContentParser>()
|
|
||||||
.Parse(parsingArguments.Container.Get<IGraphicsStateOperationFactory>(), new ByteArrayInputBytes(contents));
|
|
||||||
|
|
||||||
var context = new ContentStreamProcessor(MediaBox.Bounds, parsingArguments.CachingProviders.ResourceContainer);
|
|
||||||
|
|
||||||
var content = context.Process(operations);
|
|
||||||
|
|
||||||
Content = content;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
99
src/UglyToad.Pdf/Content/PageFactory.cs
Normal file
99
src/UglyToad.Pdf/Content/PageFactory.cs
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
namespace UglyToad.Pdf.Content
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using ContentStream;
|
||||||
|
using Cos;
|
||||||
|
using Filters;
|
||||||
|
using Geometry;
|
||||||
|
using Graphics;
|
||||||
|
using IO;
|
||||||
|
using Parser;
|
||||||
|
|
||||||
|
internal class PageFactory : IPageFactory
|
||||||
|
{
|
||||||
|
private readonly IResourceStore resourceStore;
|
||||||
|
private readonly IPdfObjectParser pdfObjectParser;
|
||||||
|
private readonly IFilterProvider filterProvider;
|
||||||
|
private readonly IPageContentParser pageContentParser;
|
||||||
|
|
||||||
|
public PageFactory(IResourceStore resourceStore, IPdfObjectParser pdfObjectParser, IFilterProvider filterProvider,
|
||||||
|
IPageContentParser pageContentParser)
|
||||||
|
{
|
||||||
|
this.resourceStore = resourceStore;
|
||||||
|
this.pdfObjectParser = pdfObjectParser;
|
||||||
|
this.filterProvider = filterProvider;
|
||||||
|
this.pageContentParser = pageContentParser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
|
||||||
|
bool isLenientParsing)
|
||||||
|
{
|
||||||
|
if (dictionary == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(dictionary));
|
||||||
|
}
|
||||||
|
|
||||||
|
var type = dictionary.GetName(CosName.TYPE);
|
||||||
|
|
||||||
|
if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
|
||||||
|
}
|
||||||
|
|
||||||
|
MediaBox mediaBox;
|
||||||
|
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
|
||||||
|
{
|
||||||
|
var x1 = mediaboxArray.getInt(0);
|
||||||
|
var y1 = mediaboxArray.getInt(1);
|
||||||
|
var x2 = mediaboxArray.getInt(2);
|
||||||
|
var y2 = mediaboxArray.getInt(3);
|
||||||
|
|
||||||
|
mediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mediaBox = pageTreeMembers.GetMediaBox();
|
||||||
|
|
||||||
|
if (mediaBox == null)
|
||||||
|
{
|
||||||
|
if (isLenientParsing)
|
||||||
|
{
|
||||||
|
mediaBox = MediaBox.A4;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("No mediabox was present for page: " + number);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
|
||||||
|
{
|
||||||
|
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
|
||||||
|
}
|
||||||
|
|
||||||
|
PageContent content = default(PageContent);
|
||||||
|
|
||||||
|
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
||||||
|
if (contentObject != null)
|
||||||
|
{
|
||||||
|
var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as RawCosStream;
|
||||||
|
|
||||||
|
if (contentStream == null)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||||
|
}
|
||||||
|
|
||||||
|
var contents = contentStream.Decode(filterProvider);
|
||||||
|
|
||||||
|
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||||
|
|
||||||
|
var context = new ContentStreamProcessor(mediaBox.Bounds, resourceStore);
|
||||||
|
|
||||||
|
content = context.Process(operations);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Page(number, mediaBox, content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -6,26 +6,26 @@
|
|||||||
using ContentStream;
|
using ContentStream;
|
||||||
using ContentStream.TypedAccessors;
|
using ContentStream.TypedAccessors;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using IO;
|
||||||
using Logging;
|
using Logging;
|
||||||
using Parser;
|
using Parser;
|
||||||
using Parser.PageTree;
|
|
||||||
|
|
||||||
public class Pages
|
public class Pages
|
||||||
{
|
{
|
||||||
|
private readonly ILog log;
|
||||||
private readonly Catalog catalog;
|
private readonly Catalog catalog;
|
||||||
private readonly ParsingArguments arguments;
|
private readonly IPdfObjectParser pdfObjectParser;
|
||||||
|
private readonly IPageFactory pageFactory;
|
||||||
|
private readonly IRandomAccessRead reader;
|
||||||
|
private readonly bool isLenientParsing;
|
||||||
private readonly PdfDictionary rootPageDictionary;
|
private readonly PdfDictionary rootPageDictionary;
|
||||||
private readonly Dictionary<int, PdfDictionary> locatedPages = new Dictionary<int, PdfDictionary>();
|
private readonly Dictionary<int, PdfDictionary> locatedPages = new Dictionary<int, PdfDictionary>();
|
||||||
|
|
||||||
public int Count { get; }
|
public int Count { get; }
|
||||||
|
|
||||||
internal Pages(Catalog catalog, ParsingArguments arguments)
|
internal Pages(ILog log, Catalog catalog, IPdfObjectParser pdfObjectParser, IPageFactory pageFactory,
|
||||||
|
IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
if (arguments == null)
|
|
||||||
{
|
|
||||||
throw new ArgumentNullException(nameof(arguments));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (catalog == null)
|
if (catalog == null)
|
||||||
{
|
{
|
||||||
throw new ArgumentNullException(nameof(catalog));
|
throw new ArgumentNullException(nameof(catalog));
|
||||||
@ -38,9 +38,9 @@
|
|||||||
throw new InvalidOperationException("No pages were present in the catalog for this PDF document");
|
throw new InvalidOperationException("No pages were present in the catalog for this PDF document");
|
||||||
}
|
}
|
||||||
|
|
||||||
var pageObject = arguments.Container.Get<DynamicParser>().Parse(arguments, pages, false);
|
var pagesObject = pdfObjectParser.Parse(pages.ToIndirectReference(), reader, isLenientParsing);
|
||||||
|
|
||||||
if (!(pageObject is PdfDictionary catalogPageDictionary))
|
if (!(pagesObject is PdfDictionary catalogPageDictionary))
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("Could not find the root pages object: " + pages);
|
throw new InvalidOperationException("Could not find the root pages object: " + pages);
|
||||||
}
|
}
|
||||||
@ -51,8 +51,12 @@
|
|||||||
|
|
||||||
Count = count;
|
Count = count;
|
||||||
|
|
||||||
|
this.log = log;
|
||||||
this.catalog = catalog;
|
this.catalog = catalog;
|
||||||
this.arguments = arguments;
|
this.pdfObjectParser = pdfObjectParser;
|
||||||
|
this.pageFactory = pageFactory;
|
||||||
|
this.reader = reader;
|
||||||
|
this.isLenientParsing = isLenientParsing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -60,7 +64,8 @@
|
|||||||
{
|
{
|
||||||
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
|
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
|
||||||
{
|
{
|
||||||
return new Page(pageNumber, targetPageDictionary, new PageTreeMembers(), arguments);
|
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader,
|
||||||
|
isLenientParsing);
|
||||||
}
|
}
|
||||||
|
|
||||||
var observed = new List<int>();
|
var observed = new List<int>();
|
||||||
@ -73,8 +78,7 @@
|
|||||||
throw new InvalidOperationException("Could not find the page with number: " + pageNumber);
|
throw new InvalidOperationException("Could not find the page with number: " + pageNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
var page = arguments.Container.Get<PageParser>()
|
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing);
|
||||||
.Parse(pageNumber, targetPageDictionary, arguments);
|
|
||||||
|
|
||||||
locatedPages[pageNumber] = targetPageDictionary;
|
locatedPages[pageNumber] = targetPageDictionary;
|
||||||
|
|
||||||
@ -108,8 +112,7 @@
|
|||||||
|
|
||||||
if (!type.Equals(CosName.PAGES))
|
if (!type.Equals(CosName.PAGES))
|
||||||
{
|
{
|
||||||
arguments.Container.Get<ILog>()
|
log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
||||||
.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -120,7 +123,7 @@
|
|||||||
foreach (var kid in kids.OfType<CosObject>())
|
foreach (var kid in kids.OfType<CosObject>())
|
||||||
{
|
{
|
||||||
// todo: exit early
|
// todo: exit early
|
||||||
var child = arguments.Container.Get<DynamicParser>().Parse(arguments, kid, false) as PdfDictionary;
|
var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
|
||||||
|
|
||||||
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
|
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
|
||||||
|
|
||||||
|
@ -4,31 +4,32 @@
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
using Filters;
|
|
||||||
using Fonts;
|
using Fonts;
|
||||||
using Fonts.Cmap;
|
|
||||||
using Fonts.Parser;
|
|
||||||
using IO;
|
using IO;
|
||||||
using Parser;
|
using Parser;
|
||||||
|
|
||||||
internal interface IResourceStore
|
|
||||||
{
|
|
||||||
IFont GetFont(CosName name);
|
|
||||||
}
|
|
||||||
|
|
||||||
internal class ResourceContainer : IResourceStore
|
internal class ResourceContainer : IResourceStore
|
||||||
{
|
{
|
||||||
|
private readonly IPdfObjectParser pdfObjectParser;
|
||||||
|
private readonly IFontFactory fontFactory;
|
||||||
|
|
||||||
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
|
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
|
||||||
|
|
||||||
internal void LoadResourceDictionary(PdfDictionary dictionary, ParsingArguments arguments)
|
public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory)
|
||||||
|
{
|
||||||
|
this.pdfObjectParser = pdfObjectParser;
|
||||||
|
this.fontFactory = fontFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
|
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
|
||||||
{
|
{
|
||||||
LoadFontDictionary(fontDictionary, arguments);
|
LoadFontDictionary(fontDictionary, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void LoadFontDictionary(PdfDictionary fontDictionary, ParsingArguments arguments)
|
private void LoadFontDictionary(PdfDictionary fontDictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
foreach (var pair in fontDictionary)
|
foreach (var pair in fontDictionary)
|
||||||
{
|
{
|
||||||
@ -39,24 +40,22 @@
|
|||||||
|
|
||||||
if (!(pair.Value is CosObject objectKey))
|
if (!(pair.Value is CosObject objectKey))
|
||||||
{
|
{
|
||||||
if (arguments.IsLenientParsing)
|
if (isLenientParsing)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var dynamicParser = arguments.Get<DynamicParser>();
|
var fontObject = pdfObjectParser.Parse(objectKey.ToIndirectReference(), reader, false) as PdfDictionary;
|
||||||
|
|
||||||
var fontObject = dynamicParser.Parse(arguments, objectKey, false) as PdfDictionary;
|
|
||||||
|
|
||||||
if (fontObject == null)
|
if (fontObject == null)
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
|
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
|
||||||
}
|
}
|
||||||
|
|
||||||
loadedFonts[pair.Key] = arguments.Get<FontFactory>().GetFont(fontObject, arguments);
|
loadedFonts[pair.Key] = fontFactory.Get(fontObject, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
15
src/UglyToad.Pdf/ContentStream/IndirectReference.cs
Normal file
15
src/UglyToad.Pdf/ContentStream/IndirectReference.cs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
namespace UglyToad.Pdf.ContentStream
|
||||||
|
{
|
||||||
|
public struct IndirectReference
|
||||||
|
{
|
||||||
|
public long ObjectNumber { get; }
|
||||||
|
|
||||||
|
public int Generation { get; }
|
||||||
|
|
||||||
|
public IndirectReference(long objectNumber, int generation)
|
||||||
|
{
|
||||||
|
ObjectNumber = objectNumber;
|
||||||
|
Generation = generation;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Cos
|
namespace UglyToad.Pdf.Cos
|
||||||
{
|
{
|
||||||
|
using ContentStream;
|
||||||
|
|
||||||
public class CosObject : CosBase, ICosUpdateInfo
|
public class CosObject : CosBase, ICosUpdateInfo
|
||||||
{
|
{
|
||||||
private CosBase baseObject;
|
private CosBase baseObject;
|
||||||
@ -124,5 +126,10 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
public bool NeedsToBeUpdated { get; set; }
|
public bool NeedsToBeUpdated { get; set; }
|
||||||
|
|
||||||
|
public IndirectReference ToIndirectReference()
|
||||||
|
{
|
||||||
|
return new IndirectReference(objectNumber, generationNumber);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,5 +32,7 @@
|
|||||||
/// The definition of the character collection for the font.
|
/// The definition of the character collection for the font.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
CharacterIdentifierSystemInfo SystemInfo { get; }
|
CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
|
||||||
|
CidFontType CidFontType { get; }
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,6 +1,5 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||||
{
|
{
|
||||||
using Cmap;
|
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <inheritdoc/>
|
/// <inheritdoc/>
|
||||||
@ -14,5 +13,6 @@
|
|||||||
public CosName SubType { get; }
|
public CosName SubType { get; }
|
||||||
public CosName BaseFont { get; }
|
public CosName BaseFont { get; }
|
||||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
public CidFontType CidFontType => CidFontType.Type0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||||
{
|
{
|
||||||
using Cmap;
|
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
@ -14,5 +13,6 @@
|
|||||||
public CosName SubType { get; }
|
public CosName SubType { get; }
|
||||||
public CosName BaseFont { get; }
|
public CosName BaseFont { get; }
|
||||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||||
|
public CidFontType CidFontType => CidFontType.Type2;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -6,7 +6,7 @@
|
|||||||
using IO;
|
using IO;
|
||||||
using Util.JetBrains.Annotations;
|
using Util.JetBrains.Annotations;
|
||||||
|
|
||||||
public class CMap
|
internal class CMap
|
||||||
{
|
{
|
||||||
public CharacterIdentifierSystemInfo Info { get; }
|
public CharacterIdentifierSystemInfo Info { get; }
|
||||||
|
|
||||||
@ -30,6 +30,8 @@
|
|||||||
[NotNull]
|
[NotNull]
|
||||||
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
|
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
|
||||||
|
|
||||||
|
public WritingMode WritingMode { get; }
|
||||||
|
|
||||||
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
|
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
|
||||||
|
|
||||||
public bool HasUnicodeMappings => BaseFontCharacterMap.Count > 0;
|
public bool HasUnicodeMappings => BaseFontCharacterMap.Count > 0;
|
||||||
@ -41,7 +43,7 @@
|
|||||||
{
|
{
|
||||||
Info = info;
|
Info = info;
|
||||||
Type = type;
|
Type = type;
|
||||||
WMode = wMode;
|
WritingMode = (WritingMode)wMode;
|
||||||
Name = name;
|
Name = name;
|
||||||
Version = version;
|
Version = version;
|
||||||
BaseFontCharacterMap = baseFontCharacterMap ?? throw new ArgumentNullException(nameof(baseFontCharacterMap));
|
BaseFontCharacterMap = baseFontCharacterMap ?? throw new ArgumentNullException(nameof(baseFontCharacterMap));
|
||||||
@ -51,8 +53,7 @@
|
|||||||
maxCodeLength = CodespaceRanges.Max(x => x.CodeLength);
|
maxCodeLength = CodespaceRanges.Max(x => x.CodeLength);
|
||||||
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
|
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int wmode = 0;
|
|
||||||
private string cmapName = null;
|
private string cmapName = null;
|
||||||
private string cmapVersion = null;
|
private string cmapVersion = null;
|
||||||
private int cmapType = -1;
|
private int cmapType = -1;
|
||||||
|
8
src/UglyToad.Pdf/Fonts/Cmap/WritingMode.cs
Normal file
8
src/UglyToad.Pdf/Fonts/Cmap/WritingMode.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
namespace UglyToad.Pdf.Fonts.Cmap
|
||||||
|
{
|
||||||
|
internal enum WritingMode
|
||||||
|
{
|
||||||
|
Horizontal = 0,
|
||||||
|
Vertical = 1
|
||||||
|
}
|
||||||
|
}
|
55
src/UglyToad.Pdf/Fonts/Composite/ToUnicodeCMap.cs
Normal file
55
src/UglyToad.Pdf/Fonts/Composite/ToUnicodeCMap.cs
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
namespace UglyToad.Pdf.Fonts.Composite
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using Cmap;
|
||||||
|
using IO;
|
||||||
|
using Util.JetBrains.Annotations;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Defines the information content (actual text) of the font
|
||||||
|
/// as opposed to the display format.
|
||||||
|
/// </summary>
|
||||||
|
internal class ToUnicodeCMap
|
||||||
|
{
|
||||||
|
[CanBeNull]
|
||||||
|
private readonly CMap cMap;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Does the font provide a CMap to map CIDs to Unicode values?
|
||||||
|
/// </summary>
|
||||||
|
public bool CanMapToUnicode => cMap != null;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Is this document (unexpectedly) using a predefined Identity-H/V CMap as its ToUnicode CMap?
|
||||||
|
/// </summary>
|
||||||
|
public bool IsUsingIdentityAsUnicodeMap { get; }
|
||||||
|
|
||||||
|
public ToUnicodeCMap([CanBeNull]CMap cMap)
|
||||||
|
{
|
||||||
|
this.cMap = cMap;
|
||||||
|
|
||||||
|
if (CanMapToUnicode)
|
||||||
|
{
|
||||||
|
IsUsingIdentityAsUnicodeMap =
|
||||||
|
cMap.Name.StartsWith("Identity-", StringComparison.InvariantCultureIgnoreCase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryGet(int code, out string value)
|
||||||
|
{
|
||||||
|
value = null;
|
||||||
|
|
||||||
|
if (!CanMapToUnicode)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cMap.TryConvertToUnicode(code, out value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int ReadCode(IInputBytes inputBytes)
|
||||||
|
{
|
||||||
|
return cMap.ReadCode(inputBytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
76
src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs
Normal file
76
src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
namespace UglyToad.Pdf.Fonts.Composite
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using CidFonts;
|
||||||
|
using Cmap;
|
||||||
|
using Cos;
|
||||||
|
using Geometry;
|
||||||
|
using IO;
|
||||||
|
using Util.JetBrains.Annotations;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Defines glyphs using a CIDFont
|
||||||
|
/// </summary>
|
||||||
|
internal class Type0Font : IFont
|
||||||
|
{
|
||||||
|
public CosName Name => BaseFont;
|
||||||
|
|
||||||
|
[NotNull]
|
||||||
|
public CosName BaseFont { get; }
|
||||||
|
|
||||||
|
[NotNull]
|
||||||
|
public ICidFont CidFont { get; }
|
||||||
|
|
||||||
|
[NotNull]
|
||||||
|
public CMap CMap { get; }
|
||||||
|
|
||||||
|
[NotNull]
|
||||||
|
public ToUnicodeCMap ToUnicode { get; }
|
||||||
|
|
||||||
|
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
|
||||||
|
|
||||||
|
public Type0Font(CosName baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap)
|
||||||
|
{
|
||||||
|
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
|
||||||
|
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
|
||||||
|
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
|
||||||
|
ToUnicode = new ToUnicodeCMap(toUnicodeCMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
|
||||||
|
{
|
||||||
|
var current = bytes.CurrentOffset;
|
||||||
|
|
||||||
|
var code = ToUnicode.ReadCode(bytes);
|
||||||
|
|
||||||
|
codeLength = bytes.CurrentOffset - current;
|
||||||
|
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryGetUnicode(int characterCode, out string value)
|
||||||
|
{
|
||||||
|
value = null;
|
||||||
|
|
||||||
|
if (!ToUnicode.CanMapToUnicode)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// According to PdfBox certain providers incorrectly using Identity CMaps as ToUnicode.
|
||||||
|
if (ToUnicode.IsUsingIdentityAsUnicodeMap)
|
||||||
|
{
|
||||||
|
value = new string((char)characterCode, 1);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ToUnicode.TryGet(characterCode, out value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PdfVector GetDisplacement(int characterCode)
|
||||||
|
{
|
||||||
|
return new PdfVector(0.333m, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
namespace UglyToad.Pdf.Fonts
|
namespace UglyToad.Pdf.Fonts
|
||||||
{
|
{
|
||||||
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -13,13 +14,13 @@
|
|||||||
/// </remarks>
|
/// </remarks>
|
||||||
internal class DescriptorFontFile
|
internal class DescriptorFontFile
|
||||||
{
|
{
|
||||||
public CosObjectKey ObjectKey { get; }
|
public IndirectReference ObjectKey { get; }
|
||||||
|
|
||||||
public byte[] FileBytes { get; }
|
public byte[] FileBytes { get; }
|
||||||
|
|
||||||
public FontFileType FileType { get; }
|
public FontFileType FileType { get; }
|
||||||
|
|
||||||
public DescriptorFontFile(CosObjectKey key, FontFileType fileType)
|
public DescriptorFontFile(IndirectReference key, FontFileType fileType)
|
||||||
{
|
{
|
||||||
ObjectKey = key;
|
ObjectKey = key;
|
||||||
FileBytes = new byte[0];
|
FileBytes = new byte[0];
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
namespace UglyToad.Pdf.Fonts.Exceptions
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.Runtime.Serialization;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The exception thrown when an error is encountered parsing a font from the PDF document.
|
||||||
|
/// This occurs where the format of the font program or dictionary does not meet the specification.
|
||||||
|
/// </summary>
|
||||||
|
/// <inheritdoc cref="Exception"/>
|
||||||
|
[Serializable]
|
||||||
|
public class InvalidFontFormatException : Exception
|
||||||
|
{
|
||||||
|
public InvalidFontFormatException()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidFontFormatException(string message) : base(message)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidFontFormatException(string message, Exception inner) : base(message, inner)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
protected InvalidFontFormatException(
|
||||||
|
SerializationInfo info,
|
||||||
|
StreamingContext context) : base(info, context)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -4,22 +4,26 @@
|
|||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using Exceptions;
|
||||||
|
using IO;
|
||||||
|
using Logging;
|
||||||
using Parser.Handlers;
|
using Parser.Handlers;
|
||||||
using Pdf.Parser;
|
|
||||||
|
|
||||||
internal class FontFactory
|
internal class FontFactory : IFontFactory
|
||||||
{
|
{
|
||||||
|
private readonly ILog log;
|
||||||
private readonly IReadOnlyDictionary<CosName, IFontHandler> handlers;
|
private readonly IReadOnlyDictionary<CosName, IFontHandler> handlers;
|
||||||
|
|
||||||
public FontFactory(Type0FontHandler type0FontHandler)
|
public FontFactory(ILog log, Type0FontHandler type0FontHandler)
|
||||||
{
|
{
|
||||||
|
this.log = log;
|
||||||
handlers = new Dictionary<CosName, IFontHandler>
|
handlers = new Dictionary<CosName, IFontHandler>
|
||||||
{
|
{
|
||||||
{CosName.TYPE0, type0FontHandler}
|
{CosName.TYPE0, type0FontHandler}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
public IFont GetFont(PdfDictionary dictionary, ParsingArguments arguments)
|
public IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
var type = dictionary.GetName(CosName.TYPE);
|
var type = dictionary.GetName(CosName.TYPE);
|
||||||
|
|
||||||
@ -27,13 +31,13 @@
|
|||||||
{
|
{
|
||||||
var message = "The font dictionary did not have type 'Font'. " + dictionary;
|
var message = "The font dictionary did not have type 'Font'. " + dictionary;
|
||||||
|
|
||||||
if (arguments.IsLenientParsing)
|
if (isLenientParsing)
|
||||||
{
|
{
|
||||||
arguments.Log.Error(message);
|
log?.Error(message);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException(message);
|
throw new InvalidFontFormatException(message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,7 +45,7 @@
|
|||||||
|
|
||||||
if (handlers.TryGetValue(subtype, out var handler))
|
if (handlers.TryGetValue(subtype, out var handler))
|
||||||
{
|
{
|
||||||
return handler.Generate(dictionary, arguments);
|
return handler.Generate(dictionary, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
|
throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
|
||||||
@ -49,3 +53,4 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,16 +10,12 @@
|
|||||||
internal interface IFont
|
internal interface IFont
|
||||||
{
|
{
|
||||||
CosName Name { get; }
|
CosName Name { get; }
|
||||||
|
|
||||||
CosName SubType { get; }
|
|
||||||
|
|
||||||
string BaseFontType { get; }
|
|
||||||
|
|
||||||
bool IsVertical { get; }
|
bool IsVertical { get; }
|
||||||
|
|
||||||
int ReadCharacterCode(IInputBytes bytes, out int codeLength);
|
int ReadCharacterCode(IInputBytes bytes, out int codeLength);
|
||||||
|
|
||||||
string GetUnicode(int characterCode);
|
bool TryGetUnicode(int characterCode, out string value);
|
||||||
|
|
||||||
PdfVector GetDisplacement(int characterCode);
|
PdfVector GetDisplacement(int characterCode);
|
||||||
}
|
}
|
||||||
@ -51,6 +47,11 @@
|
|||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public bool TryGetUnicode(int characterCode, out string value)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
public string GetUnicode(int characterCode)
|
public string GetUnicode(int characterCode)
|
||||||
{
|
{
|
||||||
if (ToUnicode != null)
|
if (ToUnicode != null)
|
||||||
|
10
src/UglyToad.Pdf/Fonts/IFontFactory.cs
Normal file
10
src/UglyToad.Pdf/Fonts/IFontFactory.cs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
namespace UglyToad.Pdf.Fonts
|
||||||
|
{
|
||||||
|
using ContentStream;
|
||||||
|
using IO;
|
||||||
|
|
||||||
|
internal interface IFontFactory
|
||||||
|
{
|
||||||
|
IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||||
|
}
|
||||||
|
}
|
@ -9,7 +9,7 @@
|
|||||||
using Tokenization.Scanner;
|
using Tokenization.Scanner;
|
||||||
using Tokenization.Tokens;
|
using Tokenization.Tokens;
|
||||||
|
|
||||||
public class CMapParser
|
internal class CMapParser
|
||||||
{
|
{
|
||||||
private static readonly BaseFontRangeParser BaseFontRangeParser = new BaseFontRangeParser();
|
private static readonly BaseFontRangeParser BaseFontRangeParser = new BaseFontRangeParser();
|
||||||
private static readonly BaseFontCharacterParser BaseFontCharacterParser = new BaseFontCharacterParser();
|
private static readonly BaseFontCharacterParser BaseFontCharacterParser = new BaseFontCharacterParser();
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
||||||
{
|
{
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Pdf.Parser;
|
using IO;
|
||||||
|
|
||||||
internal interface IFontHandler
|
internal interface IFontHandler
|
||||||
{
|
{
|
||||||
IFont Generate(PdfDictionary dictionary, ParsingArguments parsingArguments);
|
IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,9 +1,12 @@
|
|||||||
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
|
using CidFonts;
|
||||||
using Cmap;
|
using Cmap;
|
||||||
|
using Composite;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using Exceptions;
|
||||||
using Filters;
|
using Filters;
|
||||||
using IO;
|
using IO;
|
||||||
using Parts;
|
using Parts;
|
||||||
@ -14,31 +17,35 @@
|
|||||||
private readonly CidFontFactory cidFontFactory;
|
private readonly CidFontFactory cidFontFactory;
|
||||||
private readonly CMapCache cMapCache;
|
private readonly CMapCache cMapCache;
|
||||||
private readonly IFilterProvider filterProvider;
|
private readonly IFilterProvider filterProvider;
|
||||||
|
private readonly IPdfObjectParser pdfObjectParser;
|
||||||
|
|
||||||
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider)
|
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider, IPdfObjectParser pdfObjectParser)
|
||||||
{
|
{
|
||||||
this.cidFontFactory = cidFontFactory;
|
this.cidFontFactory = cidFontFactory;
|
||||||
this.cMapCache = cMapCache;
|
this.cMapCache = cMapCache;
|
||||||
this.filterProvider = filterProvider;
|
this.filterProvider = filterProvider;
|
||||||
|
this.pdfObjectParser = pdfObjectParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
public IFont Generate(PdfDictionary dictionary, ParsingArguments arguments)
|
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
var dynamicParser = arguments.Get<DynamicParser>();
|
|
||||||
|
|
||||||
var baseFont = dictionary.GetName(CosName.BASE_FONT);
|
var baseFont = dictionary.GetName(CosName.BASE_FONT);
|
||||||
|
|
||||||
var cMap = ReadEncoding(dictionary, out var isCMapPredefined);
|
var cMap = ReadEncoding(dictionary, out var isCMapPredefined);
|
||||||
|
|
||||||
if (TryGetFirstDescendant(dictionary, out var descendantObject))
|
if (TryGetFirstDescendant(dictionary, out var descendantObject))
|
||||||
{
|
{
|
||||||
var parsed = dynamicParser.Parse(arguments, descendantObject, false);
|
var parsed = pdfObjectParser.Parse(descendantObject.ToIndirectReference(), reader, isLenientParsing);
|
||||||
|
|
||||||
if (parsed is PdfDictionary descendantFontDictionary)
|
if (parsed is PdfDictionary descendantFontDictionary)
|
||||||
{
|
{
|
||||||
ParseDescendant(descendantFontDictionary, arguments);
|
ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
|
||||||
|
}
|
||||||
|
|
||||||
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
|
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
|
||||||
|
|
||||||
@ -47,22 +54,17 @@
|
|||||||
{
|
{
|
||||||
var toUnicodeValue = dictionary[CosName.TO_UNICODE];
|
var toUnicodeValue = dictionary[CosName.TO_UNICODE];
|
||||||
|
|
||||||
var toUnicode = dynamicParser.Parse(arguments, toUnicodeValue as CosObject, false) as RawCosStream;
|
var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as RawCosStream;
|
||||||
|
|
||||||
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
|
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
|
||||||
|
|
||||||
if (decodedUnicodeCMap != null)
|
if (decodedUnicodeCMap != null)
|
||||||
{
|
{
|
||||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), arguments.IsLenientParsing);
|
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var font = new CompositeFont
|
var font = new Type0Font(baseFont, new Type0CidFont(), cMap, toUnicodeCMap);
|
||||||
{
|
|
||||||
SubType = CosName.TYPE0,
|
|
||||||
ToUnicode = toUnicodeCMap,
|
|
||||||
BaseFont = baseFont
|
|
||||||
};
|
|
||||||
|
|
||||||
return font;
|
return font;
|
||||||
}
|
}
|
||||||
@ -91,7 +93,7 @@
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ParseDescendant(PdfDictionary dictionary, ParsingArguments arguments)
|
private void ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
var type = dictionary.GetName(CosName.TYPE);
|
var type = dictionary.GetName(CosName.TYPE);
|
||||||
if (!CosName.FONT.Equals(type))
|
if (!CosName.FONT.Equals(type))
|
||||||
@ -99,7 +101,7 @@
|
|||||||
throw new InvalidOperationException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
|
throw new InvalidOperationException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
|
||||||
}
|
}
|
||||||
|
|
||||||
cidFontFactory.Generate(dictionary, arguments, arguments.IsLenientParsing);
|
cidFontFactory.Generate(dictionary, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
|
|
||||||
private CMap ReadEncoding(PdfDictionary dictionary, out bool isCMapPredefined)
|
private CMap ReadEncoding(PdfDictionary dictionary, out bool isCMapPredefined)
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
|
||||||
using CidFonts;
|
using CidFonts;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
@ -12,20 +11,25 @@
|
|||||||
using Pdf.Parser;
|
using Pdf.Parser;
|
||||||
using TrueType;
|
using TrueType;
|
||||||
using TrueType.Parser;
|
using TrueType.Parser;
|
||||||
using Util;
|
|
||||||
|
|
||||||
internal class CidFontFactory
|
internal class CidFontFactory
|
||||||
{
|
{
|
||||||
private readonly FontDescriptorFactory descriptorFactory;
|
private readonly FontDescriptorFactory descriptorFactory;
|
||||||
private readonly TrueTypeFontParser trueTypeFontParser;
|
private readonly TrueTypeFontParser trueTypeFontParser;
|
||||||
|
private readonly IPdfObjectParser pdfObjectParser;
|
||||||
|
private readonly IFilterProvider filterProvider;
|
||||||
|
|
||||||
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser)
|
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
|
||||||
|
IPdfObjectParser pdfObjectParser,
|
||||||
|
IFilterProvider filterProvider)
|
||||||
{
|
{
|
||||||
this.descriptorFactory = descriptorFactory;
|
this.descriptorFactory = descriptorFactory;
|
||||||
this.trueTypeFontParser = trueTypeFontParser;
|
this.trueTypeFontParser = trueTypeFontParser;
|
||||||
|
this.pdfObjectParser = pdfObjectParser;
|
||||||
|
this.filterProvider = filterProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ICidFont Generate(PdfDictionary dictionary, ParsingArguments arguments, bool isLenientParsing)
|
public ICidFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
var type = dictionary.GetName(CosName.TYPE);
|
var type = dictionary.GetName(CosName.TYPE);
|
||||||
if (!CosName.FONT.Equals(type))
|
if (!CosName.FONT.Equals(type))
|
||||||
@ -37,12 +41,12 @@
|
|||||||
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);
|
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);
|
||||||
|
|
||||||
FontDescriptor descriptor = null;
|
FontDescriptor descriptor = null;
|
||||||
if (TryGetFontDescriptor(dictionary, arguments, out var descriptorDictionary))
|
if (TryGetFontDescriptor(dictionary, reader, out var descriptorDictionary))
|
||||||
{
|
{
|
||||||
descriptor = descriptorFactory.Generate(descriptorDictionary, arguments.IsLenientParsing);
|
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadDescriptorFile(descriptor, arguments);
|
ReadDescriptorFile(descriptor, reader, isLenientParsing);
|
||||||
|
|
||||||
var subType = dictionary.GetName(CosName.SUBTYPE);
|
var subType = dictionary.GetName(CosName.SUBTYPE);
|
||||||
if (CosName.CID_FONT_TYPE0.Equals(subType))
|
if (CosName.CID_FONT_TYPE0.Equals(subType))
|
||||||
@ -58,8 +62,7 @@
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool TryGetFontDescriptor(PdfDictionary dictionary, ParsingArguments arguments,
|
private bool TryGetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, out PdfDictionary descriptorDictionary)
|
||||||
out PdfDictionary descriptorDictionary)
|
|
||||||
{
|
{
|
||||||
descriptorDictionary = null;
|
descriptorDictionary = null;
|
||||||
|
|
||||||
@ -68,7 +71,7 @@
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
var descriptorObj = arguments.Get<DynamicParser>().Parse(arguments, obj, false);
|
var descriptorObj = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, false);
|
||||||
|
|
||||||
if (!(descriptorObj is PdfDictionary descriptor))
|
if (!(descriptorObj is PdfDictionary descriptor))
|
||||||
{
|
{
|
||||||
@ -80,21 +83,21 @@
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments)
|
private void ReadDescriptorFile(FontDescriptor descriptor, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
if (descriptor?.FontFile == null)
|
if (descriptor?.FontFile == null)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var fontFileStream = arguments.Get<DynamicParser>().Parse(arguments, descriptor.FontFile.ObjectKey, false) as RawCosStream;
|
var fontFileStream = pdfObjectParser.Parse(descriptor.FontFile.ObjectKey, reader, isLenientParsing) as RawCosStream;
|
||||||
|
|
||||||
if (fontFileStream == null)
|
if (fontFileStream == null)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var fontFile = fontFileStream.Decode(arguments.Get<IFilterProvider>());
|
var fontFile = fontFileStream.Decode(filterProvider);
|
||||||
|
|
||||||
switch (descriptor.FontFile.FileType)
|
switch (descriptor.FontFile.FileType)
|
||||||
{
|
{
|
||||||
|
@ -141,7 +141,7 @@
|
|||||||
throw new NotSupportedException("We currently expect the FontFile to be an object reference.");
|
throw new NotSupportedException("We currently expect the FontFile to be an object reference.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.Type1);
|
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.Type1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary.TryGetValue(CosName.FONT_FILE2, out value))
|
if (dictionary.TryGetValue(CosName.FONT_FILE2, out value))
|
||||||
@ -151,7 +151,7 @@
|
|||||||
throw new NotSupportedException("We currently expect the FontFile2 to be an object reference.");
|
throw new NotSupportedException("We currently expect the FontFile2 to be an object reference.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.TrueType);
|
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.TrueType);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary.TryGetValue(CosName.FONT_FILE3, out value))
|
if (dictionary.TryGetValue(CosName.FONT_FILE3, out value))
|
||||||
@ -161,7 +161,7 @@
|
|||||||
throw new NotSupportedException("We currently expect the FontFile3 to be an object reference.");
|
throw new NotSupportedException("We currently expect the FontFile3 to be an object reference.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.FromSubtype);
|
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.FromSubtype);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
7
src/UglyToad.Pdf/Geometry/Paths/GeneralPath.cs
Normal file
7
src/UglyToad.Pdf/Geometry/Paths/GeneralPath.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace UglyToad.Pdf.Geometry.Paths
|
||||||
|
{
|
||||||
|
internal class GeneralPath
|
||||||
|
{
|
||||||
|
// TODO: provide an implementation
|
||||||
|
}
|
||||||
|
}
|
@ -85,7 +85,7 @@
|
|||||||
{
|
{
|
||||||
var code = font.ReadCharacterCode(bytes, out int codeLength);
|
var code = font.ReadCharacterCode(bytes, out int codeLength);
|
||||||
|
|
||||||
var unicode = font.GetUnicode(code);
|
font.TryGetUnicode(code, out var unicode);
|
||||||
|
|
||||||
var wordSpacing = 0m;
|
var wordSpacing = 0m;
|
||||||
if (code == ' ' && codeLength == 1)
|
if (code == ' ' && codeLength == 1)
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
namespace UglyToad.Pdf.Parser
|
namespace UglyToad.Pdf.Parser
|
||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using Graphics;
|
|
||||||
using Graphics.Operations;
|
using Graphics.Operations;
|
||||||
using IO;
|
using IO;
|
||||||
|
|
||||||
internal interface IPageContentParser
|
internal interface IPageContentParser
|
||||||
{
|
{
|
||||||
IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes);
|
IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
218
src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
Normal file
218
src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
namespace UglyToad.Pdf.Parser
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using ContentStream;
|
||||||
|
using Cos;
|
||||||
|
using IO;
|
||||||
|
using Logging;
|
||||||
|
using Parts;
|
||||||
|
using Util;
|
||||||
|
|
||||||
|
internal interface IPdfObjectParser
|
||||||
|
{
|
||||||
|
CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false);
|
||||||
|
}
|
||||||
|
|
||||||
|
internal class PdfObjectParser : IPdfObjectParser
|
||||||
|
{
|
||||||
|
private readonly ILog log;
|
||||||
|
private readonly CosBaseParser baseParser;
|
||||||
|
private readonly CosStreamParser streamParser;
|
||||||
|
private readonly CrossReferenceTable crossReferenceTable;
|
||||||
|
private readonly BruteForceSearcher bruteForceSearcher;
|
||||||
|
private readonly CosObjectPool objectPool;
|
||||||
|
private readonly ObjectStreamParser objectStreamParser;
|
||||||
|
|
||||||
|
public PdfObjectParser(ILog log, CosBaseParser baseParser, CosStreamParser streamParser, CrossReferenceTable crossReferenceTable,
|
||||||
|
BruteForceSearcher bruteForceSearcher,
|
||||||
|
CosObjectPool objectPool,
|
||||||
|
ObjectStreamParser objectStreamParser)
|
||||||
|
{
|
||||||
|
this.log = log ?? new NoOpLog();
|
||||||
|
this.baseParser = baseParser ?? throw new ArgumentNullException(nameof(baseParser));
|
||||||
|
this.streamParser = streamParser ?? throw new ArgumentNullException(nameof(streamParser));
|
||||||
|
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
||||||
|
this.bruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
|
||||||
|
this.objectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
|
||||||
|
this.objectStreamParser = objectStreamParser ?? throw new ArgumentNullException(nameof(objectStreamParser));
|
||||||
|
}
|
||||||
|
|
||||||
|
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
|
||||||
|
{
|
||||||
|
var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);
|
||||||
|
|
||||||
|
var pdfObject = objectPool.GetOrCreateDefault(key);
|
||||||
|
|
||||||
|
if (pdfObject.GetObject() != null)
|
||||||
|
{
|
||||||
|
return pdfObject.GetObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);
|
||||||
|
|
||||||
|
if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Object must be defined and not compressed: " + key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLenientParsing && offsetOrStreamNumber == null)
|
||||||
|
{
|
||||||
|
var locations = bruteForceSearcher.GetObjectLocations();
|
||||||
|
|
||||||
|
offsetOrStreamNumber = TryGet(key, locations);
|
||||||
|
|
||||||
|
if (offsetOrStreamNumber != null)
|
||||||
|
{
|
||||||
|
crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (offsetOrStreamNumber == null)
|
||||||
|
{
|
||||||
|
return CosNull.Null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
|
||||||
|
|
||||||
|
if (!isCompressedStreamObject)
|
||||||
|
{
|
||||||
|
return ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing);
|
||||||
|
}
|
||||||
|
|
||||||
|
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
|
||||||
|
CosObjectKey key,
|
||||||
|
CosObjectPool pool,
|
||||||
|
bool isLenientParsing)
|
||||||
|
{
|
||||||
|
reader.Seek(offset);
|
||||||
|
|
||||||
|
var objectNumber = ObjectHelper.ReadObjectNumber(reader);
|
||||||
|
var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);
|
||||||
|
|
||||||
|
ReadHelper.ReadExpectedString(reader, "obj", true);
|
||||||
|
|
||||||
|
if (objectNumber != key.Number || objectGeneration != key.Generation)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadHelper.SkipSpaces(reader);
|
||||||
|
|
||||||
|
var baseObject = baseParser.Parse(reader, pool);
|
||||||
|
|
||||||
|
var endObjectKey = ReadHelper.ReadString(reader);
|
||||||
|
|
||||||
|
var atStreamStart = string.Equals(endObjectKey, "stream");
|
||||||
|
|
||||||
|
if (atStreamStart)
|
||||||
|
{
|
||||||
|
var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);
|
||||||
|
|
||||||
|
reader.Rewind(streamStartBytes.Length);
|
||||||
|
|
||||||
|
baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.Equals(endObjectKey, "endobj"))
|
||||||
|
{
|
||||||
|
var message =
|
||||||
|
$"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";
|
||||||
|
|
||||||
|
if (isLenientParsing)
|
||||||
|
{
|
||||||
|
log.Warn(message);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return baseObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CosBase ReadNormalObjectStream(IRandomAccessRead reader, CosBase currentBase, long offset,
|
||||||
|
bool isLenientParsing,
|
||||||
|
out string endObjectKey)
|
||||||
|
{
|
||||||
|
if (currentBase is PdfDictionary dictionary)
|
||||||
|
{
|
||||||
|
RawCosStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
|
||||||
|
|
||||||
|
currentBase = stream;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// this is not legal
|
||||||
|
// the combination of a dict and the stream/endstream
|
||||||
|
// forms a complete stream object
|
||||||
|
throw new InvalidOperationException($"Stream not preceded by dictionary (offset: {offset}).");
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadHelper.SkipSpaces(reader);
|
||||||
|
endObjectKey = ReadHelper.ReadLine(reader);
|
||||||
|
|
||||||
|
// we have case with a second 'endstream' before endobj
|
||||||
|
if (!endObjectKey.StartsWith("endobj") && endObjectKey.StartsWith("endstream"))
|
||||||
|
{
|
||||||
|
endObjectKey = endObjectKey.Substring(9).Trim();
|
||||||
|
if (endObjectKey.Length == 0)
|
||||||
|
{
|
||||||
|
// no other characters in extra endstream line
|
||||||
|
// read next line
|
||||||
|
endObjectKey = ReadHelper.ReadLine(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, bool isLenientParsing)
|
||||||
|
{
|
||||||
|
var baseStream = Parse(new IndirectReference(streamObjectNumber, 0), reader, isLenientParsing, true);
|
||||||
|
|
||||||
|
if (!(baseStream is RawCosStream stream))
|
||||||
|
{
|
||||||
|
log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");
|
||||||
|
|
||||||
|
return CosNull.Null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var objects = objectStreamParser.Parse(stream, objectPool);
|
||||||
|
|
||||||
|
// register all objects which are referenced to be contained in object stream
|
||||||
|
foreach (var next in objects)
|
||||||
|
{
|
||||||
|
var streamKey = new CosObjectKey(next);
|
||||||
|
var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets);
|
||||||
|
|
||||||
|
if (offset != null && offset == -streamObjectNumber)
|
||||||
|
{
|
||||||
|
var streamObject = objectPool.Get(streamKey);
|
||||||
|
streamObject.SetObject(next.GetObject());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);
|
||||||
|
|
||||||
|
if (matchingStreamObject != null)
|
||||||
|
{
|
||||||
|
return matchingStreamObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");
|
||||||
|
|
||||||
|
return CosNull.Null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static T? TryGet<T, TKey>(TKey key, IReadOnlyDictionary<TKey, T> dictionary) where T : struct
|
||||||
|
{
|
||||||
|
return dictionary.TryGetValue(key, out var value) ? value : default(T?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -9,7 +9,14 @@
|
|||||||
|
|
||||||
internal class PageContentParser : IPageContentParser
|
internal class PageContentParser : IPageContentParser
|
||||||
{
|
{
|
||||||
public IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes)
|
private readonly IGraphicsStateOperationFactory operationFactory;
|
||||||
|
|
||||||
|
public PageContentParser(IGraphicsStateOperationFactory operationFactory)
|
||||||
|
{
|
||||||
|
this.operationFactory = operationFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes)
|
||||||
{
|
{
|
||||||
var scanner = new CoreTokenScanner(inputBytes);
|
var scanner = new CoreTokenScanner(inputBytes);
|
||||||
|
|
||||||
|
@ -1,57 +1,12 @@
|
|||||||
namespace UglyToad.Pdf.Parser.PageTree
|
namespace UglyToad.Pdf.Parser.PageTree
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using Content;
|
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using ContentStream.TypedAccessors;
|
using ContentStream.TypedAccessors;
|
||||||
using Cos;
|
using Cos;
|
||||||
using Filters;
|
using Filters;
|
||||||
using Fonts;
|
using Fonts;
|
||||||
|
|
||||||
internal class PageParser
|
|
||||||
{
|
|
||||||
public Page Parse(int number, PdfDictionary dictionary, ParsingArguments arguments)
|
|
||||||
{
|
|
||||||
if (dictionary == null)
|
|
||||||
{
|
|
||||||
throw new ArgumentNullException(nameof(dictionary));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (arguments == null)
|
|
||||||
{
|
|
||||||
throw new ArgumentNullException(nameof(arguments));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!dictionary.IsType(CosName.PAGE))
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException("Expected a Dictionary of Type Page, instead got this: " + dictionary);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Page(number, dictionary, new PageTreeMembers(), arguments);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal class FontParser
|
|
||||||
{
|
|
||||||
public Font Parse(PdfDictionary dictionary, ParsingArguments arguments)
|
|
||||||
{
|
|
||||||
var type = dictionary.GetName(CosName.SUBTYPE);
|
|
||||||
|
|
||||||
if (CosName.Equals(type, CosName.TYPE0))
|
|
||||||
{
|
|
||||||
var compositeFont = arguments.Container.Get<CompositeFontParser>()
|
|
||||||
.Parse(dictionary, arguments);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
var simpleFont = arguments.Container.Get<SimpleFontParser>()
|
|
||||||
.Parse(dictionary, arguments);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Font();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal class CompositeFontParser
|
internal class CompositeFontParser
|
||||||
{
|
{
|
||||||
public CompositeFont Parse(PdfDictionary dictionary, ParsingArguments arguments)
|
public CompositeFont Parse(PdfDictionary dictionary, ParsingArguments arguments)
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using Content;
|
using Content;
|
||||||
using ContentStream;
|
|
||||||
using Cos;
|
using Cos;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -37,34 +36,5 @@
|
|||||||
{
|
{
|
||||||
return fonts.ContainsKey(name);
|
return fonts.ContainsKey(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
internal bool GetFont(CosName name, ParsingArguments arguments, out Font value)
|
|
||||||
{
|
|
||||||
if (fontObjects.TryGetValue(name, out value))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fonts.TryGetValue(name, out var key))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var dictionary = arguments.Container.Get<DynamicParser>()
|
|
||||||
.Parse(arguments, key, false) as PdfDictionary;
|
|
||||||
|
|
||||||
if (dictionary == null)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var font = arguments.Container.Get<FontParser>()
|
|
||||||
.Parse(dictionary, arguments);
|
|
||||||
|
|
||||||
fontObjects[name] = font;
|
|
||||||
|
|
||||||
// retrieve and cache
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -14,9 +14,9 @@
|
|||||||
|
|
||||||
public BruteForceSearcher BruteForceSearcher { get; }
|
public BruteForceSearcher BruteForceSearcher { get; }
|
||||||
|
|
||||||
public ResourceContainer ResourceContainer { get; }
|
public IResourceStore ResourceContainer { get; }
|
||||||
|
|
||||||
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, ResourceContainer resourceContainer)
|
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, IResourceStore resourceContainer)
|
||||||
{
|
{
|
||||||
ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
|
ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
|
||||||
BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
|
BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
|
||||||
|
@ -5,7 +5,15 @@
|
|||||||
using Content;
|
using Content;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using Filters;
|
||||||
|
using Fonts;
|
||||||
|
using Fonts.Parser;
|
||||||
|
using Fonts.Parser.Handlers;
|
||||||
|
using Fonts.Parser.Parts;
|
||||||
|
using Fonts.TrueType.Parser;
|
||||||
|
using Graphics;
|
||||||
using IO;
|
using IO;
|
||||||
|
using Logging;
|
||||||
using Parts;
|
using Parts;
|
||||||
using Parts.CrossReference;
|
using Parts.CrossReference;
|
||||||
using Util;
|
using Util;
|
||||||
@ -37,6 +45,8 @@
|
|||||||
|
|
||||||
private static PdfDocument OpenDocument(IRandomAccessRead reader, IContainer container, bool isLenientParsing)
|
private static PdfDocument OpenDocument(IRandomAccessRead reader, IContainer container, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
|
var log = container.Get<ILog>();
|
||||||
|
|
||||||
var version = container.Get<FileHeaderParser>().ReadHeader(reader, isLenientParsing);
|
var version = container.Get<FileHeaderParser>().ReadHeader(reader, isLenientParsing);
|
||||||
|
|
||||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
|
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
|
||||||
@ -46,9 +56,27 @@
|
|||||||
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
|
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
|
||||||
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);
|
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);
|
||||||
|
|
||||||
var dynamicParser = container.Get<DynamicParser>();
|
var filterProvider = container.Get<IFilterProvider>();
|
||||||
var bruteForceSearcher = new BruteForceSearcher(reader);
|
var bruteForceSearcher = new BruteForceSearcher(reader);
|
||||||
var resourceContainer = new ResourceContainer();
|
var pdfObjectParser = new PdfObjectParser(container.Get<ILog>(), container.Get<CosBaseParser>(),
|
||||||
|
container.Get<CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get<ObjectStreamParser>());
|
||||||
|
|
||||||
|
var trueTypeFontParser = new TrueTypeFontParser();
|
||||||
|
var fontDescriptorFactory = new FontDescriptorFactory();
|
||||||
|
|
||||||
|
var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);
|
||||||
|
|
||||||
|
var cMapCache = new CMapCache(new CMapParser());
|
||||||
|
|
||||||
|
var fontFactory = new FontFactory(container.Get<ILog>(), new Type0FontHandler(cidFontFactory,
|
||||||
|
cMapCache,
|
||||||
|
filterProvider,
|
||||||
|
pdfObjectParser));
|
||||||
|
|
||||||
|
var dynamicParser = container.Get<DynamicParser>();
|
||||||
|
var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);
|
||||||
|
|
||||||
|
var pageFactory = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
|
||||||
|
|
||||||
var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
|
var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
|
||||||
isLenientParsing);
|
isLenientParsing);
|
||||||
@ -66,7 +94,7 @@
|
|||||||
|
|
||||||
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
|
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
|
||||||
|
|
||||||
return new PdfDocument(reader, version, crossReferenceTable, container, isLenientParsing, caching, new Catalog(rootDictionary));
|
return new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, new Catalog(rootDictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,
|
private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
using Content;
|
using Content;
|
||||||
using Cos;
|
using Cos;
|
||||||
using IO;
|
using IO;
|
||||||
|
using Logging;
|
||||||
using Parser;
|
using Parser;
|
||||||
using Parser.Parts;
|
using Parser.Parts;
|
||||||
using Util;
|
using Util;
|
||||||
@ -17,8 +18,8 @@
|
|||||||
private readonly HeaderVersion version;
|
private readonly HeaderVersion version;
|
||||||
[NotNull]
|
[NotNull]
|
||||||
private readonly CrossReferenceTable crossReferenceTable;
|
private readonly CrossReferenceTable crossReferenceTable;
|
||||||
[NotNull]
|
|
||||||
private readonly IContainer container;
|
private readonly ILog log;
|
||||||
private readonly bool isLenientParsing;
|
private readonly bool isLenientParsing;
|
||||||
[NotNull]
|
[NotNull]
|
||||||
private readonly ParsingCachingProviders cachingProviders;
|
private readonly ParsingCachingProviders cachingProviders;
|
||||||
@ -29,21 +30,21 @@
|
|||||||
[NotNull]
|
[NotNull]
|
||||||
public Pages Pages { get; }
|
public Pages Pages { get; }
|
||||||
|
|
||||||
internal PdfDocument(IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
|
internal PdfDocument(ILog log, IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
|
||||||
IContainer container,
|
|
||||||
bool isLenientParsing,
|
bool isLenientParsing,
|
||||||
ParsingCachingProviders cachingProviders,
|
ParsingCachingProviders cachingProviders,
|
||||||
|
IPageFactory pageFactory,
|
||||||
|
IPdfObjectParser pdfObjectParser,
|
||||||
Catalog catalog)
|
Catalog catalog)
|
||||||
{
|
{
|
||||||
|
this.log = log;
|
||||||
this.reader = reader ?? throw new ArgumentNullException(nameof(reader));
|
this.reader = reader ?? throw new ArgumentNullException(nameof(reader));
|
||||||
this.version = version ?? throw new ArgumentNullException(nameof(version));
|
this.version = version ?? throw new ArgumentNullException(nameof(version));
|
||||||
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
||||||
this.container = container ?? throw new ArgumentNullException(nameof(container));
|
|
||||||
this.isLenientParsing = isLenientParsing;
|
this.isLenientParsing = isLenientParsing;
|
||||||
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
|
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
|
||||||
Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
|
Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
|
||||||
var arguments = new ParsingArguments(reader, crossReferenceTable, cachingProviders, container, isLenientParsing);
|
Pages = new Pages(log, Catalog, pdfObjectParser, pageFactory, reader, isLenientParsing);
|
||||||
Pages = new Pages(Catalog, arguments);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options);
|
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Tokenization
|
namespace UglyToad.Pdf.Tokenization
|
||||||
{
|
{
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using ContentStream;
|
||||||
using IO;
|
using IO;
|
||||||
using Parser.Parts;
|
using Parser.Parts;
|
||||||
using Scanner;
|
using Scanner;
|
||||||
@ -79,7 +80,7 @@
|
|||||||
|
|
||||||
if (r == OperatorToken.R)
|
if (r == OperatorToken.R)
|
||||||
{
|
{
|
||||||
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Long));
|
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Int));
|
||||||
i = i + 2;
|
i = i + 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||||
{
|
{
|
||||||
|
using ContentStream;
|
||||||
|
|
||||||
public class IndirectReferenceToken : IDataToken<IndirectReference>
|
public class IndirectReferenceToken : IDataToken<IndirectReference>
|
||||||
{
|
{
|
||||||
public IndirectReference Data { get; }
|
public IndirectReference Data { get; }
|
||||||
@ -10,16 +12,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public struct IndirectReference
|
|
||||||
{
|
|
||||||
public long ObjectNumber { get; }
|
|
||||||
|
|
||||||
public long Generation { get; }
|
|
||||||
|
|
||||||
public IndirectReference(long objectNumber, long generation)
|
|
||||||
{
|
|
||||||
ObjectNumber = objectNumber;
|
|
||||||
Generation = generation;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -227,5 +227,9 @@
|
|||||||
<EmbeddedResource Include="Resources\CMap\UniKS-UTF16-V" />
|
<EmbeddedResource Include="Resources\CMap\UniKS-UTF16-V" />
|
||||||
<EmbeddedResource Include="Resources\CMap\V" />
|
<EmbeddedResource Include="Resources\CMap\V" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Folder Include="Fonts\Simple\" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
@ -1,12 +1,7 @@
|
|||||||
namespace UglyToad.Pdf.Util
|
namespace UglyToad.Pdf.Util
|
||||||
{
|
{
|
||||||
using Filters;
|
using Filters;
|
||||||
using Fonts;
|
|
||||||
using Fonts.Parser;
|
using Fonts.Parser;
|
||||||
using Fonts.Parser.Handlers;
|
|
||||||
using Fonts.Parser.Parts;
|
|
||||||
using Fonts.TrueType.Parser;
|
|
||||||
using Graphics;
|
|
||||||
using Logging;
|
using Logging;
|
||||||
using Parser;
|
using Parser;
|
||||||
using Parser.PageTree;
|
using Parser.PageTree;
|
||||||
@ -49,19 +44,12 @@
|
|||||||
new CrossReferenceTableParser(logger, dictionaryParser, baseParser));
|
new CrossReferenceTableParser(logger, dictionaryParser, baseParser));
|
||||||
|
|
||||||
var resourceDictionaryParser = new ResourceDictionaryParser();
|
var resourceDictionaryParser = new ResourceDictionaryParser();
|
||||||
var pageParser = new PageParser();
|
|
||||||
var simpleFontParser = new SimpleFontParser();
|
var simpleFontParser = new SimpleFontParser();
|
||||||
var compositeFontParser = new CompositeFontParser();
|
var compositeFontParser = new CompositeFontParser();
|
||||||
var fontParser = new FontParser();
|
|
||||||
var pageContentParser = new PageContentParser();
|
|
||||||
var operationFactory = new ReflectionGraphicsStateOperationFactory();
|
|
||||||
|
|
||||||
var cmapParser = new CMapParser();
|
var cmapParser = new CMapParser();
|
||||||
var afmParser = new AdobeFontMetricsParser();
|
var afmParser = new AdobeFontMetricsParser();
|
||||||
|
|
||||||
var type0FontFactory = new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()), new CMapCache(cmapParser), filterProvider);
|
|
||||||
var fontFactory = new FontFactory(type0FontFactory);
|
|
||||||
|
|
||||||
var container = new Container();
|
var container = new Container();
|
||||||
container.Register(headerParser);
|
container.Register(headerParser);
|
||||||
container.Register(trailerParser);
|
container.Register(trailerParser);
|
||||||
@ -75,15 +63,11 @@
|
|||||||
container.Register(objectStreamParser);
|
container.Register(objectStreamParser);
|
||||||
container.Register(filterProvider);
|
container.Register(filterProvider);
|
||||||
container.Register(resourceDictionaryParser);
|
container.Register(resourceDictionaryParser);
|
||||||
container.Register(pageParser);
|
|
||||||
container.Register(simpleFontParser);
|
container.Register(simpleFontParser);
|
||||||
container.Register(compositeFontParser);
|
container.Register(compositeFontParser);
|
||||||
container.Register(fontParser);
|
|
||||||
container.Register(pageContentParser);
|
|
||||||
container.Register(operationFactory);
|
|
||||||
container.Register(cmapParser);
|
container.Register(cmapParser);
|
||||||
container.Register(afmParser);
|
container.Register(afmParser);
|
||||||
container.Register(fontFactory);
|
container.Register(logger);
|
||||||
|
|
||||||
return container;
|
return container;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user