mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 06:15:58 +08:00
create Type0 font, notes about font format, heavy duty refactoring to inject dependencies rather than god object
This commit is contained in:
parent
206eb91ff1
commit
f4d58e8aa9
118
font-notes.md
Normal file
118
font-notes.md
Normal file
@ -0,0 +1,118 @@
|
||||
# Fonts #
|
||||
|
||||
## Types of Font ##
|
||||
|
||||
<pre><code>
|
||||
|
||||
------ Composite Fonts -------
|
||||
|
||||
Type0 (Composed of glyphs from a CIDFont)
|
||||
|
||||
Children:
|
||||
|
||||
CIDFont CIDFontType0 (Type 1 font glyph descriptions)
|
||||
CIDFontType2 (TrueType font glyph descriptions)
|
||||
|
||||
------ Simple Fonts Below -------
|
||||
|
||||
Type 1 Type 1 (defines gylphs using type 1 font technology)
|
||||
MMType1 (multiple master font - extends type 1 fonts to support many typefaces for a single font)
|
||||
|
||||
Type 3 (defines glyphs with streams of PDF graphics operations)
|
||||
|
||||
TrueType (from the TrueType font format)
|
||||
|
||||
</code></pre>
|
||||
|
||||
|
||||
## Terminology ##
|
||||
|
||||
+ Font dictionary: PDF dictionary with information about the font
|
||||
+ Font program: Glyph information in specialized font format
|
||||
|
||||
## Composite Fonts ##
|
||||
|
||||
+ Glyphs are selected from a font-like CIDFont.
|
||||
+ Has a single CIDFont descendant.
|
||||
+ Multiple-byte sequences select a single glyph.
|
||||
|
||||
Used for multiple-byte character encodings and large numbers of glyphs.
|
||||
|
||||
Well suited to Chinese, Japanese and Korean (CJK).
|
||||
|
||||
CID stands for character identifier. This is a number used to access glyph descriptions.
|
||||
|
||||
The CMap maps between character codes and CID numbers for the glyphs.
|
||||
|
||||
A CIDFont file provides the glyph descriptions for a character collection. The glyph descriptions are
|
||||
identified by CIDs.
|
||||
|
||||
CID keyed font combines a CMap with a CIDFont.
|
||||
|
||||
The **Encoding** contains the CMap.
|
||||
The **DescendantFonts** contains the CIDFont to use with the CMap.
|
||||
|
||||
### CIDFont ###
|
||||
|
||||
A Type0 font descendant (CIDFont) must be either a CIDFontType0 (Adobe Type 1) or CIDFontType2 (TrueType).
|
||||
|
||||
For Type 2 CIDFonts (TrueType) the glyphs are identified by a glyph index (GID).
|
||||
|
||||
+ If the font program is embedded as a stream the CIDFont dictionary must contain a CIDToGIDMap which maps
|
||||
from CIDs to Glyph Indexes.
|
||||
|
||||
+ If the font program is a predefined external font the CIDFont must not contain a CIDToGIDMap. It
|
||||
may only use a predefined CMap.
|
||||
|
||||
Though a CID may not be used to select the glyph as in the predefined case, it is always used to select glyph
|
||||
metrics. Every CIDFont must describe CID 0 which is the ```.notdef``` character for missing characters.
|
||||
|
||||
### Glyph Metrics in CIDFonts ###
|
||||
|
||||
Widths for CIDFonts are defined in the DW and W entries in the CIDFont dictionary.
|
||||
|
||||
+ DW provides the default width for glyphs which are not specified individually.
|
||||
+ W defines widths for individual CIDs.
|
||||
|
||||
Vertical writing has other stuff, see the spec.
|
||||
|
||||
### CMap ###
|
||||
|
||||
The CMap maps from character codes to character selectors (CIDs).
|
||||
|
||||
The CMap defines the writing mode horizontal or vertical.
|
||||
|
||||
### Type 0 Fonts ###
|
||||
|
||||
The **Font dictionary** has the following entries:
|
||||
|
||||
+ Type (name): /Font
|
||||
+ Subtype (name): /Type0
|
||||
+ BaseFont (name): The PostScript name of the font.
|
||||
+ Encoding (name/stream R): Name of a predefined CMap or a stream for an embedded CMap.
|
||||
+ DescendantFonts (array): Single element pointing to the CIDFont.
|
||||
+ ToUnicode (stream R)?: Stream containing a CMap file to map codes to Unicode.
|
||||
|
||||
## Simple Fonts ##
|
||||
|
||||
+ Glyphs are selected by single-byte character codes. Index into a 256 entry glyph table.
|
||||
+ Only supports horizontal writing mode.
|
||||
|
||||
## Further Description ##
|
||||
|
||||
### Type 1 Fonts ###
|
||||
|
||||
The **Font program** is a PostScript program describing glyph shape. See the Adobe Type 1 Font Format specification.
|
||||
|
||||
The **Font dictionary** has the following entries:
|
||||
|
||||
+ Type (name): /Font
|
||||
+ Subtype (name): /Type1
|
||||
+ Name (name?): Font name
|
||||
+ BaseFont (name): The PostScript name of the font. Equivalent to the FontName value in the **Font program**.
|
||||
+ FirstChar (int): The first character code in the Widths array.
|
||||
+ LastChar (int) The last character code in the Widths array.
|
||||
+ Widths (numeric[] R): An array defining the glyph width in units of 1000 == 1 text space unit.
|
||||
+ FontDescriptor (Dict<> R): Describes font metrics other than widths.
|
||||
+ Encoding (name/Dict<> R): Specifies the character encoding if different from default.
|
||||
+ ToUnicode (stream R): CMap mapping character code to Unicode.
|
@ -2,6 +2,7 @@
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using ContentStream;
|
||||
using IO;
|
||||
using Pdf.Cos;
|
||||
using Pdf.Fonts;
|
||||
@ -44,6 +45,10 @@
|
||||
|
||||
internal class TestResourceStore : IResourceStore
|
||||
{
|
||||
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
}
|
||||
|
||||
public IFont GetFont(CosName name)
|
||||
{
|
||||
return null;
|
||||
|
@ -13,15 +13,14 @@
|
||||
|
||||
public class PageContentParserTests
|
||||
{
|
||||
private readonly PageContentParser parser = new PageContentParser();
|
||||
private readonly IGraphicsStateOperationFactory operationFactory = new ReflectionGraphicsStateOperationFactory();
|
||||
private readonly PageContentParser parser = new PageContentParser(new ReflectionGraphicsStateOperationFactory());
|
||||
|
||||
[Fact]
|
||||
public void CorrectlyExtractsOperations()
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false);
|
||||
|
||||
var result = parser.Parse(new ReflectionGraphicsStateOperationFactory(), input.Bytes);
|
||||
var result = parser.Parse(input.Bytes);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@ -36,7 +35,7 @@
|
||||
ET";
|
||||
var input = StringBytesTestConverter.Convert(s, false);
|
||||
|
||||
var result = parser.Parse(operationFactory, input.Bytes);
|
||||
var result = parser.Parse(input.Bytes);
|
||||
|
||||
Assert.Equal(7, result.Count);
|
||||
|
||||
@ -72,7 +71,7 @@ ET";
|
||||
|
||||
var input = StringBytesTestConverter.Convert(s, false);
|
||||
|
||||
var result = parser.Parse(operationFactory, input.Bytes);
|
||||
var result = parser.Parse(input.Bytes);
|
||||
|
||||
Assert.Equal(4, result.Count);
|
||||
|
||||
|
@ -3,6 +3,7 @@ namespace UglyToad.Pdf.Tests.Tokenization
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using Pdf.Cos;
|
||||
using Pdf.Tokenization;
|
||||
using Pdf.Tokenization.Tokens;
|
||||
|
11
src/UglyToad.Pdf/Content/IPageFactory.cs
Normal file
11
src/UglyToad.Pdf/Content/IPageFactory.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace UglyToad.Pdf.Content
|
||||
{
|
||||
using ContentStream;
|
||||
using IO;
|
||||
|
||||
internal interface IPageFactory
|
||||
{
|
||||
Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
|
||||
bool isLenientParsing);
|
||||
}
|
||||
}
|
14
src/UglyToad.Pdf/Content/IResourceStore.cs
Normal file
14
src/UglyToad.Pdf/Content/IResourceStore.cs
Normal file
@ -0,0 +1,14 @@
|
||||
namespace UglyToad.Pdf.Content
|
||||
{
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Fonts;
|
||||
using IO;
|
||||
|
||||
internal interface IResourceStore
|
||||
{
|
||||
void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||
|
||||
IFont GetFont(CosName name);
|
||||
}
|
||||
}
|
@ -2,21 +2,9 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Filters;
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Parser;
|
||||
using Util;
|
||||
|
||||
public class Page
|
||||
{
|
||||
private readonly ParsingArguments parsingArguments;
|
||||
private readonly PdfDictionary dictionary;
|
||||
|
||||
/// <summary>
|
||||
/// The 1 indexed page number.
|
||||
/// </summary>
|
||||
@ -28,78 +16,16 @@
|
||||
|
||||
public IReadOnlyList<string> Text => Content?.Text ?? new string[0];
|
||||
|
||||
internal Page(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, ParsingArguments parsingArguments)
|
||||
internal Page(int number, MediaBox mediaBox, PageContent content)
|
||||
{
|
||||
if (number <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
|
||||
}
|
||||
|
||||
this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
|
||||
this.parsingArguments = parsingArguments ?? throw new ArgumentNullException(nameof(parsingArguments));
|
||||
|
||||
Number = number;
|
||||
|
||||
var type = dictionary.GetName(CosName.TYPE);
|
||||
|
||||
if (type != null && !type.Equals(CosName.PAGE) && !parsingArguments.IsLenientParsing)
|
||||
{
|
||||
throw new InvalidOperationException($"Created page number {number} but its type was specified as {type} rather than 'Page'.");
|
||||
}
|
||||
|
||||
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
|
||||
{
|
||||
var x1 = mediaboxArray.getInt(0);
|
||||
var y1 = mediaboxArray.getInt(1);
|
||||
var x2 = mediaboxArray.getInt(2);
|
||||
var y2 = mediaboxArray.getInt(3);
|
||||
|
||||
MediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
|
||||
}
|
||||
else
|
||||
{
|
||||
MediaBox = pageTreeMembers.GetMediaBox();
|
||||
|
||||
if (MediaBox == null)
|
||||
{
|
||||
if (parsingArguments.IsLenientParsing)
|
||||
{
|
||||
MediaBox = MediaBox.A4;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("No mediabox was present for page: " + number);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
|
||||
{
|
||||
parsingArguments.CachingProviders.ResourceContainer.LoadResourceDictionary(resource, parsingArguments);
|
||||
}
|
||||
|
||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
||||
if (contentObject != null)
|
||||
{
|
||||
var contentStream = parsingArguments.Container.Get<DynamicParser>()
|
||||
.Parse(parsingArguments, contentObject, false) as RawCosStream;
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||
}
|
||||
|
||||
var contents = contentStream.Decode(parsingArguments.Container.Get<IFilterProvider>());
|
||||
|
||||
var operations = parsingArguments.Container.Get<PageContentParser>()
|
||||
.Parse(parsingArguments.Container.Get<IGraphicsStateOperationFactory>(), new ByteArrayInputBytes(contents));
|
||||
|
||||
var context = new ContentStreamProcessor(MediaBox.Bounds, parsingArguments.CachingProviders.ResourceContainer);
|
||||
|
||||
var content = context.Process(operations);
|
||||
|
||||
Content = content;
|
||||
}
|
||||
MediaBox = mediaBox;
|
||||
Content = content;
|
||||
}
|
||||
}
|
||||
}
|
99
src/UglyToad.Pdf/Content/PageFactory.cs
Normal file
99
src/UglyToad.Pdf/Content/PageFactory.cs
Normal file
@ -0,0 +1,99 @@
|
||||
namespace UglyToad.Pdf.Content
|
||||
{
|
||||
using System;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Filters;
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Parser;
|
||||
|
||||
internal class PageFactory : IPageFactory
|
||||
{
|
||||
private readonly IResourceStore resourceStore;
|
||||
private readonly IPdfObjectParser pdfObjectParser;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IPageContentParser pageContentParser;
|
||||
|
||||
public PageFactory(IResourceStore resourceStore, IPdfObjectParser pdfObjectParser, IFilterProvider filterProvider,
|
||||
IPageContentParser pageContentParser)
|
||||
{
|
||||
this.resourceStore = resourceStore;
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
this.filterProvider = filterProvider;
|
||||
this.pageContentParser = pageContentParser;
|
||||
}
|
||||
|
||||
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
|
||||
bool isLenientParsing)
|
||||
{
|
||||
if (dictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var type = dictionary.GetName(CosName.TYPE);
|
||||
|
||||
if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing)
|
||||
{
|
||||
throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
|
||||
}
|
||||
|
||||
MediaBox mediaBox;
|
||||
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
|
||||
{
|
||||
var x1 = mediaboxArray.getInt(0);
|
||||
var y1 = mediaboxArray.getInt(1);
|
||||
var x2 = mediaboxArray.getInt(2);
|
||||
var y2 = mediaboxArray.getInt(3);
|
||||
|
||||
mediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
|
||||
}
|
||||
else
|
||||
{
|
||||
mediaBox = pageTreeMembers.GetMediaBox();
|
||||
|
||||
if (mediaBox == null)
|
||||
{
|
||||
if (isLenientParsing)
|
||||
{
|
||||
mediaBox = MediaBox.A4;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("No mediabox was present for page: " + number);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
|
||||
{
|
||||
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
PageContent content = default(PageContent);
|
||||
|
||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
||||
if (contentObject != null)
|
||||
{
|
||||
var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as RawCosStream;
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||
}
|
||||
|
||||
var contents = contentStream.Decode(filterProvider);
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||
|
||||
var context = new ContentStreamProcessor(mediaBox.Bounds, resourceStore);
|
||||
|
||||
content = context.Process(operations);
|
||||
}
|
||||
|
||||
return new Page(number, mediaBox, content);
|
||||
}
|
||||
}
|
||||
}
|
@ -6,26 +6,26 @@
|
||||
using ContentStream;
|
||||
using ContentStream.TypedAccessors;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.PageTree;
|
||||
|
||||
public class Pages
|
||||
{
|
||||
private readonly ILog log;
|
||||
private readonly Catalog catalog;
|
||||
private readonly ParsingArguments arguments;
|
||||
private readonly IPdfObjectParser pdfObjectParser;
|
||||
private readonly IPageFactory pageFactory;
|
||||
private readonly IRandomAccessRead reader;
|
||||
private readonly bool isLenientParsing;
|
||||
private readonly PdfDictionary rootPageDictionary;
|
||||
private readonly Dictionary<int, PdfDictionary> locatedPages = new Dictionary<int, PdfDictionary>();
|
||||
|
||||
public int Count { get; }
|
||||
|
||||
internal Pages(Catalog catalog, ParsingArguments arguments)
|
||||
internal Pages(ILog log, Catalog catalog, IPdfObjectParser pdfObjectParser, IPageFactory pageFactory,
|
||||
IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
if (arguments == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(arguments));
|
||||
}
|
||||
|
||||
if (catalog == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(catalog));
|
||||
@ -38,9 +38,9 @@
|
||||
throw new InvalidOperationException("No pages were present in the catalog for this PDF document");
|
||||
}
|
||||
|
||||
var pageObject = arguments.Container.Get<DynamicParser>().Parse(arguments, pages, false);
|
||||
var pagesObject = pdfObjectParser.Parse(pages.ToIndirectReference(), reader, isLenientParsing);
|
||||
|
||||
if (!(pageObject is PdfDictionary catalogPageDictionary))
|
||||
if (!(pagesObject is PdfDictionary catalogPageDictionary))
|
||||
{
|
||||
throw new InvalidOperationException("Could not find the root pages object: " + pages);
|
||||
}
|
||||
@ -51,8 +51,12 @@
|
||||
|
||||
Count = count;
|
||||
|
||||
this.log = log;
|
||||
this.catalog = catalog;
|
||||
this.arguments = arguments;
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
this.pageFactory = pageFactory;
|
||||
this.reader = reader;
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
}
|
||||
|
||||
|
||||
@ -60,7 +64,8 @@
|
||||
{
|
||||
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
|
||||
{
|
||||
return new Page(pageNumber, targetPageDictionary, new PageTreeMembers(), arguments);
|
||||
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader,
|
||||
isLenientParsing);
|
||||
}
|
||||
|
||||
var observed = new List<int>();
|
||||
@ -73,8 +78,7 @@
|
||||
throw new InvalidOperationException("Could not find the page with number: " + pageNumber);
|
||||
}
|
||||
|
||||
var page = arguments.Container.Get<PageParser>()
|
||||
.Parse(pageNumber, targetPageDictionary, arguments);
|
||||
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing);
|
||||
|
||||
locatedPages[pageNumber] = targetPageDictionary;
|
||||
|
||||
@ -108,8 +112,7 @@
|
||||
|
||||
if (!type.Equals(CosName.PAGES))
|
||||
{
|
||||
arguments.Container.Get<ILog>()
|
||||
.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
||||
log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -120,7 +123,7 @@
|
||||
foreach (var kid in kids.OfType<CosObject>())
|
||||
{
|
||||
// todo: exit early
|
||||
var child = arguments.Container.Get<DynamicParser>().Parse(arguments, kid, false) as PdfDictionary;
|
||||
var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
|
||||
|
||||
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);
|
||||
|
||||
|
@ -4,31 +4,32 @@
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Filters;
|
||||
using Fonts;
|
||||
using Fonts.Cmap;
|
||||
using Fonts.Parser;
|
||||
using IO;
|
||||
using Parser;
|
||||
|
||||
internal interface IResourceStore
|
||||
{
|
||||
IFont GetFont(CosName name);
|
||||
}
|
||||
|
||||
internal class ResourceContainer : IResourceStore
|
||||
{
|
||||
private readonly IPdfObjectParser pdfObjectParser;
|
||||
private readonly IFontFactory fontFactory;
|
||||
|
||||
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
|
||||
|
||||
internal void LoadResourceDictionary(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory)
|
||||
{
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
this.fontFactory = fontFactory;
|
||||
}
|
||||
|
||||
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
|
||||
{
|
||||
LoadFontDictionary(fontDictionary, arguments);
|
||||
LoadFontDictionary(fontDictionary, reader, isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadFontDictionary(PdfDictionary fontDictionary, ParsingArguments arguments)
|
||||
private void LoadFontDictionary(PdfDictionary fontDictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
foreach (var pair in fontDictionary)
|
||||
{
|
||||
@ -39,24 +40,22 @@
|
||||
|
||||
if (!(pair.Value is CosObject objectKey))
|
||||
{
|
||||
if (arguments.IsLenientParsing)
|
||||
if (isLenientParsing)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
||||
}
|
||||
|
||||
var dynamicParser = arguments.Get<DynamicParser>();
|
||||
|
||||
var fontObject = dynamicParser.Parse(arguments, objectKey, false) as PdfDictionary;
|
||||
|
||||
var fontObject = pdfObjectParser.Parse(objectKey.ToIndirectReference(), reader, false) as PdfDictionary;
|
||||
|
||||
if (fontObject == null)
|
||||
{
|
||||
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
|
||||
}
|
||||
|
||||
loadedFonts[pair.Key] = arguments.Get<FontFactory>().GetFont(fontObject, arguments);
|
||||
loadedFonts[pair.Key] = fontFactory.Get(fontObject, reader, isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
|
15
src/UglyToad.Pdf/ContentStream/IndirectReference.cs
Normal file
15
src/UglyToad.Pdf/ContentStream/IndirectReference.cs
Normal file
@ -0,0 +1,15 @@
|
||||
namespace UglyToad.Pdf.ContentStream
|
||||
{
|
||||
public struct IndirectReference
|
||||
{
|
||||
public long ObjectNumber { get; }
|
||||
|
||||
public int Generation { get; }
|
||||
|
||||
public IndirectReference(long objectNumber, int generation)
|
||||
{
|
||||
ObjectNumber = objectNumber;
|
||||
Generation = generation;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
namespace UglyToad.Pdf.Cos
|
||||
{
|
||||
using ContentStream;
|
||||
|
||||
public class CosObject : CosBase, ICosUpdateInfo
|
||||
{
|
||||
private CosBase baseObject;
|
||||
@ -124,5 +126,10 @@
|
||||
}
|
||||
|
||||
public bool NeedsToBeUpdated { get; set; }
|
||||
|
||||
public IndirectReference ToIndirectReference()
|
||||
{
|
||||
return new IndirectReference(objectNumber, generationNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,5 +32,7 @@
|
||||
/// The definition of the character collection for the font.
|
||||
/// </summary>
|
||||
CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
|
||||
CidFontType CidFontType { get; }
|
||||
}
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using Cmap;
|
||||
using Cos;
|
||||
|
||||
/// <inheritdoc/>
|
||||
@ -14,5 +13,6 @@
|
||||
public CosName SubType { get; }
|
||||
public CosName BaseFont { get; }
|
||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
public CidFontType CidFontType => CidFontType.Type0;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
using Cmap;
|
||||
using Cos;
|
||||
|
||||
/// <inheritdoc />
|
||||
@ -14,5 +13,6 @@
|
||||
public CosName SubType { get; }
|
||||
public CosName BaseFont { get; }
|
||||
public CharacterIdentifierSystemInfo SystemInfo { get; }
|
||||
public CidFontType CidFontType => CidFontType.Type2;
|
||||
}
|
||||
}
|
@ -6,7 +6,7 @@
|
||||
using IO;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
public class CMap
|
||||
internal class CMap
|
||||
{
|
||||
public CharacterIdentifierSystemInfo Info { get; }
|
||||
|
||||
@ -30,6 +30,8 @@
|
||||
[NotNull]
|
||||
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
|
||||
|
||||
public WritingMode WritingMode { get; }
|
||||
|
||||
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
|
||||
|
||||
public bool HasUnicodeMappings => BaseFontCharacterMap.Count > 0;
|
||||
@ -41,7 +43,7 @@
|
||||
{
|
||||
Info = info;
|
||||
Type = type;
|
||||
WMode = wMode;
|
||||
WritingMode = (WritingMode)wMode;
|
||||
Name = name;
|
||||
Version = version;
|
||||
BaseFontCharacterMap = baseFontCharacterMap ?? throw new ArgumentNullException(nameof(baseFontCharacterMap));
|
||||
@ -51,8 +53,7 @@
|
||||
maxCodeLength = CodespaceRanges.Max(x => x.CodeLength);
|
||||
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
|
||||
}
|
||||
|
||||
private int wmode = 0;
|
||||
|
||||
private string cmapName = null;
|
||||
private string cmapVersion = null;
|
||||
private int cmapType = -1;
|
||||
|
8
src/UglyToad.Pdf/Fonts/Cmap/WritingMode.cs
Normal file
8
src/UglyToad.Pdf/Fonts/Cmap/WritingMode.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace UglyToad.Pdf.Fonts.Cmap
|
||||
{
|
||||
internal enum WritingMode
|
||||
{
|
||||
Horizontal = 0,
|
||||
Vertical = 1
|
||||
}
|
||||
}
|
55
src/UglyToad.Pdf/Fonts/Composite/ToUnicodeCMap.cs
Normal file
55
src/UglyToad.Pdf/Fonts/Composite/ToUnicodeCMap.cs
Normal file
@ -0,0 +1,55 @@
|
||||
namespace UglyToad.Pdf.Fonts.Composite
|
||||
{
|
||||
using System;
|
||||
using Cmap;
|
||||
using IO;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <summary>
|
||||
/// Defines the information content (actual text) of the font
|
||||
/// as opposed to the display format.
|
||||
/// </summary>
|
||||
internal class ToUnicodeCMap
|
||||
{
|
||||
[CanBeNull]
|
||||
private readonly CMap cMap;
|
||||
|
||||
/// <summary>
|
||||
/// Does the font provide a CMap to map CIDs to Unicode values?
|
||||
/// </summary>
|
||||
public bool CanMapToUnicode => cMap != null;
|
||||
|
||||
/// <summary>
|
||||
/// Is this document (unexpectedly) using a predefined Identity-H/V CMap as its ToUnicode CMap?
|
||||
/// </summary>
|
||||
public bool IsUsingIdentityAsUnicodeMap { get; }
|
||||
|
||||
public ToUnicodeCMap([CanBeNull]CMap cMap)
|
||||
{
|
||||
this.cMap = cMap;
|
||||
|
||||
if (CanMapToUnicode)
|
||||
{
|
||||
IsUsingIdentityAsUnicodeMap =
|
||||
cMap.Name.StartsWith("Identity-", StringComparison.InvariantCultureIgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
public bool TryGet(int code, out string value)
|
||||
{
|
||||
value = null;
|
||||
|
||||
if (!CanMapToUnicode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return cMap.TryConvertToUnicode(code, out value);
|
||||
}
|
||||
|
||||
public int ReadCode(IInputBytes inputBytes)
|
||||
{
|
||||
return cMap.ReadCode(inputBytes);
|
||||
}
|
||||
}
|
||||
}
|
76
src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs
Normal file
76
src/UglyToad.Pdf/Fonts/Composite/Type0Font.cs
Normal file
@ -0,0 +1,76 @@
|
||||
namespace UglyToad.Pdf.Fonts.Composite
|
||||
{
|
||||
using System;
|
||||
using CidFonts;
|
||||
using Cmap;
|
||||
using Cos;
|
||||
using Geometry;
|
||||
using IO;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <summary>
|
||||
/// Defines glyphs using a CIDFont
|
||||
/// </summary>
|
||||
internal class Type0Font : IFont
|
||||
{
|
||||
public CosName Name => BaseFont;
|
||||
|
||||
[NotNull]
|
||||
public CosName BaseFont { get; }
|
||||
|
||||
[NotNull]
|
||||
public ICidFont CidFont { get; }
|
||||
|
||||
[NotNull]
|
||||
public CMap CMap { get; }
|
||||
|
||||
[NotNull]
|
||||
public ToUnicodeCMap ToUnicode { get; }
|
||||
|
||||
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
|
||||
|
||||
public Type0Font(CosName baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap)
|
||||
{
|
||||
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
|
||||
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
|
||||
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
|
||||
ToUnicode = new ToUnicodeCMap(toUnicodeCMap);
|
||||
}
|
||||
|
||||
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
|
||||
{
|
||||
var current = bytes.CurrentOffset;
|
||||
|
||||
var code = ToUnicode.ReadCode(bytes);
|
||||
|
||||
codeLength = bytes.CurrentOffset - current;
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
public bool TryGetUnicode(int characterCode, out string value)
|
||||
{
|
||||
value = null;
|
||||
|
||||
if (!ToUnicode.CanMapToUnicode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// According to PdfBox certain providers incorrectly using Identity CMaps as ToUnicode.
|
||||
if (ToUnicode.IsUsingIdentityAsUnicodeMap)
|
||||
{
|
||||
value = new string((char)characterCode, 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return ToUnicode.TryGet(characterCode, out value);
|
||||
}
|
||||
|
||||
public PdfVector GetDisplacement(int characterCode)
|
||||
{
|
||||
return new PdfVector(0.333m, 0);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts
|
||||
{
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
|
||||
/// <summary>
|
||||
@ -13,13 +14,13 @@
|
||||
/// </remarks>
|
||||
internal class DescriptorFontFile
|
||||
{
|
||||
public CosObjectKey ObjectKey { get; }
|
||||
public IndirectReference ObjectKey { get; }
|
||||
|
||||
public byte[] FileBytes { get; }
|
||||
|
||||
public FontFileType FileType { get; }
|
||||
|
||||
public DescriptorFontFile(CosObjectKey key, FontFileType fileType)
|
||||
public DescriptorFontFile(IndirectReference key, FontFileType fileType)
|
||||
{
|
||||
ObjectKey = key;
|
||||
FileBytes = new byte[0];
|
||||
|
@ -0,0 +1,32 @@
|
||||
namespace UglyToad.Pdf.Fonts.Exceptions
|
||||
{
|
||||
using System;
|
||||
using System.Runtime.Serialization;
|
||||
|
||||
/// <summary>
|
||||
/// The exception thrown when an error is encountered parsing a font from the PDF document.
|
||||
/// This occurs where the format of the font program or dictionary does not meet the specification.
|
||||
/// </summary>
|
||||
/// <inheritdoc cref="Exception"/>
|
||||
[Serializable]
|
||||
public class InvalidFontFormatException : Exception
|
||||
{
|
||||
public InvalidFontFormatException()
|
||||
{
|
||||
}
|
||||
|
||||
public InvalidFontFormatException(string message) : base(message)
|
||||
{
|
||||
}
|
||||
|
||||
public InvalidFontFormatException(string message, Exception inner) : base(message, inner)
|
||||
{
|
||||
}
|
||||
|
||||
protected InvalidFontFormatException(
|
||||
SerializationInfo info,
|
||||
StreamingContext context) : base(info, context)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
@ -4,22 +4,26 @@
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parser.Handlers;
|
||||
using Pdf.Parser;
|
||||
|
||||
internal class FontFactory
|
||||
internal class FontFactory : IFontFactory
|
||||
{
|
||||
private readonly ILog log;
|
||||
private readonly IReadOnlyDictionary<CosName, IFontHandler> handlers;
|
||||
|
||||
public FontFactory(Type0FontHandler type0FontHandler)
|
||||
public FontFactory(ILog log, Type0FontHandler type0FontHandler)
|
||||
{
|
||||
this.log = log;
|
||||
handlers = new Dictionary<CosName, IFontHandler>
|
||||
{
|
||||
{CosName.TYPE0, type0FontHandler}
|
||||
};
|
||||
}
|
||||
|
||||
public IFont GetFont(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
public IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var type = dictionary.GetName(CosName.TYPE);
|
||||
|
||||
@ -27,13 +31,13 @@
|
||||
{
|
||||
var message = "The font dictionary did not have type 'Font'. " + dictionary;
|
||||
|
||||
if (arguments.IsLenientParsing)
|
||||
if (isLenientParsing)
|
||||
{
|
||||
arguments.Log.Error(message);
|
||||
log?.Error(message);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException(message);
|
||||
throw new InvalidFontFormatException(message);
|
||||
}
|
||||
}
|
||||
|
||||
@ -41,7 +45,7 @@
|
||||
|
||||
if (handlers.TryGetValue(subtype, out var handler))
|
||||
{
|
||||
return handler.Generate(dictionary, arguments);
|
||||
return handler.Generate(dictionary, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
|
||||
@ -49,3 +53,4 @@
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -10,16 +10,12 @@
|
||||
internal interface IFont
|
||||
{
|
||||
CosName Name { get; }
|
||||
|
||||
CosName SubType { get; }
|
||||
|
||||
string BaseFontType { get; }
|
||||
|
||||
|
||||
bool IsVertical { get; }
|
||||
|
||||
int ReadCharacterCode(IInputBytes bytes, out int codeLength);
|
||||
|
||||
string GetUnicode(int characterCode);
|
||||
bool TryGetUnicode(int characterCode, out string value);
|
||||
|
||||
PdfVector GetDisplacement(int characterCode);
|
||||
}
|
||||
@ -51,6 +47,11 @@
|
||||
return code;
|
||||
}
|
||||
|
||||
public bool TryGetUnicode(int characterCode, out string value)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public string GetUnicode(int characterCode)
|
||||
{
|
||||
if (ToUnicode != null)
|
||||
|
10
src/UglyToad.Pdf/Fonts/IFontFactory.cs
Normal file
10
src/UglyToad.Pdf/Fonts/IFontFactory.cs
Normal file
@ -0,0 +1,10 @@
|
||||
namespace UglyToad.Pdf.Fonts
|
||||
{
|
||||
using ContentStream;
|
||||
using IO;
|
||||
|
||||
internal interface IFontFactory
|
||||
{
|
||||
IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||
}
|
||||
}
|
@ -9,7 +9,7 @@
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
public class CMapParser
|
||||
internal class CMapParser
|
||||
{
|
||||
private static readonly BaseFontRangeParser BaseFontRangeParser = new BaseFontRangeParser();
|
||||
private static readonly BaseFontCharacterParser BaseFontCharacterParser = new BaseFontCharacterParser();
|
||||
|
@ -1,10 +1,10 @@
|
||||
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
||||
{
|
||||
using ContentStream;
|
||||
using Pdf.Parser;
|
||||
using IO;
|
||||
|
||||
internal interface IFontHandler
|
||||
{
|
||||
IFont Generate(PdfDictionary dictionary, ParsingArguments parsingArguments);
|
||||
IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
|
||||
}
|
||||
}
|
@ -1,9 +1,12 @@
|
||||
namespace UglyToad.Pdf.Fonts.Parser.Handlers
|
||||
{
|
||||
using System;
|
||||
using CidFonts;
|
||||
using Cmap;
|
||||
using Composite;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using Filters;
|
||||
using IO;
|
||||
using Parts;
|
||||
@ -14,31 +17,35 @@
|
||||
private readonly CidFontFactory cidFontFactory;
|
||||
private readonly CMapCache cMapCache;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
private readonly IPdfObjectParser pdfObjectParser;
|
||||
|
||||
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider)
|
||||
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider, IPdfObjectParser pdfObjectParser)
|
||||
{
|
||||
this.cidFontFactory = cidFontFactory;
|
||||
this.cMapCache = cMapCache;
|
||||
this.filterProvider = filterProvider;
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
}
|
||||
|
||||
public IFont Generate(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var dynamicParser = arguments.Get<DynamicParser>();
|
||||
|
||||
var baseFont = dictionary.GetName(CosName.BASE_FONT);
|
||||
|
||||
var cMap = ReadEncoding(dictionary, out var isCMapPredefined);
|
||||
|
||||
if (TryGetFirstDescendant(dictionary, out var descendantObject))
|
||||
{
|
||||
var parsed = dynamicParser.Parse(arguments, descendantObject, false);
|
||||
var parsed = pdfObjectParser.Parse(descendantObject.ToIndirectReference(), reader, isLenientParsing);
|
||||
|
||||
if (parsed is PdfDictionary descendantFontDictionary)
|
||||
{
|
||||
ParseDescendant(descendantFontDictionary, arguments);
|
||||
ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
|
||||
}
|
||||
|
||||
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
|
||||
|
||||
@ -47,22 +54,17 @@
|
||||
{
|
||||
var toUnicodeValue = dictionary[CosName.TO_UNICODE];
|
||||
|
||||
var toUnicode = dynamicParser.Parse(arguments, toUnicodeValue as CosObject, false) as RawCosStream;
|
||||
var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as RawCosStream;
|
||||
|
||||
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
|
||||
|
||||
if (decodedUnicodeCMap != null)
|
||||
{
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), arguments.IsLenientParsing);
|
||||
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
|
||||
}
|
||||
}
|
||||
|
||||
var font = new CompositeFont
|
||||
{
|
||||
SubType = CosName.TYPE0,
|
||||
ToUnicode = toUnicodeCMap,
|
||||
BaseFont = baseFont
|
||||
};
|
||||
var font = new Type0Font(baseFont, new Type0CidFont(), cMap, toUnicodeCMap);
|
||||
|
||||
return font;
|
||||
}
|
||||
@ -91,7 +93,7 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
private void ParseDescendant(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
private void ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var type = dictionary.GetName(CosName.TYPE);
|
||||
if (!CosName.FONT.Equals(type))
|
||||
@ -99,7 +101,7 @@
|
||||
throw new InvalidOperationException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
|
||||
}
|
||||
|
||||
cidFontFactory.Generate(dictionary, arguments, arguments.IsLenientParsing);
|
||||
cidFontFactory.Generate(dictionary, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
private CMap ReadEncoding(PdfDictionary dictionary, out bool isCMapPredefined)
|
||||
|
@ -2,7 +2,6 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using CidFonts;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
@ -12,20 +11,25 @@
|
||||
using Pdf.Parser;
|
||||
using TrueType;
|
||||
using TrueType.Parser;
|
||||
using Util;
|
||||
|
||||
internal class CidFontFactory
|
||||
{
|
||||
private readonly FontDescriptorFactory descriptorFactory;
|
||||
private readonly TrueTypeFontParser trueTypeFontParser;
|
||||
private readonly IPdfObjectParser pdfObjectParser;
|
||||
private readonly IFilterProvider filterProvider;
|
||||
|
||||
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser)
|
||||
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
|
||||
IPdfObjectParser pdfObjectParser,
|
||||
IFilterProvider filterProvider)
|
||||
{
|
||||
this.descriptorFactory = descriptorFactory;
|
||||
this.trueTypeFontParser = trueTypeFontParser;
|
||||
this.pdfObjectParser = pdfObjectParser;
|
||||
this.filterProvider = filterProvider;
|
||||
}
|
||||
|
||||
public ICidFont Generate(PdfDictionary dictionary, ParsingArguments arguments, bool isLenientParsing)
|
||||
public ICidFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
var type = dictionary.GetName(CosName.TYPE);
|
||||
if (!CosName.FONT.Equals(type))
|
||||
@ -37,12 +41,12 @@
|
||||
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);
|
||||
|
||||
FontDescriptor descriptor = null;
|
||||
if (TryGetFontDescriptor(dictionary, arguments, out var descriptorDictionary))
|
||||
if (TryGetFontDescriptor(dictionary, reader, out var descriptorDictionary))
|
||||
{
|
||||
descriptor = descriptorFactory.Generate(descriptorDictionary, arguments.IsLenientParsing);
|
||||
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
|
||||
}
|
||||
|
||||
ReadDescriptorFile(descriptor, arguments);
|
||||
ReadDescriptorFile(descriptor, reader, isLenientParsing);
|
||||
|
||||
var subType = dictionary.GetName(CosName.SUBTYPE);
|
||||
if (CosName.CID_FONT_TYPE0.Equals(subType))
|
||||
@ -58,8 +62,7 @@
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool TryGetFontDescriptor(PdfDictionary dictionary, ParsingArguments arguments,
|
||||
out PdfDictionary descriptorDictionary)
|
||||
private bool TryGetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, out PdfDictionary descriptorDictionary)
|
||||
{
|
||||
descriptorDictionary = null;
|
||||
|
||||
@ -68,7 +71,7 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
var descriptorObj = arguments.Get<DynamicParser>().Parse(arguments, obj, false);
|
||||
var descriptorObj = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, false);
|
||||
|
||||
if (!(descriptorObj is PdfDictionary descriptor))
|
||||
{
|
||||
@ -80,21 +83,21 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
private void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments)
|
||||
private void ReadDescriptorFile(FontDescriptor descriptor, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
if (descriptor?.FontFile == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var fontFileStream = arguments.Get<DynamicParser>().Parse(arguments, descriptor.FontFile.ObjectKey, false) as RawCosStream;
|
||||
var fontFileStream = pdfObjectParser.Parse(descriptor.FontFile.ObjectKey, reader, isLenientParsing) as RawCosStream;
|
||||
|
||||
if (fontFileStream == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var fontFile = fontFileStream.Decode(arguments.Get<IFilterProvider>());
|
||||
var fontFile = fontFileStream.Decode(filterProvider);
|
||||
|
||||
switch (descriptor.FontFile.FileType)
|
||||
{
|
||||
|
@ -141,7 +141,7 @@
|
||||
throw new NotSupportedException("We currently expect the FontFile to be an object reference.");
|
||||
}
|
||||
|
||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.Type1);
|
||||
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.Type1);
|
||||
}
|
||||
|
||||
if (dictionary.TryGetValue(CosName.FONT_FILE2, out value))
|
||||
@ -151,7 +151,7 @@
|
||||
throw new NotSupportedException("We currently expect the FontFile2 to be an object reference.");
|
||||
}
|
||||
|
||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.TrueType);
|
||||
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.TrueType);
|
||||
}
|
||||
|
||||
if (dictionary.TryGetValue(CosName.FONT_FILE3, out value))
|
||||
@ -161,7 +161,7 @@
|
||||
throw new NotSupportedException("We currently expect the FontFile3 to be an object reference.");
|
||||
}
|
||||
|
||||
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.FromSubtype);
|
||||
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.FromSubtype);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
7
src/UglyToad.Pdf/Geometry/Paths/GeneralPath.cs
Normal file
7
src/UglyToad.Pdf/Geometry/Paths/GeneralPath.cs
Normal file
@ -0,0 +1,7 @@
|
||||
namespace UglyToad.Pdf.Geometry.Paths
|
||||
{
|
||||
internal class GeneralPath
|
||||
{
|
||||
// TODO: provide an implementation
|
||||
}
|
||||
}
|
@ -85,7 +85,7 @@
|
||||
{
|
||||
var code = font.ReadCharacterCode(bytes, out int codeLength);
|
||||
|
||||
var unicode = font.GetUnicode(code);
|
||||
font.TryGetUnicode(code, out var unicode);
|
||||
|
||||
var wordSpacing = 0m;
|
||||
if (code == ' ' && codeLength == 1)
|
||||
|
@ -1,12 +1,11 @@
|
||||
namespace UglyToad.Pdf.Parser
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Graphics;
|
||||
using Graphics.Operations;
|
||||
using IO;
|
||||
|
||||
internal interface IPageContentParser
|
||||
{
|
||||
IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes);
|
||||
IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes);
|
||||
}
|
||||
}
|
218
src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
Normal file
218
src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
Normal file
@ -0,0 +1,218 @@
|
||||
namespace UglyToad.Pdf.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Util;
|
||||
|
||||
internal interface IPdfObjectParser
|
||||
{
|
||||
CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false);
|
||||
}
|
||||
|
||||
internal class PdfObjectParser : IPdfObjectParser
|
||||
{
|
||||
private readonly ILog log;
|
||||
private readonly CosBaseParser baseParser;
|
||||
private readonly CosStreamParser streamParser;
|
||||
private readonly CrossReferenceTable crossReferenceTable;
|
||||
private readonly BruteForceSearcher bruteForceSearcher;
|
||||
private readonly CosObjectPool objectPool;
|
||||
private readonly ObjectStreamParser objectStreamParser;
|
||||
|
||||
public PdfObjectParser(ILog log, CosBaseParser baseParser, CosStreamParser streamParser, CrossReferenceTable crossReferenceTable,
|
||||
BruteForceSearcher bruteForceSearcher,
|
||||
CosObjectPool objectPool,
|
||||
ObjectStreamParser objectStreamParser)
|
||||
{
|
||||
this.log = log ?? new NoOpLog();
|
||||
this.baseParser = baseParser ?? throw new ArgumentNullException(nameof(baseParser));
|
||||
this.streamParser = streamParser ?? throw new ArgumentNullException(nameof(streamParser));
|
||||
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
||||
this.bruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
|
||||
this.objectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
|
||||
this.objectStreamParser = objectStreamParser ?? throw new ArgumentNullException(nameof(objectStreamParser));
|
||||
}
|
||||
|
||||
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
|
||||
{
|
||||
var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);
|
||||
|
||||
var pdfObject = objectPool.GetOrCreateDefault(key);
|
||||
|
||||
if (pdfObject.GetObject() != null)
|
||||
{
|
||||
return pdfObject.GetObject();
|
||||
}
|
||||
|
||||
var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);
|
||||
|
||||
if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
|
||||
{
|
||||
throw new InvalidOperationException("Object must be defined and not compressed: " + key);
|
||||
}
|
||||
|
||||
if (isLenientParsing && offsetOrStreamNumber == null)
|
||||
{
|
||||
var locations = bruteForceSearcher.GetObjectLocations();
|
||||
|
||||
offsetOrStreamNumber = TryGet(key, locations);
|
||||
|
||||
if (offsetOrStreamNumber != null)
|
||||
{
|
||||
crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
|
||||
}
|
||||
}
|
||||
|
||||
if (offsetOrStreamNumber == null)
|
||||
{
|
||||
return CosNull.Null;
|
||||
}
|
||||
|
||||
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
|
||||
|
||||
if (!isCompressedStreamObject)
|
||||
{
|
||||
return ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing);
|
||||
}
|
||||
|
||||
return ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing);
|
||||
}
|
||||
|
||||
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
|
||||
CosObjectKey key,
|
||||
CosObjectPool pool,
|
||||
bool isLenientParsing)
|
||||
{
|
||||
reader.Seek(offset);
|
||||
|
||||
var objectNumber = ObjectHelper.ReadObjectNumber(reader);
|
||||
var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);
|
||||
|
||||
ReadHelper.ReadExpectedString(reader, "obj", true);
|
||||
|
||||
if (objectNumber != key.Number || objectGeneration != key.Generation)
|
||||
{
|
||||
throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
|
||||
}
|
||||
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
var baseObject = baseParser.Parse(reader, pool);
|
||||
|
||||
var endObjectKey = ReadHelper.ReadString(reader);
|
||||
|
||||
var atStreamStart = string.Equals(endObjectKey, "stream");
|
||||
|
||||
if (atStreamStart)
|
||||
{
|
||||
var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);
|
||||
|
||||
reader.Rewind(streamStartBytes.Length);
|
||||
|
||||
baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
|
||||
}
|
||||
|
||||
if (!string.Equals(endObjectKey, "endobj"))
|
||||
{
|
||||
var message =
|
||||
$"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";
|
||||
|
||||
if (isLenientParsing)
|
||||
{
|
||||
log.Warn(message);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException(message);
|
||||
}
|
||||
}
|
||||
|
||||
return baseObject;
|
||||
}
|
||||
|
||||
private CosBase ReadNormalObjectStream(IRandomAccessRead reader, CosBase currentBase, long offset,
|
||||
bool isLenientParsing,
|
||||
out string endObjectKey)
|
||||
{
|
||||
if (currentBase is PdfDictionary dictionary)
|
||||
{
|
||||
RawCosStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
|
||||
|
||||
currentBase = stream;
|
||||
}
|
||||
else
|
||||
{
|
||||
// this is not legal
|
||||
// the combination of a dict and the stream/endstream
|
||||
// forms a complete stream object
|
||||
throw new InvalidOperationException($"Stream not preceded by dictionary (offset: {offset}).");
|
||||
}
|
||||
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
endObjectKey = ReadHelper.ReadLine(reader);
|
||||
|
||||
// we have case with a second 'endstream' before endobj
|
||||
if (!endObjectKey.StartsWith("endobj") && endObjectKey.StartsWith("endstream"))
|
||||
{
|
||||
endObjectKey = endObjectKey.Substring(9).Trim();
|
||||
if (endObjectKey.Length == 0)
|
||||
{
|
||||
// no other characters in extra endstream line
|
||||
// read next line
|
||||
endObjectKey = ReadHelper.ReadLine(reader);
|
||||
}
|
||||
}
|
||||
|
||||
return currentBase;
|
||||
}
|
||||
|
||||
private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, bool isLenientParsing)
|
||||
{
|
||||
var baseStream = Parse(new IndirectReference(streamObjectNumber, 0), reader, isLenientParsing, true);
|
||||
|
||||
if (!(baseStream is RawCosStream stream))
|
||||
{
|
||||
log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");
|
||||
|
||||
return CosNull.Null;
|
||||
}
|
||||
|
||||
var objects = objectStreamParser.Parse(stream, objectPool);
|
||||
|
||||
// register all objects which are referenced to be contained in object stream
|
||||
foreach (var next in objects)
|
||||
{
|
||||
var streamKey = new CosObjectKey(next);
|
||||
var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets);
|
||||
|
||||
if (offset != null && offset == -streamObjectNumber)
|
||||
{
|
||||
var streamObject = objectPool.Get(streamKey);
|
||||
streamObject.SetObject(next.GetObject());
|
||||
}
|
||||
}
|
||||
|
||||
var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);
|
||||
|
||||
if (matchingStreamObject != null)
|
||||
{
|
||||
return matchingStreamObject;
|
||||
}
|
||||
|
||||
log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");
|
||||
|
||||
return CosNull.Null;
|
||||
}
|
||||
|
||||
private static T? TryGet<T, TKey>(TKey key, IReadOnlyDictionary<TKey, T> dictionary) where T : struct
|
||||
{
|
||||
return dictionary.TryGetValue(key, out var value) ? value : default(T?);
|
||||
}
|
||||
}
|
||||
}
|
@ -9,7 +9,14 @@
|
||||
|
||||
internal class PageContentParser : IPageContentParser
|
||||
{
|
||||
public IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes)
|
||||
private readonly IGraphicsStateOperationFactory operationFactory;
|
||||
|
||||
public PageContentParser(IGraphicsStateOperationFactory operationFactory)
|
||||
{
|
||||
this.operationFactory = operationFactory;
|
||||
}
|
||||
|
||||
public IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes)
|
||||
{
|
||||
var scanner = new CoreTokenScanner(inputBytes);
|
||||
|
||||
|
@ -1,57 +1,12 @@
|
||||
namespace UglyToad.Pdf.Parser.PageTree
|
||||
{
|
||||
using System;
|
||||
using Content;
|
||||
using ContentStream;
|
||||
using ContentStream.TypedAccessors;
|
||||
using Cos;
|
||||
using Filters;
|
||||
using Fonts;
|
||||
|
||||
internal class PageParser
|
||||
{
|
||||
public Page Parse(int number, PdfDictionary dictionary, ParsingArguments arguments)
|
||||
{
|
||||
if (dictionary == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
if (arguments == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(arguments));
|
||||
}
|
||||
|
||||
if (!dictionary.IsType(CosName.PAGE))
|
||||
{
|
||||
throw new InvalidOperationException("Expected a Dictionary of Type Page, instead got this: " + dictionary);
|
||||
}
|
||||
|
||||
return new Page(number, dictionary, new PageTreeMembers(), arguments);
|
||||
}
|
||||
}
|
||||
|
||||
internal class FontParser
|
||||
{
|
||||
public Font Parse(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
{
|
||||
var type = dictionary.GetName(CosName.SUBTYPE);
|
||||
|
||||
if (CosName.Equals(type, CosName.TYPE0))
|
||||
{
|
||||
var compositeFont = arguments.Container.Get<CompositeFontParser>()
|
||||
.Parse(dictionary, arguments);
|
||||
}
|
||||
else
|
||||
{
|
||||
var simpleFont = arguments.Container.Get<SimpleFontParser>()
|
||||
.Parse(dictionary, arguments);
|
||||
}
|
||||
|
||||
return new Font();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
internal class CompositeFontParser
|
||||
{
|
||||
public CompositeFont Parse(PdfDictionary dictionary, ParsingArguments arguments)
|
||||
|
@ -2,7 +2,6 @@
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using Content;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
|
||||
/// <summary>
|
||||
@ -37,34 +36,5 @@
|
||||
{
|
||||
return fonts.ContainsKey(name);
|
||||
}
|
||||
|
||||
internal bool GetFont(CosName name, ParsingArguments arguments, out Font value)
|
||||
{
|
||||
if (fontObjects.TryGetValue(name, out value))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!fonts.TryGetValue(name, out var key))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var dictionary = arguments.Container.Get<DynamicParser>()
|
||||
.Parse(arguments, key, false) as PdfDictionary;
|
||||
|
||||
if (dictionary == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var font = arguments.Container.Get<FontParser>()
|
||||
.Parse(dictionary, arguments);
|
||||
|
||||
fontObjects[name] = font;
|
||||
|
||||
// retrieve and cache
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -14,9 +14,9 @@
|
||||
|
||||
public BruteForceSearcher BruteForceSearcher { get; }
|
||||
|
||||
public ResourceContainer ResourceContainer { get; }
|
||||
public IResourceStore ResourceContainer { get; }
|
||||
|
||||
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, ResourceContainer resourceContainer)
|
||||
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, IResourceStore resourceContainer)
|
||||
{
|
||||
ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
|
||||
BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
|
||||
|
@ -5,7 +5,15 @@
|
||||
using Content;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Filters;
|
||||
using Fonts;
|
||||
using Fonts.Parser;
|
||||
using Fonts.Parser.Handlers;
|
||||
using Fonts.Parser.Parts;
|
||||
using Fonts.TrueType.Parser;
|
||||
using Graphics;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Parts.CrossReference;
|
||||
using Util;
|
||||
@ -37,6 +45,8 @@
|
||||
|
||||
private static PdfDocument OpenDocument(IRandomAccessRead reader, IContainer container, bool isLenientParsing)
|
||||
{
|
||||
var log = container.Get<ILog>();
|
||||
|
||||
var version = container.Get<FileHeaderParser>().ReadHeader(reader, isLenientParsing);
|
||||
|
||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
|
||||
@ -46,9 +56,27 @@
|
||||
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
|
||||
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);
|
||||
|
||||
var dynamicParser = container.Get<DynamicParser>();
|
||||
var filterProvider = container.Get<IFilterProvider>();
|
||||
var bruteForceSearcher = new BruteForceSearcher(reader);
|
||||
var resourceContainer = new ResourceContainer();
|
||||
var pdfObjectParser = new PdfObjectParser(container.Get<ILog>(), container.Get<CosBaseParser>(),
|
||||
container.Get<CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get<ObjectStreamParser>());
|
||||
|
||||
var trueTypeFontParser = new TrueTypeFontParser();
|
||||
var fontDescriptorFactory = new FontDescriptorFactory();
|
||||
|
||||
var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);
|
||||
|
||||
var cMapCache = new CMapCache(new CMapParser());
|
||||
|
||||
var fontFactory = new FontFactory(container.Get<ILog>(), new Type0FontHandler(cidFontFactory,
|
||||
cMapCache,
|
||||
filterProvider,
|
||||
pdfObjectParser));
|
||||
|
||||
var dynamicParser = container.Get<DynamicParser>();
|
||||
var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);
|
||||
|
||||
var pageFactory = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
|
||||
|
||||
var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
|
||||
isLenientParsing);
|
||||
@ -66,7 +94,7 @@
|
||||
|
||||
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
|
||||
|
||||
return new PdfDocument(reader, version, crossReferenceTable, container, isLenientParsing, caching, new Catalog(rootDictionary));
|
||||
return new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, new Catalog(rootDictionary));
|
||||
}
|
||||
|
||||
private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,
|
||||
|
@ -4,6 +4,7 @@
|
||||
using Content;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.Parts;
|
||||
using Util;
|
||||
@ -17,8 +18,8 @@
|
||||
private readonly HeaderVersion version;
|
||||
[NotNull]
|
||||
private readonly CrossReferenceTable crossReferenceTable;
|
||||
[NotNull]
|
||||
private readonly IContainer container;
|
||||
|
||||
private readonly ILog log;
|
||||
private readonly bool isLenientParsing;
|
||||
[NotNull]
|
||||
private readonly ParsingCachingProviders cachingProviders;
|
||||
@ -29,21 +30,21 @@
|
||||
[NotNull]
|
||||
public Pages Pages { get; }
|
||||
|
||||
internal PdfDocument(IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
|
||||
IContainer container,
|
||||
internal PdfDocument(ILog log, IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
|
||||
bool isLenientParsing,
|
||||
ParsingCachingProviders cachingProviders,
|
||||
IPageFactory pageFactory,
|
||||
IPdfObjectParser pdfObjectParser,
|
||||
Catalog catalog)
|
||||
{
|
||||
this.log = log;
|
||||
this.reader = reader ?? throw new ArgumentNullException(nameof(reader));
|
||||
this.version = version ?? throw new ArgumentNullException(nameof(version));
|
||||
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
|
||||
this.container = container ?? throw new ArgumentNullException(nameof(container));
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
|
||||
Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
|
||||
var arguments = new ParsingArguments(reader, crossReferenceTable, cachingProviders, container, isLenientParsing);
|
||||
Pages = new Pages(Catalog, arguments);
|
||||
Pages = new Pages(log, Catalog, pdfObjectParser, pageFactory, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options);
|
||||
|
@ -1,6 +1,7 @@
|
||||
namespace UglyToad.Pdf.Tokenization
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using IO;
|
||||
using Parser.Parts;
|
||||
using Scanner;
|
||||
@ -79,7 +80,7 @@
|
||||
|
||||
if (r == OperatorToken.R)
|
||||
{
|
||||
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Long));
|
||||
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Int));
|
||||
i = i + 2;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
using ContentStream;
|
||||
|
||||
public class IndirectReferenceToken : IDataToken<IndirectReference>
|
||||
{
|
||||
public IndirectReference Data { get; }
|
||||
@ -10,16 +12,4 @@
|
||||
}
|
||||
}
|
||||
|
||||
public struct IndirectReference
|
||||
{
|
||||
public long ObjectNumber { get; }
|
||||
|
||||
public long Generation { get; }
|
||||
|
||||
public IndirectReference(long objectNumber, long generation)
|
||||
{
|
||||
ObjectNumber = objectNumber;
|
||||
Generation = generation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -227,5 +227,9 @@
|
||||
<EmbeddedResource Include="Resources\CMap\UniKS-UTF16-V" />
|
||||
<EmbeddedResource Include="Resources\CMap\V" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Folder Include="Fonts\Simple\" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
@ -1,12 +1,7 @@
|
||||
namespace UglyToad.Pdf.Util
|
||||
{
|
||||
using Filters;
|
||||
using Fonts;
|
||||
using Fonts.Parser;
|
||||
using Fonts.Parser.Handlers;
|
||||
using Fonts.Parser.Parts;
|
||||
using Fonts.TrueType.Parser;
|
||||
using Graphics;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.PageTree;
|
||||
@ -49,19 +44,12 @@
|
||||
new CrossReferenceTableParser(logger, dictionaryParser, baseParser));
|
||||
|
||||
var resourceDictionaryParser = new ResourceDictionaryParser();
|
||||
var pageParser = new PageParser();
|
||||
var simpleFontParser = new SimpleFontParser();
|
||||
var compositeFontParser = new CompositeFontParser();
|
||||
var fontParser = new FontParser();
|
||||
var pageContentParser = new PageContentParser();
|
||||
var operationFactory = new ReflectionGraphicsStateOperationFactory();
|
||||
|
||||
var cmapParser = new CMapParser();
|
||||
var afmParser = new AdobeFontMetricsParser();
|
||||
|
||||
var type0FontFactory = new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()), new CMapCache(cmapParser), filterProvider);
|
||||
var fontFactory = new FontFactory(type0FontFactory);
|
||||
|
||||
|
||||
var container = new Container();
|
||||
container.Register(headerParser);
|
||||
container.Register(trailerParser);
|
||||
@ -75,15 +63,11 @@
|
||||
container.Register(objectStreamParser);
|
||||
container.Register(filterProvider);
|
||||
container.Register(resourceDictionaryParser);
|
||||
container.Register(pageParser);
|
||||
container.Register(simpleFontParser);
|
||||
container.Register(compositeFontParser);
|
||||
container.Register(fontParser);
|
||||
container.Register(pageContentParser);
|
||||
container.Register(operationFactory);
|
||||
container.Register(cmapParser);
|
||||
container.Register(afmParser);
|
||||
container.Register(fontFactory);
|
||||
container.Register(logger);
|
||||
|
||||
return container;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user