create Type0 font, notes about font format, heavy duty refactoring to inject dependencies rather than god object

This commit is contained in:
Eliot Jones 2017-12-22 23:54:54 +00:00
parent 206eb91ff1
commit f4d58e8aa9
43 changed files with 853 additions and 296 deletions

118
font-notes.md Normal file
View File

@ -0,0 +1,118 @@
# Fonts #
## Types of Font ##
<pre><code>
------ Composite Fonts -------
Type0 (Composed of glyphs from a CIDFont)
Children:
CIDFont CIDFontType0 (Type 1 font glyph descriptions)
CIDFontType2 (TrueType font glyph descriptions)
------ Simple Fonts Below -------
Type 1 Type 1 (defines gylphs using type 1 font technology)
MMType1 (multiple master font - extends type 1 fonts to support many typefaces for a single font)
Type 3 (defines glyphs with streams of PDF graphics operations)
TrueType (from the TrueType font format)
</code></pre>
## Terminology ##
+ Font dictionary: PDF dictionary with information about the font
+ Font program: Glyph information in specialized font format
## Composite Fonts ##
+ Glyphs are selected from a font-like CIDFont.
+ Has a single CIDFont descendant.
+ Multiple-byte sequences select a single glyph.
Used for multiple-byte character encodings and large numbers of glyphs.
Well suited to Chinese, Japanese and Korean (CJK).
CID stands for character identifier. This is a number used to access glyph descriptions.
The CMap maps between character codes and CID numbers for the glyphs.
A CIDFont file provides the glyph descriptions for a character collection. The glyph descriptions are
identified by CIDs.
CID keyed font combines a CMap with a CIDFont.
The **Encoding** contains the CMap.
The **DescendantFonts** contains the CIDFont to use with the CMap.
### CIDFont ###
A Type0 font descendant (CIDFont) must be either a CIDFontType0 (Adobe Type 1) or CIDFontType2 (TrueType).
For Type 2 CIDFonts (TrueType) the glyphs are identified by a glyph index (GID).
+ If the font program is embedded as a stream the CIDFont dictionary must contain a CIDToGIDMap which maps
from CIDs to Glyph Indexes.
+ If the font program is a predefined external font the CIDFont must not contain a CIDToGIDMap. It
may only use a predefined CMap.
Though a CID may not be used to select the glyph as in the predefined case, it is always used to select glyph
metrics. Every CIDFont must describe CID 0 which is the ```.notdef``` character for missing characters.
### Glyph Metrics in CIDFonts ###
Widths for CIDFonts are defined in the DW and W entries in the CIDFont dictionary.
+ DW provides the default width for glyphs which are not specified individually.
+ W defines widths for individual CIDs.
Vertical writing has other stuff, see the spec.
### CMap ###
The CMap maps from character codes to character selectors (CIDs).
The CMap defines the writing mode horizontal or vertical.
### Type 0 Fonts ###
The **Font dictionary** has the following entries:
+ Type (name): /Font
+ Subtype (name): /Type0
+ BaseFont (name): The PostScript name of the font.
+ Encoding (name/stream R): Name of a predefined CMap or a stream for an embedded CMap.
+ DescendantFonts (array): Single element pointing to the CIDFont.
+ ToUnicode (stream R)?: Stream containing a CMap file to map codes to Unicode.
## Simple Fonts ##
+ Glyphs are selected by single-byte character codes. Index into a 256 entry glyph table.
+ Only supports horizontal writing mode.
## Further Description ##
### Type 1 Fonts ###
The **Font program** is a PostScript program describing glyph shape. See the Adobe Type 1 Font Format specification.
The **Font dictionary** has the following entries:
+ Type (name): /Font
+ Subtype (name): /Type1
+ Name (name?): Font name
+ BaseFont (name): The PostScript name of the font. Equivalent to the FontName value in the **Font program**.
+ FirstChar (int): The first character code in the Widths array.
+ LastChar (int) The last character code in the Widths array.
+ Widths (numeric[] R): An array defining the glyph width in units of 1000 == 1 text space unit.
+ FontDescriptor (Dict<> R): Describes font metrics other than widths.
+ Encoding (name/Dict<> R): Specifies the character encoding if different from default.
+ ToUnicode (stream R): CMap mapping character code to Unicode.

View File

@ -2,6 +2,7 @@
{
using System.Collections.Generic;
using Content;
using ContentStream;
using IO;
using Pdf.Cos;
using Pdf.Fonts;
@ -44,6 +45,10 @@
internal class TestResourceStore : IResourceStore
{
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
}
public IFont GetFont(CosName name)
{
return null;

View File

@ -13,15 +13,14 @@
public class PageContentParserTests
{
private readonly PageContentParser parser = new PageContentParser();
private readonly IGraphicsStateOperationFactory operationFactory = new ReflectionGraphicsStateOperationFactory();
private readonly PageContentParser parser = new PageContentParser(new ReflectionGraphicsStateOperationFactory());
[Fact]
public void CorrectlyExtractsOperations()
{
var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false);
var result = parser.Parse(new ReflectionGraphicsStateOperationFactory(), input.Bytes);
var result = parser.Parse(input.Bytes);
}
[Fact]
@ -36,7 +35,7 @@
ET";
var input = StringBytesTestConverter.Convert(s, false);
var result = parser.Parse(operationFactory, input.Bytes);
var result = parser.Parse(input.Bytes);
Assert.Equal(7, result.Count);
@ -72,7 +71,7 @@ ET";
var input = StringBytesTestConverter.Convert(s, false);
var result = parser.Parse(operationFactory, input.Bytes);
var result = parser.Parse(input.Bytes);
Assert.Equal(4, result.Count);

View File

@ -3,6 +3,7 @@ namespace UglyToad.Pdf.Tests.Tokenization
{
using System;
using System.Collections.Generic;
using ContentStream;
using Pdf.Cos;
using Pdf.Tokenization;
using Pdf.Tokenization.Tokens;

View File

@ -0,0 +1,11 @@
namespace UglyToad.Pdf.Content
{
using ContentStream;
using IO;
internal interface IPageFactory
{
Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
bool isLenientParsing);
}
}

View File

@ -0,0 +1,14 @@
namespace UglyToad.Pdf.Content
{
using ContentStream;
using Cos;
using Fonts;
using IO;
internal interface IResourceStore
{
void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
IFont GetFont(CosName name);
}
}

View File

@ -2,21 +2,9 @@
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using ContentStream;
using Cos;
using Filters;
using Geometry;
using Graphics;
using IO;
using Parser;
using Util;
public class Page
{
private readonly ParsingArguments parsingArguments;
private readonly PdfDictionary dictionary;
/// <summary>
/// The 1 indexed page number.
/// </summary>
@ -28,78 +16,16 @@
public IReadOnlyList<string> Text => Content?.Text ?? new string[0];
internal Page(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, ParsingArguments parsingArguments)
internal Page(int number, MediaBox mediaBox, PageContent content)
{
if (number <= 0)
{
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
}
this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
this.parsingArguments = parsingArguments ?? throw new ArgumentNullException(nameof(parsingArguments));
Number = number;
var type = dictionary.GetName(CosName.TYPE);
if (type != null && !type.Equals(CosName.PAGE) && !parsingArguments.IsLenientParsing)
{
throw new InvalidOperationException($"Created page number {number} but its type was specified as {type} rather than 'Page'.");
}
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
{
var x1 = mediaboxArray.getInt(0);
var y1 = mediaboxArray.getInt(1);
var x2 = mediaboxArray.getInt(2);
var y2 = mediaboxArray.getInt(3);
MediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
}
else
{
MediaBox = pageTreeMembers.GetMediaBox();
if (MediaBox == null)
{
if (parsingArguments.IsLenientParsing)
{
MediaBox = MediaBox.A4;
}
else
{
throw new InvalidOperationException("No mediabox was present for page: " + number);
}
}
}
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
{
parsingArguments.CachingProviders.ResourceContainer.LoadResourceDictionary(resource, parsingArguments);
}
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
if (contentObject != null)
{
var contentStream = parsingArguments.Container.Get<DynamicParser>()
.Parse(parsingArguments, contentObject, false) as RawCosStream;
if (contentStream == null)
{
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var contents = contentStream.Decode(parsingArguments.Container.Get<IFilterProvider>());
var operations = parsingArguments.Container.Get<PageContentParser>()
.Parse(parsingArguments.Container.Get<IGraphicsStateOperationFactory>(), new ByteArrayInputBytes(contents));
var context = new ContentStreamProcessor(MediaBox.Bounds, parsingArguments.CachingProviders.ResourceContainer);
var content = context.Process(operations);
Content = content;
}
MediaBox = mediaBox;
Content = content;
}
}
}

View File

@ -0,0 +1,99 @@
namespace UglyToad.Pdf.Content
{
using System;
using ContentStream;
using Cos;
using Filters;
using Geometry;
using Graphics;
using IO;
using Parser;
internal class PageFactory : IPageFactory
{
private readonly IResourceStore resourceStore;
private readonly IPdfObjectParser pdfObjectParser;
private readonly IFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
public PageFactory(IResourceStore resourceStore, IPdfObjectParser pdfObjectParser, IFilterProvider filterProvider,
IPageContentParser pageContentParser)
{
this.resourceStore = resourceStore;
this.pdfObjectParser = pdfObjectParser;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
}
public Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader,
bool isLenientParsing)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
var type = dictionary.GetName(CosName.TYPE);
if (type != null && !type.Equals(CosName.PAGE) && !isLenientParsing)
{
throw new InvalidOperationException($"Page {number} had its type was specified as {type} rather than 'Page'.");
}
MediaBox mediaBox;
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
{
var x1 = mediaboxArray.getInt(0);
var y1 = mediaboxArray.getInt(1);
var x2 = mediaboxArray.getInt(2);
var y2 = mediaboxArray.getInt(3);
mediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
}
else
{
mediaBox = pageTreeMembers.GetMediaBox();
if (mediaBox == null)
{
if (isLenientParsing)
{
mediaBox = MediaBox.A4;
}
else
{
throw new InvalidOperationException("No mediabox was present for page: " + number);
}
}
}
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is PdfDictionary resource)
{
resourceStore.LoadResourceDictionary(resource, reader, isLenientParsing);
}
PageContent content = default(PageContent);
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
if (contentObject != null)
{
var contentStream = pdfObjectParser.Parse(contentObject.ToIndirectReference(), reader, false) as RawCosStream;
if (contentStream == null)
{
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var contents = contentStream.Decode(filterProvider);
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
var context = new ContentStreamProcessor(mediaBox.Bounds, resourceStore);
content = context.Process(operations);
}
return new Page(number, mediaBox, content);
}
}
}

View File

@ -6,26 +6,26 @@
using ContentStream;
using ContentStream.TypedAccessors;
using Cos;
using IO;
using Logging;
using Parser;
using Parser.PageTree;
public class Pages
{
private readonly ILog log;
private readonly Catalog catalog;
private readonly ParsingArguments arguments;
private readonly IPdfObjectParser pdfObjectParser;
private readonly IPageFactory pageFactory;
private readonly IRandomAccessRead reader;
private readonly bool isLenientParsing;
private readonly PdfDictionary rootPageDictionary;
private readonly Dictionary<int, PdfDictionary> locatedPages = new Dictionary<int, PdfDictionary>();
public int Count { get; }
internal Pages(Catalog catalog, ParsingArguments arguments)
internal Pages(ILog log, Catalog catalog, IPdfObjectParser pdfObjectParser, IPageFactory pageFactory,
IRandomAccessRead reader, bool isLenientParsing)
{
if (arguments == null)
{
throw new ArgumentNullException(nameof(arguments));
}
if (catalog == null)
{
throw new ArgumentNullException(nameof(catalog));
@ -38,9 +38,9 @@
throw new InvalidOperationException("No pages were present in the catalog for this PDF document");
}
var pageObject = arguments.Container.Get<DynamicParser>().Parse(arguments, pages, false);
var pagesObject = pdfObjectParser.Parse(pages.ToIndirectReference(), reader, isLenientParsing);
if (!(pageObject is PdfDictionary catalogPageDictionary))
if (!(pagesObject is PdfDictionary catalogPageDictionary))
{
throw new InvalidOperationException("Could not find the root pages object: " + pages);
}
@ -51,8 +51,12 @@
Count = count;
this.log = log;
this.catalog = catalog;
this.arguments = arguments;
this.pdfObjectParser = pdfObjectParser;
this.pageFactory = pageFactory;
this.reader = reader;
this.isLenientParsing = isLenientParsing;
}
@ -60,7 +64,8 @@
{
if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary))
{
return new Page(pageNumber, targetPageDictionary, new PageTreeMembers(), arguments);
return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader,
isLenientParsing);
}
var observed = new List<int>();
@ -73,8 +78,7 @@
throw new InvalidOperationException("Could not find the page with number: " + pageNumber);
}
var page = arguments.Container.Get<PageParser>()
.Parse(pageNumber, targetPageDictionary, arguments);
var page = pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing);
locatedPages[pageNumber] = targetPageDictionary;
@ -108,8 +112,7 @@
if (!type.Equals(CosName.PAGES))
{
arguments.Container.Get<ILog>()
.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
log.Warn("Did not find the expected type (Page or Pages) in dictionary: " + currentPageDictionary);
return false;
}
@ -120,7 +123,7 @@
foreach (var kid in kids.OfType<CosObject>())
{
// todo: exit early
var child = arguments.Container.Get<DynamicParser>().Parse(arguments, kid, false) as PdfDictionary;
var child = pdfObjectParser.Parse(kid.ToIndirectReference(), reader, isLenientParsing) as PdfDictionary;
var thisPageMatches = FindPage(child, soughtPageNumber, pageNumbersObserved);

View File

@ -4,31 +4,32 @@
using System.Collections.Generic;
using ContentStream;
using Cos;
using Filters;
using Fonts;
using Fonts.Cmap;
using Fonts.Parser;
using IO;
using Parser;
internal interface IResourceStore
{
IFont GetFont(CosName name);
}
internal class ResourceContainer : IResourceStore
{
private readonly IPdfObjectParser pdfObjectParser;
private readonly IFontFactory fontFactory;
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
internal void LoadResourceDictionary(PdfDictionary dictionary, ParsingArguments arguments)
public ResourceContainer(IPdfObjectParser pdfObjectParser, IFontFactory fontFactory)
{
this.pdfObjectParser = pdfObjectParser;
this.fontFactory = fontFactory;
}
public void LoadResourceDictionary(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is PdfDictionary fontDictionary)
{
LoadFontDictionary(fontDictionary, arguments);
LoadFontDictionary(fontDictionary, reader, isLenientParsing);
}
}
private void LoadFontDictionary(PdfDictionary fontDictionary, ParsingArguments arguments)
private void LoadFontDictionary(PdfDictionary fontDictionary, IRandomAccessRead reader, bool isLenientParsing)
{
foreach (var pair in fontDictionary)
{
@ -39,24 +40,22 @@
if (!(pair.Value is CosObject objectKey))
{
if (arguments.IsLenientParsing)
if (isLenientParsing)
{
continue;
}
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
}
var dynamicParser = arguments.Get<DynamicParser>();
var fontObject = dynamicParser.Parse(arguments, objectKey, false) as PdfDictionary;
var fontObject = pdfObjectParser.Parse(objectKey.ToIndirectReference(), reader, false) as PdfDictionary;
if (fontObject == null)
{
throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey.GetObjectNumber()}");
}
loadedFonts[pair.Key] = arguments.Get<FontFactory>().GetFont(fontObject, arguments);
loadedFonts[pair.Key] = fontFactory.Get(fontObject, reader, isLenientParsing);
}
}

View File

@ -0,0 +1,15 @@
namespace UglyToad.Pdf.ContentStream
{
public struct IndirectReference
{
public long ObjectNumber { get; }
public int Generation { get; }
public IndirectReference(long objectNumber, int generation)
{
ObjectNumber = objectNumber;
Generation = generation;
}
}
}

View File

@ -1,5 +1,7 @@
namespace UglyToad.Pdf.Cos
{
using ContentStream;
public class CosObject : CosBase, ICosUpdateInfo
{
private CosBase baseObject;
@ -124,5 +126,10 @@
}
public bool NeedsToBeUpdated { get; set; }
public IndirectReference ToIndirectReference()
{
return new IndirectReference(objectNumber, generationNumber);
}
}
}

View File

@ -32,5 +32,7 @@
/// The definition of the character collection for the font.
/// </summary>
CharacterIdentifierSystemInfo SystemInfo { get; }
CidFontType CidFontType { get; }
}
}

View File

@ -1,6 +1,5 @@
namespace UglyToad.Pdf.Fonts.CidFonts
{
using Cmap;
using Cos;
/// <inheritdoc/>
@ -14,5 +13,6 @@
public CosName SubType { get; }
public CosName BaseFont { get; }
public CharacterIdentifierSystemInfo SystemInfo { get; }
public CidFontType CidFontType => CidFontType.Type0;
}
}

View File

@ -1,6 +1,5 @@
namespace UglyToad.Pdf.Fonts.CidFonts
{
using Cmap;
using Cos;
/// <inheritdoc />
@ -14,5 +13,6 @@
public CosName SubType { get; }
public CosName BaseFont { get; }
public CharacterIdentifierSystemInfo SystemInfo { get; }
public CidFontType CidFontType => CidFontType.Type2;
}
}

View File

@ -6,7 +6,7 @@
using IO;
using Util.JetBrains.Annotations;
public class CMap
internal class CMap
{
public CharacterIdentifierSystemInfo Info { get; }
@ -30,6 +30,8 @@
[NotNull]
public IReadOnlyList<CidCharacterMapping> CidCharacterMappings { get; }
public WritingMode WritingMode { get; }
public bool HasCidMappings => CidCharacterMappings.Count > 0 || CidRanges.Count > 0;
public bool HasUnicodeMappings => BaseFontCharacterMap.Count > 0;
@ -41,7 +43,7 @@
{
Info = info;
Type = type;
WMode = wMode;
WritingMode = (WritingMode)wMode;
Name = name;
Version = version;
BaseFontCharacterMap = baseFontCharacterMap ?? throw new ArgumentNullException(nameof(baseFontCharacterMap));
@ -51,8 +53,7 @@
maxCodeLength = CodespaceRanges.Max(x => x.CodeLength);
minCodeLength = CodespaceRanges.Min(x => x.CodeLength);
}
private int wmode = 0;
private string cmapName = null;
private string cmapVersion = null;
private int cmapType = -1;

View File

@ -0,0 +1,8 @@
namespace UglyToad.Pdf.Fonts.Cmap
{
internal enum WritingMode
{
Horizontal = 0,
Vertical = 1
}
}

View File

@ -0,0 +1,55 @@
namespace UglyToad.Pdf.Fonts.Composite
{
using System;
using Cmap;
using IO;
using Util.JetBrains.Annotations;
/// <summary>
/// Defines the information content (actual text) of the font
/// as opposed to the display format.
/// </summary>
internal class ToUnicodeCMap
{
[CanBeNull]
private readonly CMap cMap;
/// <summary>
/// Does the font provide a CMap to map CIDs to Unicode values?
/// </summary>
public bool CanMapToUnicode => cMap != null;
/// <summary>
/// Is this document (unexpectedly) using a predefined Identity-H/V CMap as its ToUnicode CMap?
/// </summary>
public bool IsUsingIdentityAsUnicodeMap { get; }
public ToUnicodeCMap([CanBeNull]CMap cMap)
{
this.cMap = cMap;
if (CanMapToUnicode)
{
IsUsingIdentityAsUnicodeMap =
cMap.Name.StartsWith("Identity-", StringComparison.InvariantCultureIgnoreCase);
}
}
public bool TryGet(int code, out string value)
{
value = null;
if (!CanMapToUnicode)
{
return false;
}
return cMap.TryConvertToUnicode(code, out value);
}
public int ReadCode(IInputBytes inputBytes)
{
return cMap.ReadCode(inputBytes);
}
}
}

View File

@ -0,0 +1,76 @@
namespace UglyToad.Pdf.Fonts.Composite
{
using System;
using CidFonts;
using Cmap;
using Cos;
using Geometry;
using IO;
using Util.JetBrains.Annotations;
/// <summary>
/// Defines glyphs using a CIDFont
/// </summary>
internal class Type0Font : IFont
{
public CosName Name => BaseFont;
[NotNull]
public CosName BaseFont { get; }
[NotNull]
public ICidFont CidFont { get; }
[NotNull]
public CMap CMap { get; }
[NotNull]
public ToUnicodeCMap ToUnicode { get; }
public bool IsVertical => CMap.WritingMode == WritingMode.Vertical;
public Type0Font(CosName baseFont, ICidFont cidFont, CMap cmap, CMap toUnicodeCMap)
{
BaseFont = baseFont ?? throw new ArgumentNullException(nameof(baseFont));
CidFont = cidFont ?? throw new ArgumentNullException(nameof(cidFont));
CMap = cmap ?? throw new ArgumentNullException(nameof(cmap));
ToUnicode = new ToUnicodeCMap(toUnicodeCMap);
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
{
var current = bytes.CurrentOffset;
var code = ToUnicode.ReadCode(bytes);
codeLength = bytes.CurrentOffset - current;
return code;
}
public bool TryGetUnicode(int characterCode, out string value)
{
value = null;
if (!ToUnicode.CanMapToUnicode)
{
return false;
}
// According to PdfBox certain providers incorrectly using Identity CMaps as ToUnicode.
if (ToUnicode.IsUsingIdentityAsUnicodeMap)
{
value = new string((char)characterCode, 1);
return true;
}
return ToUnicode.TryGet(characterCode, out value);
}
public PdfVector GetDisplacement(int characterCode)
{
return new PdfVector(0.333m, 0);
}
}
}

View File

@ -1,5 +1,6 @@
namespace UglyToad.Pdf.Fonts
{
using ContentStream;
using Cos;
/// <summary>
@ -13,13 +14,13 @@
/// </remarks>
internal class DescriptorFontFile
{
public CosObjectKey ObjectKey { get; }
public IndirectReference ObjectKey { get; }
public byte[] FileBytes { get; }
public FontFileType FileType { get; }
public DescriptorFontFile(CosObjectKey key, FontFileType fileType)
public DescriptorFontFile(IndirectReference key, FontFileType fileType)
{
ObjectKey = key;
FileBytes = new byte[0];

View File

@ -0,0 +1,32 @@
namespace UglyToad.Pdf.Fonts.Exceptions
{
using System;
using System.Runtime.Serialization;
/// <summary>
/// The exception thrown when an error is encountered parsing a font from the PDF document.
/// This occurs where the format of the font program or dictionary does not meet the specification.
/// </summary>
/// <inheritdoc cref="Exception"/>
[Serializable]
public class InvalidFontFormatException : Exception
{
public InvalidFontFormatException()
{
}
public InvalidFontFormatException(string message) : base(message)
{
}
public InvalidFontFormatException(string message, Exception inner) : base(message, inner)
{
}
protected InvalidFontFormatException(
SerializationInfo info,
StreamingContext context) : base(info, context)
{
}
}
}

View File

@ -4,22 +4,26 @@
using System.Collections.Generic;
using ContentStream;
using Cos;
using Exceptions;
using IO;
using Logging;
using Parser.Handlers;
using Pdf.Parser;
internal class FontFactory
internal class FontFactory : IFontFactory
{
private readonly ILog log;
private readonly IReadOnlyDictionary<CosName, IFontHandler> handlers;
public FontFactory(Type0FontHandler type0FontHandler)
public FontFactory(ILog log, Type0FontHandler type0FontHandler)
{
this.log = log;
handlers = new Dictionary<CosName, IFontHandler>
{
{CosName.TYPE0, type0FontHandler}
};
}
public IFont GetFont(PdfDictionary dictionary, ParsingArguments arguments)
public IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var type = dictionary.GetName(CosName.TYPE);
@ -27,13 +31,13 @@
{
var message = "The font dictionary did not have type 'Font'. " + dictionary;
if (arguments.IsLenientParsing)
if (isLenientParsing)
{
arguments.Log.Error(message);
log?.Error(message);
}
else
{
throw new InvalidOperationException(message);
throw new InvalidFontFormatException(message);
}
}
@ -41,7 +45,7 @@
if (handlers.TryGetValue(subtype, out var handler))
{
return handler.Generate(dictionary, arguments);
return handler.Generate(dictionary, reader, isLenientParsing);
}
throw new NotImplementedException($"Parsing not implemented for fonts of type: {subtype}, please submit a pull request or an issue.");
@ -49,3 +53,4 @@
}
}

View File

@ -10,16 +10,12 @@
internal interface IFont
{
CosName Name { get; }
CosName SubType { get; }
string BaseFontType { get; }
bool IsVertical { get; }
int ReadCharacterCode(IInputBytes bytes, out int codeLength);
string GetUnicode(int characterCode);
bool TryGetUnicode(int characterCode, out string value);
PdfVector GetDisplacement(int characterCode);
}
@ -51,6 +47,11 @@
return code;
}
public bool TryGetUnicode(int characterCode, out string value)
{
throw new NotImplementedException();
}
public string GetUnicode(int characterCode)
{
if (ToUnicode != null)

View File

@ -0,0 +1,10 @@
namespace UglyToad.Pdf.Fonts
{
using ContentStream;
using IO;
internal interface IFontFactory
{
IFont Get(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
}
}

View File

@ -9,7 +9,7 @@
using Tokenization.Scanner;
using Tokenization.Tokens;
public class CMapParser
internal class CMapParser
{
private static readonly BaseFontRangeParser BaseFontRangeParser = new BaseFontRangeParser();
private static readonly BaseFontCharacterParser BaseFontCharacterParser = new BaseFontCharacterParser();

View File

@ -1,10 +1,10 @@
namespace UglyToad.Pdf.Fonts.Parser.Handlers
{
using ContentStream;
using Pdf.Parser;
using IO;
internal interface IFontHandler
{
IFont Generate(PdfDictionary dictionary, ParsingArguments parsingArguments);
IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing);
}
}

View File

@ -1,9 +1,12 @@
namespace UglyToad.Pdf.Fonts.Parser.Handlers
{
using System;
using CidFonts;
using Cmap;
using Composite;
using ContentStream;
using Cos;
using Exceptions;
using Filters;
using IO;
using Parts;
@ -14,31 +17,35 @@
private readonly CidFontFactory cidFontFactory;
private readonly CMapCache cMapCache;
private readonly IFilterProvider filterProvider;
private readonly IPdfObjectParser pdfObjectParser;
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider)
public Type0FontHandler(CidFontFactory cidFontFactory, CMapCache cMapCache, IFilterProvider filterProvider, IPdfObjectParser pdfObjectParser)
{
this.cidFontFactory = cidFontFactory;
this.cMapCache = cMapCache;
this.filterProvider = filterProvider;
this.pdfObjectParser = pdfObjectParser;
}
public IFont Generate(PdfDictionary dictionary, ParsingArguments arguments)
public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var dynamicParser = arguments.Get<DynamicParser>();
var baseFont = dictionary.GetName(CosName.BASE_FONT);
var cMap = ReadEncoding(dictionary, out var isCMapPredefined);
if (TryGetFirstDescendant(dictionary, out var descendantObject))
{
var parsed = dynamicParser.Parse(arguments, descendantObject, false);
var parsed = pdfObjectParser.Parse(descendantObject.ToIndirectReference(), reader, isLenientParsing);
if (parsed is PdfDictionary descendantFontDictionary)
{
ParseDescendant(descendantFontDictionary, arguments);
ParseDescendant(descendantFontDictionary, reader, isLenientParsing);
}
}
else
{
throw new InvalidFontFormatException("No descendant font dictionary was declared for this Type 0 font. This dictionary should contain the CIDFont for the Type 0 font. " + dictionary);
}
var ucs2CMap = GetUcs2CMap(dictionary, isCMapPredefined, false);
@ -47,22 +54,17 @@
{
var toUnicodeValue = dictionary[CosName.TO_UNICODE];
var toUnicode = dynamicParser.Parse(arguments, toUnicodeValue as CosObject, false) as RawCosStream;
var toUnicode = pdfObjectParser.Parse(((CosObject)toUnicodeValue).ToIndirectReference(), reader, isLenientParsing) as RawCosStream;
var decodedUnicodeCMap = toUnicode?.Decode(filterProvider);
if (decodedUnicodeCMap != null)
{
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), arguments.IsLenientParsing);
toUnicodeCMap = cMapCache.Parse(new ByteArrayInputBytes(decodedUnicodeCMap), isLenientParsing);
}
}
var font = new CompositeFont
{
SubType = CosName.TYPE0,
ToUnicode = toUnicodeCMap,
BaseFont = baseFont
};
var font = new Type0Font(baseFont, new Type0CidFont(), cMap, toUnicodeCMap);
return font;
}
@ -91,7 +93,7 @@
return false;
}
private void ParseDescendant(PdfDictionary dictionary, ParsingArguments arguments)
private void ParseDescendant(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var type = dictionary.GetName(CosName.TYPE);
if (!CosName.FONT.Equals(type))
@ -99,7 +101,7 @@
throw new InvalidOperationException($"Expected \'Font\' dictionary but found \'{type.Name}\'");
}
cidFontFactory.Generate(dictionary, arguments, arguments.IsLenientParsing);
cidFontFactory.Generate(dictionary, reader, isLenientParsing);
}
private CMap ReadEncoding(PdfDictionary dictionary, out bool isCMapPredefined)

View File

@ -2,7 +2,6 @@
{
using System;
using System.Collections.Generic;
using System.IO;
using CidFonts;
using ContentStream;
using Cos;
@ -12,20 +11,25 @@
using Pdf.Parser;
using TrueType;
using TrueType.Parser;
using Util;
internal class CidFontFactory
{
private readonly FontDescriptorFactory descriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly IPdfObjectParser pdfObjectParser;
private readonly IFilterProvider filterProvider;
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser)
public CidFontFactory(FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
IPdfObjectParser pdfObjectParser,
IFilterProvider filterProvider)
{
this.descriptorFactory = descriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
this.pdfObjectParser = pdfObjectParser;
this.filterProvider = filterProvider;
}
public ICidFont Generate(PdfDictionary dictionary, ParsingArguments arguments, bool isLenientParsing)
public ICidFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
{
var type = dictionary.GetName(CosName.TYPE);
if (!CosName.FONT.Equals(type))
@ -37,12 +41,12 @@
var verticalWritingMetrics = ReadVerticalDisplacements(dictionary);
FontDescriptor descriptor = null;
if (TryGetFontDescriptor(dictionary, arguments, out var descriptorDictionary))
if (TryGetFontDescriptor(dictionary, reader, out var descriptorDictionary))
{
descriptor = descriptorFactory.Generate(descriptorDictionary, arguments.IsLenientParsing);
descriptor = descriptorFactory.Generate(descriptorDictionary, isLenientParsing);
}
ReadDescriptorFile(descriptor, arguments);
ReadDescriptorFile(descriptor, reader, isLenientParsing);
var subType = dictionary.GetName(CosName.SUBTYPE);
if (CosName.CID_FONT_TYPE0.Equals(subType))
@ -58,8 +62,7 @@
return null;
}
private static bool TryGetFontDescriptor(PdfDictionary dictionary, ParsingArguments arguments,
out PdfDictionary descriptorDictionary)
private bool TryGetFontDescriptor(PdfDictionary dictionary, IRandomAccessRead reader, out PdfDictionary descriptorDictionary)
{
descriptorDictionary = null;
@ -68,7 +71,7 @@
return false;
}
var descriptorObj = arguments.Get<DynamicParser>().Parse(arguments, obj, false);
var descriptorObj = pdfObjectParser.Parse(obj.ToIndirectReference(), reader, false);
if (!(descriptorObj is PdfDictionary descriptor))
{
@ -80,21 +83,21 @@
return true;
}
private void ReadDescriptorFile(FontDescriptor descriptor, ParsingArguments arguments)
private void ReadDescriptorFile(FontDescriptor descriptor, IRandomAccessRead reader, bool isLenientParsing)
{
if (descriptor?.FontFile == null)
{
return;
}
var fontFileStream = arguments.Get<DynamicParser>().Parse(arguments, descriptor.FontFile.ObjectKey, false) as RawCosStream;
var fontFileStream = pdfObjectParser.Parse(descriptor.FontFile.ObjectKey, reader, isLenientParsing) as RawCosStream;
if (fontFileStream == null)
{
return;
}
var fontFile = fontFileStream.Decode(arguments.Get<IFilterProvider>());
var fontFile = fontFileStream.Decode(filterProvider);
switch (descriptor.FontFile.FileType)
{

View File

@ -141,7 +141,7 @@
throw new NotSupportedException("We currently expect the FontFile to be an object reference.");
}
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.Type1);
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.Type1);
}
if (dictionary.TryGetValue(CosName.FONT_FILE2, out value))
@ -151,7 +151,7 @@
throw new NotSupportedException("We currently expect the FontFile2 to be an object reference.");
}
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.TrueType);
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.TrueType);
}
if (dictionary.TryGetValue(CosName.FONT_FILE3, out value))
@ -161,7 +161,7 @@
throw new NotSupportedException("We currently expect the FontFile3 to be an object reference.");
}
return new DescriptorFontFile(obj.GetObjectKey(), DescriptorFontFile.FontFileType.FromSubtype);
return new DescriptorFontFile(obj.ToIndirectReference(), DescriptorFontFile.FontFileType.FromSubtype);
}
return null;

View File

@ -0,0 +1,7 @@
namespace UglyToad.Pdf.Geometry.Paths
{
internal class GeneralPath
{
// TODO: provide an implementation
}
}

View File

@ -85,7 +85,7 @@
{
var code = font.ReadCharacterCode(bytes, out int codeLength);
var unicode = font.GetUnicode(code);
font.TryGetUnicode(code, out var unicode);
var wordSpacing = 0m;
if (code == ' ' && codeLength == 1)

View File

@ -1,12 +1,11 @@
namespace UglyToad.Pdf.Parser
{
using System.Collections.Generic;
using Graphics;
using Graphics.Operations;
using IO;
internal interface IPageContentParser
{
IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes);
IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes);
}
}

View File

@ -0,0 +1,218 @@
namespace UglyToad.Pdf.Parser
{
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using IO;
using Logging;
using Parts;
using Util;
internal interface IPdfObjectParser
{
CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false);
}
internal class PdfObjectParser : IPdfObjectParser
{
private readonly ILog log;
private readonly CosBaseParser baseParser;
private readonly CosStreamParser streamParser;
private readonly CrossReferenceTable crossReferenceTable;
private readonly BruteForceSearcher bruteForceSearcher;
private readonly CosObjectPool objectPool;
private readonly ObjectStreamParser objectStreamParser;
public PdfObjectParser(ILog log, CosBaseParser baseParser, CosStreamParser streamParser, CrossReferenceTable crossReferenceTable,
BruteForceSearcher bruteForceSearcher,
CosObjectPool objectPool,
ObjectStreamParser objectStreamParser)
{
this.log = log ?? new NoOpLog();
this.baseParser = baseParser ?? throw new ArgumentNullException(nameof(baseParser));
this.streamParser = streamParser ?? throw new ArgumentNullException(nameof(streamParser));
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
this.bruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
this.objectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
this.objectStreamParser = objectStreamParser ?? throw new ArgumentNullException(nameof(objectStreamParser));
}
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
{
var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);
var pdfObject = objectPool.GetOrCreateDefault(key);
if (pdfObject.GetObject() != null)
{
return pdfObject.GetObject();
}
var offsetOrStreamNumber = TryGet(key, crossReferenceTable.ObjectOffsets);
if (requireExistingObject && (offsetOrStreamNumber == null || offsetOrStreamNumber <= 0))
{
throw new InvalidOperationException("Object must be defined and not compressed: " + key);
}
if (isLenientParsing && offsetOrStreamNumber == null)
{
var locations = bruteForceSearcher.GetObjectLocations();
offsetOrStreamNumber = TryGet(key, locations);
if (offsetOrStreamNumber != null)
{
crossReferenceTable.UpdateOffset(key, offsetOrStreamNumber.Value);
}
}
if (offsetOrStreamNumber == null)
{
return CosNull.Null;
}
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
if (!isCompressedStreamObject)
{
return ParseObjectFromFile(offsetOrStreamNumber.Value, reader, key, objectPool, isLenientParsing);
}
return ParseCompressedStreamObject(reader, -offsetOrStreamNumber.Value, indirectReference.ObjectNumber, isLenientParsing);
}
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
CosObjectKey key,
CosObjectPool pool,
bool isLenientParsing)
{
reader.Seek(offset);
var objectNumber = ObjectHelper.ReadObjectNumber(reader);
var objectGeneration = ObjectHelper.ReadGenerationNumber(reader);
ReadHelper.ReadExpectedString(reader, "obj", true);
if (objectNumber != key.Number || objectGeneration != key.Generation)
{
throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
}
ReadHelper.SkipSpaces(reader);
var baseObject = baseParser.Parse(reader, pool);
var endObjectKey = ReadHelper.ReadString(reader);
var atStreamStart = string.Equals(endObjectKey, "stream");
if (atStreamStart)
{
var streamStartBytes = OtherEncodings.StringAsLatin1Bytes(endObjectKey);
reader.Rewind(streamStartBytes.Length);
baseObject = ReadNormalObjectStream(reader, baseObject, offset, isLenientParsing, out endObjectKey);
}
if (!string.Equals(endObjectKey, "endobj"))
{
var message =
$"Object ({objectNumber}:{objectGeneration}) at offset {offset} does not end with \'endobj\' but with \'{endObjectKey}\'";
if (isLenientParsing)
{
log.Warn(message);
}
else
{
throw new InvalidOperationException(message);
}
}
return baseObject;
}
private CosBase ReadNormalObjectStream(IRandomAccessRead reader, CosBase currentBase, long offset,
bool isLenientParsing,
out string endObjectKey)
{
if (currentBase is PdfDictionary dictionary)
{
RawCosStream stream = streamParser.Parse(reader, dictionary, isLenientParsing);
currentBase = stream;
}
else
{
// this is not legal
// the combination of a dict and the stream/endstream
// forms a complete stream object
throw new InvalidOperationException($"Stream not preceded by dictionary (offset: {offset}).");
}
ReadHelper.SkipSpaces(reader);
endObjectKey = ReadHelper.ReadLine(reader);
// we have case with a second 'endstream' before endobj
if (!endObjectKey.StartsWith("endobj") && endObjectKey.StartsWith("endstream"))
{
endObjectKey = endObjectKey.Substring(9).Trim();
if (endObjectKey.Length == 0)
{
// no other characters in extra endstream line
// read next line
endObjectKey = ReadHelper.ReadLine(reader);
}
}
return currentBase;
}
private CosBase ParseCompressedStreamObject(IRandomAccessRead reader, long streamObjectNumber, long requestedNumber, bool isLenientParsing)
{
var baseStream = Parse(new IndirectReference(streamObjectNumber, 0), reader, isLenientParsing, true);
if (!(baseStream is RawCosStream stream))
{
log.Warn($"Could not find a stream for the object number, defaults to returning CosNull: {streamObjectNumber}");
return CosNull.Null;
}
var objects = objectStreamParser.Parse(stream, objectPool);
// register all objects which are referenced to be contained in object stream
foreach (var next in objects)
{
var streamKey = new CosObjectKey(next);
var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets);
if (offset != null && offset == -streamObjectNumber)
{
var streamObject = objectPool.Get(streamKey);
streamObject.SetObject(next.GetObject());
}
}
var matchingStreamObject = objects.FirstOrDefault(x => x.GetObjectNumber() == requestedNumber);
if (matchingStreamObject != null)
{
return matchingStreamObject;
}
log.Error($"Could not find the object {requestedNumber} in the stream for object {streamObjectNumber}. Returning CosNull.");
return CosNull.Null;
}
private static T? TryGet<T, TKey>(TKey key, IReadOnlyDictionary<TKey, T> dictionary) where T : struct
{
return dictionary.TryGetValue(key, out var value) ? value : default(T?);
}
}
}

View File

@ -9,7 +9,14 @@
internal class PageContentParser : IPageContentParser
{
public IReadOnlyList<IGraphicsStateOperation> Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes)
private readonly IGraphicsStateOperationFactory operationFactory;
public PageContentParser(IGraphicsStateOperationFactory operationFactory)
{
this.operationFactory = operationFactory;
}
public IReadOnlyList<IGraphicsStateOperation> Parse(IInputBytes inputBytes)
{
var scanner = new CoreTokenScanner(inputBytes);

View File

@ -1,57 +1,12 @@
namespace UglyToad.Pdf.Parser.PageTree
{
using System;
using Content;
using ContentStream;
using ContentStream.TypedAccessors;
using Cos;
using Filters;
using Fonts;
internal class PageParser
{
public Page Parse(int number, PdfDictionary dictionary, ParsingArguments arguments)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
if (arguments == null)
{
throw new ArgumentNullException(nameof(arguments));
}
if (!dictionary.IsType(CosName.PAGE))
{
throw new InvalidOperationException("Expected a Dictionary of Type Page, instead got this: " + dictionary);
}
return new Page(number, dictionary, new PageTreeMembers(), arguments);
}
}
internal class FontParser
{
public Font Parse(PdfDictionary dictionary, ParsingArguments arguments)
{
var type = dictionary.GetName(CosName.SUBTYPE);
if (CosName.Equals(type, CosName.TYPE0))
{
var compositeFont = arguments.Container.Get<CompositeFontParser>()
.Parse(dictionary, arguments);
}
else
{
var simpleFont = arguments.Container.Get<SimpleFontParser>()
.Parse(dictionary, arguments);
}
return new Font();
}
}
internal class CompositeFontParser
{
public CompositeFont Parse(PdfDictionary dictionary, ParsingArguments arguments)

View File

@ -2,7 +2,6 @@
{
using System.Collections.Generic;
using Content;
using ContentStream;
using Cos;
/// <summary>
@ -37,34 +36,5 @@
{
return fonts.ContainsKey(name);
}
internal bool GetFont(CosName name, ParsingArguments arguments, out Font value)
{
if (fontObjects.TryGetValue(name, out value))
{
return true;
}
if (!fonts.TryGetValue(name, out var key))
{
return false;
}
var dictionary = arguments.Container.Get<DynamicParser>()
.Parse(arguments, key, false) as PdfDictionary;
if (dictionary == null)
{
return false;
}
var font = arguments.Container.Get<FontParser>()
.Parse(dictionary, arguments);
fontObjects[name] = font;
// retrieve and cache
return false;
}
}
}

View File

@ -14,9 +14,9 @@
public BruteForceSearcher BruteForceSearcher { get; }
public ResourceContainer ResourceContainer { get; }
public IResourceStore ResourceContainer { get; }
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, ResourceContainer resourceContainer)
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, IResourceStore resourceContainer)
{
ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));

View File

@ -5,7 +5,15 @@
using Content;
using ContentStream;
using Cos;
using Filters;
using Fonts;
using Fonts.Parser;
using Fonts.Parser.Handlers;
using Fonts.Parser.Parts;
using Fonts.TrueType.Parser;
using Graphics;
using IO;
using Logging;
using Parts;
using Parts.CrossReference;
using Util;
@ -37,6 +45,8 @@
private static PdfDocument OpenDocument(IRandomAccessRead reader, IContainer container, bool isLenientParsing)
{
var log = container.Get<ILog>();
var version = container.Get<FileHeaderParser>().ReadHeader(reader, isLenientParsing);
var crossReferenceOffset = container.Get<FileTrailerParser>().GetXrefOffset(reader, isLenientParsing);
@ -46,9 +56,27 @@
var crossReferenceTable = container.Get<FileCrossReferenceTableParser>()
.Parse(reader, isLenientParsing, crossReferenceOffset, pool);
var dynamicParser = container.Get<DynamicParser>();
var filterProvider = container.Get<IFilterProvider>();
var bruteForceSearcher = new BruteForceSearcher(reader);
var resourceContainer = new ResourceContainer();
var pdfObjectParser = new PdfObjectParser(container.Get<ILog>(), container.Get<CosBaseParser>(),
container.Get<CosStreamParser>(), crossReferenceTable, bruteForceSearcher, pool, container.Get<ObjectStreamParser>());
var trueTypeFontParser = new TrueTypeFontParser();
var fontDescriptorFactory = new FontDescriptorFactory();
var cidFontFactory = new CidFontFactory(fontDescriptorFactory, trueTypeFontParser, pdfObjectParser, filterProvider);
var cMapCache = new CMapCache(new CMapParser());
var fontFactory = new FontFactory(container.Get<ILog>(), new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider,
pdfObjectParser));
var dynamicParser = container.Get<DynamicParser>();
var resourceContainer = new ResourceContainer(pdfObjectParser, fontFactory);
var pageFactory = new PageFactory(resourceContainer, pdfObjectParser, filterProvider, new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
isLenientParsing);
@ -66,7 +94,7 @@
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
return new PdfDocument(reader, version, crossReferenceTable, container, isLenientParsing, caching, new Catalog(rootDictionary));
return new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, new Catalog(rootDictionary));
}
private static CosBase ParseTrailer(IRandomAccessRead reader, CrossReferenceTable crossReferenceTable,

View File

@ -4,6 +4,7 @@
using Content;
using Cos;
using IO;
using Logging;
using Parser;
using Parser.Parts;
using Util;
@ -17,8 +18,8 @@
private readonly HeaderVersion version;
[NotNull]
private readonly CrossReferenceTable crossReferenceTable;
[NotNull]
private readonly IContainer container;
private readonly ILog log;
private readonly bool isLenientParsing;
[NotNull]
private readonly ParsingCachingProviders cachingProviders;
@ -29,21 +30,21 @@
[NotNull]
public Pages Pages { get; }
internal PdfDocument(IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
IContainer container,
internal PdfDocument(ILog log, IRandomAccessRead reader, HeaderVersion version, CrossReferenceTable crossReferenceTable,
bool isLenientParsing,
ParsingCachingProviders cachingProviders,
IPageFactory pageFactory,
IPdfObjectParser pdfObjectParser,
Catalog catalog)
{
this.log = log;
this.reader = reader ?? throw new ArgumentNullException(nameof(reader));
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
this.container = container ?? throw new ArgumentNullException(nameof(container));
this.isLenientParsing = isLenientParsing;
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
var arguments = new ParsingArguments(reader, crossReferenceTable, cachingProviders, container, isLenientParsing);
Pages = new Pages(Catalog, arguments);
Pages = new Pages(log, Catalog, pdfObjectParser, pageFactory, reader, isLenientParsing);
}
public static PdfDocument Open(byte[] fileBytes, ParsingOptions options = null) => PdfDocumentFactory.Open(fileBytes, options);

View File

@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Tokenization
{
using System.Collections.Generic;
using ContentStream;
using IO;
using Parser.Parts;
using Scanner;
@ -79,7 +80,7 @@
if (r == OperatorToken.R)
{
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Long));
result[key] = new IndirectReferenceToken(new IndirectReference(num.Long, gen.Int));
i = i + 2;
}
}

View File

@ -1,5 +1,7 @@
namespace UglyToad.Pdf.Tokenization.Tokens
{
using ContentStream;
public class IndirectReferenceToken : IDataToken<IndirectReference>
{
public IndirectReference Data { get; }
@ -10,16 +12,4 @@
}
}
public struct IndirectReference
{
public long ObjectNumber { get; }
public long Generation { get; }
public IndirectReference(long objectNumber, long generation)
{
ObjectNumber = objectNumber;
Generation = generation;
}
}
}

View File

@ -227,5 +227,9 @@
<EmbeddedResource Include="Resources\CMap\UniKS-UTF16-V" />
<EmbeddedResource Include="Resources\CMap\V" />
</ItemGroup>
<ItemGroup>
<Folder Include="Fonts\Simple\" />
</ItemGroup>
</Project>

View File

@ -1,12 +1,7 @@
namespace UglyToad.Pdf.Util
{
using Filters;
using Fonts;
using Fonts.Parser;
using Fonts.Parser.Handlers;
using Fonts.Parser.Parts;
using Fonts.TrueType.Parser;
using Graphics;
using Logging;
using Parser;
using Parser.PageTree;
@ -49,19 +44,12 @@
new CrossReferenceTableParser(logger, dictionaryParser, baseParser));
var resourceDictionaryParser = new ResourceDictionaryParser();
var pageParser = new PageParser();
var simpleFontParser = new SimpleFontParser();
var compositeFontParser = new CompositeFontParser();
var fontParser = new FontParser();
var pageContentParser = new PageContentParser();
var operationFactory = new ReflectionGraphicsStateOperationFactory();
var cmapParser = new CMapParser();
var afmParser = new AdobeFontMetricsParser();
var type0FontFactory = new Type0FontHandler(new CidFontFactory(new FontDescriptorFactory(), new TrueTypeFontParser()), new CMapCache(cmapParser), filterProvider);
var fontFactory = new FontFactory(type0FontFactory);
var container = new Container();
container.Register(headerParser);
container.Register(trailerParser);
@ -75,15 +63,11 @@
container.Register(objectStreamParser);
container.Register(filterProvider);
container.Register(resourceDictionaryParser);
container.Register(pageParser);
container.Register(simpleFontParser);
container.Register(compositeFontParser);
container.Register(fontParser);
container.Register(pageContentParser);
container.Register(operationFactory);
container.Register(cmapParser);
container.Register(afmParser);
container.Register(fontFactory);
container.Register(logger);
return container;
}