#10 make all token classes public and expose via a public structure member on pdf document

This commit is contained in:
Eliot Jones
2018-11-24 19:02:06 +00:00
parent 8c0e8d5f26
commit 2fa781b8e9
49 changed files with 824 additions and 187 deletions

View File

@@ -4,6 +4,7 @@
using System.Linq;
using Exceptions;
using PdfPig.Cos;
using PdfPig.CrossReference;
using PdfPig.Parser.FileStructure;
using PdfPig.Tokenization.Scanner;
using Xunit;

View File

@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Tests
{
using System.Collections.Generic;
using System.Linq;
using Xunit;
public class PublicApiScannerTests
@@ -25,20 +24,43 @@
var expected = new List<string>
{
"UglyToad.PdfPig.IndirectReference",
"UglyToad.PdfPig.PdfDocument",
"UglyToad.PdfPig.ParsingOptions",
"UglyToad.PdfPig.Structure",
"UglyToad.PdfPig.Logging.ILog",
"UglyToad.PdfPig.Geometry.PdfPoint",
"UglyToad.PdfPig.Geometry.PdfRectangle",
"UglyToad.PdfPig.Fonts.Exceptions.InvalidFontFormatException",
"UglyToad.PdfPig.Exceptions.PdfDocumentFormatException",
"UglyToad.PdfPig.Content.Catalog",
"UglyToad.PdfPig.Content.DocumentInformation",
"UglyToad.PdfPig.Content.Letter",
"UglyToad.PdfPig.Content.Page",
"UglyToad.PdfPig.Content.PageSize",
"UglyToad.PdfPig.Content.DocumentInformation"
"UglyToad.PdfPig.CrossReference.CrossReferenceTable",
"UglyToad.PdfPig.CrossReference.CrossReferenceType",
"UglyToad.PdfPig.CrossReference.TrailerDictionary",
"UglyToad.PdfPig.Tokens.ArrayToken",
"UglyToad.PdfPig.Tokens.BooleanToken",
"UglyToad.PdfPig.Tokens.CommentToken",
"UglyToad.PdfPig.Tokens.DictionaryToken",
"UglyToad.PdfPig.Tokens.HexToken",
"UglyToad.PdfPig.Tokens.IDataToken`1",
"UglyToad.PdfPig.Tokens.IndirectReferenceToken",
"UglyToad.PdfPig.Tokens.IToken",
"UglyToad.PdfPig.Tokens.NameToken",
"UglyToad.PdfPig.Tokens.NullToken",
"UglyToad.PdfPig.Tokens.NumericToken",
"UglyToad.PdfPig.Tokens.ObjectToken",
"UglyToad.PdfPig.Tokens.StreamToken",
"UglyToad.PdfPig.Tokens.StringToken"
};
Assert.Equal(expected.OrderBy(x => x), publicTypeNames.OrderBy(x => x));
foreach (var publicTypeName in publicTypeNames)
{
Assert.True(expected.Contains(publicTypeName), $"Type should not be public: {publicTypeName}.");
}
}
}
}

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using PdfPig.ContentStream;
using PdfPig.IO;
@@ -187,7 +188,7 @@ H‰œUkLSgþÚh¹IÝÅlK(%[ÈÅ©+ ƒåꩊèæÇtnZ)Z¹¨Oå~9Š
Assert.StartsWith("58949797283757 0 obj", s.Substring((int)token.Position));
}
[Fact]
public void ReadsStreamObject()
{
@@ -212,9 +213,10 @@ endobj";
endobj";
var locationProvider = new TestObjectLocationProvider();
// Mark location of "353 0 obj"
// Mark location of "353 0 obj"
locationProvider.Offsets[new IndirectReference(353, 0)] = 1643;
var scanner = GetScanner(s, locationProvider);
var tokens = ReadToEnd(scanner);
@@ -244,9 +246,10 @@ endobj";
var scanner = GetScanner(s);
var token = ReadToEnd(scanner)[0];
var token = ReadToEnd(scanner)[0];
var stream = Assert.IsType<StreamToken>(token.Data);
var stream = Assert.IsType<StreamToken>(token.Data);
var bytes = stream.Data.ToArray();
Assert.Equal(45, bytes.Length);
var outputString = Encoding.UTF8.GetString(bytes);
@@ -274,7 +277,7 @@ endobj";
locationProvider.Offsets[new IndirectReference(5, 0)] = 0;
var scanner = GetScanner(s, locationProvider);
var scanner = GetScanner(s, locationProvider);
var token = ReadToEnd(scanner)[1];
var stream = Assert.IsType<StreamToken>(token.Data);

View File

@@ -2,16 +2,32 @@
{
using System;
using Tokens;
using Util.JetBrains.Annotations;
internal class Catalog
/// <summary>
/// The root of the document's object hierarchy. Contains references to objects defining the contents,
/// outline, named destinations and more.
/// </summary>
public class Catalog
{
private readonly DictionaryToken catalogDictionary;
/// <summary>
/// The catalog dictionary containing assorted information.
/// </summary>
[NotNull]
public DictionaryToken CatalogDictionary { get; }
/// <summary>
/// Defines the page tree node which is the root of the pages tree for the document.
/// </summary>
[NotNull]
public DictionaryToken PagesDictionary { get; }
public Catalog(DictionaryToken catalogDictionary, DictionaryToken pagesDictionary)
/// <summary>
/// Create a new <see cref="CatalogDictionary"/>.
/// </summary>
internal Catalog(DictionaryToken catalogDictionary, DictionaryToken pagesDictionary)
{
this.catalogDictionary = catalogDictionary ?? throw new ArgumentNullException(nameof(catalogDictionary));
CatalogDictionary = catalogDictionary ?? throw new ArgumentNullException(nameof(catalogDictionary));
PagesDictionary = pagesDictionary ?? throw new ArgumentNullException(nameof(pagesDictionary));
}

View File

@@ -1,45 +0,0 @@
namespace UglyToad.PdfPig.Cos
{
using System;
using System.Collections.Generic;
using ContentStream;
using Tokens;
using Util.JetBrains.Annotations;
internal class CrossReferenceTable
{
public CrossReferenceType Type { get; }
private readonly Dictionary<IndirectReference, long> objectOffsets;
[NotNull]
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets => objectOffsets;
[NotNull]
public DictionaryToken Dictionary { get; }
public CrossReferenceTable(CrossReferenceType type, IReadOnlyDictionary<IndirectReference, long> objectOffsets, DictionaryToken dictionary)
{
if (objectOffsets == null)
{
throw new ArgumentNullException(nameof(objectOffsets));
}
Type = type;
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
var result = new Dictionary<IndirectReference, long>();
foreach (var objectOffset in objectOffsets)
{
result[objectOffset.Key] = objectOffset.Value;
}
this.objectOffsets = result;
}
public void UpdateOffset(IndirectReference key, long offset)
{
objectOffsets[key] = offset;
}
}
}

View File

@@ -1,8 +0,0 @@
namespace UglyToad.PdfPig.Cos
{
internal enum CrossReferenceType
{
Table,
Stream
}
}

View File

@@ -0,0 +1,100 @@
namespace UglyToad.PdfPig.CrossReference
{
using System;
using System.Collections.Generic;
using CrossReference;
using Util.JetBrains.Annotations;
/// <summary>
/// The cross-reference table contains information that enables random access to PDF objects within the file by object number
/// so that specific objects can be located directly without having to scan the whole document.
/// A PDF document may contain multiple cross reference tables, this class provides access to the merged result with the latest
/// offset for each object. The offsets of the original cross reference tables or streams merged into this result are available
/// in the <see cref="CrossReferenceOffsets"/> list.
/// </summary>
public class CrossReferenceTable
{
private readonly Dictionary<IndirectReference, long> objectOffsets;
/// <summary>
/// The corresponding byte offset for each keyed object in this document.
/// </summary>
[NotNull]
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets => objectOffsets;
/// <summary>
/// The type of the first cross-reference table located in this document.
/// </summary>
public CrossReferenceType Type { get; }
/// <summary>
/// The trailer dictionary.
/// </summary>
[NotNull]
public TrailerDictionary Trailer { get; }
/// <summary>
/// The byte offsets of each cross-reference table or stream in this document and the previous
/// table or stream they link to if applicable.
/// </summary>
[NotNull]
public IReadOnlyList<CrossReferenceOffset> CrossReferenceOffsets { get; }
internal CrossReferenceTable(CrossReferenceType type, IReadOnlyDictionary<IndirectReference, long> objectOffsets,
TrailerDictionary trailer,
IReadOnlyList<CrossReferenceOffset> crossReferenceOffsets)
{
if (objectOffsets == null)
{
throw new ArgumentNullException(nameof(objectOffsets));
}
Type = type;
Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer));
CrossReferenceOffsets = crossReferenceOffsets ?? throw new ArgumentNullException(nameof(crossReferenceOffsets));
var result = new Dictionary<IndirectReference, long>();
foreach (var objectOffset in objectOffsets)
{
result[objectOffset.Key] = objectOffset.Value;
}
this.objectOffsets = result;
}
/// <summary>
/// The offset of a cross-reference table or stream in the document.
/// </summary>
public struct CrossReferenceOffset
{
/// <summary>
/// The offset in bytes from the start of the document where the stream or table starts.
/// </summary>
public long Current { get; }
/// <summary>
/// The offset in bytes from the start of the document where the previous stream or table in a document
/// containing incremental updates can be found, if applicable.
/// </summary>
public long? Previous { get; }
/// <summary>
/// Create a new <see cref="CrossReferenceOffset"/>.
/// </summary>
/// <param name="current">The offset in bytes from the start of the document where the stream or table starts.</param>
/// <param name="previous">The offset in bytes from the start of the document where the previous stream or table in a document starts.</param>
public CrossReferenceOffset(long current, long? previous)
{
Current = current;
Previous = previous;
}
/// <inheritdoc />
public override string ToString()
{
var prev = Previous.HasValue ? $" {Previous}" : string.Empty;
return $"{Current}{prev}";
}
}
}
}

View File

@@ -1,9 +1,10 @@
namespace UglyToad.PdfPig.Cos
namespace UglyToad.PdfPig.CrossReference
{
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using Logging;
using Tokens;
@@ -17,7 +18,7 @@
{
private readonly List<CrossReferenceTablePart> parts = new List<CrossReferenceTablePart>();
public IReadOnlyList<CrossReferenceTablePart> Parts => parts;
public void Add(CrossReferenceTablePart part)
{
if (part == null)
@@ -28,7 +29,7 @@
parts.Add(part);
}
public CrossReferenceTable Build(long startXrefOffset, ILog log)
public CrossReferenceTable Build(long firstCrossReferenceOffset, ILog log)
{
CrossReferenceType type = CrossReferenceType.Table;
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary<IToken, IToken>());
@@ -36,12 +37,12 @@
List<long> xrefSeqBytePos = new List<long>();
var currentPart = parts.FirstOrDefault(x => x.Offset == startXrefOffset);
var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);
if (currentPart == null)
{
// no XRef at given position
log.Warn("Did not found XRef object at specified startxref position " + startXrefOffset);
log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset);
// use all objects in byte position order (last entries overwrite previous ones)
xrefSeqBytePos.AddRange(parts.Select(x => x.Offset));
@@ -55,7 +56,7 @@
// found starting Xref object
// add this and follow chain defined by 'Prev' keys
xrefSeqBytePos.Add(startXrefOffset);
xrefSeqBytePos.Add(firstCrossReferenceOffset);
while (currentPart.Dictionary != null)
{
@@ -111,7 +112,13 @@
}
}
return new CrossReferenceTable(type, objectOffsets, trailerDictionary);
return new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary),
parts.Select(x =>
{
var prev = x.GetPreviousOffset();
return new CrossReferenceTable.CrossReferenceOffset(x.Offset, prev >= 0 ? prev : default(long?));
}).ToList());
}
}
}

View File

@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Cos
namespace UglyToad.PdfPig.CrossReference
{
using System.Collections.Generic;
using ContentStream;
using Tokens;
/// <summary>

View File

@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Cos
namespace UglyToad.PdfPig.CrossReference
{
using System.Collections.Generic;
using ContentStream;
using Tokens;
internal class CrossReferenceTablePartBuilder

View File

@@ -0,0 +1,17 @@
namespace UglyToad.PdfPig.CrossReference
{
/// <summary>
/// The type of a cross-reference section in a PDF document.
/// </summary>
public enum CrossReferenceType
{
/// <summary>
/// A cross-reference table.
/// </summary>
Table,
/// <summary>
/// A cross-reference stream.
/// </summary>
Stream
}
}

View File

@@ -0,0 +1,109 @@
namespace UglyToad.PdfPig.CrossReference
{
using System;
using System.Collections.Generic;
using Exceptions;
using Tokens;
using Util;
using Util.JetBrains.Annotations;
/// <summary>
/// Contains information for interpreting the cross-reference table.
/// </summary>
public class TrailerDictionary
{
/// <summary>
/// The total number of object entries across both the original cross-reference table
/// and in any incremental updates.
/// </summary>
/// <remarks>
/// Any object in a cross-reference section whose number is greater than this value is
/// ignored and considered missing.
/// </remarks>
public int Size { get; }
/// <summary>
/// The offset in bytes to the previous cross-reference table or stream
/// if the document has more than one cross-reference section.
/// </summary>
public long? PreviousCrossReferenceOffset { get; }
/// <summary>
/// The object reference for the document's catalog dictionary.
/// </summary>
public IndirectReference Root { get; }
/// <summary>
/// The object reference for the document's information dictionary if it contains one.
/// </summary>
public IndirectReference? Info { get; }
/// <summary>
/// A list containing two-byte strings which act as file identifiers.
/// </summary>
public IReadOnlyList<string> Identifier { get; }
/// <summary>
/// The document's encryption dictionary.
/// </summary>
[CanBeNull]
public IToken EncryptionToken { get; }
/// <summary>
/// Create a new <see cref="TrailerDictionary"/>.
/// </summary>
/// <param name="dictionary">The parsed dictionary from the document.</param>
internal TrailerDictionary(DictionaryToken dictionary)
{
if (dictionary == null)
{
throw new ArgumentNullException(nameof(dictionary));
}
Size = dictionary.GetInt(NameToken.Size);
PreviousCrossReferenceOffset = dictionary.GetLongOrDefault(NameToken.Prev);
if (!dictionary.TryGet(NameToken.Root, out IndirectReferenceToken rootReference))
{
throw new PdfDocumentFormatException($"No root token was found in the trailer dictionary: {dictionary}.");
}
Root = rootReference.Data;
if (dictionary.TryGet(NameToken.Info, out IndirectReferenceToken reference))
{
Info = reference.Data;
}
if (dictionary.TryGet(NameToken.Id, out ArrayToken arr))
{
var ids = new List<string>(arr.Data.Count);
foreach (var token in arr.Data)
{
if (token is StringToken str)
{
ids.Add(str.Data);
}
}
Identifier = ids;
}
else
{
Identifier = Array.Empty<string>();
}
if (dictionary.TryGet(NameToken.Encrypt, out var encryptionToken))
{
EncryptionToken = encryptionToken;
}
}
/// <inheritdoc />
public override string ToString()
{
return $"Size: {Size}, Root: {Root}";
}
}
}

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using System.IO;
using Tokens;
@@ -23,7 +24,7 @@
85 * 85 * 85 *85
};
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
var asciiBuffer = new byte[5];
@@ -34,7 +35,7 @@
{
for (var i = 0; i < input.Length; i++)
for (var i = 0; i < input.Count; i++)
{
var value = input[i];
@@ -45,7 +46,7 @@
if (value == EndOfDataBytes[0])
{
if (i == input.Length - 1 || input[i + 1] == EndOfDataBytes[1])
if (i == input.Count - 1 || input[i + 1] == EndOfDataBytes[1])
{
if (index > 0)
{

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using System.IO;
using Tokens;
@@ -21,7 +22,7 @@
/* 100 */ 13, 14, 15
};
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
var pair = new byte[2];
var index = 0;
@@ -29,7 +30,7 @@
using (var memoryStream = new MemoryStream())
using (var binaryWriter = new BinaryWriter(memoryStream))
{
for (var i = 0; i < input.Length; i++)
for (var i = 0; i < input.Count; i++)
{
if (input[i] == '>')
{

View File

@@ -1,11 +1,12 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
internal class DctDecodeFilter : IFilter
{
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotImplementedException();
}

View File

@@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using Logging;
using Tokens;
using Util;
@@ -33,7 +34,7 @@
this.log = log;
}
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
if (input == null)
{
@@ -44,9 +45,10 @@
var predictor = parameters.GetIntOrDefault(NameToken.Predictor, -1);
var bytes = input.ToArray();
try
{
var decompressed = Decompress(input);
var decompressed = Decompress(bytes);
if (predictor == -1)
{
@@ -66,7 +68,7 @@
log.Error("Could not decode a flate stream due to an error.", ex);
}
return input;
return bytes;
}
private byte[] Decompress(byte[] input)

View File

@@ -1,9 +1,10 @@
namespace UglyToad.PdfPig.Filters
{
using System.Collections.Generic;
using Tokens;
internal interface IFilter
{
byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex);
byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex);
}
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Filters
{
using System.Collections.Generic;
using System.IO;
using Tokens;
@@ -7,13 +8,13 @@
{
private const byte EndOfDataLength = 128;
public byte[] Decode(byte[] input, DictionaryToken streamDictionary, int filterIndex)
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
{
var i = 0;
while (i < input.Length)
while (i < input.Count)
{
var runLength = input[i];

View File

@@ -1,5 +1,7 @@
namespace UglyToad.PdfPig.Fonts.CidFonts
{
using System.Collections.Generic;
/// <summary>
/// Specifies mapping from character identifiers to glyph indices.
/// Can either be defined as a name in which case it must be Identity or a stream which defines the mapping.
@@ -15,9 +17,9 @@
map = null;
}
public CharacterIdentifierToGlyphIndexMap(byte[] streamBytes)
public CharacterIdentifierToGlyphIndexMap(IReadOnlyList<byte> streamBytes)
{
var numberOfEntries = streamBytes.Length / 2;
var numberOfEntries = streamBytes.Count / 2;
map = new int[numberOfEntries];
var offset = 0;

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
@@ -9,14 +10,14 @@
/// </summary>
internal class CompactFontFormatData
{
private readonly byte[] dataBytes;
private readonly IReadOnlyList<byte> dataBytes;
public int Position { get; private set; } = -1;
public int Length => dataBytes.Length;
public int Length => dataBytes.Count;
[DebuggerStepThrough]
public CompactFontFormatData(byte[] dataBytes)
public CompactFontFormatData(IReadOnlyList<byte> dataBytes)
{
this.dataBytes = dataBytes;
}
@@ -64,9 +65,9 @@
{
Position++;
if (Position >= dataBytes.Length)
if (Position >= dataBytes.Count)
{
throw new IndexOutOfRangeException($"Cannot read byte at position {Position} of an array which is {dataBytes.Length} bytes long.");
throw new IndexOutOfRangeException($"Cannot read byte at position {Position} of an array which is {dataBytes.Count} bytes long.");
}
return dataBytes[Position];
@@ -79,7 +80,7 @@
public bool CanRead()
{
return Position < dataBytes.Length - 1;
return Position < dataBytes.Count - 1;
}
public void Seek(int offset)
@@ -111,13 +112,19 @@
public CompactFontFormatData SnapshotPortion(int startLocation, int length)
{
if (startLocation > dataBytes.Length - 1 || startLocation + length > dataBytes.Length)
if (startLocation > dataBytes.Count - 1 || startLocation + length > dataBytes.Count)
{
throw new ArgumentException($"Attempted to create a snapshot of an invalid portion of the data. Length was {dataBytes.Length}, requested start: {startLocation} and requested length: {length}.");
throw new ArgumentException($"Attempted to create a snapshot of an invalid portion of the data. Length was {dataBytes.Count}, requested start: {startLocation} and requested length: {length}.");
}
var newData = new byte[length];
Array.Copy(dataBytes, startLocation, newData, 0, length);
var newI = 0;
for (var i = startLocation; i < startLocation + length; i++)
{
newData[newI] = dataBytes[i];
newI++;
}
return new CompactFontFormatData(newData);
}
}

View File

@@ -1,11 +1,11 @@
namespace UglyToad.PdfPig.ContentStream
namespace UglyToad.PdfPig
{
using System.Diagnostics;
/// <summary>
/// Used to uniquely identify and refer to objects in the PDF file.
/// </summary>
internal struct IndirectReference
public struct IndirectReference
{
/// <summary>
/// A positive integer object number.
@@ -29,6 +29,7 @@
Generation = generation;
}
/// <inheritdoc />
public override bool Equals(object obj)
{
if (obj is IndirectReference reference)
@@ -40,6 +41,7 @@
return false;
}
/// <inheritdoc />
public override int GetHashCode()
{
unchecked
@@ -52,6 +54,7 @@
}
}
/// <inheritdoc />
public override string ToString()
{
return $"{ObjectNumber} {Generation}";

View File

@@ -1,21 +1,21 @@
namespace UglyToad.PdfPig.Parser
{
using Content;
using IO;
using CrossReference;
using Parts;
using Tokenization.Scanner;
using Tokens;
internal class DocumentInformationFactory
{
public DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, DictionaryToken rootDictionary)
public DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer)
{
if (!rootDictionary.TryGet(NameToken.Info, out var infoBase))
if (!trailer.Info.HasValue)
{
return DocumentInformation.Default;
}
var infoParsed = DirectObjectFinder.Get<DictionaryToken>(infoBase, pdfTokenScanner);
var infoParsed = DirectObjectFinder.Get<DictionaryToken>(trailer.Info.Value, pdfTokenScanner);
var title = GetEntryOrDefault(infoParsed, NameToken.Title);
var author = GetEntryOrDefault(infoParsed, NameToken.Author);

View File

@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using Cos;
using CrossReference;
using Exceptions;
using IO;
using Logging;
@@ -30,12 +31,12 @@
this.xrefCosChecker = xrefCosChecker;
}
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long xrefLocation, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner)
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner)
{
long fixedOffset = offsetValidator.CheckXRefOffset(xrefLocation, tokenScanner, bytes, isLenientParsing);
long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);
if (fixedOffset > -1)
{
xrefLocation = fixedOffset;
crossReferenceLocation = fixedOffset;
log.Debug($"Found the first cross reference table or stream at {fixedOffset}.");
}
@@ -43,7 +44,7 @@
var table = new CrossReferenceTableBuilder();
var prevSet = new HashSet<long>();
long previousCrossReferenceLocation = xrefLocation;
long previousCrossReferenceLocation = crossReferenceLocation;
// Parse all cross reference tables and streams.
while (previousCrossReferenceLocation > 0)
@@ -167,7 +168,7 @@
prevSet.Add(previousCrossReferenceLocation);
}
var resolved = table.Build(xrefLocation, log);
var resolved = table.Build(crossReferenceLocation, log);
// check the offsets of all referenced objects
xrefCosChecker.CheckCrossReferenceOffsets(bytes, resolved, isLenientParsing);

View File

@@ -3,6 +3,7 @@
using System.Collections.Generic;
using System.Linq;
using Cos;
using CrossReference;
using Exceptions;
using Parts.CrossReference;
using Tokenization;

View File

@@ -3,8 +3,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using CrossReference;
using IO;
using Logging;
using Parts;

View File

@@ -4,6 +4,7 @@
using Cos;
using Exceptions;
using Filters;
using PdfPig.CrossReference;
using Tokens;
using Util;
@@ -21,11 +22,11 @@
/// </summary>
public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
{
byte[] decoded = stream.Decode(filterProvider);
var decoded = stream.Decode(filterProvider);
var fieldSizes = new CrossReferenceStreamFieldSize(stream.StreamDictionary);
var lineCount = decoded.Length / fieldSizes.LineLength;
var lineCount = decoded.Count / fieldSizes.LineLength;
long previousOffset = -1;
if (stream.StreamDictionary.TryGet(NameToken.Prev, out var prevToken) && prevToken is NumericToken prevNumeric)

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Parser.Parts
{
using ContentStream;
using Exceptions;
using Tokenization.Scanner;
using Tokens;
@@ -36,6 +37,38 @@
return false;
}
public static T Get<T>(IndirectReference reference, IPdfTokenScanner scanner) where T : IToken
{
var temp = scanner.Get(reference);
if (temp.Data is T locatedResult)
{
return locatedResult;
}
if (temp.Data is IndirectReferenceToken nestedReference)
{
return Get<T>(nestedReference, scanner);
}
if (temp.Data is ArrayToken array && array.Data.Count == 1)
{
var arrayElement = array.Data[0];
if (arrayElement is IndirectReferenceToken arrayReference)
{
return Get<T>(arrayReference, scanner);
}
if (arrayElement is T arrayToken)
{
return arrayToken;
}
}
throw new PdfDocumentFormatException($"Could not find the object number {reference} with type {typeof(T).Name}.");
}
public static T Get<T>(IToken token, IPdfTokenScanner scanner) where T : IToken
{
if (token is T result)

View File

@@ -3,8 +3,7 @@
using System;
using System.IO;
using Content;
using Cos;
using Exceptions;
using CrossReference;
using FileStructure;
using Filters;
using Fonts;
@@ -123,7 +122,7 @@
var rootDictionary = ParseTrailer(crossReferenceTable, isLenientParsing, pdfScanner);
var information = informationFactory.Create(pdfScanner, crossReferenceTable.Dictionary);
var information = informationFactory.Create(pdfScanner, crossReferenceTable.Trailer);
var catalog = catalogFactory.Create(pdfScanner, rootDictionary);
@@ -135,17 +134,12 @@
private static DictionaryToken ParseTrailer(CrossReferenceTable crossReferenceTable, bool isLenientParsing, IPdfTokenScanner pdfTokenScanner)
{
if (crossReferenceTable.Dictionary.ContainsKey(NameToken.Encrypt))
if (crossReferenceTable.Trailer.EncryptionToken != null)
{
throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Dictionary);
throw new NotSupportedException("Cannot currently parse a document using encryption: " + crossReferenceTable.Trailer.EncryptionToken);
}
if (!crossReferenceTable.Dictionary.TryGet(NameToken.Root, out var rootToken))
{
throw new PdfDocumentFormatException($"Missing root object specification in trailer: {crossReferenceTable.Dictionary}.");
}
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(rootToken, pdfTokenScanner);
var rootDictionary = DirectObjectFinder.Get<DictionaryToken>(crossReferenceTable.Trailer.Root, pdfTokenScanner);
if (!rootDictionary.ContainsKey(NameToken.Type) && isLenientParsing)
{

View File

@@ -3,7 +3,7 @@
using System;
using System.IO;
using Content;
using Cos;
using CrossReference;
using IO;
using Logging;
using Parser;
@@ -22,10 +22,7 @@
[NotNull]
private readonly HeaderVersion version;
[NotNull]
private readonly CrossReferenceTable crossReferenceTable;
private readonly ILog log;
private readonly IInputBytes inputBytes;
@@ -34,10 +31,7 @@
private readonly ParsingCachingProviders cachingProviders;
private readonly IPdfTokenScanner pdfScanner;
[NotNull]
private readonly Catalog catalog;
[NotNull]
private readonly Pages pages;
@@ -47,6 +41,12 @@
[NotNull]
public DocumentInformation Information { get; }
/// <summary>
/// Access to the underlying raw structure of the document.
/// </summary>
[NotNull]
internal Structure Structure { get; }
/// <summary>
/// The version number of the PDF specification which this file conforms to, for example 1.4.
/// </summary>
@@ -70,13 +70,12 @@
this.log = log;
this.inputBytes = inputBytes;
this.version = version ?? throw new ArgumentNullException(nameof(version));
this.crossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
this.isLenientParsing = isLenientParsing;
this.cachingProviders = cachingProviders ?? throw new ArgumentNullException(nameof(cachingProviders));
this.pdfScanner = pdfScanner;
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
Information = information ?? throw new ArgumentNullException(nameof(information));
catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
pages = new Pages(log, catalog, pageFactory, isLenientParsing, pdfScanner);
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
}
/// <summary>
@@ -123,7 +122,7 @@
return pages.GetPage(pageNumber);
}
/// <inheritdoc />
/// <summary>
/// Dispose the <see cref="T:UglyToad.PdfPig.PdfDocument" /> and close any unmanaged resources.

View File

@@ -0,0 +1,50 @@
namespace UglyToad.PdfPig
{
using System;
using Content;
using CrossReference;
using Tokenization.Scanner;
using Tokens;
using Util.JetBrains.Annotations;
/// <summary>
/// Provides access to explore and retrieve the underlying PDF objects from the document.
/// </summary>
public class Structure
{
/// <summary>
/// The root of the document's hierarchy providing access to the page tree as well as other information.
/// </summary>
[NotNull]
public Catalog Catalog { get; }
/// <summary>
/// The cross-reference table enables direct access to objects by number.
/// </summary>
[NotNull]
public CrossReferenceTable CrossReferenceTable { get; }
/// <summary>
/// Provides access to tokenization capabilities for objects by object number.
/// </summary>
internal IPdfTokenScanner TokenScanner { get; }
internal Structure(Catalog catalog, CrossReferenceTable crossReferenceTable,
IPdfTokenScanner scanner)
{
Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
CrossReferenceTable = crossReferenceTable ?? throw new ArgumentNullException(nameof(crossReferenceTable));
TokenScanner = scanner ?? throw new ArgumentNullException(nameof(scanner));
}
/// <summary>
/// Retrieve the tokenized object with the specified object reference number.
/// </summary>
/// <param name="reference">The object reference number.</param>
/// <returns>The tokenized PDF object from the file.</returns>
public ObjectToken GetObject(IndirectReference reference)
{
return TokenScanner.Get(reference);
}
}
}

View File

@@ -2,8 +2,7 @@
{
using System;
using System.Collections.Generic;
using ContentStream;
using Cos;
using CrossReference;
using Parser.Parts;
using Tokens;

View File

@@ -4,16 +4,22 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using ContentStream;
using Exceptions;
using Filters;
using IO;
using Parser.Parts;
using Tokens;
using Util;
/// <summary>
/// Tokenizes objects from bytes in a PDF file.
/// </summary>
internal interface IPdfTokenScanner : ISeekableTokenScanner
{
/// <summary>
/// Tokenize the object with a given object number.
/// </summary>
/// <param name="reference">The object number for the object to tokenize.</param>
/// <returns>The tokenized object.</returns>
ObjectToken Get(IndirectReference reference);
}

View File

@@ -3,13 +3,26 @@
using System;
using System.Collections.Generic;
using System.Text;
using ContentStream;
using Util.JetBrains.Annotations;
internal class ArrayToken : IDataToken<IReadOnlyList<IToken>>
/// <summary>
/// An array object is a one-dimensional collection of objects arranged sequentially.
/// PDF arrays may be heterogeneous; that is, an array's elements may be any combination of numbers, strings,
/// dictionaries, or any other objects, including other arrays.
/// </summary>
public class ArrayToken : IDataToken<IReadOnlyList<IToken>>
{
/// <summary>
/// The tokens contained in this array.
/// </summary>
[NotNull]
public IReadOnlyList<IToken> Data { get; }
public ArrayToken(IReadOnlyList<IToken> data)
/// <summary>
/// Create a new <see cref="ArrayToken"/>.
/// </summary>
/// <param name="data">The tokens contained by this array.</param>
public ArrayToken([NotNull] IReadOnlyList<IToken> data)
{
if (data == null)
{
@@ -42,6 +55,7 @@
Data = result;
}
/// <inheritdoc />
public override string ToString()
{
var builder = new StringBuilder("[ ");

View File

@@ -5,7 +5,7 @@
/// <summary>
/// The boolean object either <see cref="True"/> (<see langword="true"/>) or <see cref="False"/> (<see langword="true"/>).
/// </summary>
internal class BooleanToken : IDataToken<bool>
public class BooleanToken : IDataToken<bool>
{
/// <summary>
/// The boolean token corresponding to <see langword="true"/>.
@@ -33,6 +33,7 @@
Data = data;
}
/// <inheritdoc />
public override bool Equals(object obj)
{
if (!(obj is BooleanToken other))
@@ -43,16 +44,21 @@
return other.Data == Data;
}
/// <summary>
/// Check if two boolean tokens are equal in value.
/// </summary>
protected bool Equals(BooleanToken other)
{
return Data == other.Data;
}
/// <inheritdoc />
public override int GetHashCode()
{
return Data.GetHashCode();
}
/// <inheritdoc />
public override string ToString()
{
return Data.ToString();

View File

@@ -6,12 +6,12 @@
/// A comment from a PDF document. Any occurrence of the percent sign character (%) outside a string or stream
/// introduces a comment. The comment consists of all characters between the percent sign and the end of the line.
/// </summary>
internal class CommentToken : IDataToken<string>
public class CommentToken : IDataToken<string>
{
/// <summary>
/// The text of the comment (excluding the initial percent '%' sign).
/// </summary>
[CanBeNull]
[NotNull]
public string Data { get; }
/// <summary>
@@ -20,12 +20,13 @@
/// <param name="data">The text of the comment.</param>
public CommentToken([CanBeNull]string data)
{
Data = data;
Data = data ?? string.Empty;
}
/// <inheritdoc />
public override string ToString()
{
return Data ?? "NULL";
return Data;
}
}
}

View File

@@ -3,15 +3,27 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Exceptions;
using Parser.Parts;
using Tokenization.Scanner;
using Util.JetBrains.Annotations;
internal class DictionaryToken : IDataToken<IReadOnlyDictionary<string, IToken>>
/// <summary>
/// A dictionary object is an associative table containing pairs of objects, known as the dictionary's entries.
/// The key must be a <see cref="NameToken"/> and the value may be an kind of <see cref="IToken"/>.
/// </summary>
public class DictionaryToken : IDataToken<IReadOnlyDictionary<string, IToken>>
{
/// <summary>
/// The key value pairs in this dictionary.
/// </summary>
[NotNull]
public IReadOnlyDictionary<string, IToken> Data { get; }
/// <summary>
/// Create a new <see cref="DictionaryToken"/>.
/// </summary>
/// <param name="data">The data this dictionary will contain.</param>
public DictionaryToken([NotNull]IReadOnlyDictionary<IToken, IToken> data)
{
if (data == null)
@@ -30,7 +42,7 @@
else
{
// For now:
throw new InvalidOperationException("Key for dictionary token was not a string! " + keyValuePair.Key);
throw new PdfDocumentFormatException("Key for dictionary token was not a string! " + keyValuePair.Key);
}
}
@@ -42,7 +54,7 @@
Data = data;
}
public T Get<T>(NameToken name, IPdfTokenScanner scanner) where T : IToken
internal T Get<T>(NameToken name, IPdfTokenScanner scanner) where T : IToken
{
if (!TryGet(name, out var token) || !(token is T typedToken))
{
@@ -57,6 +69,12 @@
return typedToken;
}
/// <summary>
/// Try and get the entry with a given name.
/// </summary>
/// <param name="name">The name of the entry to retrieve.</param>
/// <param name="token">The token, if it is found.</param>
/// <returns><see langword="true"/> if the token is found, <see langword="false"/> otherwise.</returns>
public bool TryGet(NameToken name, out IToken token)
{
if (name == null)
@@ -67,6 +85,13 @@
return Data.TryGetValue(name.Data, out token);
}
/// <summary>
/// Try and get the entry with a given name and a specific data type.
/// </summary>
/// <typeparam name="T">The expected data type of the dictionary value.</typeparam>
/// <param name="name">The name of the entry to retrieve.</param>
/// <param name="token">The token, if it is found.</param>
/// <returns><see langword="true"/> if the token is found with this type, <see langword="false"/> otherwise.</returns>
public bool TryGet<T>(NameToken name, out T token) where T : IToken
{
token = default(T);
@@ -79,12 +104,30 @@
return true;
}
/// <summary>
/// Whether the dictionary contains an entry with this name.
/// </summary>
/// <param name="name">The name to check.</param>
/// <returns><see langword="true"/> if the token is found, <see langword="false"/> otherwise.</returns>
public bool ContainsKey(NameToken name)
{
return Data.ContainsKey(name.Data);
}
/// <summary>
/// Create a copy of this dictionary with the additional entry (or override the value of the existing entry).
/// </summary>
/// <param name="key">The key of the entry to create or override.</param>
/// <param name="value">The value of the entry to create or override.</param>
/// <returns>A new <see cref="DictionaryToken"/> with the entry created or modified.</returns>
public DictionaryToken With(NameToken key, IToken value) => With(key.Data, value);
/// <summary>
/// Create a copy of this dictionary with the additional entry (or override the value of the existing entry).
/// </summary>
/// <param name="key">The key of the entry to create or override.</param>
/// <param name="value">The value of the entry to create or override.</param>
/// <returns>A new <see cref="DictionaryToken"/> with the entry created or modified.</returns>
public DictionaryToken With(string key, IToken value)
{
var result = new Dictionary<string, IToken>(Data.Count + 1);
@@ -98,7 +141,8 @@
return new DictionaryToken(result);
}
/// <inheritdoc />
public override string ToString()
{
return string.Join(", ", Data.Select(x => $"<{x.Key}, {x.Value}>"));

View File

@@ -1,7 +1,13 @@
namespace UglyToad.PdfPig.Tokens
{
/// <summary>
/// Represents an End Of Line marker found in Type 1 font files. This is not used by the main PDF tokenizer.
/// </summary>
internal class EndOfLineToken : IToken
{
/// <summary>
/// The instance of the end of line token.
/// </summary>
public static EndOfLineToken Token { get; } = new EndOfLineToken();
private EndOfLineToken()

View File

@@ -1,9 +1,14 @@
namespace UglyToad.PdfPig.Tokens
{
using System;
using System.Collections.Generic;
using System.Text;
using Util.JetBrains.Annotations;
internal class HexToken : IDataToken<string>
/// <summary>
/// A token containing string data where the string is encoded as hexadecimal.
/// </summary>
public class HexToken : IDataToken<string>
{
private static readonly Dictionary<char, byte> HexMap = new Dictionary<char, byte>
{
@@ -32,16 +37,33 @@ namespace UglyToad.PdfPig.Tokens
{'f', 0x0F }
};
/// <summary>
/// The string contained in the hex data.
/// </summary>
[NotNull]
public string Data { get; }
/// <summary>
/// The bytes of the hex data.
/// </summary>
[NotNull]
public IReadOnlyList<byte> Bytes { get; }
public HexToken(IReadOnlyList<char> characters)
/// <summary>
/// Create a new <see cref="HexToken"/> from the provided hex characters.
/// </summary>
/// <param name="characters">A set of hex characters 0-9, A - F, a - f representing a string.</param>
public HexToken([NotNull] IReadOnlyList<char> characters)
{
if (characters == null)
{
throw new ArgumentNullException(nameof(characters));
}
var bytes = new List<byte>();
var builder = new StringBuilder();
for (int i = 0; i < characters.Count; i += 2)
for (var i = 0; i < characters.Count; i += 2)
{
char high = characters[i];
char low;
@@ -67,6 +89,12 @@ namespace UglyToad.PdfPig.Tokens
Data = builder.ToString();
}
/// <summary>
/// Convert two hex characters to a byte.
/// </summary>
/// <param name="high">The high nibble.</param>
/// <param name="low">The low nibble.</param>
/// <returns>The byte.</returns>
public static byte Convert(char high, char low)
{
var highByte = HexMap[high];
@@ -75,8 +103,18 @@ namespace UglyToad.PdfPig.Tokens
return (byte)(highByte << 4 | lowByte);
}
public static int ConvertHexBytesToInt(HexToken token)
/// <summary>
/// Convert the bytes in this hex token to an integer.
/// </summary>
/// <param name="token">The token containing the data to convert.</param>
/// <returns>The integer corresponding to the bytes.</returns>
public static int ConvertHexBytesToInt([NotNull] HexToken token)
{
if (token == null)
{
throw new ArgumentNullException(nameof(token));
}
var bytes = token.Bytes;
var value = bytes[0] & 0xFF;

View File

@@ -1,7 +1,15 @@
namespace UglyToad.PdfPig.Tokens
{
internal interface IDataToken<out T> : IToken
/// <inheritdoc />
/// <summary>
/// A token from a PDF document which contains data in some format.
/// </summary>
/// <typeparam name="T">The type of the data this token contains.</typeparam>
public interface IDataToken<out T> : IToken
{
/// <summary>
/// The data this token contains.
/// </summary>
T Data { get; }
}
}

View File

@@ -1,9 +1,9 @@
namespace UglyToad.PdfPig.Tokens
{
/// <summary>
/// A marker interface for tokens from the content.
/// A marker interface for tokens from the PDF file contents.
/// </summary>
internal interface IToken
public interface IToken
{
}
}

View File

@@ -1,16 +1,28 @@
namespace UglyToad.PdfPig.Tokens
{
using ContentStream;
using CrossReference;
internal class IndirectReferenceToken : IDataToken<IndirectReference>
/// <summary>
/// A reference to an indirect object (see <see cref="ObjectToken"/>). This object may be located by using the
/// <see cref="CrossReferenceTable"/>.
/// </summary>
public class IndirectReferenceToken : IDataToken<IndirectReference>
{
/// <summary>
/// The identifier for an object in the PDF file.
/// </summary>
public IndirectReference Data { get; }
/// <summary>
/// Create a new <see cref="IndirectReferenceToken"/>.
/// </summary>
/// <param name="data">The identifier for the object this references.</param>
public IndirectReferenceToken(IndirectReference data)
{
Data = data;
}
/// <inheritdoc />
public override string ToString()
{
return $"{Data}";

View File

@@ -2,10 +2,11 @@
{
using System.Collections.Concurrent;
internal partial class NameToken
public partial class NameToken
{
private static readonly ConcurrentDictionary<string, NameToken> NameMap = new ConcurrentDictionary<string, NameToken>();
#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
#region A
public static readonly NameToken A = new NameToken("A");
public static readonly NameToken Aa = new NameToken("AA");
@@ -530,5 +531,6 @@
// Y
public static readonly NameToken YStep = new NameToken("YStep");
public static readonly NameToken Yes = new NameToken("Yes");
#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member
}
}

View File

@@ -1,7 +1,19 @@
namespace UglyToad.PdfPig.Tokens
{
internal partial class NameToken : IDataToken<string>
using System;
using Util.JetBrains.Annotations;
/// <summary>
/// A name object is an atomic symbol uniquely defined by a sequence of characters.
/// Each name is considered identical if it has the same sequence of characters. Names are used in
/// PDF documents to identify dictionary keys and other elements of a PDF document.
/// </summary>
public partial class NameToken : IDataToken<string>
{
/// <summary>
/// The string representation of the name.
/// </summary>
[NotNull]
public string Data { get; }
private NameToken(string text)
@@ -11,8 +23,19 @@
Data = text;
}
public static NameToken Create(string name)
/// <summary>
/// Creates a new <see cref="NameToken"/> with the given name, ensuring only one instance of each
/// <see cref="NameToken"/> can exist.
/// </summary>
/// <param name="name">The string representation of the name for the token to create.</param>
/// <returns>The created or existing token.</returns>
public static NameToken Create([NotNull] string name)
{
if (name == null)
{
throw new ArgumentNullException(nameof(name));
}
if (!NameMap.TryGetValue(name, out var value))
{
return new NameToken(name);
@@ -20,27 +43,37 @@
return value;
}
/// <inheritdoc />
public override bool Equals(object obj)
{
return Equals(obj as NameToken);
}
/// <summary>
/// Are these names identical?
/// </summary>
public bool Equals(NameToken other)
{
return string.Equals(Data, other?.Data);
}
/// <inheritdoc />
public override int GetHashCode()
{
return Data.GetHashCode();
}
/// <summary>
/// Convert the name token to a string implicitly.
/// </summary>
/// <param name="name">The name token to convert.</param>
public static implicit operator string(NameToken name)
{
return name?.Data;
}
/// <inheritdoc />
public override string ToString()
{
return $"/{Data}";

View File

@@ -1,11 +1,49 @@
namespace UglyToad.PdfPig.Tokens
{
internal class NullToken : IDataToken<object>
/// <summary>
/// The null object has a type and value that are unequal to those of any other object.
/// There is only one object of type null, denoted by the keyword null.
/// An indirect object reference to a nonexistent object is treated the same as the null object.
/// Specifying the null object as the value of a dictionary entry is equivalent to omitting the entry entirely.
/// </summary>
public class NullToken : IDataToken<object>
{
/// <summary>
/// The single instance of the <see cref="NullToken"/>.
/// </summary>
public static NullToken Instance { get; } = new NullToken();
/// <summary>
/// <see langword="null"/>.
/// </summary>
public object Data { get; } = null;
private NullToken() { }
/// <inheritdoc />
public override bool Equals(object obj)
{
return obj is NullToken;
}
/// <summary>
/// Whether two null tokens are equal.
/// </summary>
protected bool Equals(NullToken other)
{
return Equals(Data, other.Data);
}
/// <inheritdoc />
public override int GetHashCode()
{
return 0;
}
/// <inheritdoc />
public override string ToString()
{
return "null";
}
}
}

View File

@@ -3,22 +3,44 @@
using System;
using System.Globalization;
internal class NumericToken : IDataToken<decimal>
/// <summary>
/// PDF supports integer and real numbers. Integer objects represent mathematical integers within a certain interval centered at 0.
/// Real objects approximate mathematical real numbers, but with limited range and precision.
/// This token represents both types and they are used interchangeably in the specification.
/// </summary>
public class NumericToken : IDataToken<decimal>
{
/// <inheritdoc />
public decimal Data { get; }
public bool IsWhole { get; }
/// <summary>
/// Whether the number represented has a non-zero decimal part.
/// </summary>
public bool HasDecimalPlaces { get; }
/// <summary>
/// The value of this number as an <see langword="int"/>.
/// </summary>
public int Int { get; }
/// <summary>
/// Whether the number overflows an integer.
/// </summary>
public bool IsBiggerThanInt { get; }
/// <summary>
/// The value of this number as a <see langword="long"/>.
/// </summary>
public long Long { get; }
/// <summary>
/// Create a <see cref="NumericToken"/>.
/// </summary>
/// <param name="value">The number to represent.</param>
public NumericToken(decimal value)
{
Data = value;
IsWhole = decimal.Floor(value) == value;
HasDecimalPlaces = decimal.Floor(value) != value;
Long = (long) value;
try
@@ -31,6 +53,7 @@
}
}
/// <inheritdoc />
public override string ToString()
{
return Data.ToString(NumberFormatInfo.InvariantInfo);

View File

@@ -1,13 +1,19 @@
namespace UglyToad.PdfPig.Tokens
{
using System;
using ContentStream;
using Util.JetBrains.Annotations;
internal class ObjectToken : IDataToken<IToken>
/// <summary>
/// Any object in a PDF file may be labeled as an indirect object. This gives the object a unique object identifier by which other objects can refer to it.
/// These objects contain inner data of any type.
/// </summary>
public class ObjectToken : IDataToken<IToken>
{
/// <summary>
/// The offset of the start of the object number in the file bytes.
/// The offset to the start of the object number from the start of the file in bytes.
/// </summary>
public long Position { get; set; }
public long Position { get; }
/// <summary>
/// The object and generation number of the object.
@@ -17,13 +23,26 @@
/// <summary>
/// The inner data of the object.
/// </summary>
[NotNull]
public IToken Data { get; }
/// <summary>
/// Create a new <see cref="ObjectToken"/> from the PDF document at the given offset with the identifier and inner data.
/// </summary>
/// <param name="position">The offset in bytes from the start of the file for this object.</param>
/// <param name="number">The identifier for this object.</param>
/// <param name="data">The data contained in this object.</param>
public ObjectToken(long position, IndirectReference number, IToken data)
{
Position = position;
Number = number;
Data = data;
Data = data ?? throw new ArgumentNullException(nameof(data));
}
/// <inheritdoc />
public override string ToString()
{
return $"Number: {Number}, Position: {Position}, Type: {Data.GetType().Name}";
}
}
}

View File

@@ -1,24 +1,44 @@
namespace UglyToad.PdfPig.Tokens
{
using System;
using System.Collections.Generic;
using Filters;
using Util.JetBrains.Annotations;
internal class StreamToken : IDataToken<byte[]>
/// <summary>
/// A stream consists of a dictionary followed by zero or more bytes bracketed between the keywords stream and endstream.
/// The bytes may be compressed by application of zero or more filters which are run in the order specified in the <see cref="StreamDictionary"/>.
/// </summary>
public class StreamToken : IDataToken<IReadOnlyList<byte>>
{
private readonly object lockObject = new object();
private byte[] decodedBytes;
private IReadOnlyList<byte> decodedBytes;
/// <summary>
/// The dictionary specifying the length of the stream, any applied compression filters and additional information.
/// </summary>
[NotNull]
public DictionaryToken StreamDictionary { get; }
public byte[] Data { get; }
/// <summary>
/// The compressed byte data of the stream.
/// </summary>
[NotNull]
public IReadOnlyList<byte> Data { get; }
public StreamToken(DictionaryToken streamDictionary, byte[] data)
/// <summary>
/// Create a new <see cref="StreamToken"/>.
/// </summary>
/// <param name="streamDictionary">The stream dictionary.</param>
/// <param name="data">The stream data.</param>
public StreamToken([NotNull] DictionaryToken streamDictionary, [NotNull] IReadOnlyList<byte> data)
{
StreamDictionary = streamDictionary;
Data = data;
StreamDictionary = streamDictionary ?? throw new ArgumentNullException(nameof(streamDictionary));
Data = data ?? throw new ArgumentNullException(nameof(data));
}
public byte[] Decode(IFilterProvider filterProvider)
internal IReadOnlyList<byte> Decode(IFilterProvider filterProvider)
{
lock (lockObject)
{
@@ -40,5 +60,11 @@
return transform;
}
}
/// <inheritdoc />
public override string ToString()
{
return $"Length: {Data.Count}, Dictionary: {StreamDictionary}";
}
}
}

View File

@@ -1,14 +1,29 @@
namespace UglyToad.PdfPig.Tokens
{
internal class StringToken : IDataToken<string>
using System;
using Util.JetBrains.Annotations;
/// <summary>
/// Represents a string of text contained in a PDF document.
/// </summary>
public class StringToken : IDataToken<string>
{
/// <summary>
/// The string in the token.
/// </summary>
[NotNull]
public string Data { get; }
public StringToken(string data)
/// <summary>
/// Create a new <see cref="StringToken"/>.
/// </summary>
/// <param name="data">The string data for the token to contain.</param>
public StringToken([NotNull] string data)
{
Data = data;
Data = data ?? throw new ArgumentNullException(nameof(data));
}
/// <inheritdoc />
public override string ToString()
{
return $"({Data})";

View File

@@ -7,6 +7,21 @@
internal static class DictionaryTokenExtensions
{
public static int GetInt(this DictionaryToken token, NameToken name)
{
if (token == null)
{
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var keyedToken) || !(keyedToken is NumericToken numeric))
{
throw new PdfDocumentFormatException($"The dictionary did not contain a number with the key {name}. Dictionary way: {token}.");
}
return numeric.Int;
}
public static int GetIntOrDefault(this DictionaryToken token, NameToken name, int defaultValue = 0)
{
if (token == null)
@@ -22,6 +37,21 @@
return numeric.Int;
}
public static long? GetLongOrDefault(this DictionaryToken token, NameToken name)
{
if (token == null)
{
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var keyedToken) || !(keyedToken is NumericToken numeric))
{
return null;
}
return numeric.Long;
}
[CanBeNull]
public static NameToken GetNameOrDefault(this DictionaryToken token, NameToken name)
{