move the usages of cos object key to indirect reference

This commit is contained in:
Eliot Jones
2018-01-14 14:48:54 +00:00
parent b19b96604d
commit 36c0eedd7c
15 changed files with 102 additions and 73 deletions

View File

@@ -87,7 +87,7 @@ startxref
foreach (var keyValuePair in locations)
{
Assert.Contains(newLocations.Keys, x => ReferenceEquals(x, keyValuePair.Key));
Assert.Contains(newLocations.Keys, x => x.Equals(keyValuePair.Key));
}
}
}

View File

@@ -122,14 +122,14 @@ trailer
Assert.Equal(2, result.ObjectOffsets.Count);
var results = result.ObjectOffsets.Select(x => new {x.Key.Number, x.Key.Generation, x.Value}).ToList();
var results = result.ObjectOffsets.Select(x => new {x.Key.ObjectNumber, x.Key.Generation, x.Value}).ToList();
Assert.Equal(100, results[0].Value);
Assert.Equal(1, results[0].Number);
Assert.Equal(1, results[0].ObjectNumber);
Assert.Equal(0, results[0].Generation);
Assert.Equal(200, results[1].Value);
Assert.Equal(2, results[1].Number);
Assert.Equal(2, results[1].ObjectNumber);
Assert.Equal(5, results[1].Generation);
}
@@ -147,14 +147,14 @@ trailer
Assert.Equal(2, result.ObjectOffsets.Count);
var results = result.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList();
var results = result.ObjectOffsets.Select(x => new { x.Key.ObjectNumber, x.Key.Generation, x.Value }).ToList();
Assert.Equal(190, results[0].Value);
Assert.Equal(15, results[0].Number);
Assert.Equal(15, results[0].ObjectNumber);
Assert.Equal(0, results[0].Generation);
Assert.Equal(250, results[1].Value);
Assert.Equal(16, results[1].Number);
Assert.Equal(16, results[1].ObjectNumber);
Assert.Equal(32, results[1].Generation);
}
@@ -173,14 +173,14 @@ trailer
Assert.Equal(2, result.ObjectOffsets.Count);
var results = result.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList();
var results = result.ObjectOffsets.Select(x => new { x.Key.ObjectNumber, x.Key.Generation, x.Value }).ToList();
Assert.Equal(190, results[0].Value);
Assert.Equal(15, results[0].Number);
Assert.Equal(15, results[0].ObjectNumber);
Assert.Equal(0, results[0].Generation);
Assert.Equal(250, results[1].Value);
Assert.Equal(16, results[1].Number);
Assert.Equal(16, results[1].ObjectNumber);
Assert.Equal(32, results[1].Generation);
}
@@ -203,26 +203,26 @@ trailer
Assert.Equal(5, result.ObjectOffsets.Count);
var results = result.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList();
var results = result.ObjectOffsets.Select(x => new { x.Key.ObjectNumber, x.Key.Generation, x.Value }).ToList();
Assert.Equal(100, results[0].Value);
Assert.Equal(1, results[0].Number);
Assert.Equal(1, results[0].ObjectNumber);
Assert.Equal(0, results[0].Generation);
Assert.Equal(200, results[1].Value);
Assert.Equal(2, results[1].Number);
Assert.Equal(2, results[1].ObjectNumber);
Assert.Equal(5, results[1].Generation);
Assert.Equal(230, results[2].Value);
Assert.Equal(3, results[2].Number);
Assert.Equal(3, results[2].ObjectNumber);
Assert.Equal(5, results[2].Generation);
Assert.Equal(190, results[3].Value);
Assert.Equal(15, results[3].Number);
Assert.Equal(15, results[3].ObjectNumber);
Assert.Equal(7, results[3].Generation);
Assert.Equal(250, results[4].Value);
Assert.Equal(16, results[4].Number);
Assert.Equal(16, results[4].ObjectNumber);
Assert.Equal(32, results[4].Generation);
}

View File

@@ -1,34 +1,29 @@
namespace UglyToad.PdfPig.Cos
{
using System.Collections.Generic;
using ContentStream;
internal class CosObjectPool
{
private readonly Dictionary<CosObjectKey, CosObject> objects = new Dictionary<CosObjectKey, CosObject>();
private readonly Dictionary<IndirectReference, CosObject> objects = new Dictionary<IndirectReference, CosObject>();
public CosObject Get(CosObjectKey key)
public CosObject Get(IndirectReference key)
{
if (key != null)
if (objects.TryGetValue(key, out var value))
{
if (objects.TryGetValue(key, out var value))
{
return value;
}
return value;
}
// this was a forward reference, make "proxy" object
var obj = new CosObject(null);
if (key != null)
{
obj.SetObjectNumber(key.Number);
obj.SetGenerationNumber((int)key.Generation);
objects[key] = obj;
}
obj.SetObjectNumber(key.ObjectNumber);
obj.SetGenerationNumber(key.Generation);
objects[key] = obj;
return obj;
}
public CosObject GetOrCreateDefault(CosObjectKey key)
public CosObject GetOrCreateDefault(IndirectReference key)
{
if (!objects.TryGetValue(key, out CosObject obj))
{

View File

@@ -9,15 +9,15 @@
{
public CrossReferenceType Type { get; }
private readonly Dictionary<CosObjectKey, long> objectOffsets;
private readonly Dictionary<IndirectReference, long> objectOffsets;
[NotNull]
public IReadOnlyDictionary<CosObjectKey, long> ObjectOffsets => objectOffsets;
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets => objectOffsets;
[NotNull]
public PdfDictionary Dictionary { get; }
public CrossReferenceTable(CrossReferenceType type, IReadOnlyDictionary<CosObjectKey, long> objectOffsets, PdfDictionary dictionary)
public CrossReferenceTable(CrossReferenceType type, IReadOnlyDictionary<IndirectReference, long> objectOffsets, PdfDictionary dictionary)
{
if (objectOffsets == null)
{
@@ -27,7 +27,7 @@
Type = type;
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
var result = new Dictionary<CosObjectKey, long>();
var result = new Dictionary<IndirectReference, long>();
foreach (var objectOffset in objectOffsets)
{
result[objectOffset.Key] = objectOffset.Value;
@@ -36,7 +36,7 @@
this.objectOffsets = result;
}
public void UpdateOffset(CosObjectKey key, long offset)
public void UpdateOffset(IndirectReference key, long offset)
{
objectOffsets[key] = offset;
}

View File

@@ -32,7 +32,7 @@
{
CrossReferenceType type = CrossReferenceType.Table;
PdfDictionary trailerDictionary = new PdfDictionary();
Dictionary<CosObjectKey, long> objectOffsets = new Dictionary<CosObjectKey, long>();
Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
List<long> xrefSeqBytePos = new List<long>();

View File

@@ -23,7 +23,7 @@
/// </remarks>
internal class CrossReferenceTablePart
{
public IReadOnlyDictionary<CosObjectKey, long> ObjectOffsets { get; }
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets { get; }
public long Offset { get; private set; }
@@ -33,7 +33,7 @@
public CrossReferenceType Type { get; }
public CrossReferenceTablePart(IReadOnlyDictionary<CosObjectKey, long> objectOffsets, long offset, long previous, PdfDictionary dictionary, CrossReferenceType type)
public CrossReferenceTablePart(IReadOnlyDictionary<IndirectReference, long> objectOffsets, long offset, long previous, PdfDictionary dictionary, CrossReferenceType type)
{
ObjectOffsets = objectOffsets;
Offset = offset;

View File

@@ -5,7 +5,7 @@
internal class CrossReferenceTablePartBuilder
{
private readonly Dictionary<CosObjectKey, long> objects = new Dictionary<CosObjectKey, long>();
private readonly Dictionary<IndirectReference, long> objects = new Dictionary<IndirectReference, long>();
public long Offset { get; set; }
@@ -17,7 +17,7 @@
public void Add(long objectId, int generationNumber, long offset)
{
CosObjectKey objKey = new CosObjectKey(objectId, generationNumber);
IndirectReference objKey = new IndirectReference(objectId, generationNumber);
if (!objects.ContainsKey(objKey))
{

View File

@@ -63,7 +63,7 @@
throw new ArgumentNullException(nameof(pool));
}
var key = new CosObjectKey(objectNumber, objectGeneration);
var key = new IndirectReference(objectNumber, objectGeneration);
var pdfObject = pool.GetOrCreateDefault(key);
@@ -112,7 +112,7 @@
}
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
CosObjectKey key,
IndirectReference key,
CosObjectPool pool,
bool isLenientParsing)
{
@@ -123,7 +123,7 @@
ReadHelper.ReadExpectedString(reader, "obj", true);
if (objectNumber != key.Number || objectGeneration != key.Generation)
if (objectNumber != key.ObjectNumber || objectGeneration != key.Generation)
{
throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
}
@@ -213,7 +213,7 @@
// register all objects which are referenced to be contained in object stream
foreach (var next in objects)
{
var streamKey = new CosObjectKey(next);
var streamKey = new IndirectReference(next.GetObjectNumber(), next.GetGenerationNumber());
var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets);
if (offset != null && offset == -streamObjectNumber)

View File

@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using ContentStream;
using Cos;
using IO;
using Parts;
@@ -12,9 +13,9 @@
{
private static readonly long MINIMUM_SEARCH_OFFSET = 6;
private Dictionary<CosObjectKey, long> bfSearchCOSObjectKeyOffsets;
private Dictionary<IndirectReference, long> bfSearchCOSObjectKeyOffsets;
private bool validateXrefOffsets(IRandomAccessRead reader, Dictionary<CosObjectKey, long> xrefOffset)
private bool validateXrefOffsets(IRandomAccessRead reader, Dictionary<IndirectReference, long> xrefOffset)
{
if (xrefOffset == null)
{
@@ -22,7 +23,7 @@
}
foreach (var objectEntry in xrefOffset)
{
CosObjectKey objectKey = objectEntry.Key;
IndirectReference objectKey = objectEntry.Key;
long objectOffset = objectEntry.Value;
// a negative offset number represents a object number itself
// see type 2 entry in xref stream
@@ -37,14 +38,14 @@
return true;
}
private bool checkObjectKeys(IRandomAccessRead source, CosObjectKey objectKey, long offset)
private bool checkObjectKeys(IRandomAccessRead source, IndirectReference objectKey, long offset)
{
// there can't be any object at the very beginning of a pdf
if (offset < MINIMUM_SEARCH_OFFSET)
{
return false;
}
long objectNr = objectKey.Number;
long objectNr = objectKey.ObjectNumber;
long objectGen = objectKey.Generation;
long originOffset = source.GetPosition();
string objectString = ObjectHelper.createObjectString(objectNr, objectGen);
@@ -71,7 +72,7 @@
}
private Dictionary<CosObjectKey, long> getBFCosObjectOffsets(IRandomAccessRead reader)
private Dictionary<IndirectReference, long> getBFCosObjectOffsets(IRandomAccessRead reader)
{
if (bfSearchCOSObjectKeyOffsets == null)
{
@@ -83,7 +84,7 @@
private void bfSearchForObjects(IRandomAccessRead source)
{
bfSearchForLastEOFMarker(source);
bfSearchCOSObjectKeyOffsets = new Dictionary<CosObjectKey, long>();
bfSearchCOSObjectKeyOffsets = new Dictionary<IndirectReference, long>();
long originOffset = source.GetPosition();
long currentOffset = MINIMUM_SEARCH_OFFSET;
long lastObjectId = long.MinValue;
@@ -125,7 +126,7 @@
if (lastObjOffset > 0)
{
// add the former object ID only if there was a subsequent object ID
bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
bfSearchCOSObjectKeyOffsets[new IndirectReference(lastObjectId, lastGenID)] = lastObjOffset;
}
lastObjectId = objectId;
lastGenID = genID;
@@ -147,7 +148,7 @@
{
// if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
// the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
bfSearchCOSObjectKeyOffsets[new CosObjectKey(lastObjectId, lastGenID)] = lastObjOffset;
bfSearchCOSObjectKeyOffsets[new IndirectReference(lastObjectId, lastGenID)] = lastObjOffset;
}
// reestablish origin position
@@ -166,33 +167,33 @@
{
return;
}
Dictionary<CosObjectKey, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value);
Dictionary<IndirectReference, long> xrefOffset = xrefTrailerResolver.ObjectOffsets.ToDictionary(x => x.Key, x => x.Value);
if (validateXrefOffsets(reader, xrefOffset))
{
return;
}
Dictionary<CosObjectKey, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets(reader);
Dictionary<IndirectReference, long> bfCOSObjectKeyOffsets = getBFCosObjectOffsets(reader);
if (bfCOSObjectKeyOffsets.Count > 0)
{
List<CosObjectKey> objStreams = new List<CosObjectKey>();
List<IndirectReference> objStreams = new List<IndirectReference>();
// find all object streams
foreach (var entry in xrefOffset)
{
long offset = entry.Value;
if (offset < 0)
{
CosObjectKey objStream = new CosObjectKey(-offset, 0);
IndirectReference objStream = new IndirectReference(-offset, 0);
if (!objStreams.Contains(objStream))
{
objStreams.Add(new CosObjectKey(-offset, 0));
objStreams.Add(new IndirectReference(-offset, 0));
}
}
}
// remove all found object streams
if (objStreams.Count > 0)
{
foreach (CosObjectKey key in objStreams)
foreach (IndirectReference key in objStreams)
{
if (bfCOSObjectKeyOffsets.ContainsKey(key))
{

View File

@@ -41,7 +41,7 @@
public CosBase Parse(IndirectReference indirectReference, IRandomAccessRead reader, bool isLenientParsing = true, bool requireExistingObject = false)
{
var key = new CosObjectKey(indirectReference.ObjectNumber, indirectReference.Generation);
var key = new IndirectReference(indirectReference.ObjectNumber, indirectReference.Generation);
var pdfObject = objectPool.GetOrCreateDefault(key);
@@ -76,7 +76,7 @@
return CosNull.Null;
}
throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
throw new InvalidOperationException($"Could not locate the object {key.ObjectNumber} which was not found in the cross reference table.");
}
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
@@ -90,7 +90,7 @@
}
private CosBase ParseObjectFromFile(long offset, IRandomAccessRead reader,
CosObjectKey key,
IndirectReference key,
CosObjectPool pool,
bool isLenientParsing)
{
@@ -101,7 +101,7 @@
ReadHelper.ReadExpectedString(reader, "obj", true);
if (objectNumber != key.Number || objectGeneration != key.Generation)
if (objectNumber != key.ObjectNumber || objectGeneration != key.Generation)
{
throw new InvalidOperationException($"Xref for {key} points to object {objectNumber} {objectGeneration} at {offset}");
}
@@ -193,7 +193,7 @@
// register all objects which are referenced to be contained in object stream
foreach (var next in objects)
{
var streamKey = new CosObjectKey(next);
var streamKey = new IndirectReference(next.GetObjectNumber(), next.GetGenerationNumber());
var offset = TryGet(streamKey, crossReferenceTable.ObjectOffsets);
if (offset != null && offset == -streamObjectNumber)

View File

@@ -2,7 +2,7 @@
{
using System;
using System.Collections.Generic;
using Cos;
using ContentStream;
using IO;
using Util;
using Util.JetBrains.Annotations;
@@ -16,7 +16,7 @@
private readonly IRandomAccessRead reader;
private Dictionary<CosObjectKey, long> objectLocations;
private Dictionary<IndirectReference, long> objectLocations;
public BruteForceSearcher([NotNull] IRandomAccessRead reader)
{
@@ -24,7 +24,7 @@
}
[NotNull]
public IReadOnlyDictionary<CosObjectKey, long> GetObjectLocations()
public IReadOnlyDictionary<IndirectReference, long> GetObjectLocations()
{
if (objectLocations != null)
{
@@ -33,7 +33,7 @@
var lastEndOfFile = GetLastEndOfFileMarker();
var results = new Dictionary<CosObjectKey, long>();
var results = new Dictionary<IndirectReference, long>();
var originPosition = reader.GetPosition();
@@ -81,7 +81,7 @@
if (lastObjOffset > 0)
{
// add the former object ID only if there was a subsequent object ID
results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
results[new IndirectReference(lastObjectId, lastGenerationId)] = lastObjOffset;
}
lastObjectId = objectId;
lastGenerationId = generationId;
@@ -103,7 +103,7 @@
{
// if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker
// the last object id has to be added here so that it can't get lost as there isn't any subsequent object id
results[new CosObjectKey(lastObjectId, lastGenerationId)] = lastObjOffset;
results[new IndirectReference(lastObjectId, lastGenerationId)] = lastObjOffset;
}
// reestablish origin position

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Parser.Parts
{
using ContentStream;
using Cos;
using IO;
using Util;
@@ -25,7 +26,7 @@
if (po.get(po.size() - 1) is CosInt)
{
var number = (CosInt)po.remove(po.size() - 1);
CosObjectKey key = new CosObjectKey(number.AsLong(), genNumber.AsInt());
IndirectReference key = new IndirectReference(number.AsLong(), genNumber.AsInt());
pbo = pool.Get(key);
}
else

View File

@@ -156,7 +156,7 @@
throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
}
var key = new CosObjectKey(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());
var key = new IndirectReference(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());
// dereference the object
return pool.Get(key);

View File

@@ -5,6 +5,7 @@
using Content;
using ContentStream;
using Cos;
using Exceptions;
using FileStructure;
using Filters;
using Fonts;
@@ -102,7 +103,7 @@
if (!(root is PdfDictionary rootDictionary))
{
throw new InvalidOperationException("Expected root dictionary, but got this: " + root);
throw new PdfDocumentFormatException("Expected root dictionary, but got this: " + root);
}
// in some pdfs the type value "Catalog" is missing in the root object
@@ -117,6 +118,8 @@
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
var pdfScanner = new PdfTokenScanner(inputBytes, null);
return new PdfDocument(log, reader, version, crossReferenceTable, isLenientParsing, caching, pageFactory, pdfObjectParser, catalog, information);
}

View File

@@ -1,6 +1,9 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using System.Collections.Generic;
using ContentStream;
using Cos;
using Parser.Parts;
internal interface IObjectLocationProvider
{
@@ -8,4 +11,30 @@
void UpdateOffset(IndirectReference reference, long offset);
}
internal class ObjectLocationProvider : IObjectLocationProvider
{
private readonly CrossReferenceTable crossReferenceTable;
private readonly CosObjectPool pool;
private readonly BruteForceSearcher searcher;
private readonly Dictionary<IndirectReference, long> offsets = new Dictionary<IndirectReference, long>();
public ObjectLocationProvider(CrossReferenceTable crossReferenceTable, CosObjectPool pool, BruteForceSearcher searcher)
{
this.crossReferenceTable = crossReferenceTable;
this.pool = pool;
this.searcher = searcher;
}
public bool TryGetOffset(IndirectReference reference, out long offset)
{
throw new System.NotImplementedException();
}
public void UpdateOffset(IndirectReference reference, long offset)
{
throw new System.NotImplementedException();
}
}
}