namespace UglyToad.PdfPig.Parser.Parts.CrossReference
{
using System.Collections.Generic;
using Cos;
using Exceptions;
using Filters;
using Tokenization.Tokens;
using Util;
internal class CrossReferenceStreamParser
{
private readonly IFilterProvider filterProvider;
public CrossReferenceStreamParser(IFilterProvider filterProvider)
{
this.filterProvider = filterProvider;
}
///
/// Parses through the unfiltered stream and populates the xrefTable HashMap.
///
public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
{
byte[] decoded = stream.Decode(filterProvider);
var fieldSizes = new CrossReferenceStreamFieldSize(stream.StreamDictionary);
var lineCount = decoded.Length / fieldSizes.LineLength;
long previousOffset = -1;
if (stream.StreamDictionary.TryGet(NameToken.Prev, out var prevToken) && prevToken is NumericToken prevNumeric)
{
previousOffset = prevNumeric.Long;
}
var builder = new CrossReferenceTablePartBuilder
{
Offset = streamOffset,
Previous = previousOffset,
Dictionary = stream.StreamDictionary,
XRefType = CrossReferenceType.Stream
};
var objectNumbers = GetObjectNumbers(stream.StreamDictionary);
var lineNumber = 0;
var lineBuffer = new byte[fieldSizes.LineLength];
foreach (var objectNumber in objectNumbers)
{
if (lineNumber >= lineCount)
{
break;
}
var byteOffset = lineNumber * fieldSizes.LineLength;
for (var i = 0; i < fieldSizes.LineLength; i++)
{
lineBuffer[i] = decoded[byteOffset + i];
}
int type;
if (fieldSizes.Field1Size == 0)
{
type = 1;
}
else
{
type = 0;
for (var i = 0; i < fieldSizes.Field1Size; i++)
{
type += (lineBuffer[i] & 0x00ff) << ((fieldSizes.Field1Size - i - 1) * 8);
}
}
ReadNextStreamObject(type, objectNumber, fieldSizes, builder, lineBuffer);
lineNumber++;
}
return builder.Build();
}
private static void ReadNextStreamObject(int type, long objectNumber, CrossReferenceStreamFieldSize fieldSizes,
CrossReferenceTablePartBuilder builder, byte[] lineBuffer)
{
switch (type)
{
case 0:
// Ignore free objects.
break;
case 1:
// Non object stream entries.
int offset = 0;
for (int i = 0; i < fieldSizes.Field2Size; i++)
{
offset += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
}
int genNum = 0;
for (int i = 0; i < fieldSizes.Field3Size; i++)
{
genNum += (lineBuffer[i + fieldSizes.Field1Size + fieldSizes.Field2Size] & 0x00ff) << ((fieldSizes.Field3Size - i - 1) * 8);
}
builder.Add(objectNumber, genNum, offset);
break;
case 2:
/*
* object stored in object stream:
* 2nd argument is object number of object stream
* 3rd argument is index of object within object stream
*
* For sequential PDFParser we do not need this information
* because
* These objects are handled by the dereferenceObjects() method
* since they're only pointing to object numbers
*
* However for XRef aware parsers we have to know which objects contain
* object streams. We will store this information in normal xref mapping
* table but add object stream number with minus sign in order to
* distinguish from file offsets
*/
int objstmObjNr = 0;
for (int i = 0; i < fieldSizes.Field2Size; i++)
{
objstmObjNr += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
}
builder.Add(objectNumber, 0, -objstmObjNr);
break;
}
}
private static List GetObjectNumbers(DictionaryToken dictionary)
{
if (!dictionary.TryGet(NameToken.Size, out var sizeToken) || !(sizeToken is NumericToken sizeNumeric))
{
throw new PdfDocumentFormatException($"The stream dictionary must contain a numeric size value: {dictionary}.");
}
var indexArray = new[] { 0, sizeNumeric.Int };
if (dictionary.TryGet(NameToken.Index, out var indexToken) && indexToken is ArrayToken indexArrayToken)
{
indexArray = new[]
{
indexArrayToken.GetNumeric(0).Int,
indexArrayToken.GetNumeric(1).Int
};
}
List objNums = new List();
var firstObjectNumber = indexArray[0];
var size = indexArray[1];
for (var i = 0; i < size; i++)
{
objNums.Add(firstObjectNumber + i);
}
return objNums;
}
}
}