Optimize internal representation of IndirectReference

This commit is contained in:
BobLd
2025-06-01 09:44:46 +01:00
parent 8f9194c9a4
commit 24431b1f9f
3 changed files with 78 additions and 9 deletions

View File

@@ -3,20 +3,33 @@
using System;
using System.Diagnostics;
// https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java#L25
/// <summary>
/// Used to uniquely identify and refer to objects in the PDF file.
/// </summary>
public readonly struct IndirectReference : IEquatable<IndirectReference>
{
private const int NUMBER_OFFSET = sizeof(ushort) * 8;
private static readonly long GENERATION_MASK = (long)Math.Pow(2, NUMBER_OFFSET) - 1;
private static readonly long MAX_OBJECT_NUMBER = (long)(Math.Pow(2, sizeof(long) * 8 - NUMBER_OFFSET) - 1) / 2;
// combined number and generation
// The lowest 16 bits hold the generation 0-65535
// The rest is used for the number (even though 34 bit are sufficient for 10 digits)
private readonly long numberAndGeneration;
/// <summary>
/// A positive integer object number.
/// </summary>
public long ObjectNumber { get; }
// Below is different from PdfBox as we keep the sign of the offset number (use >> instead of >>> (unsigned right shift))
public long ObjectNumber => numberAndGeneration >> NUMBER_OFFSET;
/// <summary>
/// A non-negative integer generation number which starts as 0 and increases if the file is updated incrementally.
/// <para>The maximum generation number is 65,535.</para>
/// </summary>
public int Generation { get; }
public int Generation => (int)(numberAndGeneration & GENERATION_MASK);
/// <summary>
/// Create a new <see cref="IndirectReference"/>
@@ -26,14 +39,34 @@
[DebuggerStepThrough]
public IndirectReference(long objectNumber, int generation)
{
ObjectNumber = objectNumber;
Generation = generation;
if (generation < 0 || generation > ushort.MaxValue)
{
throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value, and less or equal to 65,535.");
}
if (objectNumber < -MAX_OBJECT_NUMBER || objectNumber > MAX_OBJECT_NUMBER)
{
throw new ArgumentOutOfRangeException(nameof(objectNumber), $"Object number must be between -{MAX_OBJECT_NUMBER:##,###} and {MAX_OBJECT_NUMBER:##,###}.");
}
numberAndGeneration = ComputeInternalHash(objectNumber, generation);
}
/// <summary>
/// Calculate the internal hash value for the given object number and generation number.
/// </summary>
/// <param name="num">The object number.</param>
/// <param name="gen">The generation number.</param>
/// <returns>The internal hash for the given values.</returns>
private static long ComputeInternalHash(long num, int gen)
{
return num << NUMBER_OFFSET | (gen & GENERATION_MASK);
}
/// <inheritdoc />
public bool Equals(IndirectReference other)
{
return other.ObjectNumber == ObjectNumber && other.Generation == Generation;
return other.numberAndGeneration == numberAndGeneration;
}
/// <inheritdoc />
@@ -45,7 +78,7 @@
/// <inheritdoc />
public override int GetHashCode()
{
return HashCode.Combine(ObjectNumber, Generation);
return numberAndGeneration.GetHashCode();
}
/// <inheritdoc />

View File

@@ -30,6 +30,44 @@
Assert.True(reference1.Equals(reference2));
}
[Fact]
public void IndirectReferenceHashTest()
{
var reference0 = new IndirectReference(1574, 690);
Assert.Equal(1574, reference0.ObjectNumber);
Assert.Equal(690, reference0.Generation);
var reference1 = new IndirectReference(-1574, 690);
Assert.Equal(-1574, reference1.ObjectNumber);
Assert.Equal(690, reference1.Generation);
var reference2 = new IndirectReference(58949797283757, 16);
Assert.Equal(58949797283757, reference2.ObjectNumber);
Assert.Equal(16, reference2.Generation);
var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
Assert.Equal(-58949797283757, reference3.ObjectNumber);
Assert.Equal(ushort.MaxValue, reference3.Generation);
var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
Assert.Equal(140737488355327, reference4.ObjectNumber);
Assert.Equal(ushort.MaxValue, reference4.Generation);
var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
Assert.Equal(-140737488355327, reference5.ObjectNumber);
Assert.Equal(ushort.MaxValue, reference5.Generation);
var ex0 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(140737488355328, 0));
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
var ex1 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(-140737488355328, 0));
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
var ex2 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, -1));
Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex2.Message);
var ex3 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, ushort.MaxValue + 1));
Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex3.Message);
}
[Fact]
public void TwoIndirectReferenceNotEqual()
{

View File

@@ -94,8 +94,6 @@
long objectGen = objectKey.Generation;
var originOffset = bytes.CurrentOffset;
var objectString = ObjectHelper.CreateObjectString(objectNr, objectGen);
try
{
if (offset >= bytes.Length)
@@ -111,7 +109,7 @@
bytes.MoveNext();
}
if (ReadHelper.IsString(bytes, objectString))
if (ReadHelper.IsString(bytes, ObjectHelper.CreateObjectString(objectNr, objectGen)))
{
// everything is ok, return origin object key
bytes.Seek(originOffset);