From 24431b1f9feb4bcb948fe88c052c3161f3b12d4d Mon Sep 17 00:00:00 2001
From: BobLd <38405645+BobLd@users.noreply.github.com>
Date: Sun, 1 Jun 2025 09:44:46 +0100
Subject: [PATCH] Optimize internal representation of IndirectReference
---
src/UglyToad.PdfPig.Core/IndirectReference.cs | 45 ++++++++++++++++---
.../ContentStream/IndirectReferenceTests.cs | 38 ++++++++++++++++
.../CrossReferenceObjectOffsetValidator.cs | 4 +-
3 files changed, 78 insertions(+), 9 deletions(-)
diff --git a/src/UglyToad.PdfPig.Core/IndirectReference.cs b/src/UglyToad.PdfPig.Core/IndirectReference.cs
index e53aef0f..eceee1ed 100644
--- a/src/UglyToad.PdfPig.Core/IndirectReference.cs
+++ b/src/UglyToad.PdfPig.Core/IndirectReference.cs
@@ -3,20 +3,33 @@
using System;
using System.Diagnostics;
+ // https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java#L25
+
///
/// Used to uniquely identify and refer to objects in the PDF file.
///
public readonly struct IndirectReference : IEquatable
{
+ private const int NUMBER_OFFSET = sizeof(ushort) * 8;
+ private static readonly long GENERATION_MASK = (long)Math.Pow(2, NUMBER_OFFSET) - 1;
+ private static readonly long MAX_OBJECT_NUMBER = (long)(Math.Pow(2, sizeof(long) * 8 - NUMBER_OFFSET) - 1) / 2;
+
+ // combined number and generation
+ // The lowest 16 bits hold the generation 0-65535
+ // The rest is used for the number (even though 34 bit are sufficient for 10 digits)
+ private readonly long numberAndGeneration;
+
///
/// A positive integer object number.
///
- public long ObjectNumber { get; }
+ // Below is different from PdfBox as we keep the sign of the offset number (use >> instead of >>> (unsigned right shift))
+ public long ObjectNumber => numberAndGeneration >> NUMBER_OFFSET;
///
/// A non-negative integer generation number which starts as 0 and increases if the file is updated incrementally.
+ /// The maximum generation number is 65,535.
///
- public int Generation { get; }
+ public int Generation => (int)(numberAndGeneration & GENERATION_MASK);
///
/// Create a new
@@ -26,14 +39,34 @@
[DebuggerStepThrough]
public IndirectReference(long objectNumber, int generation)
{
- ObjectNumber = objectNumber;
- Generation = generation;
+ if (generation < 0 || generation > ushort.MaxValue)
+ {
+ throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value, and less or equal to 65,535.");
+ }
+
+ if (objectNumber < -MAX_OBJECT_NUMBER || objectNumber > MAX_OBJECT_NUMBER)
+ {
+ throw new ArgumentOutOfRangeException(nameof(objectNumber), $"Object number must be between -{MAX_OBJECT_NUMBER:##,###} and {MAX_OBJECT_NUMBER:##,###}.");
+ }
+
+ numberAndGeneration = ComputeInternalHash(objectNumber, generation);
+ }
+
+ ///
+ /// Calculate the internal hash value for the given object number and generation number.
+ ///
+ /// The object number.
+ /// The generation number.
+ /// The internal hash for the given values.
+ private static long ComputeInternalHash(long num, int gen)
+ {
+ return num << NUMBER_OFFSET | (gen & GENERATION_MASK);
}
///
public bool Equals(IndirectReference other)
{
- return other.ObjectNumber == ObjectNumber && other.Generation == Generation;
+ return other.numberAndGeneration == numberAndGeneration;
}
///
@@ -45,7 +78,7 @@
///
public override int GetHashCode()
{
- return HashCode.Combine(ObjectNumber, Generation);
+ return numberAndGeneration.GetHashCode();
}
///
diff --git a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
index 74d49840..676dc596 100644
--- a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
+++ b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs
@@ -30,6 +30,44 @@
Assert.True(reference1.Equals(reference2));
}
+ [Fact]
+ public void IndirectReferenceHashTest()
+ {
+ var reference0 = new IndirectReference(1574, 690);
+ Assert.Equal(1574, reference0.ObjectNumber);
+ Assert.Equal(690, reference0.Generation);
+
+ var reference1 = new IndirectReference(-1574, 690);
+ Assert.Equal(-1574, reference1.ObjectNumber);
+ Assert.Equal(690, reference1.Generation);
+
+ var reference2 = new IndirectReference(58949797283757, 16);
+ Assert.Equal(58949797283757, reference2.ObjectNumber);
+ Assert.Equal(16, reference2.Generation);
+
+ var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
+ Assert.Equal(-58949797283757, reference3.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference3.Generation);
+
+ var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
+ Assert.Equal(140737488355327, reference4.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference4.Generation);
+
+ var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
+ Assert.Equal(-140737488355327, reference5.ObjectNumber);
+ Assert.Equal(ushort.MaxValue, reference5.Generation);
+
+ var ex0 = Assert.Throws(() => new IndirectReference(140737488355328, 0));
+ Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
+ var ex1 = Assert.Throws(() => new IndirectReference(-140737488355328, 0));
+ Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
+
+ var ex2 = Assert.Throws(() => new IndirectReference(1574, -1));
+ Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex2.Message);
+ var ex3 = Assert.Throws(() => new IndirectReference(1574, ushort.MaxValue + 1));
+ Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex3.Message);
+ }
+
[Fact]
public void TwoIndirectReferenceNotEqual()
{
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
index f2449b39..8aaa19d3 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
@@ -94,8 +94,6 @@
long objectGen = objectKey.Generation;
var originOffset = bytes.CurrentOffset;
- var objectString = ObjectHelper.CreateObjectString(objectNr, objectGen);
-
try
{
if (offset >= bytes.Length)
@@ -111,7 +109,7 @@
bytes.MoveNext();
}
- if (ReadHelper.IsString(bytes, objectString))
+ if (ReadHelper.IsString(bytes, ObjectHelper.CreateObjectString(objectNr, objectGen)))
{
// everything is ok, return origin object key
bytes.Seek(originOffset);