From 24431b1f9feb4bcb948fe88c052c3161f3b12d4d Mon Sep 17 00:00:00 2001 From: BobLd <38405645+BobLd@users.noreply.github.com> Date: Sun, 1 Jun 2025 09:44:46 +0100 Subject: [PATCH] Optimize internal representation of IndirectReference --- src/UglyToad.PdfPig.Core/IndirectReference.cs | 45 ++++++++++++++++--- .../ContentStream/IndirectReferenceTests.cs | 38 ++++++++++++++++ .../CrossReferenceObjectOffsetValidator.cs | 4 +- 3 files changed, 78 insertions(+), 9 deletions(-) diff --git a/src/UglyToad.PdfPig.Core/IndirectReference.cs b/src/UglyToad.PdfPig.Core/IndirectReference.cs index e53aef0f..eceee1ed 100644 --- a/src/UglyToad.PdfPig.Core/IndirectReference.cs +++ b/src/UglyToad.PdfPig.Core/IndirectReference.cs @@ -3,20 +3,33 @@ using System; using System.Diagnostics; + // https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java#L25 + /// /// Used to uniquely identify and refer to objects in the PDF file. /// public readonly struct IndirectReference : IEquatable { + private const int NUMBER_OFFSET = sizeof(ushort) * 8; + private static readonly long GENERATION_MASK = (long)Math.Pow(2, NUMBER_OFFSET) - 1; + private static readonly long MAX_OBJECT_NUMBER = (long)(Math.Pow(2, sizeof(long) * 8 - NUMBER_OFFSET) - 1) / 2; + + // combined number and generation + // The lowest 16 bits hold the generation 0-65535 + // The rest is used for the number (even though 34 bit are sufficient for 10 digits) + private readonly long numberAndGeneration; + /// /// A positive integer object number. /// - public long ObjectNumber { get; } + // Below is different from PdfBox as we keep the sign of the offset number (use >> instead of >>> (unsigned right shift)) + public long ObjectNumber => numberAndGeneration >> NUMBER_OFFSET; /// /// A non-negative integer generation number which starts as 0 and increases if the file is updated incrementally. + /// The maximum generation number is 65,535. /// - public int Generation { get; } + public int Generation => (int)(numberAndGeneration & GENERATION_MASK); /// /// Create a new @@ -26,14 +39,34 @@ [DebuggerStepThrough] public IndirectReference(long objectNumber, int generation) { - ObjectNumber = objectNumber; - Generation = generation; + if (generation < 0 || generation > ushort.MaxValue) + { + throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value, and less or equal to 65,535."); + } + + if (objectNumber < -MAX_OBJECT_NUMBER || objectNumber > MAX_OBJECT_NUMBER) + { + throw new ArgumentOutOfRangeException(nameof(objectNumber), $"Object number must be between -{MAX_OBJECT_NUMBER:##,###} and {MAX_OBJECT_NUMBER:##,###}."); + } + + numberAndGeneration = ComputeInternalHash(objectNumber, generation); + } + + /// + /// Calculate the internal hash value for the given object number and generation number. + /// + /// The object number. + /// The generation number. + /// The internal hash for the given values. + private static long ComputeInternalHash(long num, int gen) + { + return num << NUMBER_OFFSET | (gen & GENERATION_MASK); } /// public bool Equals(IndirectReference other) { - return other.ObjectNumber == ObjectNumber && other.Generation == Generation; + return other.numberAndGeneration == numberAndGeneration; } /// @@ -45,7 +78,7 @@ /// public override int GetHashCode() { - return HashCode.Combine(ObjectNumber, Generation); + return numberAndGeneration.GetHashCode(); } /// diff --git a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs index 74d49840..676dc596 100644 --- a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs +++ b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs @@ -30,6 +30,44 @@ Assert.True(reference1.Equals(reference2)); } + [Fact] + public void IndirectReferenceHashTest() + { + var reference0 = new IndirectReference(1574, 690); + Assert.Equal(1574, reference0.ObjectNumber); + Assert.Equal(690, reference0.Generation); + + var reference1 = new IndirectReference(-1574, 690); + Assert.Equal(-1574, reference1.ObjectNumber); + Assert.Equal(690, reference1.Generation); + + var reference2 = new IndirectReference(58949797283757, 16); + Assert.Equal(58949797283757, reference2.ObjectNumber); + Assert.Equal(16, reference2.Generation); + + var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue); + Assert.Equal(-58949797283757, reference3.ObjectNumber); + Assert.Equal(ushort.MaxValue, reference3.Generation); + + var reference4 = new IndirectReference(140737488355327, ushort.MaxValue); + Assert.Equal(140737488355327, reference4.ObjectNumber); + Assert.Equal(ushort.MaxValue, reference4.Generation); + + var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue); + Assert.Equal(-140737488355327, reference5.ObjectNumber); + Assert.Equal(ushort.MaxValue, reference5.Generation); + + var ex0 = Assert.Throws(() => new IndirectReference(140737488355328, 0)); + Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message); + var ex1 = Assert.Throws(() => new IndirectReference(-140737488355328, 0)); + Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message); + + var ex2 = Assert.Throws(() => new IndirectReference(1574, -1)); + Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex2.Message); + var ex3 = Assert.Throws(() => new IndirectReference(1574, ushort.MaxValue + 1)); + Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex3.Message); + } + [Fact] public void TwoIndirectReferenceNotEqual() { diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs index f2449b39..8aaa19d3 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs @@ -94,8 +94,6 @@ long objectGen = objectKey.Generation; var originOffset = bytes.CurrentOffset; - var objectString = ObjectHelper.CreateObjectString(objectNr, objectGen); - try { if (offset >= bytes.Length) @@ -111,7 +109,7 @@ bytes.MoveNext(); } - if (ReadHelper.IsString(bytes, objectString)) + if (ReadHelper.IsString(bytes, ObjectHelper.CreateObjectString(objectNr, objectGen))) { // everything is ok, return origin object key bytes.Seek(originOffset);