diff --git a/src/UglyToad.PdfPig.Core/IndirectReference.cs b/src/UglyToad.PdfPig.Core/IndirectReference.cs index eceee1ed..39243dc2 100644 --- a/src/UglyToad.PdfPig.Core/IndirectReference.cs +++ b/src/UglyToad.PdfPig.Core/IndirectReference.cs @@ -39,9 +39,10 @@ [DebuggerStepThrough] public IndirectReference(long objectNumber, int generation) { - if (generation < 0 || generation > ushort.MaxValue) + if (generation < 0) { - throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value, and less or equal to 65,535."); + // Note: We do not check generation for max value and let it overflow + throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value."); } if (objectNumber < -MAX_OBJECT_NUMBER || objectNumber > MAX_OBJECT_NUMBER) diff --git a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs index 676dc596..6804c372 100644 --- a/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs +++ b/src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs @@ -63,9 +63,20 @@ Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message); var ex2 = Assert.Throws(() => new IndirectReference(1574, -1)); - Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex2.Message); - var ex3 = Assert.Throws(() => new IndirectReference(1574, ushort.MaxValue + 1)); - Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex3.Message); + Assert.StartsWith("Generation number must not be a negative value.", ex2.Message); + + // We make sure object number is still correct even if generation is not + var reference6 = new IndirectReference(1574, int.MaxValue); + Assert.Equal(1574, reference6.ObjectNumber); + + var reference7 = new IndirectReference(-1574, ushort.MaxValue + 10); + Assert.Equal(-1574, reference7.ObjectNumber); + + var reference9 = new IndirectReference(-140737488355327, ushort.MaxValue + 10); + Assert.Equal(-140737488355327, reference9.ObjectNumber); + + var reference10 = new IndirectReference(140737488355327, ushort.MaxValue * 10); + Assert.Equal(140737488355327, reference10.ObjectNumber); } [Fact] diff --git a/src/UglyToad.PdfPig.Tests/Integration/IndirectReferenceTests.cs b/src/UglyToad.PdfPig.Tests/Integration/IndirectReferenceTests.cs new file mode 100644 index 00000000..d6bbb828 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/IndirectReferenceTests.cs @@ -0,0 +1,27 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using DocumentLayoutAnalysis.WordExtractor; + using System.Linq; + + public class IndirectReferenceTests + { + [Fact] + public void Generation65536() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetSpecificTestDocumentPath("93101_1.pdf"))) + { + var page = document.GetPage(1); + var words = NearestNeighbourWordExtractor.Instance.GetWords(page.Letters).ToArray(); + Assert.NotEmpty(words); + + Assert.Equal("Railway", words[0].Text); + + for (int p = 2; p <= document.NumberOfPages; ++p) + { + page = document.GetPage(p); + Assert.NotEmpty(page.Letters); + } + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/93101_1.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/93101_1.pdf new file mode 100644 index 00000000..fd7aedc4 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/93101_1.pdf differ diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs index 1cda8ca6..9ff6039c 100644 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs +++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs @@ -20,6 +20,12 @@ public void Add(long objectId, int generationNumber, long offset) { + if (generationNumber > ushort.MaxValue) + { + // We skip invalid generation number + return; + } + IndirectReference objKey = new IndirectReference(objectId, generationNumber); if (!objects.ContainsKey(objKey)) diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index 72004575..16f05707 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -745,6 +745,7 @@ if (offset == 0 && reference.Generation > ushort.MaxValue) { + // TODO - To remove as should not happen anymore return new ObjectToken(offset, reference, NullToken.Instance); }