diff --git a/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/CrossReferenceTableParserTests.cs b/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/CrossReferenceTableParserTests.cs new file mode 100644 index 00000000..5f3e8188 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/CrossReferenceTableParserTests.cs @@ -0,0 +1,236 @@ +namespace UglyToad.Pdf.Tests.Parser.Parts.CrossReference +{ + using System.Linq; + using IO; + using Pdf.Cos; + using Pdf.Parser.Parts.CrossReference; + using Pdf.Util; + using Xunit; + + public class CrossReferenceTableParserTests + { + private readonly CosObjectPool objectPool = new CosObjectPool(); + + private readonly CrossReferenceTableParser parser = new CrossReferenceTableParser(new TestingLog(), + new TestDictionaryParser(), + new TestBaseParser()); + + [Fact] + public void OffsetNotXrefFalse() + { + var input = GetReader("12 0 obj <<>> endobj xref"); + + var result = parser.TryParse(input, 4, false, objectPool, out var _); + + Assert.False(result); + } + + [Fact] + public void OffsetXButNotXrefFalse() + { + var input = GetReader(@"xtable +trailer"); + + var result = parser.TryParse(input, 0, false, objectPool, out var _); + + Assert.False(result); + } + + [Fact] + public void EmptyTableFalse() + { + var input = GetReader(@"xref +trailer"); + + var result = parser.TryParse(input, 0, false, objectPool, out var _); + + Assert.False(result); + } + + [Fact] + public void InvalidSubsectionDefinitionLenientTrue() + { + var input = GetReader(@"xref +ab 12 +trailer +<<>>"); + + var result = parser.TryParse(input, 0, true, objectPool, out var _); + + Assert.True(result); + } + + [Fact] + public void InvalidSubsectionDefinitionNotLenientFalse() + { + var input = GetReader(@"xref +ab 12 +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var _); + + Assert.False(result); + } + + [Fact] + public void SkipsFirstFreeLine() + { + var input = GetReader(@"xref +0 1 +0000000000 65535 f +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var table); + + Assert.True(result); + + var built = table.AsCrossReferenceTablePart(); + + Assert.Empty(built.ObjectOffsets); + Assert.Equal(0, built.Offset); + Assert.Equal(CrossReferenceType.Table, built.Type); + } + + [Fact] + public void ReadsEntries() + { + var input = GetReader(@"xref +0 3 +0000000000 65535 f +0000000100 00000 n +0000000200 00005 n +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var table); + + Assert.True(result); + + var built = table.AsCrossReferenceTablePart(); + + Assert.Equal(2, built.ObjectOffsets.Count); + + var results = built.ObjectOffsets.Select(x => new {x.Key.Number, x.Key.Generation, x.Value}).ToList(); + + Assert.Equal(100, results[0].Value); + Assert.Equal(1, results[0].Number); + Assert.Equal(0, results[0].Generation); + + Assert.Equal(200, results[1].Value); + Assert.Equal(2, results[1].Number); + Assert.Equal(5, results[1].Generation); + } + + [Fact] + public void ReadsEntriesOffsetFirstNumber() + { + var input = GetReader(@"xref +15 2 +0000000190 00000 n +0000000250 00032 n +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var table); + + Assert.True(result); + + var built = table.AsCrossReferenceTablePart(); + + Assert.Equal(2, built.ObjectOffsets.Count); + + var results = built.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList(); + + Assert.Equal(190, results[0].Value); + Assert.Equal(15, results[0].Number); + Assert.Equal(0, results[0].Generation); + + Assert.Equal(250, results[1].Value); + Assert.Equal(16, results[1].Number); + Assert.Equal(32, results[1].Generation); + } + + [Fact] + public void ReadsEntriesSkippingBlankLine() + { + var input = GetReader(@"xref +15 2 +0000000190 00000 n + +0000000250 00032 n +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var table); + + Assert.True(result); + + var built = table.AsCrossReferenceTablePart(); + + Assert.Equal(2, built.ObjectOffsets.Count); + + var results = built.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList(); + + Assert.Equal(190, results[0].Value); + Assert.Equal(15, results[0].Number); + Assert.Equal(0, results[0].Generation); + + Assert.Equal(250, results[1].Value); + Assert.Equal(16, results[1].Number); + Assert.Equal(32, results[1].Generation); + } + + [Fact] + public void ReadsEntriesFromMultipleSubsections() + { + var input = GetReader(@"xref +0 4 +0000000000 65535 f +0000000100 00000 n +0000000200 00005 n +0000000230 00005 n +15 2 +0000000190 00007 n +0000000250 00032 n +trailer +<<>>"); + + var result = parser.TryParse(input, 0, false, objectPool, out var table); + + Assert.True(result); + + var built = table.AsCrossReferenceTablePart(); + + Assert.Equal(5, built.ObjectOffsets.Count); + + var results = built.ObjectOffsets.Select(x => new { x.Key.Number, x.Key.Generation, x.Value }).ToList(); + + Assert.Equal(100, results[0].Value); + Assert.Equal(1, results[0].Number); + Assert.Equal(0, results[0].Generation); + + Assert.Equal(200, results[1].Value); + Assert.Equal(2, results[1].Number); + Assert.Equal(5, results[1].Generation); + + Assert.Equal(230, results[2].Value); + Assert.Equal(3, results[2].Number); + Assert.Equal(5, results[2].Generation); + + Assert.Equal(190, results[3].Value); + Assert.Equal(15, results[3].Number); + Assert.Equal(7, results[3].Generation); + + Assert.Equal(250, results[4].Value); + Assert.Equal(16, results[4].Number); + Assert.Equal(32, results[4].Generation); + } + + private static IRandomAccessRead GetReader(string input) + { + return new RandomAccessBuffer(OtherEncodings.StringAsLatin1Bytes(input)); + } + } +} diff --git a/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/TableSubsectionDefinitionTests.cs b/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/TableSubsectionDefinitionTests.cs new file mode 100644 index 00000000..4147c18e --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Parser/Parts/CrossReference/TableSubsectionDefinitionTests.cs @@ -0,0 +1,117 @@ +namespace UglyToad.Pdf.Tests.Parser.Parts.CrossReference +{ + using System; + using IO; + using Pdf.Parser.Parts.CrossReference; + using Pdf.Util; + using Xunit; + + public class TableSubsectionDefinitionTests + { + private readonly TestingLog log = new TestingLog(); + + [Fact] + public void SetsPropertiesCorrectly() + { + var definition = new TableSubsectionDefinition(5, 12); + + Assert.Equal(5, definition.FirstNumber); + Assert.Equal(12, definition.Count); + } + + [Fact] + public void CountCannotBeNegative() + { + // ReSharper disable once ObjectCreationAsStatement + Action action = () => new TableSubsectionDefinition(1, -12); + + Assert.Throws(action); + } + + [Fact] + public void ToStringRepresentsPdfForm() + { + var definition = new TableSubsectionDefinition(420, 69); + + Assert.Equal("420 69", definition.ToString()); + } + + [Fact] + public void TryReadIncorrectFormatSinglePartFalse() + { + var bytes = OtherEncodings.StringAsLatin1Bytes(@"76362"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var _); + + Assert.False(result); + } + + [Fact] + public void TryReadIncorrectFormatMultiplePartsFalse() + { + var bytes = OtherEncodings.StringAsLatin1Bytes(@"76362 100 1000"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var _); + + Assert.False(result); + } + + [Fact] + public void FirstPartInvalidFormatFalse() + { + var bytes = OtherEncodings.StringAsLatin1Bytes("00adb85 97"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var _); + + Assert.False(result); + } + + [Fact] + public void SecondPartInvalidFormatFalse() + { + var bytes = OtherEncodings.StringAsLatin1Bytes("85 9t"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var _); + + Assert.False(result); + } + + [Fact] + public void ValidTrue() + { + var bytes = OtherEncodings.StringAsLatin1Bytes("12 32"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var definition); + + Assert.True(result); + + Assert.Equal(12, definition.FirstNumber); + Assert.Equal(32, definition.Count); + } + + [Fact] + public void ValidWithLongTrue() + { + var bytes = OtherEncodings.StringAsLatin1Bytes("214748364700 6"); + + var input = new RandomAccessBuffer(bytes); + + var result = TableSubsectionDefinition.TryRead(log, input, out var definition); + + Assert.True(result); + + Assert.Equal(214748364700L, definition.FirstNumber); + Assert.Equal(6, definition.Count); + } + } +} diff --git a/src/UglyToad.Pdf.Tests/TestDictionaryParser.cs b/src/UglyToad.Pdf.Tests/TestDictionaryParser.cs new file mode 100644 index 00000000..8baab883 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/TestDictionaryParser.cs @@ -0,0 +1,23 @@ +namespace UglyToad.Pdf.Tests +{ + using ContentStream; + using IO; + using Pdf.Cos; + using Pdf.Parser.Parts; + + internal class TestDictionaryParser : IDictionaryParser + { + public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) + { + return new PdfDictionary(); + } + } + + internal class TestBaseParser : IBaseParser + { + public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool) + { + return CosNull.Null; + } + } +} diff --git a/src/UglyToad.Pdf/Parser/Parts/CosBaseParser.cs b/src/UglyToad.Pdf/Parser/Parts/CosBaseParser.cs index 0ffd6f8a..8b44b465 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CosBaseParser.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CosBaseParser.cs @@ -6,7 +6,12 @@ using IO; using Util; - internal class CosBaseParser + internal interface IBaseParser + { + CosBase Parse(IRandomAccessRead reader, CosObjectPool pool); + } + + internal class CosBaseParser : IBaseParser { private readonly CosNameParser nameParser; private readonly CosStringParser stringParser; diff --git a/src/UglyToad.Pdf/Parser/Parts/CosDictionaryParser.cs b/src/UglyToad.Pdf/Parser/Parts/CosDictionaryParser.cs index 1702f3a1..491884dd 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CosDictionaryParser.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CosDictionaryParser.cs @@ -1,7 +1,6 @@ namespace UglyToad.Pdf.Parser.Parts { using System; - using System.Collections.Generic; using ContentStream; using Cos; using IO; @@ -9,7 +8,12 @@ using Util; using Util.JetBrains.Annotations; - internal class CosDictionaryParser + internal interface IDictionaryParser + { + PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool); + } + + internal class CosDictionaryParser : IDictionaryParser { private readonly ILog log; private readonly CosNameParser nameParser; @@ -34,7 +38,7 @@ this.nameParser = nameParser ?? throw new ArgumentNullException(); } - public PdfDictionary Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool) + public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) { if (reader == null) { @@ -93,7 +97,7 @@ } [ItemCanBeNull] - private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool) + private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) { var key = nameParser.Parse(reader); var value = ParseValue(reader, baseParser, pool); @@ -126,7 +130,7 @@ return (key, value); } - private static CosBase ParseValue(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool) + private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool) { var numOffset = reader.GetPosition(); var value = baseParser.Parse(reader, pool); diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceResolver.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceResolver.cs deleted file mode 100644 index 90ce9957..00000000 --- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceResolver.cs +++ /dev/null @@ -1,7 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace UglyToad.Pdf.Parser.Parts.CrossReference -{ -} diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs index 645e44b5..f7b1c0f3 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs @@ -14,10 +14,10 @@ private const string FreeEntry = "f"; private readonly ILog log; - private readonly CosDictionaryParser dictionaryParser; - private readonly CosBaseParser baseParser; + private readonly IDictionaryParser dictionaryParser; + private readonly IBaseParser baseParser; - public CrossReferenceTableParser(ILog log, CosDictionaryParser dictionaryParser, CosBaseParser baseParser) + public CrossReferenceTableParser(ILog log, IDictionaryParser dictionaryParser, IBaseParser baseParser) { this.log = log; this.dictionaryParser = dictionaryParser; @@ -64,14 +64,15 @@ { if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition)) { + log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}"); + if (isLenientParsing) { - log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}"); - } - else - { - throw new InvalidOperationException($"Unexpected subsection definition in the cross-reference table at offset {offset}"); + + break; } + + return false; } var currentObjectId = subsectionDefinition.FirstNumber; @@ -83,10 +84,12 @@ { break; } + if (source.Peek() == 't') { break; } + //Ignore table contents var currentLine = ReadHelper.ReadLine(source); var splitString = currentLine.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs index 96a43266..97119110 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs @@ -34,6 +34,11 @@ /// public TableSubsectionDefinition(long firstNumber, int count) { + if (count < 0) + { + throw new ArgumentOutOfRangeException(nameof(count), $"Count must be 0 or positive, instead it was {count}."); + } + FirstNumber = firstNumber; Count = count; }