diff --git a/src/UglyToad.Pdf.Tests/Parser/PageContentParserTests.cs b/src/UglyToad.Pdf.Tests/Parser/PageContentParserTests.cs new file mode 100644 index 00000000..c23ed086 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Parser/PageContentParserTests.cs @@ -0,0 +1,425 @@ +namespace UglyToad.Pdf.Tests.Parser +{ + using Graphics; + using Pdf.Parser; + using Xunit; + + public class PageContentParserTests + { + private readonly PageContentParser parser = new PageContentParser(); + + [Fact] + public void CorrectlyExtractsOperations() + { + var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false); + + var result = parser.Parse(new ReflectionGraphicsStateOperationFactory(), input.Bytes); + } + + private const string SimpleGoogleDocPageContent = @" +1 0 0 -1 0 792 cm +q +0 0 612 792 re +W* n +q +.75 0 0 .75 0 0 cm +1 1 1 RG 1 1 1 rg +/G0 gs +0 0 816 1056 re +f +0 0 816 1056 re +f +0 0 816 1056 re +f +Q +Q +q +0 0 612 791.25 re +W* n +q +.75 0 0 .75 0 0 cm +1 1 1 RG 1 1 1 rg +/G0 gs +0 0 816 1055 re +f +0 96 816 960 re +f +0 0 0 RG 0 0 0 rg +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +96 0 Td <0037> Tj +13.0280762 0 Td <004B> Tj +11.8616943 0 Td <004C> Tj +4.7384338 0 Td <0056> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +136.292267 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +136.292267 0 Td <0003> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +142.217911 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +142.217911 0 Td <004C> Tj +4.7384338 0 Td <0056> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +157.620407 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +157.620407 0 Td <0003> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +163.546051 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +163.546051 0 Td <0057> Tj +5.9256439 0 Td <004B> Tj +11.8616943 0 Td <0048> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +193.19508 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +193.19508 0 Td <0003> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +199.12073 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +199.12073 0 Td <0047> Tj +11.8616943 0 Td <0052> Tj +11.8616943 0 Td <0046> Tj +10.6640625 0 Td <0058> Tj +11.8616943 0 Td <0050> Tj +17.766479 0 Td <0048> Tj +11.8616943 0 Td <0051> Tj +11.8616943 0 Td <0057> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +292.7854 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +292.7854 0 Td <0003> Tj +ET +BT +/F1 21.33 Tf +1 0 0 -1 0 140 Tm +298.71106 0 Td <0001> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +298.71106 0 Td <0057> Tj +5.9256287 0 Td <004C> Tj +4.7384338 0 Td <0057> Tj +5.9256592 0 Td <004F> Tj +4.7384033 0 Td <0048> Tj +ET +BT +/F0 21.33 Tf +1 0 0 -1 0 140 Tm +331.89063 0 Td <0003> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 171 Tm +96 0 Td <0003> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +96 0 Td <0037> Tj +8.9526215 0 Td <004B> Tj +8.1511078 0 Td <0048> Tj +8.1511078 0 Td <0055> Tj +4.8806458 0 Td <0048> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +134.286591 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +134.286591 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +138.358566 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +138.358566 0 Td <004C> Tj +3.2561493 0 Td <0056> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +148.942841 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +148.942841 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +153.014816 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +153.014816 0 Td <0056> Tj +7.328125 0 Td <0052> Tj +8.1511078 0 Td <0050> Tj +12.2087708 0 Td <0048> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +188.85393 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +188.85393 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +192.9259 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +192.9259 0 Td <004F> Tj +3.2561493 0 Td <0048> Tj +8.1511078 0 Td <0047> Tj +8.1511078 0 Td <0048> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +220.63538 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +220.63538 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +224.70735 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +224.70735 0 Td <0057> Tj +4.0719757 0 Td <0048> Tj +8.1511078 0 Td <005B> Tj +7.328125 0 Td <0057> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +248.33054 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +248.33054 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 191 Tm +252.40251 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +252.40251 0 Td <004B> Tj +8.1511078 0 Td <0048> Tj +8.1510925 0 Td <0055> Tj +4.8806763 0 Td <0048> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 191 Tm +281.73438 0 Td <0003> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 211 Tm +96 0 Td <0003> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +96 0 Td <0024> Tj +9.7756042 0 Td <0051> Tj +8.1511078 0 Td <0047> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +122.07782 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +122.07782 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +126.149796 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +126.149796 0 Td <0057> Tj +4.0719757 0 Td <004B> Tj +8.1511078 0 Td <0048> Tj +8.1511078 0 Td <0051> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +154.675095 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +154.675095 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +158.74707 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +158.74707 0 Td <0044> Tj +8.1511078 0 Td <0051> Tj +8.1511078 0 Td <0052> Tj +8.1511078 0 Td <0057> Tj +4.0719757 0 Td <004B> Tj +8.1511078 0 Td <0048> Tj +8.1511078 0 Td <0055> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +208.45523 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +208.45523 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +212.52721 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +212.52721 0 Td <004F> Tj +3.2561493 0 Td <004C> Tj +3.2561493 0 Td <0051> Tj +8.1511078 0 Td <0048> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +235.34172 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +235.34172 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +239.4137 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +239.4137 0 Td <0052> Tj +8.1511078 0 Td <0049> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +251.63678 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +251.63678 0 Td <0003> Tj +ET +BT +/F1 14.6599998 Tf +1 0 0 -1 0 231 Tm +255.70876 0 Td <0001> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +255.70876 0 Td <0057> Tj +4.0719757 0 Td <0048> Tj +8.1510925 0 Td <005B> Tj +7.328125 0 Td <0057> Tj +4.071991 0 Td <0011> Tj +ET +BT +/F0 14.6599998 Tf +1 0 0 -1 0 231 Tm +283.39063 0 Td <0003> Tj +ET +Q +Q +"; + } +} diff --git a/src/UglyToad.Pdf.Tests/Text/OperatorScannerTests.cs b/src/UglyToad.Pdf.Tests/Text/OperatorScannerTests.cs deleted file mode 100644 index 48f97f3e..00000000 --- a/src/UglyToad.Pdf.Tests/Text/OperatorScannerTests.cs +++ /dev/null @@ -1,266 +0,0 @@ -namespace UglyToad.Pdf.Tests.Text -{ - using System.Collections.Generic; - using System.Linq; - using Pdf.Text; - using Pdf.Util; - using Xunit; - using ComponentType = Pdf.Text.TextObjectComponentType; - - public class ByteTextScannerTests - { - [Fact] - public void ParseSimpleTest() - { - const string text = @" -BT - /F13 12 Tf - 288 720 Td - (ABC) Tj -ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Font, - ComponentType.Numeric, - ComponentType.TextFont, - ComponentType.Numeric, - ComponentType.Numeric, - ComponentType.MoveTextPosition, - ComponentType.String, - ComponentType.ShowText, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Fact] - public void ParseStyledText() - { - const string text = @"BT -/F13 48 Tf -0 40 Td -0 Tr -0.5 g -(Some Text) Tj -ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Font, - ComponentType.Numeric, - ComponentType.TextFont, - ComponentType.Numeric, - ComponentType.Numeric, - ComponentType.MoveTextPosition, - ComponentType.Numeric, - ComponentType.SetTextRenderingMode, - ComponentType.Numeric, - ComponentType.SetGrayNonStroking, - ComponentType.String, - ComponentType.ShowText, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Fact] - public void ParseTextAsPath() - { - const string text = @"BT -/F13 48 Tf 20 38 Td 1 Tr 2 w <0053> Tj ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Font, - ComponentType.Numeric, - ComponentType.TextFont, - ComponentType.Numeric, - ComponentType.Numeric, - ComponentType.MoveTextPosition, - ComponentType.Numeric, - ComponentType.SetTextRenderingMode, - ComponentType.Numeric, - ComponentType.SetLineWidth, - ComponentType.String, - ComponentType.ShowText, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Fact] - public void ParseTextMissingFont() - { - const string text = @" -BT - 40 50 Td -(Some more text which -includes a line break, if valid?) Tj -ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Numeric, - ComponentType.Numeric, - ComponentType.MoveTextPosition, - ComponentType.String, - ComponentType.ShowText, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Fact] - public void ParseTextMatrix() - { - const string text = @"BT -1 0 67473.567 -1 0 140 Tm -ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, - ComponentType.SetTextMatrix, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Fact] - public void ParseSimpleGoogleDocsCase() - { - const string text = @"BT -/F0 21.33 Tf -1 0 0 -1 0 140 Tm -96 0 Td <0037> Tj -13.0280762 0 Td <004B> Tj -11.8616943 0 Td <004C> Tj -4.7384338 0 Td <0056> Tj -ET"; - - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var components = new List(); - - while (scanner.Read()) - { - components.Add(scanner.CurrentComponent); - } - - var expected = new[] - { - ComponentType.BeginText, - ComponentType.Font, ComponentType.Numeric, ComponentType.TextFont, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.SetTextMatrix, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText, - ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText, - ComponentType.EndText - }; - - Assert.Equal(expected, components.Select(x => x.Type)); - } - - [Theory] - [InlineData("BT", ComponentType.BeginText)] - [InlineData("ET", ComponentType.EndText)] - [InlineData("Tf", ComponentType.TextFont)] - [InlineData("Tj", ComponentType.ShowText)] - [InlineData("Td", ComponentType.MoveTextPosition)] - [InlineData(" Tm", ComponentType.SetTextMatrix)] - [InlineData(" T* ", ComponentType.MoveToNextLineStart)] - [InlineData("\r\n \nTs ", ComponentType.SetTextRise)] - public void RecognisesSingleOperatorAsOnlyStringItem(string text, ComponentType textObjectComponentType) - { - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var result = new List(); - while (scanner.Read()) - { - result.Add(scanner.CurrentComponent); - } - - Assert.Single(result); - Assert.Equal(textObjectComponentType, result[0].Type); - } - - [Theory] - [InlineData("ETe")] - [InlineData("Tff")] - [InlineData("T j")] - [InlineData(" Ta ")] - [InlineData(" t*")] - [InlineData("\rT\ns")] - [InlineData("no")] - public void SkipsSimilarOperator(string text) - { - var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text)); - - var result = new List(); - while (scanner.Read()) - { - result.Add(scanner.CurrentComponent); - } - - Assert.Empty(result); - } - } -} diff --git a/src/UglyToad.Pdf.Tests/Text/Operators/NumericTextComponentApproachTests.cs b/src/UglyToad.Pdf.Tests/Text/Operators/NumericTextComponentApproachTests.cs deleted file mode 100644 index b4b847c9..00000000 --- a/src/UglyToad.Pdf.Tests/Text/Operators/NumericTextComponentApproachTests.cs +++ /dev/null @@ -1,52 +0,0 @@ -namespace UglyToad.Pdf.Tests.Text.Operators -{ - using System.Collections.Generic; - using System.Linq; - using Pdf.Text; - using Pdf.Text.Operators; - using Xunit; - - public class NumericTextComponentApproachTests - { - private readonly NumericTextComponentApproach approach = new NumericTextComponentApproach(); - - public static IEnumerable TestData = new [] - { - new object[] { "123" }, - new object[] { "43445" }, - new object[] { "+17" }, - new object[] { "-98" }, - new object[] { "0" }, - new object[] { "34.5" }, - new object[] { "-3.62" }, - new object[] { "+123.6" }, - new object[] { "4." }, - new object[] { "-.002" }, - new object[] { "0.0" }, - }; - - [Theory] - [MemberData(nameof(TestData))] - public void CanReadNumbers(string number) - { - var bytes = number.Select(x => (byte) x).ToArray(); - - var canRead = approach.CanRead(bytes[0], 0); - - Assert.True(canRead); - } - - [Theory] - [MemberData(nameof(TestData))] - public void ReadsNumbers(string number) - { - var bytes = number.Select(x => (byte)x); - - var result = approach.Read(new byte[0], bytes, out var offset); - - Assert.NotNull(result); - - Assert.Equal(TextObjectComponentType.Numeric, result.Type); - } - } -} diff --git a/src/UglyToad.Pdf.Tests/Text/Operators/StringTextComponentApproachTests.cs b/src/UglyToad.Pdf.Tests/Text/Operators/StringTextComponentApproachTests.cs deleted file mode 100644 index 056c163e..00000000 --- a/src/UglyToad.Pdf.Tests/Text/Operators/StringTextComponentApproachTests.cs +++ /dev/null @@ -1,37 +0,0 @@ -namespace UglyToad.Pdf.Tests.Text.Operators -{ - using System; - using System.Collections.Generic; - using System.Linq; - using Pdf.Text.Operators; - using Xunit; - - public class StringTextComponentApproachTests - { - private readonly StringTextComponentApproach approach = new StringTextComponentApproach(); - - [Theory] - [InlineData("<03)")] - [InlineData("<03AR>")] - [InlineData("<9-3>")] - public void InvalidHexThrows(string s) - { - Action action = () => approach.Read(new List(), s.Select(x => (byte)x), out var _); - - Assert.Throws(action); - } - - [Theory] - [InlineData("<03>")] - [InlineData("<03BA>")] - [InlineData("<9a37eF>")] - public void CanReadValidHex(string s) - { - var result = approach.Read(new List(), s.Select(x => (byte)x), out var _); - - Assert.NotNull(result); - Assert.Equal(s.Select(x => (byte)x).ToArray(), result.AsOperand.RawBytes); - - } - } -} diff --git a/src/UglyToad.Pdf.Tests/Tokenization/HexTokenizerTests.cs b/src/UglyToad.Pdf.Tests/Tokenization/HexTokenizerTests.cs index 7b8ff0a2..8c27acc1 100644 --- a/src/UglyToad.Pdf.Tests/Tokenization/HexTokenizerTests.cs +++ b/src/UglyToad.Pdf.Tests/Tokenization/HexTokenizerTests.cs @@ -24,7 +24,7 @@ } [Theory] - [InlineData("<00>", "\0")] + [InlineData("<00>", "")] [InlineData("", "¡")] public void TokenizesHexStringsCorrectly(string s, string expected) { diff --git a/src/UglyToad.Pdf.Tests/Tokenization/Tokens/HexTokenTests.cs b/src/UglyToad.Pdf.Tests/Tokenization/Tokens/HexTokenTests.cs index 0f6693d6..d3878748 100644 --- a/src/UglyToad.Pdf.Tests/Tokenization/Tokens/HexTokenTests.cs +++ b/src/UglyToad.Pdf.Tests/Tokenization/Tokens/HexTokenTests.cs @@ -8,7 +8,7 @@ [Theory] [InlineData("AE", "®")] [InlineData("61", "a")] - [InlineData("0061", "\0a")] + [InlineData("0061", "a")] [InlineData("7465787420736f", "text so")] public void MapsCorrectlyToString(string input, string expected) { diff --git a/src/UglyToad.Pdf/Content/MediaBox.cs b/src/UglyToad.Pdf/Content/MediaBox.cs new file mode 100644 index 00000000..6be5212b --- /dev/null +++ b/src/UglyToad.Pdf/Content/MediaBox.cs @@ -0,0 +1,76 @@ +namespace UglyToad.Pdf.Content +{ + using System; + using Geometry; + + /// + /// The boundary of the physical medium to display or print on. + /// + /// + /// See table 3.27 from the PDF specification version 1.7. + /// + public class MediaBox + { + /// + /// User space units per inch. + /// + private const decimal PointsPerInch = 72; + + /// + /// User space units per millimeter. + /// + private const decimal PointsPerMm = 1 / (10 * 2.54m) * PointsPerInch; + + /// + /// A the size of U.S. Letter, 8.5" x 11" Paper. + /// + public static readonly MediaBox Letter = new MediaBox(new PdfRectangle(0, 0, 8.5m * PointsPerInch, 11m * PointsPerInch)); + + /// + /// A the size of U.S. Legal, 8.5" x 14" Paper. + /// + public static readonly MediaBox Legal = new MediaBox(new PdfRectangle(0, 0, 8.5m * PointsPerInch, 14m * PointsPerInch)); + + /// + /// A the size of A0 Paper. + /// + public static readonly MediaBox A0 = new MediaBox(new PdfRectangle(0, 0, 841 * PointsPerMm, 1189 * PointsPerMm)); + + /// + /// A the size of A1 Paper. + /// + public static readonly MediaBox A1 = new MediaBox(new PdfRectangle(0, 0, 594 * PointsPerMm, 841 * PointsPerMm)); + + /// + /// A the size of A2 Paper. + /// + public static readonly MediaBox A2 = new MediaBox(new PdfRectangle(0, 0, 420 * PointsPerMm, 594 * PointsPerMm)); + + /// + /// A the size of A3 Paper. + /// + public static readonly MediaBox A3 = new MediaBox(new PdfRectangle(0, 0, 297 * PointsPerMm, 420 * PointsPerMm)); + + /// + /// A the size of A4 Paper. + /// + public static readonly MediaBox A4 = new MediaBox(new PdfRectangle(0, 0, 210 * PointsPerMm, 297 * PointsPerMm)); + + /// + /// A the size of A5 Paper. + /// + public static readonly MediaBox A5 = new MediaBox(new PdfRectangle(0, 0, 148 * PointsPerMm, 210 * PointsPerMm)); + + /// + /// A the size of A6 Paper. + /// + public static readonly MediaBox A6 = new MediaBox(new PdfRectangle(0, 0, 105 * PointsPerMm, 148 * PointsPerMm)); + + public PdfRectangle Bounds { get; } + + public MediaBox(PdfRectangle bounds) + { + Bounds = bounds ?? throw new ArgumentNullException(nameof(bounds)); + } + } +} diff --git a/src/UglyToad.Pdf/Content/Page.cs b/src/UglyToad.Pdf/Content/Page.cs index 714ed503..d587ea28 100644 --- a/src/UglyToad.Pdf/Content/Page.cs +++ b/src/UglyToad.Pdf/Content/Page.cs @@ -1,89 +1,101 @@ namespace UglyToad.Pdf.Content { using System; + using System.Diagnostics; using ContentStream; - using ContentStream.TypedAccessors; using Cos; using Filters; - using Logging; + using Geometry; + using Graphics; + using IO; using Parser; - using Text; using Util; public class Page { private readonly ParsingArguments parsingArguments; private readonly ContentStreamDictionary dictionary; + + /// + /// The 1 indexed page number. + /// public int Number { get; } - public bool Loaded { get; private set; } + public MediaBox MediaBox { get; } - internal Page(int number, ContentStreamDictionary dictionary, ParsingArguments parsingArguments) + public PageContent Content { get; } + + internal Page(int number, ContentStreamDictionary dictionary, PageTreeMembers pageTreeMembers, ParsingArguments parsingArguments) { if (number <= 0) { throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative."); } - - Number = number; - Loaded = false; + this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary)); this.parsingArguments = parsingArguments ?? throw new ArgumentNullException(nameof(parsingArguments)); - var mediabox = dictionary.GetDictionaryObject(CosName.MEDIA_BOX) as COSArray; - var contents = dictionary.GetItemOrDefault(CosName.CONTENTS); - var raw = contents as RawCosStream; - var obj = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(7, 0)); - var parser = parsingArguments.Container.Get() - .Parse(parsingArguments, obj, false) as RawCosStream; - var rw = parser.Decode(parsingArguments.Container.Get()); - var format = OtherEncodings.BytesAsLatin1String(rw); - var pee = new TextSectionParser(new NoOpLog()).ReadTextObjects(new ByteTextScanner(rw)); - var font0 = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(16, 0)); - var cmpa = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(9, 0)); - var toad = parsingArguments.Container.Get() - .Parse(parsingArguments, new CosObjectKey(9, 0), false); - var bigsby = (toad as RawCosStream).Decode(parsingArguments.Container.Get()); + Number = number; - var ssss = OtherEncodings.BytesAsLatin1String(bigsby); + var type = dictionary.GetName(CosName.TYPE); + + if (type != null && !type.Equals(CosName.PAGE) && !parsingArguments.IsLenientParsing) + { + throw new InvalidOperationException($"Created page number {number} but its type was specified as {type} rather than 'Page'."); + } + + if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray)) + { + var x1 = mediaboxArray.getInt(0); + var y1 = mediaboxArray.getInt(1); + var x2 = mediaboxArray.getInt(2); + var y2 = mediaboxArray.getInt(3); + + MediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2)); + } + else + { + MediaBox = pageTreeMembers.GetMediaBox(); + + if (MediaBox == null) + { + if (parsingArguments.IsLenientParsing) + { + MediaBox = MediaBox.A4; + } + else + { + throw new InvalidOperationException("No mediabox was present for page: " + number); + } + } + } + + if (dictionary.GetItemOrDefault(CosName.RESOURCES) is ContentStreamDictionary resource) + { + parsingArguments.CachingProviders.ResourceContainer.LoadResourceDictionary(resource, parsingArguments); + } + + var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject; + if (contentObject != null) + { + var contentStream = parsingArguments.Container.Get() + .Parse(parsingArguments, contentObject, false) as RawCosStream; + + if (contentStream == null) + { + throw new InvalidOperationException("Failed to parse the content for the page: " + number); + } + + var contents = contentStream.Decode(parsingArguments.Container.Get()); + + if (Debugger.IsAttached) + { + var textContents = OtherEncodings.BytesAsLatin1String(contents); + } + + Content = parsingArguments.Container.Get() + .Parse(parsingArguments.Container.Get(), new ByteArrayInputBytes(contents)); + } } } - - /// - /// - /// - /// - /// The positive x axis extends horizontally to the right and the positive y axis vertically upward, as in standard mathematical practice - /// - public struct Rectangle - { - public decimal Width { get; } - - public decimal Height { get; } - - public decimal Left { get; } - - public decimal Top { get; } - - public decimal Right { get; } - - public decimal Bottom { get; } - - public Rectangle(decimal x1, decimal y1, decimal x2, decimal y2) - { - Width = 0; - Height = 0; - Top = 0; - Left = 0; - Right = 0; - Bottom = 0; - } - } - - public struct Coordinate - { - public decimal X { get; set; } - - public decimal Y { get; set; } - } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Content/PageContent.cs b/src/UglyToad.Pdf/Content/PageContent.cs new file mode 100644 index 00000000..ca199d68 --- /dev/null +++ b/src/UglyToad.Pdf/Content/PageContent.cs @@ -0,0 +1,17 @@ +namespace UglyToad.Pdf.Content +{ + using System.Collections.Generic; + using Graphics.Operations; + + /// + /// + /// + /// + /// This should contain a replayable stack of drawing instructions for page content + /// from a content stream in addition to lazily evaluated state such as text on the page or images. + /// + public class PageContent + { + internal IReadOnlyList GraphicsStateOperations { get; set; } + } +} diff --git a/src/UglyToad.Pdf/Content/PageTreeMembers.cs b/src/UglyToad.Pdf/Content/PageTreeMembers.cs new file mode 100644 index 00000000..5ce5c2f6 --- /dev/null +++ b/src/UglyToad.Pdf/Content/PageTreeMembers.cs @@ -0,0 +1,16 @@ +namespace UglyToad.Pdf.Content +{ + using System; + + /// + /// Contains the values inherited from the Page Tree for this page. + /// + public class PageTreeMembers + { + public MediaBox GetMediaBox() + { + // TODO: tree inheritance + throw new NotImplementedException("Track inherited members"); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Content/Pages.cs b/src/UglyToad.Pdf/Content/Pages.cs index 407cdf14..aa78a74c 100644 --- a/src/UglyToad.Pdf/Content/Pages.cs +++ b/src/UglyToad.Pdf/Content/Pages.cs @@ -60,7 +60,7 @@ { if (locatedPages.TryGetValue(pageNumber, out ContentStreamDictionary targetPageDictionary)) { - return new Page(pageNumber, targetPageDictionary, arguments); + return new Page(pageNumber, targetPageDictionary, new PageTreeMembers(), arguments); } var observed = new List(); diff --git a/src/UglyToad.Pdf/Content/ResourceContainer.cs b/src/UglyToad.Pdf/Content/ResourceContainer.cs new file mode 100644 index 00000000..33f8db36 --- /dev/null +++ b/src/UglyToad.Pdf/Content/ResourceContainer.cs @@ -0,0 +1,52 @@ +namespace UglyToad.Pdf.Content +{ + using System; + using System.Collections.Generic; + using ContentStream; + using Cos; + using Fonts; + using Parser; + + public class ResourceContainer + { + private readonly Dictionary loadedFonts = new Dictionary(); + + internal void LoadResourceDictionary(ContentStreamDictionary dictionary, ParsingArguments arguments) + { + if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is ContentStreamDictionary fontDictionary) + { + LoadFontDictionary(fontDictionary, arguments); + } + } + + private void LoadFontDictionary(ContentStreamDictionary fontDictionary, ParsingArguments arguments) + { + foreach (var pair in fontDictionary) + { + if (loadedFonts.ContainsKey(pair.Key)) + { + continue; + } + + if (!(pair.Value is CosObject objectKey)) + { + if (arguments.IsLenientParsing) + { + continue; + } + + throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}."); + } + + var dynamicParser = arguments.Container.Get(); + + var fontObject = dynamicParser.Parse(arguments, objectKey, false) as ContentStreamDictionary; + + var font = new CompositeFont(); + + loadedFonts[pair.Key] = font; + } + } + } +} + diff --git a/src/UglyToad.Pdf/ContentStream/ContentStreamDictionary.cs b/src/UglyToad.Pdf/ContentStream/ContentStreamDictionary.cs index 09676f4d..e3e6be7d 100644 --- a/src/UglyToad.Pdf/ContentStream/ContentStreamDictionary.cs +++ b/src/UglyToad.Pdf/ContentStream/ContentStreamDictionary.cs @@ -50,6 +50,18 @@ return null; } + public bool TryGetItemOfType(CosName key, out T item) where T : CosBase + { + item = null; + if (inner.TryGetValue(key, out var value) && value is T t) + { + item = t; + return true; + } + + return false; + } + public void Set(CosName key, CosBase value) { if (key == null) @@ -74,6 +86,5 @@ throw new NotImplementedException(); } #endregion - } } diff --git a/src/UglyToad.Pdf/Core/TransformationMatrix.cs b/src/UglyToad.Pdf/Core/TransformationMatrix.cs new file mode 100644 index 00000000..7260250f --- /dev/null +++ b/src/UglyToad.Pdf/Core/TransformationMatrix.cs @@ -0,0 +1,75 @@ +namespace UglyToad.Pdf.Core +{ + using System; + using Geometry; + + /// + /// Specifies the conversion from the transformed coordinate space to the original untransformed coordinate space. + /// + internal struct TransformationMatrix + { + public static TransformationMatrix Default = new TransformationMatrix(new decimal[] + { + 1,0,0, + 0,1,0, + 0,0,1 + }); + + private readonly decimal[] value; + + public decimal A => value[0]; + public decimal B => value[1]; + public decimal C => value[3]; + public decimal D => value[4]; + public decimal E => value[6]; + public decimal F => value[7]; + + public TransformationMatrix(decimal[] value) + { + if (value == null) + { + throw new ArgumentNullException(nameof(value)); + } + + if (value.Length != 9) + { + throw new ArgumentException("The constructor for the PDF transformation matrix must contain 9 elements. Instead got: " + value); + } + + this.value = value; + } + + public PdfPoint Transform(PdfPoint original) + { + var x = A * original.X + C * original.Y + E; + var y = B * original.X + D * original.Y + F; + + return new PdfPoint(x, y); + } + + public static TransformationMatrix FromArray(decimal[] values) + { + if (values.Length == 9) + { + return new TransformationMatrix(values); + } + + if (values.Length == 6) + { + return new TransformationMatrix(new [] + { + values[0], values[1], 0, + values[2], values[3], 0, + values[4], values[5], 1 + }); + } + + throw new ArgumentException("The array must either define all 9 elements of the matrix or all 6 key elements. Instead array was: " + values); + } + + public override string ToString() + { + return $"{A}, {B}, 0\r\n{C}, {D}, 0\r\n{E}, {F}, 1"; + } + } +} diff --git a/src/UglyToad.Pdf/Cos/COSDocument.cs b/src/UglyToad.Pdf/Cos/COSDocument.cs index 8884d205..09b6b1e8 100644 --- a/src/UglyToad.Pdf/Cos/COSDocument.cs +++ b/src/UglyToad.Pdf/Cos/COSDocument.cs @@ -64,24 +64,6 @@ private bool closed = false; - private readonly ScratchFile scratchFile; - - public COSDocument() : this(ScratchFile.getMainMemoryOnlyInstance()) - { - } - - /** - * Constructor that will use the provide memory handler for storage of the - * PDF streams. - * - * @param scratchFile memory handler for buffering of PDF streams - * - */ - public COSDocument(ScratchFile scratchFile) - { - this.scratchFile = scratchFile; - } - /** * Creates a new COSStream using the current configuration for scratch files. * @@ -380,7 +362,6 @@ { stream.Dispose(); } - scratchFile?.Dispose(); closed = true; } } diff --git a/src/UglyToad.Pdf/Fonts/CompositeFont.cs b/src/UglyToad.Pdf/Fonts/CompositeFont.cs index 4d544bf5..37a2fbd9 100644 --- a/src/UglyToad.Pdf/Fonts/CompositeFont.cs +++ b/src/UglyToad.Pdf/Fonts/CompositeFont.cs @@ -1,20 +1,18 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace UglyToad.Pdf.Fonts +namespace UglyToad.Pdf.Fonts { + using System; + using System.Collections.Generic; using Cmap; using Cos; - public class CompositeFont - { - public bool IsSimple { get; } = false; + //public class CompositeFont + //{ + // public bool IsSimple { get; } = false; - public CosName SubType { get; } = CosName.TYPE0; + // public CosName SubType { get; } = CosName.TYPE0; - public CharacterIdentifierFont Descendant { get; } - } + // public CharacterIdentifierFont Descendant { get; } + //} /// /// Equivalent to the DW2 array in the font dictionary for vertical fonts. @@ -49,7 +47,7 @@ namespace UglyToad.Pdf.Fonts public class CharacterIdentifierFont { public const int DefaultWidthWhenUndeclared = 1000; - + public CidFontType Subtype { get; } public CosName BaseFont { get; } @@ -66,10 +64,10 @@ namespace UglyToad.Pdf.Fonts public CharacterIdentifierToGlyphIdentifierMap CidToGidMap { get; } - public CharacterIdentifierFont(CidFontType subtype, CosName baseFont, CharacterIdentifierSystemInfo systemInfo, - CosObjectKey fontDescriptor, + public CharacterIdentifierFont(CidFontType subtype, CosName baseFont, CharacterIdentifierSystemInfo systemInfo, + CosObjectKey fontDescriptor, int defaultWidth, - COSArray widths, + COSArray widths, CharacterIdentifierToGlyphIdentifierMap cidToGidMap) { Subtype = subtype; @@ -81,7 +79,7 @@ namespace UglyToad.Pdf.Fonts CidToGidMap = cidToGidMap; } - + } public class CharacterIdentifierFontBuilder @@ -98,7 +96,7 @@ namespace UglyToad.Pdf.Fonts private readonly CharacterIdentifierSystemInfo systemInfo; private readonly CosObjectKey fontDescriptorKey; - public CharacterIdentifierFontBuilder(CosName subType, CosName baseFont, + public CharacterIdentifierFontBuilder(CosName subType, CosName baseFont, CharacterIdentifierSystemInfo systemInfo, CosObjectKey fontDescriptorKey) { @@ -126,11 +124,6 @@ namespace UglyToad.Pdf.Fonts public class CharacterIdentifierToGlyphIdentifierMap { - - } - public class CharacterMap - { - } } diff --git a/src/UglyToad.Pdf/Fonts/IFont.cs b/src/UglyToad.Pdf/Fonts/IFont.cs new file mode 100644 index 00000000..18bd0433 --- /dev/null +++ b/src/UglyToad.Pdf/Fonts/IFont.cs @@ -0,0 +1,23 @@ +namespace UglyToad.Pdf.Fonts +{ + using Cmap; + using Cos; + + public interface IFont + { + CosName SubType { get; } + + string BaseFontType { get; } + + CMap ToUnicode { get; } + } + + public class CompositeFont : IFont + { + public CosName SubType { get; } + + public string BaseFontType { get; } + + public CMap ToUnicode { get; } + } +} diff --git a/src/UglyToad.Pdf/Text/TextObjectComponentType.cs b/src/UglyToad.Pdf/Fonts/TextObjectComponentType.cs similarity index 100% rename from src/UglyToad.Pdf/Text/TextObjectComponentType.cs rename to src/UglyToad.Pdf/Fonts/TextObjectComponentType.cs diff --git a/src/UglyToad.Pdf/Geometry/PdfPoint.cs b/src/UglyToad.Pdf/Geometry/PdfPoint.cs new file mode 100644 index 00000000..5b214e54 --- /dev/null +++ b/src/UglyToad.Pdf/Geometry/PdfPoint.cs @@ -0,0 +1,34 @@ +namespace UglyToad.Pdf.Geometry +{ + public struct PdfPoint + { + public static PdfPoint Origin = new PdfPoint(0m, 0m); + + public decimal X { get; } + + public decimal Y { get; } + + public PdfPoint(decimal x, decimal y) + { + X = x; + Y = y; + } + + public PdfPoint(int x, int y) + { + X = x; + Y = y; + } + + public PdfPoint(double x, double y) + { + X = (decimal)x; + Y = (decimal)y; + } + + public override string ToString() + { + return $"(x:{X}, y:{Y})"; + } + } +} diff --git a/src/UglyToad.Pdf/Geometry/PdfRectangle.cs b/src/UglyToad.Pdf/Geometry/PdfRectangle.cs new file mode 100644 index 00000000..850f8eb3 --- /dev/null +++ b/src/UglyToad.Pdf/Geometry/PdfRectangle.cs @@ -0,0 +1,46 @@ +namespace UglyToad.Pdf.Geometry +{ + using System; + + public class PdfRectangle + { + public PdfPoint TopLeft { get; } + + public PdfPoint BottomRight { get; } + + public PdfPoint TopRight { get; } + + public PdfPoint BottomLeft { get; } + + public decimal Width { get; } + + public decimal Height { get; } + + public decimal Area { get; } + + public PdfRectangle(PdfPoint point1, PdfPoint point2) : this(point1.X, point1.Y, point2.X, point2.Y) { } + public PdfRectangle(decimal x1, decimal y1, decimal x2, decimal y2) + { + var bottom = Math.Min(y1, y2); + var top = Math.Max(y1, y2); + + var left = Math.Min(x1, x2); + var right = Math.Max(x1, x2); + + TopLeft = new PdfPoint(left, top); + TopRight = new PdfPoint(right, top); + + BottomLeft = new PdfPoint(left, bottom); + BottomRight = new PdfPoint(right, bottom); + + Width = right - left; + Height = top - bottom; + Area = Width * Height; + } + + public override string ToString() + { + return $"[{TopLeft}, {BottomRight}]"; + } + } +} diff --git a/src/UglyToad.Pdf/Graphics/IGraphicsStateOperationFactory.cs b/src/UglyToad.Pdf/Graphics/IGraphicsStateOperationFactory.cs new file mode 100644 index 00000000..6c6c1cc8 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/IGraphicsStateOperationFactory.cs @@ -0,0 +1,13 @@ +namespace UglyToad.Pdf.Graphics +{ + using System.Collections.Generic; + using Operations; + using Tokenization.Tokens; + using Util.JetBrains.Annotations; + + internal interface IGraphicsStateOperationFactory + { + [CanBeNull] + IGraphicsStateOperation Create(OperatorToken op, IReadOnlyList operands); + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/AppendDualControlPointBezierCurve.cs b/src/UglyToad.Pdf/Graphics/Operations/AppendDualControlPointBezierCurve.cs new file mode 100644 index 00000000..1b082fc0 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/AppendDualControlPointBezierCurve.cs @@ -0,0 +1,29 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class AppendDualControlPointBezierCurve : IGraphicsStateOperation + { + public const string Symbol = "c"; + + public string Operator => Symbol; + + public PdfPoint ControlPoint1 { get; } + + public PdfPoint ControlPoint2 { get; } + + public PdfPoint End { get; } + + public AppendDualControlPointBezierCurve(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3) + { + ControlPoint1 = new PdfPoint(x1, y1); + ControlPoint2 = new PdfPoint(x2, y2); + End = new PdfPoint(x3, y3); + } + + public override string ToString() + { + return $"{ControlPoint1.X} {ControlPoint1.Y} {ControlPoint2.X} {ControlPoint2.Y} {End.X} {End.Y} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/AppendEndControlPointBezierCurve.cs b/src/UglyToad.Pdf/Graphics/Operations/AppendEndControlPointBezierCurve.cs new file mode 100644 index 00000000..17493b64 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/AppendEndControlPointBezierCurve.cs @@ -0,0 +1,26 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class AppendEndControlPointBezierCurve : IGraphicsStateOperation + { + public const string Symbol = "y"; + + public string Operator => Symbol; + + public PdfPoint ControlPoint1 { get; } + + public PdfPoint End { get; } + + public AppendEndControlPointBezierCurve(decimal x1, decimal y1, decimal x3, decimal y3) + { + ControlPoint1 = new PdfPoint(x1, y1); + End = new PdfPoint(x3, y3); + } + + public override string ToString() + { + return $"{ControlPoint1.X} {ControlPoint1.Y} {End.X} {End.Y} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/AppendRectangle.cs b/src/UglyToad.Pdf/Graphics/Operations/AppendRectangle.cs new file mode 100644 index 00000000..7c5a4e26 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/AppendRectangle.cs @@ -0,0 +1,30 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class AppendRectangle : IGraphicsStateOperation + { + public const string Symbol = "re"; + + public string Operator => Symbol; + + public PdfPoint LowerLeft { get; } + + public decimal Width { get; } + + public decimal Height { get; } + + public AppendRectangle(decimal x, decimal y, decimal width, decimal height) + { + LowerLeft = new PdfPoint(x, y); + + Width = width; + Height = height; + } + + public override string ToString() + { + return $"{LowerLeft.X} {LowerLeft.Y} {Width} {Height} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/AppendStartControlPointBezierCurve.cs b/src/UglyToad.Pdf/Graphics/Operations/AppendStartControlPointBezierCurve.cs new file mode 100644 index 00000000..e720eb32 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/AppendStartControlPointBezierCurve.cs @@ -0,0 +1,26 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class AppendStartControlPointBezierCurve : IGraphicsStateOperation + { + public const string Symbol = "v"; + + public string Operator => Symbol; + + public PdfPoint ControlPoint2 { get; } + + public PdfPoint End { get; } + + public AppendStartControlPointBezierCurve(decimal x2, decimal y2, decimal x3, decimal y3) + { + ControlPoint2 = new PdfPoint(x2, y2); + End = new PdfPoint(x3, y3); + } + + public override string ToString() + { + return $"{ControlPoint2.X} {ControlPoint2.Y} {End.X} {End.Y} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/AppendStraightLineSegment.cs b/src/UglyToad.Pdf/Graphics/Operations/AppendStraightLineSegment.cs new file mode 100644 index 00000000..bb7ea489 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/AppendStraightLineSegment.cs @@ -0,0 +1,23 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class AppendStraightLineSegment : IGraphicsStateOperation + { + public const string Symbol = "l"; + + public string Operator => Symbol; + + public PdfPoint End { get; } + + public AppendStraightLineSegment(decimal x, decimal y) + { + End = new PdfPoint(x, y); + } + + public override string ToString() + { + return $"{End.X} {End.Y} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/BeginNewSubpath.cs b/src/UglyToad.Pdf/Graphics/Operations/BeginNewSubpath.cs new file mode 100644 index 00000000..c52b5d00 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/BeginNewSubpath.cs @@ -0,0 +1,23 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Geometry; + + internal class BeginNewSubpath : IGraphicsStateOperation + { + public const string Symbol = "m"; + + public string Operator => Symbol; + + public PdfPoint Point { get; } + + public BeginNewSubpath(decimal x, decimal y) + { + Point = new PdfPoint(x, y); + } + + public override string ToString() + { + return $"{Point.X} {Point.Y} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/BeginText.cs b/src/UglyToad.Pdf/Graphics/Operations/BeginText.cs new file mode 100644 index 00000000..204b385c --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/BeginText.cs @@ -0,0 +1,19 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class BeginText : IGraphicsStateOperation + { + public const string Symbol = "BT"; + public static readonly BeginText Value = new BeginText(); + + public string Operator => Symbol; + + private BeginText() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/CloseAndStrokePath.cs b/src/UglyToad.Pdf/Graphics/Operations/CloseAndStrokePath.cs new file mode 100644 index 00000000..baaf5559 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/CloseAndStrokePath.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class CloseAndStrokePath : IGraphicsStateOperation + { + public const string Symbol = "s"; + + public static readonly CloseAndStrokePath Value = new CloseAndStrokePath(); + + public string Operator => Symbol; + + private CloseAndStrokePath() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathEvenOddRuleAndStroke.cs b/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathEvenOddRuleAndStroke.cs new file mode 100644 index 00000000..b418d65a --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathEvenOddRuleAndStroke.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class CloseFillPathEvenOddRuleAndStroke : IGraphicsStateOperation + { + public const string Symbol = "b*"; + + public static readonly CloseFillPathEvenOddRuleAndStroke Value = new CloseFillPathEvenOddRuleAndStroke(); + + public string Operator => Symbol; + + private CloseFillPathEvenOddRuleAndStroke() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathNonZeroWindingAndStroke.cs b/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathNonZeroWindingAndStroke.cs new file mode 100644 index 00000000..c07d5d84 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/CloseFillPathNonZeroWindingAndStroke.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class CloseFillPathNonZeroWindingAndStroke : IGraphicsStateOperation + { + public const string Symbol = "b"; + + public static readonly CloseFillPathNonZeroWindingAndStroke Value = new CloseFillPathNonZeroWindingAndStroke(); + + public string Operator => Symbol; + + private CloseFillPathNonZeroWindingAndStroke() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/CloseSubpath.cs b/src/UglyToad.Pdf/Graphics/Operations/CloseSubpath.cs new file mode 100644 index 00000000..9a4a1ab5 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/CloseSubpath.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class CloseSubpath : IGraphicsStateOperation + { + public const string Symbol = "h"; + + public static readonly CloseSubpath Value = new CloseSubpath(); + + public string Operator => Symbol; + + private CloseSubpath() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/EndPath.cs b/src/UglyToad.Pdf/Graphics/Operations/EndPath.cs new file mode 100644 index 00000000..63211af1 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/EndPath.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class EndPath : IGraphicsStateOperation + { + public const string Symbol = "n"; + + public static readonly EndPath Value = new EndPath(); + + public string Operator => Symbol; + + private EndPath() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/EndText.cs b/src/UglyToad.Pdf/Graphics/Operations/EndText.cs new file mode 100644 index 00000000..1b06c83a --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/EndText.cs @@ -0,0 +1,19 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class EndText : IGraphicsStateOperation + { + public const string Symbol = "q"; + public static readonly EndText Value = new EndText(); + + public string Operator => Symbol; + + private EndText() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRule.cs b/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRule.cs new file mode 100644 index 00000000..a962c9ab --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRule.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class FillPathEvenOddRule : IGraphicsStateOperation + { + public const string Symbol = "f*"; + + public static readonly FillPathEvenOddRule Value = new FillPathEvenOddRule(); + + public string Operator => Symbol; + + private FillPathEvenOddRule() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRuleAndStroke.cs b/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRuleAndStroke.cs new file mode 100644 index 00000000..9642776b --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/FillPathEvenOddRuleAndStroke.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class FillPathEvenOddRuleAndStroke : IGraphicsStateOperation + { + public const string Symbol = "B*"; + + public static readonly FillPathEvenOddRuleAndStroke Value = new FillPathEvenOddRuleAndStroke(); + + public string Operator => Symbol; + + private FillPathEvenOddRuleAndStroke() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWinding.cs b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWinding.cs new file mode 100644 index 00000000..6187c661 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWinding.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class FillPathNonZeroWinding : IGraphicsStateOperation + { + public const string Symbol = "f"; + + public static readonly FillPathNonZeroWinding Value = new FillPathNonZeroWinding(); + + public string Operator => Symbol; + + private FillPathNonZeroWinding() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingAndStroke.cs b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingAndStroke.cs new file mode 100644 index 00000000..502ff8a2 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingAndStroke.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class FillPathNonZeroWindingAndStroke : IGraphicsStateOperation + { + public const string Symbol = "B"; + + public static readonly FillPathNonZeroWindingAndStroke Value = new FillPathNonZeroWindingAndStroke(); + + public string Operator => Symbol; + + private FillPathNonZeroWindingAndStroke() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingCompatibility.cs b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingCompatibility.cs new file mode 100644 index 00000000..3f5832b5 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/FillPathNonZeroWindingCompatibility.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class FillPathNonZeroWindingCompatibility : IGraphicsStateOperation + { + public const string Symbol = "F"; + + public static readonly FillPathNonZeroWindingCompatibility Value = new FillPathNonZeroWindingCompatibility(); + + public string Operator => Symbol; + + private FillPathNonZeroWindingCompatibility() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/IGraphicsStateOperation.cs b/src/UglyToad.Pdf/Graphics/Operations/IGraphicsStateOperation.cs new file mode 100644 index 00000000..a1655019 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/IGraphicsStateOperation.cs @@ -0,0 +1,7 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal interface IGraphicsStateOperation + { + string Operator { get; } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByEvenOddIntersect.cs b/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByEvenOddIntersect.cs new file mode 100644 index 00000000..22041b79 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByEvenOddIntersect.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class ModifyClippingByEvenOddIntersect : IGraphicsStateOperation + { + public const string Symbol = "W*"; + + public static readonly ModifyClippingByEvenOddIntersect Value = new ModifyClippingByEvenOddIntersect(); + + public string Operator => Symbol; + + private ModifyClippingByEvenOddIntersect() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByNonZeroWindingIntersect.cs b/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByNonZeroWindingIntersect.cs new file mode 100644 index 00000000..3a20a2aa --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/ModifyClippingByNonZeroWindingIntersect.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class ModifyClippingByNonZeroWindingIntersect : IGraphicsStateOperation + { + public const string Symbol = "W"; + + public static readonly ModifyClippingByNonZeroWindingIntersect Value = new ModifyClippingByNonZeroWindingIntersect(); + + public string Operator => Symbol; + + private ModifyClippingByNonZeroWindingIntersect() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/ModifyTransformationMatrix.cs b/src/UglyToad.Pdf/Graphics/Operations/ModifyTransformationMatrix.cs new file mode 100644 index 00000000..6e34c261 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/ModifyTransformationMatrix.cs @@ -0,0 +1,32 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using System; + + internal class ModifyTransformationMatrix : IGraphicsStateOperation + { + public const string Symbol = "cm"; + + public string Operator => Symbol; + + public decimal[] Value { get; } + + public ModifyTransformationMatrix(decimal[] value) + { + if (value == null) + { + throw new ArgumentNullException(nameof(value)); + } + + if (value.Length != 6) + { + throw new ArgumentException("The cm operator must pass 6 numbers. Instead got: " + value); + } + Value = value; + } + + public override string ToString() + { + return $"{Value[0]} {Value[1]} {Value[2]} {Value[3]} {Value[4]} {Value[5]} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLine.cs b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLine.cs new file mode 100644 index 00000000..a704814a --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLine.cs @@ -0,0 +1,19 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class MoveToNextLine : IGraphicsStateOperation + { + public const string Symbol = "T*"; + public static readonly MoveToNextLine Value = new MoveToNextLine(); + + public string Operator => Symbol; + + private MoveToNextLine() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowString.cs b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowString.cs new file mode 100644 index 00000000..d3958d28 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowString.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class MoveToNextLineShowString : IGraphicsStateOperation + { + public const string Symbol = "'"; + + public string Operator => Symbol; + + public string Text { get; } + + public MoveToNextLineShowString(string text) + { + Text = text; + } + + public override string ToString() + { + return $"{Text} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowStringWithSpacing.cs b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowStringWithSpacing.cs new file mode 100644 index 00000000..a40c03ea --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineShowStringWithSpacing.cs @@ -0,0 +1,27 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class MoveToNextLineShowStringWithSpacing : IGraphicsStateOperation + { + public const string Symbol = "\""; + + public string Operator => Symbol; + + public decimal WordSpacing { get; } + + public decimal CharacterSpacing { get; } + + public string Text { get; } + + public MoveToNextLineShowStringWithSpacing(decimal wordSpacing, decimal characterSpacing, string text) + { + WordSpacing = wordSpacing; + CharacterSpacing = characterSpacing; + Text = text; + } + + public override string ToString() + { + return $"{WordSpacing} {CharacterSpacing} {Text} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffset.cs b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffset.cs new file mode 100644 index 00000000..d867ec97 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffset.cs @@ -0,0 +1,24 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class MoveToNextLineWithOffset : IGraphicsStateOperation + { + public const string Symbol = "Td"; + + public string Operator => Symbol; + + public decimal Tx { get; } + + public decimal Ty { get; } + + public MoveToNextLineWithOffset(decimal tx, decimal ty) + { + Tx = tx; + Ty = ty; + } + + public override string ToString() + { + return $"{Tx} {Ty} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffsetSetLeading.cs b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffsetSetLeading.cs new file mode 100644 index 00000000..9c501e7e --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/MoveToNextLineWithOffsetSetLeading.cs @@ -0,0 +1,24 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class MoveToNextLineWithOffsetSetLeading : IGraphicsStateOperation + { + public const string Symbol = "TD"; + + public string Operator => Symbol; + + public decimal Tx { get; } + + public decimal Ty { get; } + + public MoveToNextLineWithOffsetSetLeading(decimal tx, decimal ty) + { + Tx = tx; + Ty = ty; + } + + public override string ToString() + { + return $"{Tx} {Ty} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/Pop.cs b/src/UglyToad.Pdf/Graphics/Operations/Pop.cs new file mode 100644 index 00000000..2d4163a6 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/Pop.cs @@ -0,0 +1,19 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class Pop : IGraphicsStateOperation + { + public const string Symbol = "Q"; + public static readonly Pop Value = new Pop(); + + public string Operator => Symbol; + + private Pop() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/Push.cs b/src/UglyToad.Pdf/Graphics/Operations/Push.cs new file mode 100644 index 00000000..fec25854 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/Push.cs @@ -0,0 +1,19 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class Push : IGraphicsStateOperation + { + public const string Symbol = "q"; + public static readonly Push Value = new Push(); + + public string Operator => Symbol; + + private Push() + { + } + + public override string ToString() + { + return Symbol; + } + } +} diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetCharacterSpacing.cs b/src/UglyToad.Pdf/Graphics/Operations/SetCharacterSpacing.cs new file mode 100644 index 00000000..a853acfa --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetCharacterSpacing.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetCharacterSpacing : IGraphicsStateOperation + { + public const string Symbol = "Tc"; + + public string Operator => Symbol; + + public decimal Spacing { get; } + + public SetCharacterSpacing(decimal spacing) + { + Spacing = spacing; + } + + public override string ToString() + { + return $"{Spacing} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetColorRenderingIntent.cs b/src/UglyToad.Pdf/Graphics/Operations/SetColorRenderingIntent.cs new file mode 100644 index 00000000..03df4e44 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetColorRenderingIntent.cs @@ -0,0 +1,9 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetColorRenderingIntent : IGraphicsStateOperation + { + public const string Symbol = "ri"; + + public string Operator => Symbol; + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetFlatnessTolerance.cs b/src/UglyToad.Pdf/Graphics/Operations/SetFlatnessTolerance.cs new file mode 100644 index 00000000..bec4a575 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetFlatnessTolerance.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetFlatnessTolerance : IGraphicsStateOperation + { + public const string Symbol = "i"; + + public string Operator => Symbol; + + public decimal Tolerance { get; } + + public SetFlatnessTolerance(decimal tolerance) + { + Tolerance = tolerance; + } + + public override string ToString() + { + return $"{Tolerance} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetFontSize.cs b/src/UglyToad.Pdf/Graphics/Operations/SetFontSize.cs new file mode 100644 index 00000000..437d587b --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetFontSize.cs @@ -0,0 +1,26 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using Cos; + + internal class SetFontSize : IGraphicsStateOperation + { + public const string Symbol = "Tf"; + + public string Operator => Symbol; + + public CosName Font { get; } + + public decimal Size { get; } + + public SetFontSize(CosName font, decimal size) + { + Font = font; + Size = size; + } + + public override string ToString() + { + return $"{Font} {Size} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetHorizontalScaling.cs b/src/UglyToad.Pdf/Graphics/Operations/SetHorizontalScaling.cs new file mode 100644 index 00000000..3886a5e5 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetHorizontalScaling.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetHorizontalScaling : IGraphicsStateOperation + { + public const string Symbol = "Tz"; + + public string Operator => Symbol; + + public decimal Scale { get; } + + public SetHorizontalScaling(decimal scale) + { + Scale = scale; + } + + public override string ToString() + { + return $"{Scale} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetLineCap.cs b/src/UglyToad.Pdf/Graphics/Operations/SetLineCap.cs new file mode 100644 index 00000000..d9cb2bd8 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetLineCap.cs @@ -0,0 +1,36 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using System; + + internal class SetLineCap : IGraphicsStateOperation + { + public const string Symbol = "J"; + + public string Operator => Symbol; + + public Style Cap { get; set; } + + public SetLineCap(int cap) : this((Style)cap) { } + public SetLineCap(Style cap) + { + if (cap < 0 || (int)cap > 2) + { + throw new ArgumentException("Invalid argument passed for line cap style. Should be 0, 1 or 2; instead got: " + cap); + } + + Cap = cap; + } + + public override string ToString() + { + return $"{(int) Cap} {Symbol}"; + } + + public enum Style + { + Butt = 0, + Round = 1, + ProjectingSquare = 2 + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetLineDashPattern.cs b/src/UglyToad.Pdf/Graphics/Operations/SetLineDashPattern.cs new file mode 100644 index 00000000..1acf00a6 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetLineDashPattern.cs @@ -0,0 +1,24 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetLineDashPattern : IGraphicsStateOperation + { + public const string Symbol = "d"; + + public string Operator => Symbol; + + public decimal[] Array { get; } + + public decimal Phase { get; } + + public SetLineDashPattern(decimal[] array, decimal phase) + { + Array = array; + Phase = phase; + } + + public override string ToString() + { + return $"{Array} {Phase} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetLineJoin.cs b/src/UglyToad.Pdf/Graphics/Operations/SetLineJoin.cs new file mode 100644 index 00000000..26087341 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetLineJoin.cs @@ -0,0 +1,36 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using System; + + internal class SetLineJoin : IGraphicsStateOperation + { + public const string Symbol = "j"; + + public string Operator => Symbol; + + public Style Join { get; set; } + + public SetLineJoin(int join) : this((Style)join) { } + public SetLineJoin(Style join) + { + if (join < 0 || (int)join > 2) + { + throw new ArgumentException("Invalid argument passed for line join style. Should be 0, 1 or 2; instead got: " + join); + } + + Join = join; + } + + public override string ToString() + { + return $"{(int)Join} {Symbol}"; + } + + public enum Style + { + Miter = 0, + Round = 1, + Bevel = 2 + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetLineWidth.cs b/src/UglyToad.Pdf/Graphics/Operations/SetLineWidth.cs new file mode 100644 index 00000000..8a366892 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetLineWidth.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetLineWidth : IGraphicsStateOperation + { + public const string Symbol = "w"; + + public string Operator => Symbol; + + public decimal Width { get; } + + public SetLineWidth(decimal width) + { + Width = width; + } + + public override string ToString() + { + return $"{Width} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetMiterLimit.cs b/src/UglyToad.Pdf/Graphics/Operations/SetMiterLimit.cs new file mode 100644 index 00000000..605d53ca --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetMiterLimit.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetMiterLimit : IGraphicsStateOperation + { + public const string Symbol = "M"; + + public string Operator => Symbol; + + public decimal Limit { get; } + + public SetMiterLimit(decimal limit) + { + Limit = limit; + } + + public override string ToString() + { + return $"{Limit} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs new file mode 100644 index 00000000..7e1973f1 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceCmyk.cs @@ -0,0 +1,30 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetNonStrokeColorDeviceCmyk : IGraphicsStateOperation + { + public const string Symbol = "K"; + + public string Operator => Symbol; + + public decimal C { get; } + + public decimal M { get; } + + public decimal Y { get; } + + public decimal K { get; } + + public SetNonStrokeColorDeviceCmyk(decimal c, decimal m, decimal y, decimal k) + { + C = c; + M = m; + Y = y; + K = k; + } + + public override string ToString() + { + return $"{C} {M} {Y} {K} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceGray.cs b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceGray.cs new file mode 100644 index 00000000..3ab06ae6 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceGray.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetNonStrokeColorDeviceGray : IGraphicsStateOperation + { + public const string Symbol = "g"; + + public string Operator => Symbol; + + public decimal Gray { get; } + + public SetNonStrokeColorDeviceGray(decimal gray) + { + Gray = gray; + } + + public override string ToString() + { + return $"{Gray} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs new file mode 100644 index 00000000..7958c870 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetNonStrokeColorDeviceRgb.cs @@ -0,0 +1,27 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetNonStrokeColorDeviceRgb : IGraphicsStateOperation + { + public const string Symbol = "rg"; + + public string Operator => Symbol; + + public decimal R { get; } + + public decimal G { get; } + + public decimal B { get; } + + public SetNonStrokeColorDeviceRgb(decimal r, decimal g, decimal b) + { + R = r; + G = g; + B = b; + } + + public override string ToString() + { + return $"{R} {G} {B} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceCmyk.cs b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceCmyk.cs new file mode 100644 index 00000000..562a338b --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceCmyk.cs @@ -0,0 +1,30 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetStrokeColorDeviceCmyk : IGraphicsStateOperation + { + public const string Symbol = "K"; + + public string Operator => Symbol; + + public decimal C { get; } + + public decimal M { get; } + + public decimal Y { get; } + + public decimal K { get; } + + public SetStrokeColorDeviceCmyk(decimal c, decimal m, decimal y, decimal k) + { + C = c; + M = m; + Y = y; + K = k; + } + + public override string ToString() + { + return $"{C} {M} {Y} {K} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceGray.cs b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceGray.cs new file mode 100644 index 00000000..82cf4315 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceGray.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetStrokeColorDeviceGray : IGraphicsStateOperation + { + public const string Symbol = "G"; + + public string Operator => Symbol; + + public decimal Gray { get; } + + public SetStrokeColorDeviceGray(decimal gray) + { + Gray = gray; + } + + public override string ToString() + { + return $"{Gray} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceRgb.cs b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceRgb.cs new file mode 100644 index 00000000..1fcd3329 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetStrokeColorDeviceRgb.cs @@ -0,0 +1,27 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetStrokeColorDeviceRgb : IGraphicsStateOperation + { + public const string Symbol = "RG"; + + public string Operator => Symbol; + + public decimal R { get; } + + public decimal G { get; } + + public decimal B { get; } + + public SetStrokeColorDeviceRgb(decimal r, decimal g, decimal b) + { + R = r; + G = g; + B = b; + } + + public override string ToString() + { + return $"{R} {G} {B} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetTextLeading.cs b/src/UglyToad.Pdf/Graphics/Operations/SetTextLeading.cs new file mode 100644 index 00000000..4e125154 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetTextLeading.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetTextLeading : IGraphicsStateOperation + { + public const string Symbol = "TL"; + + public string Operator => Symbol; + + public decimal Leading { get; } + + public SetTextLeading(decimal leading) + { + Leading = leading; + } + + public override string ToString() + { + return $"{Leading} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetTextMatrix.cs b/src/UglyToad.Pdf/Graphics/Operations/SetTextMatrix.cs new file mode 100644 index 00000000..bd88333d --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetTextMatrix.cs @@ -0,0 +1,28 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + using System; + + internal class SetTextMatrix : IGraphicsStateOperation + { + public const string Symbol = "Tm"; + + public string Operator => Symbol; + + public decimal[] Value { get; } + + public SetTextMatrix(decimal[] value) + { + if (value.Length != 6) + { + throw new ArgumentException("Text matrix must provide 6 values. Instead got: " + value); + } + + Value = value; + } + + public override string ToString() + { + return $"{Value[0]} {Value[1]} {Value[2]} {Value[3]} {Value[4]} {Value[5]} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetTextRenderingMode.cs b/src/UglyToad.Pdf/Graphics/Operations/SetTextRenderingMode.cs new file mode 100644 index 00000000..cf14ca31 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetTextRenderingMode.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetTextRenderingMode : IGraphicsStateOperation + { + public const string Symbol = "Tr"; + + public string Operator => Symbol; + + public int Mode { get; } + + public SetTextRenderingMode(int mode) + { + Mode = mode; + } + + public override string ToString() + { + return $"{Mode} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetTextRise.cs b/src/UglyToad.Pdf/Graphics/Operations/SetTextRise.cs new file mode 100644 index 00000000..c0ff24d0 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetTextRise.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetTextRise : IGraphicsStateOperation + { + public const string Symbol = "Ts"; + + public string Operator => Symbol; + + public decimal Rise { get; } + + public SetTextRise(decimal rise) + { + Rise = rise; + } + + public override string ToString() + { + return $"{Rise} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/SetWordSpacing.cs b/src/UglyToad.Pdf/Graphics/Operations/SetWordSpacing.cs new file mode 100644 index 00000000..08482c77 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/SetWordSpacing.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class SetWordSpacing : IGraphicsStateOperation + { + public const string Symbol = "Tw"; + + public string Operator => Symbol; + + public decimal Spacing { get; } + + public SetWordSpacing(decimal spacing) + { + Spacing = spacing; + } + + public override string ToString() + { + return $"{Spacing} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/ShowString.cs b/src/UglyToad.Pdf/Graphics/Operations/ShowString.cs new file mode 100644 index 00000000..152ff1bb --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/ShowString.cs @@ -0,0 +1,21 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class ShowString : IGraphicsStateOperation + { + public const string Symbol = "Tj"; + + public string Operator => Symbol; + + public string Text { get; } + + public ShowString(string text) + { + Text = text; + } + + public override string ToString() + { + return $"{Text} {Symbol}"; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/ShowStringsWithPositioning.cs b/src/UglyToad.Pdf/Graphics/Operations/ShowStringsWithPositioning.cs new file mode 100644 index 00000000..ea84bc52 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/ShowStringsWithPositioning.cs @@ -0,0 +1,16 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class ShowStringsWithPositioning : IGraphicsStateOperation + { + public const string Symbol = "TJ"; + + public string Operator => Symbol; + + public object[] Array { get; } + + public ShowStringsWithPositioning(object[] array) + { + Array = array; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/Operations/StrokePath.cs b/src/UglyToad.Pdf/Graphics/Operations/StrokePath.cs new file mode 100644 index 00000000..70218384 --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/Operations/StrokePath.cs @@ -0,0 +1,20 @@ +namespace UglyToad.Pdf.Graphics.Operations +{ + internal class StrokePath : IGraphicsStateOperation + { + public const string Symbol = "S"; + + public static readonly StrokePath Value = new StrokePath(); + + public string Operator => Symbol; + + private StrokePath() + { + } + + public override string ToString() + { + return Symbol; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Graphics/ReflectionGraphicsStateOperationFactory.cs b/src/UglyToad.Pdf/Graphics/ReflectionGraphicsStateOperationFactory.cs new file mode 100644 index 00000000..c5b55d3f --- /dev/null +++ b/src/UglyToad.Pdf/Graphics/ReflectionGraphicsStateOperationFactory.cs @@ -0,0 +1,148 @@ +namespace UglyToad.Pdf.Graphics +{ + using System; + using System.Collections.Generic; + using System.Reflection; + using Cos; + using Operations; + using Tokenization.Tokens; + + internal class ReflectionGraphicsStateOperationFactory : IGraphicsStateOperationFactory + { + private readonly IReadOnlyDictionary operations; + + public ReflectionGraphicsStateOperationFactory() + { + var assemblyTypes = Assembly.GetAssembly(typeof(ReflectionGraphicsStateOperationFactory)).GetTypes(); + + var result = new Dictionary(); + + foreach (var assemblyType in assemblyTypes) + { + if (!assemblyType.IsInterface && typeof(IGraphicsStateOperation).IsAssignableFrom(assemblyType)) + { + var symbol = assemblyType.GetField("Symbol"); + + if (symbol == null) + { + throw new InvalidOperationException("An operation type was defined without the public const Symbol being declared. Type was: " + assemblyType.FullName); + } + + var value = symbol.GetValue(null).ToString(); + + result[value] = assemblyType; + } + } + + operations = result; + } + + public IGraphicsStateOperation Create(OperatorToken op, IReadOnlyList operands) + { + if (!operations.TryGetValue(op.Data, out Type operationType)) + { + return null; + } + + var constructors = operationType.GetConstructors(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance); + + if (constructors.Length == 0) + { + throw new InvalidOperationException("No constructors to invoke were found for operation type: " + operationType.FullName); + } + + // This only works by luck... + var constructor = constructors[0]; + + if (constructor.IsPrivate) + { + return (IGraphicsStateOperation)operationType.GetField("Value").GetValue(null); + } + + var parameters = constructor.GetParameters(); + + var offset = 0; + + var arguments = new List(); + + foreach (var parameter in parameters) + { + if (parameter.ParameterType == typeof(decimal)) + { + if (operands[offset] is NumericToken numeric) + { + arguments.Add(numeric.Data); + } + else + { + throw new InvalidOperationException($"Expected a decimal parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}"); + } + + offset++; + } + else if (parameter.ParameterType == typeof(int)) + { + if (operands[offset] is NumericToken numeric) + { + arguments.Add(numeric.Int); + } + else + { + throw new InvalidOperationException($"Expected an integer parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}"); + } + + offset++; + } + else if (parameter.ParameterType == typeof(decimal[])) + { + var array = new List(); + while (offset < operands.Count && operands[offset] is NumericToken numeric) + { + array.Add(numeric.Data); + offset++; + } + + arguments.Add(array.ToArray()); + } + else if (parameter.ParameterType == typeof(CosName)) + { + if (operands[offset] is NameToken name) + { + arguments.Add(name.Data); + } + else + { + throw new InvalidOperationException($"Expected a decimal array parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}"); + } + + offset++; + } + else if (parameter.ParameterType == typeof(string)) + { + if (operands[offset] is StringToken stringToken) + { + arguments.Add(stringToken.Data); + } + else if (operands[offset] is HexToken hexToken) + { + arguments.Add(hexToken.Data); + } + else + { + throw new InvalidOperationException($"Expected a string parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}"); + } + + offset++; + } + else + { + throw new NotImplementedException($"Unsupported parameter type {parameter.ParameterType.FullName} for operation type {operationType.FullName}."); + } + } + + var result = constructor.Invoke(arguments.ToArray()); + + return (IGraphicsStateOperation)result; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/IO/NumericTokenizer.cs b/src/UglyToad.Pdf/IO/NumericTokenizer.cs deleted file mode 100644 index 9197185c..00000000 --- a/src/UglyToad.Pdf/IO/NumericTokenizer.cs +++ /dev/null @@ -1,56 +0,0 @@ -namespace UglyToad.Pdf.IO -{ - using System; - using System.Collections.Generic; - using Text; - using Text.Operators; - using Tokenization.Tokens; - - public class NumericTokenizer - { - private static readonly HashSet SupportedCharacterSet = new HashSet - { - (byte)'0', - (byte)'1', - (byte)'2', - (byte)'3', - (byte)'4', - (byte)'5', - (byte)'6', - (byte)'7', - (byte)'8', - (byte)'9', - (byte)'+', - (byte)'-', - (byte)'.' - }; - - public bool CanRead(byte b, int offset) - { - return SupportedCharacterSet.Contains(b); - } - - public ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset) - { - offset = readBytes.Count; - var bytes = new List(readBytes); - - using (var reader = furtherBytes.GetEnumerator()) - { - while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current)) - { - if (!SupportedCharacterSet.Contains(reader.Current)) - { - throw new InvalidOperationException("Unsupported byte in numeric operator: " + (char)reader.Current); - } - - bytes.Add(reader.Current); - offset++; - } - } - - return new OperandComponent(new NumericOperand(bytes), TextObjectComponentType.Numeric); - } - - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/IO/ScratchFile.cs b/src/UglyToad.Pdf/IO/ScratchFile.cs deleted file mode 100644 index 732d027b..00000000 --- a/src/UglyToad.Pdf/IO/ScratchFile.cs +++ /dev/null @@ -1,493 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace UglyToad.Pdf.IO -{ - using System.IO; - - /** - * Implements a memory page handling mechanism as base for creating (multiple) - * {@link RandomAccess} buffers each having its set of pages (implemented by - * {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}. - * - *

Pages can be stored in main memory or in a temporary file. A mixed mode - * is supported storing a certain amount of pages in memory and only the - * additional ones in temporary file (defined by maximum main memory to - * be used).

- * - *

Pages can be marked as 'free' in order to re-use them. For in-memory pages - * this will release the used memory while for pages in temporary file this - * simply marks the area as free to re-use.

- * - *

If a temporary file was created (done with the first page to be stored - * in temporary file) it is deleted when {@link ScratchFile#close()} is called.

- * - *

Using this class for {@link RandomAccess} buffers allows for a direct control - * on the maximum memory usage and allows processing large files for which we - * otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}.

- * - *

This base class for providing pages is thread safe (the buffer implementations are not).

- */ - public class ScratchFile : IDisposable - { - /** number of pages by which we enlarge the scratch file (reduce I/O-operations) */ - private static readonly int ENLARGE_PAGE_COUNT = 16; - /** in case of unrestricted main memory usage this is the initial number of pages - * {@link #inMemoryPages} is setup for */ - private static readonly int INIT_UNRESTRICTED_MAINMEM_PAGECOUNT = 100000; - private static readonly int PAGE_SIZE = 4096; - - private readonly Object ioLock = new Object(); - private readonly string scratchFileDirectory; - /** scratch file; only to be accessed under synchronization of {@link #ioLock} */ - private string file; - /** random access to scratch file; only to be accessed under synchronization of {@link #ioLock} */ - //private java.io.RandomAccessFile raf; - private volatile int pageCount = 0; - //private readonly BitSet freePages = new BitSet(); - /** holds pointers to in-memory page content; will be initialized once in case of restricted - * main memory, otherwise it is enlarged as needed and first initialized to a size of - * {@link #INIT_UNRESTRICTED_MAINMEM_PAGECOUNT} */ - private volatile byte[][] inMemoryPages; - private readonly int inMemoryMaxPageCount; - private readonly int maxPageCount; - private readonly bool useScratchFile; - private readonly bool maxMainMemoryIsRestricted; - - private volatile bool isClosed = false; - - /** - * Initializes page handler. If a scratchFileDirectory is supplied, - * then the scratch file will be created in that directory. - * - *

All pages will be stored in the scratch file.

- * - * @param scratchFileDirectory The directory in which to create the scratch file - * or null to created it in the default temporary directory. - * - * @throws IOException If scratch file directory was given but don't exist. - */ - public ScratchFile(string scratchFileDirectory) : this(MemoryUsageSetting.setupTempFileOnly().setTempDir(scratchFileDirectory)) - { - } - - /** - * Initializes page handler. If a scratchFileDirectory is supplied, - * then the scratch file will be created in that directory. - * - *

Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE}) - * will be stored in-memory and only additional pages will be written to/read from scratch file.

- * - * @param memUsageSetting set how memory/temporary files are used for buffering streams etc. - * - * @throws IOException If scratch file directory was given but don't exist. - */ - public ScratchFile(MemoryUsageSetting memUsageSetting) - { - maxMainMemoryIsRestricted = (!memUsageSetting.getUseMainMemory()) || memUsageSetting.isMainMemoryRestricted(); - useScratchFile = maxMainMemoryIsRestricted ? memUsageSetting.getUseTempFile() : false; - scratchFileDirectory = useScratchFile ? memUsageSetting.getTempDir() : null; - - //if ((scratchFileDirectory != null) && (!scratchFileDirectory.isDirectory())) - //{ - // throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory); - //} - - maxPageCount = memUsageSetting.isStorageRestricted() ? - (int)Math.Min(int.MaxValue, memUsageSetting.getMaxStorageBytes() / PAGE_SIZE) : - int.MaxValue; - - inMemoryMaxPageCount = memUsageSetting.getUseMainMemory() ? - (memUsageSetting.isMainMemoryRestricted() ? - (int)Math.Min(int.MaxValue, memUsageSetting.getMaxMainMemoryBytes() / PAGE_SIZE) : - int.MaxValue) : - 0; - inMemoryPages = new byte[maxMainMemoryIsRestricted ? inMemoryMaxPageCount : INIT_UNRESTRICTED_MAINMEM_PAGECOUNT][]; - - //freePages.set(0, inMemoryPages.Length); - } - - /** - * Getter for an instance using only unrestricted main memory for buffering - * (same as new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly())). - * - * @return instance configured to only use main memory with no size restriction - */ - public static ScratchFile getMainMemoryOnlyInstance() - { - try - { - return new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); - } - catch (IOException ioe) - { - // cannot happen for main memory setup - return null; - } - } - - /** - * Returns a new free page, either from free page pool - * or by enlarging scratch file (may be created). - * - * @return index of new page - */ - int getNewPage() - { - //lock (freePages) - //{ - // int idx = freePages.nextSetBit(0); - - // if (idx < 0) - // { - // enlarge(); - - // idx = freePages.nextSetBit(0); - // if (idx < 0) - // { - // throw new IOException("Maximum allowed scratch file memory exceeded."); - // } - // } - - // freePages.clear(idx); - - // if (idx >= pageCount) - // { - // pageCount = idx + 1; - // } - - // return idx; - //} - - throw new NotImplementedException(); - } - - /** - * This will provide new free pages by either enlarging the scratch file - * by a number of pages defined by {@link #ENLARGE_PAGE_COUNT} - in case - * scratch file usage is allowed - or increase the {@link #inMemoryPages} - * array in case main memory was not restricted. If neither of both is - * allowed/the case than free pages count won't be changed. The same is true - * if no new pages could be added because we reached the maximum of - * {@link Integer#MAX_VALUE} pages. - * - *

If scratch file uage is allowed and scratch file does not exist already - * it will be created.

- * - *

Only to be called under synchronization on {@link #freePages}.

- */ - private void enlarge() - { - lock (ioLock) - { - checkClosed(); - - if (pageCount >= maxPageCount) - { - return; - } - - if (useScratchFile) - { - // create scratch file is needed - //if (raf == null) - //{ - // var location = Path.Combine(scratchFileDirectory, "PDFBox.tmp"); - // File.Create(location); - // try - // { - // raf = new java.io.RandomAccessFile(file, "rw"); - // } - // catch (IOException e) - // { - // File.Delete(file.ToString()); - // throw e; - // } - //} - - //long fileLen = raf.length(); - long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE; - - //if (expectedFileLen != fileLen) - { - // throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen); - } - - // enlarge if we do not overflow - if (pageCount + ENLARGE_PAGE_COUNT > pageCount) - { - // fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE; - - // raf.setLength(fileLen); - - // freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT); - } - } - else if (!maxMainMemoryIsRestricted) - { - // increase number of in-memory pages - int oldSize = inMemoryPages.Length; - int newSize = (int)Math.Min(((long)oldSize) * 2, int.MaxValue); // this handles integer overflow - if (newSize > oldSize) - { - byte[][] newInMemoryPages = new byte[newSize][]; - System.Array.Copy(inMemoryPages, 0, newInMemoryPages, 0, oldSize); - inMemoryPages = newInMemoryPages; - - // freePages.set(oldSize, newSize); - } - } - } - } - - /** - * Returns byte size of a page. - * - * @return byte size of a page - */ - int getPageSize() - { - return PAGE_SIZE; - } - - /** - * Reads the page with specified index. - * - * @param pageIdx index of page to read - * - * @return byte array of size {@link #PAGE_SIZE} filled with page data read from file - * - * @throws IOException - */ - byte[] readPage(int pageIdx) - { - if ((pageIdx < 0) || (pageIdx >= pageCount)) - { - checkClosed(); - throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1)); - } - - // check if we have the page in memory - if (pageIdx < inMemoryMaxPageCount) - { - byte[] page = inMemoryPages[pageIdx]; - - // handle case that we are closed - if (page == null) - { - checkClosed(); - throw new IOException("Requested page with index " + pageIdx + " was not written before."); - } - - return page; - } - - lock (ioLock) - { - // if (raf == null) - { - checkClosed(); - throw new IOException("Missing scratch file to read page with index " + pageIdx + " from."); - } - - byte[] page = new byte[PAGE_SIZE]; - // raf.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE); - // raf.readFully(page); - - return page; - } - } - - /** - * Writes updated page. Page is either kept in-memory if pageIdx < {@link #inMemoryMaxPageCount} - * or is written to scratch file. - * - *

Provided page byte array must not be re-used for other pages since we - * store it as is in case of in-memory handling.

- * - * @param pageIdx index of page to write - * @param page page to write (length has to be {@value #PAGE_SIZE}) - * - * @throws IOException in case page index is out of range or page has wrong length - * or writing to file failed - */ - void writePage(int pageIdx, byte[] page) - { - if ((pageIdx < 0) || (pageIdx >= pageCount)) - { - checkClosed(); - throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1)); - } - - if (page.Length != PAGE_SIZE) - { - throw new IOException("Wrong page size to write: " + page.Length + ". Expected: " + PAGE_SIZE); - } - - if (pageIdx < inMemoryMaxPageCount) - { - if (maxMainMemoryIsRestricted) - { - inMemoryPages[pageIdx] = page; - } - else - { - // need synchronization since inMemoryPages may change - lock (ioLock) - { - inMemoryPages[pageIdx] = page; - } - } - - // in case we were closed in between throw exception - checkClosed(); - } - else - { - lock (ioLock) - { - checkClosed(); - // raf.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE); - // raf.write(page); - } - } - } - - /** - * Checks if this page handler has already been closed. If so, - * an {@link IOException} is thrown. - * - * @throws IOException If {@link #close()} has already been called. - */ - void checkClosed() - { - if (isClosed) - { - throw new IOException("Scratch file already closed"); - } - } - - /** - * Creates a new buffer using this page handler. - * - * @return A new buffer. - * - * @throws IOException If an error occurred. - */ - // public RandomAccess createBuffer() - // { - // return new ScratchFileBuffer(this); - // } - - /** - * Creates a new buffer using this page handler and initializes it with the - * data read from provided input stream (input stream is copied to buffer). - * The buffer data pointer is reset to point to first byte. - * - * @return A new buffer containing data read from input stream. - * - * @throws IOException If an error occurred. - */ - // public RandomAccess createBuffer(MemoryStream input) - // { - // ScratchFileBuffer buf = new ScratchFileBuffer(this); - - // byte[] byteBuffer = new byte[8192]; - // int bytesRead = 0; - // while ((bytesRead = input.Read(byteBuffer, 0, 8192)) > -1) - // { - // buf.write(byteBuffer, 0, bytesRead); - // } - // buf.seek(0); - - // return buf; - //} - - /** - * Allows a buffer which is cleared/closed to release its pages to be re-used. - * - * @param pageIndexes pages indexes of pages to release - * @param count number of page indexes contained in provided array - */ - void markPagesAsFree(int[] pageIndexes, int off, int count) - { - - // lock (freePages) - { - for (int aIdx = off; aIdx < count; aIdx++) - { - int pageIdx = pageIndexes[aIdx]; - // if ((pageIdx >= 0) && (pageIdx < pageCount) && (!freePages.get(pageIdx))) - { - // freePages.set(pageIdx); - if (pageIdx < inMemoryMaxPageCount) - { - inMemoryPages[pageIdx] = null; // remark: not in ioLock synchronization since behavior won't - // change even in case of parallel called 'enlarge' method - } - } - - } - } - } - - /** - * Closes and deletes the temporary file. No further interaction with - * the scratch file or associated buffers can happen after this method is called. - * It also releases in-memory pages. - * - * @throws IOException If there was a problem closing or deleting the temporary file. - */ - public void Dispose() - { - - IOException ioexc = null; - - lock (ioLock) - { - if (isClosed) - { - return; - } - - isClosed = true; - - // if (raf != null) - { - try - { - // raf.close(); - } - catch (IOException ioe) - { - ioexc = ioe; - } - } - - if (file != null) - { - // if (!file.delete()) - { - if (File.Exists(file.ToString()) && (ioexc == null)) - { - // ioexc = new IOException("Error deleting scratch file: " + file.getAbsolutePath()); - } - } - } - } - - // lock (freePages) - { - // freePages.clear(); - pageCount = 0; - } - - if (ioexc != null) - { - throw ioexc; - } - } - } - -} diff --git a/src/UglyToad.Pdf/Parser/IPageContentParser.cs b/src/UglyToad.Pdf/Parser/IPageContentParser.cs new file mode 100644 index 00000000..8cbc38b8 --- /dev/null +++ b/src/UglyToad.Pdf/Parser/IPageContentParser.cs @@ -0,0 +1,11 @@ +namespace UglyToad.Pdf.Parser +{ + using Content; + using Graphics; + using IO; + + internal interface IPageContentParser + { + PageContent Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes); + } +} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Parser/PageContentParser.cs b/src/UglyToad.Pdf/Parser/PageContentParser.cs new file mode 100644 index 00000000..d5350efe --- /dev/null +++ b/src/UglyToad.Pdf/Parser/PageContentParser.cs @@ -0,0 +1,50 @@ +namespace UglyToad.Pdf.Parser +{ + using System.Collections.Generic; + using Content; + using Graphics; + using Graphics.Operations; + using IO; + using Tokenization.Scanner; + using Tokenization.Tokens; + + internal class PageContentParser : IPageContentParser + { + public PageContent Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes) + { + var scanner = new CoreTokenScanner(inputBytes); + + var precedingTokens = new List(); + var graphicsStateOperations = new List(); + + while (scanner.MoveNext()) + { + var token = scanner.CurrentToken; + + if (token is OperatorToken op) + { + var operation = operationFactory.Create(op, precedingTokens); + + if (operation != null) + { + graphicsStateOperations.Add(operation); + } + + precedingTokens.Clear(); + } + else if (token is CommentToken) + { + } + else + { + precedingTokens.Add(token); + } + } + + return new PageContent + { + GraphicsStateOperations = graphicsStateOperations + }; + } + } +} diff --git a/src/UglyToad.Pdf/Parser/PageTree/PageParser.cs b/src/UglyToad.Pdf/Parser/PageTree/PageParser.cs index 1ed7dbe0..c78334ee 100644 --- a/src/UglyToad.Pdf/Parser/PageTree/PageParser.cs +++ b/src/UglyToad.Pdf/Parser/PageTree/PageParser.cs @@ -1,8 +1,6 @@ -using System; -using System.Text; - -namespace UglyToad.Pdf.Parser.PageTree +namespace UglyToad.Pdf.Parser.PageTree { + using System; using Content; using ContentStream; using ContentStream.TypedAccessors; @@ -29,14 +27,7 @@ namespace UglyToad.Pdf.Parser.PageTree throw new InvalidOperationException("Expected a Dictionary of Type Page, instead got this: " + dictionary); } - var resources = dictionary.GetDictionaryOrDefault(CosName.RESOURCES); - - var resourceDictionary = arguments.Container.Get() - .Parse(resources, arguments); - - var font = resourceDictionary.GetFont(CosName.Create("F0"), arguments, out var fontValue); - - return new Page(number, dictionary, arguments); + return new Page(number, dictionary, new PageTreeMembers(), arguments); } } @@ -56,7 +47,7 @@ namespace UglyToad.Pdf.Parser.PageTree var simpleFont = arguments.Container.Get() .Parse(dictionary, arguments); } - + return new Font(); } } @@ -112,11 +103,11 @@ namespace UglyToad.Pdf.Parser.PageTree public class SimpleFont { - + } public class Font { - + } } diff --git a/src/UglyToad.Pdf/Parser/ParsingCachingProviders.cs b/src/UglyToad.Pdf/Parser/ParsingCachingProviders.cs index c3565840..5d2aaab5 100644 --- a/src/UglyToad.Pdf/Parser/ParsingCachingProviders.cs +++ b/src/UglyToad.Pdf/Parser/ParsingCachingProviders.cs @@ -1,6 +1,7 @@ namespace UglyToad.Pdf.Parser { using System; + using Content; using Cos; using Parts; @@ -13,10 +14,13 @@ public BruteForceSearcher BruteForceSearcher { get; } - public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher) + public ResourceContainer ResourceContainer { get; } + + public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, ResourceContainer resourceContainer) { ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool)); BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher)); + ResourceContainer = resourceContainer ?? throw new ArgumentNullException(nameof(resourceContainer)); } } } diff --git a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs index b1b2c334..86a06a75 100644 --- a/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.Pdf/Parser/PdfDocumentFactory.cs @@ -48,6 +48,7 @@ var dynamicParser = container.Get(); var bruteForceSearcher = new BruteForceSearcher(reader); + var resourceContainer = new ResourceContainer(); var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool, isLenientParsing); @@ -63,7 +64,7 @@ rootDictionary.Set(CosName.TYPE, CosName.CATALOG); } - var caching = new ParsingCachingProviders(pool, bruteForceSearcher); + var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer); return new PdfDocument(reader, version, crossReferenceTable, container, isLenientParsing, caching, new Catalog(rootDictionary)); } diff --git a/src/UglyToad.Pdf/Parser/PdfParser.cs b/src/UglyToad.Pdf/Parser/PdfParser.cs deleted file mode 100644 index 421828db..00000000 --- a/src/UglyToad.Pdf/Parser/PdfParser.cs +++ /dev/null @@ -1,135 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace UglyToad.Pdf.Parser -{ - using Cos; - using Filters; - using IO; - using Logging; - using Parts; - using Parts.CrossReference; - - internal class PDFParser : COSParser - { - private String password = ""; - private IInputStream keyStoreInputStream = null; - private String keyAlias = null; - private FileHeaderParser headerParser = new FileHeaderParser(null); - private FileTrailerParser trailerParser = new FileTrailerParser(); - - public PDFParser(IRandomAccessRead source, string decryptionPassword, IInputStream keyStore, - String alias) : base(source) - { - fileLen = source.Length(); - password = decryptionPassword; - keyStoreInputStream = keyStore; - keyAlias = alias; - init(); - - } - - private void init() - { - document = new COSDocument(); - } - - /** - * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset) - * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref - * at the beginning of the file. Last the root object is parsed. - * - * @throws InvalidPasswordException If the password is incorrect. - * @throws IOException If something went wrong. - */ - protected void initialParse(bool isLenient) - { - // Find the cross reference table at the offset given at the end of the document - var xrefOffset = trailerParser.GetXrefOffset(source, isLenient); - - ILog log = null; - var bruteForceSearcher = new BruteForceSearcher(source); - var nameParser = new CosNameParser(); - var dictionaryParser = new CosDictionaryParser(nameParser, log); - var baseParser = new CosBaseParser(nameParser, new CosStringParser(), dictionaryParser, new CosArrayParser()); - var streamParser = new CosStreamParser(log); - var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(log), new PngPredictor(), log); - var crossReferenceParser = new CrossReferenceStreamParser(filterProvider); - - var crossReferenceTableParser = new FileCrossReferenceTableParser(log, dictionaryParser, baseParser, streamParser, crossReferenceParser, - new CrossReferenceTableParser(log, dictionaryParser, baseParser)); - - var pool = new CosObjectPool(); - - var table = crossReferenceTableParser.Parse(source, isLenient, xrefOffset, pool); - - CosBase baseObj = parseTrailerValuesDynamically(document.trailer, bruteForceSearcher, baseParser, source, isLenient, document, streamParser, pool); - if (!(baseObj is CosDictionary)) - { - throw new InvalidOperationException("Expected root dictionary, but got this: " + baseObj); - } - - CosDictionary root = (CosDictionary)baseObj; - // in some pdfs the type value "Catalog" is missing in the root object - if (isLenient && !root.containsKey(CosName.TYPE)) - { - root.setItem(CosName.TYPE, CosName.CATALOG); - } - - CosObject catalogObj = document.getCatalog(); - if (catalogObj.GetObject() is CosDictionary) - { - parseDictObjects((CosDictionary)catalogObj.GetObject(), (CosName[])null, bruteForceSearcher, baseParser, streamParser, source, document, isLenient, pool); - - CosBase infoBase = document.trailer.getDictionaryObject(CosName.INFO); - if (infoBase is CosDictionary) - { - parseDictObjects((CosDictionary)infoBase, (CosName[])null, bruteForceSearcher, baseParser, streamParser, source, document, isLenient, pool); - } - - document.IsDecrypted = true; - } - initialParseDone = true; - } - - /** - * This will parse the stream and populate the COSDocument object. This will close - * the keystore stream when it is done parsing. - * - * @throws InvalidPasswordException If the password is incorrect. - * @throws IOException If there is an error reading from the stream or corrupt data - * is found. - */ - public void Parse(bool isLenientParsing) - { - // set to false if all is processed - bool exceptionOccurred = true; - - try - { - // Read the version from the top of the file - var version = headerParser.ReadHeader(source, getIsLenient()); - document.Version = version.Version; - - if (!initialParseDone) - { - initialParse(isLenientParsing); - } - - exceptionOccurred = false; - } - finally - { - IOUtils.closeQuietly(keyStoreInputStream); - - if (exceptionOccurred && document != null) - { - IOUtils.closeQuietly(document); - document = null; - } - } - } - } - -} diff --git a/src/UglyToad.Pdf/Text/ByteTextScanner.cs b/src/UglyToad.Pdf/Text/ByteTextScanner.cs deleted file mode 100644 index ecfd35c3..00000000 --- a/src/UglyToad.Pdf/Text/ByteTextScanner.cs +++ /dev/null @@ -1,157 +0,0 @@ -namespace UglyToad.Pdf.Text -{ - using System.Collections.Generic; - using System.Linq; - using Operators; - - public class ByteTextScanner : ITextScanner - { - private static readonly ITextComponentApproach[] Approaches = - { - new BaseTextComponentApproach(new[] {(byte) 'B', (byte) 'T'}, TextObjectComponentType.BeginText, new TextObjectComponentType[0]), - new BaseTextComponentApproach(new[] {(byte) 'E', (byte) 'T'}, TextObjectComponentType.EndText, new TextObjectComponentType[0]), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'f'}, TextObjectComponentType.TextFont, new []{ TextObjectComponentType.Font, TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'm'}, TextObjectComponentType.SetTextMatrix, new [] - { - TextObjectComponentType.Numeric, TextObjectComponentType.Numeric, TextObjectComponentType.Numeric, - TextObjectComponentType.Numeric, TextObjectComponentType.Numeric, TextObjectComponentType.Numeric - }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'd'}, TextObjectComponentType.MoveTextPosition, new[]{ TextObjectComponentType.Numeric, TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'D'}, TextObjectComponentType.MoveTextPositionAndSetLeading, new[]{ TextObjectComponentType.Numeric, TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'j'}, TextObjectComponentType.ShowText, new[] { TextObjectComponentType.String }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'J'}, TextObjectComponentType.ShowTextWithIndividualGlyphPositioning, new[]{ TextObjectComponentType.Array }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'L'}, TextObjectComponentType.SetTextLeading, new []{ TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'r'}, TextObjectComponentType.SetTextRenderingMode, new[] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 's'}, TextObjectComponentType.SetTextRise, new[] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'w'}, TextObjectComponentType.SetWordSpacing, new[] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'z'}, TextObjectComponentType.SetHorizontalTextScaling, new[] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) '*'}, TextObjectComponentType.MoveToNextLineStart, new TextObjectComponentType[0]), - new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'c'}, TextObjectComponentType.SetCharacterSpacing, new[] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'g'}, TextObjectComponentType.SetGrayNonStroking, new [] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'G'}, TextObjectComponentType.SetGrayStroking, new [] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'w'}, TextObjectComponentType.SetLineWidth, new [] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'W'}, TextObjectComponentType.SetClippingPathNonZeroWinding, new [] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) 'W', (byte) '*'}, TextObjectComponentType.SetClippingPathEvenOdd, new [] { TextObjectComponentType.Numeric }), - new BaseTextComponentApproach(new[] {(byte) '\''}, TextObjectComponentType.MoveNextLineAndShowText, new [] { TextObjectComponentType.String }), - new FontTextComponentApproach(), - new NumericTextComponentApproach(), - new StringTextComponentApproach() - }; - - private readonly byte[] bytes; - - private int offset; - - public ByteTextScanner(byte[] bytes) - { - this.bytes = bytes; - } - - public ITextObjectComponent CurrentComponent { get; private set; } - - public bool Read() - { - if (offset == bytes.Length - 1) - { - return false; - } - - bool isReadingCandidate = false; - int startOffset = -1; - var validApproaches = new List(); - var buffer = new List(); - - while (offset < bytes.Length) - { - var current = bytes[offset]; - - // Whitespace clears the current operator search. - if (BaseTextComponentApproach.IsEmpty(current)) - { - // TODO: consider the case of two valid operators, one of which is a single character, 'Q' and 'Qe'. For example "BT 10 Q 13 Qe ET" - - isReadingCandidate = false; - - validApproaches.Clear(); - buffer.Clear(); - - offset++; - continue; - } - - buffer.Add(current); - - // If we previously started reading a byte which matched some possible approaches. - if (isReadingCandidate) - { - // Remove any approaches which are no longer valid for the next byte. - foreach (var validApproach in new List(validApproaches)) - { - if (!validApproach.CanRead(current, offset - startOffset)) - { - validApproaches.Remove(validApproach); - } - } - - // There is a single valid approach which is indicative of a specific operator. - if (validApproaches.Count == 1) - { - CurrentComponent = validApproaches[0].Read(buffer, bytes.Skip(offset + 1), out var localOffset); - - if (CurrentComponent != null) - { - offset += localOffset; - return true; - } - - isReadingCandidate = false; - } - // This was a false start. - else if (validApproaches.Count == 0) - { - buffer.Clear(); - isReadingCandidate = false; - } - } - // If we haven't looked at the first byte after some whitespace. - else if (buffer.Count == 1) - { - // Find any operator approaches which are valid for this first byte. - foreach (var approach in Approaches) - { - if (approach.CanRead(current, 0)) - { - validApproaches.Add(approach); - } - } - - switch (validApproaches.Count) - { - case 0: - // No valid approaches, this cannot be a operator, continue until we hit a whitespace. - break; - case 1: - // A single valid approach, this immediately matches an operator. - CurrentComponent = validApproaches[0].Read(buffer, bytes.Skip(offset + 1), out var localOffset); - - if (CurrentComponent != null) - { - offset += localOffset; - return true; - } - break; - default: - // Multiple valid approaches, use the next character to refine the possible approaches. - startOffset = offset; - isReadingCandidate = true; - break; - } - } - - offset++; - } - - return false; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/ITextComponentApproach.cs b/src/UglyToad.Pdf/Text/ITextComponentApproach.cs deleted file mode 100644 index 17aa3624..00000000 --- a/src/UglyToad.Pdf/Text/ITextComponentApproach.cs +++ /dev/null @@ -1,11 +0,0 @@ -namespace UglyToad.Pdf.Text -{ - using System.Collections.Generic; - - public interface ITextComponentApproach - { - bool CanRead(byte b, int offset); - - ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset); - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/ITextScanner.cs b/src/UglyToad.Pdf/Text/ITextScanner.cs deleted file mode 100644 index 059c0fbe..00000000 --- a/src/UglyToad.Pdf/Text/ITextScanner.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace UglyToad.Pdf.Text -{ - public interface ITextScanner - { - ITextObjectComponent CurrentComponent { get; } - - bool Read(); - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/Operator.cs b/src/UglyToad.Pdf/Text/Operator.cs deleted file mode 100644 index 645b40d1..00000000 --- a/src/UglyToad.Pdf/Text/Operator.cs +++ /dev/null @@ -1,41 +0,0 @@ -namespace UglyToad.Pdf.Text -{ - using System.Collections.Generic; - - /// - /// Represents an operator which operates on operands containing the data. - /// - public class Operator : ITextObjectComponent - { - /// - /// Always - /// - public bool IsOperator { get; } = true; - - /// - /// The ordered operand types required prior to this operator. - /// - public IReadOnlyList OperandTypes { get; } - - /// - /// The type of this operator. - /// - public TextObjectComponentType Type { get; } - - /// - /// Always . - /// - public IOperand AsOperand { get; } = null; - - public Operator(TextObjectComponentType type, IReadOnlyList operandTypes) - { - OperandTypes = operandTypes; - Type = type; - } - - public override string ToString() - { - return $"Operator: {Type}"; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/Operators/BaseTextComponentApproach.cs b/src/UglyToad.Pdf/Text/Operators/BaseTextComponentApproach.cs deleted file mode 100644 index 06743989..00000000 --- a/src/UglyToad.Pdf/Text/Operators/BaseTextComponentApproach.cs +++ /dev/null @@ -1,85 +0,0 @@ -namespace UglyToad.Pdf.Text.Operators -{ - using System.Collections.Generic; - - public class BaseTextComponentApproach : ITextComponentApproach - { - private readonly byte[] bytes; - private readonly TextObjectComponentType textObjectComponentType; - private readonly IReadOnlyList operandTypes; - - public BaseTextComponentApproach(byte[] bytes, TextObjectComponentType textObjectComponentType, - IReadOnlyList operandTypes) - { - this.bytes = bytes; - this.textObjectComponentType = textObjectComponentType; - this.operandTypes = operandTypes; - } - - public bool CanRead(byte b, int offset) - { - if (offset >= bytes.Length) - { - return false; - } - - return bytes[offset] == b; - } - - public ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset) - { - bool hasOpenedEnumerator = false; - offset = bytes.Length; - using (var enumerator = furtherBytes.GetEnumerator()) - { - for (var i = 0; i < bytes.Length; i++) - { - if (i < readBytes.Count) - { - if (readBytes[i] != bytes[i]) - { - return null; - } - - // Look beyond the end - if (i == bytes.Length - 1) - { - if (!hasOpenedEnumerator && enumerator.MoveNext() && !IsEmpty(enumerator.Current)) - { - return null; - } - } - } - else - { - hasOpenedEnumerator = true; - - if (!enumerator.MoveNext()) - { - return null; - } - - var curr = enumerator.Current; - - if (curr != bytes[i]) - { - return null; - } - - if (i == bytes.Length - 1 && enumerator.MoveNext() && !IsEmpty(enumerator.Current)) - { - return null; - } - } - } - } - - return new Operator(textObjectComponentType, operandTypes); - } - - public static bool IsEmpty(byte b) - { - return b == ' ' || b == '\r' || b == '\n' || b == 0; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/Operators/FontTextComponentApproach.cs b/src/UglyToad.Pdf/Text/Operators/FontTextComponentApproach.cs deleted file mode 100644 index 7ce33b11..00000000 --- a/src/UglyToad.Pdf/Text/Operators/FontTextComponentApproach.cs +++ /dev/null @@ -1,77 +0,0 @@ -namespace UglyToad.Pdf.Text.Operators -{ - using System.Collections.Generic; - - public class FontTextComponentApproach : ITextComponentApproach - { - public bool CanRead(byte b, int offset) - { - if (offset == 0 && b == '/') - { - return true; - } - - if (offset == 0) - { - return false; - } - - return !BaseTextComponentApproach.IsEmpty(b); - } - - public ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset) - { - offset = readBytes.Count; - - using (var reader = furtherBytes.GetEnumerator()) - { - var values = new List(readBytes); - - while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current)) - { - values.Add(reader.Current); - offset++; - } - - return new OperandComponent(new FontOperand(values), TextObjectComponentType.Font); - } - } - } - - public class OperandComponent : ITextObjectComponent - { - public bool IsOperator { get; } = false; - - public IReadOnlyList OperandTypes { get; } = new TextObjectComponentType[0]; - - public TextObjectComponentType Type { get; } - - public IOperand AsOperand { get; } - - public OperandComponent(IOperand operand, TextObjectComponentType type) - { - Type = type; - AsOperand = operand; - } - } - - public class FontOperand : IOperand - { - public IReadOnlyList RawBytes { get; } - - public FontOperand(IReadOnlyList bytes) - { - RawBytes = bytes; - } - } - - public class StringOperand : IOperand - { - public IReadOnlyList RawBytes { get; } - - public StringOperand(IReadOnlyList bytes) - { - RawBytes = bytes; - } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/Operators/NumericTextComponentApproach.cs b/src/UglyToad.Pdf/Text/Operators/NumericTextComponentApproach.cs deleted file mode 100644 index e592d5d6..00000000 --- a/src/UglyToad.Pdf/Text/Operators/NumericTextComponentApproach.cs +++ /dev/null @@ -1,62 +0,0 @@ -namespace UglyToad.Pdf.Text.Operators -{ - using System; - using System.Collections.Generic; - - public class NumericTextComponentApproach : ITextComponentApproach - { - private static readonly HashSet SupportedCharacterSet = new HashSet - { - (byte)'0', - (byte)'1', - (byte)'2', - (byte)'3', - (byte)'4', - (byte)'5', - (byte)'6', - (byte)'7', - (byte)'8', - (byte)'9', - (byte)'+', - (byte)'-', - (byte)'.' - }; - - public bool CanRead(byte b, int offset) - { - return SupportedCharacterSet.Contains(b); - } - - public ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset) - { - offset = readBytes.Count; - var bytes = new List(readBytes); - - using (var reader = furtherBytes.GetEnumerator()) - { - while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current)) - { - if (!SupportedCharacterSet.Contains(reader.Current)) - { - throw new InvalidOperationException("Unsupported byte in numeric operator: " + (char)reader.Current); - } - - bytes.Add(reader.Current); - offset++; - } - } - - return new OperandComponent(new NumericOperand(bytes), TextObjectComponentType.Numeric); - } - } - - public class NumericOperand : IOperand - { - public NumericOperand(IReadOnlyList bytes) - { - - } - - public IReadOnlyList RawBytes { get; set; } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/Operators/StringTextComponentApproach.cs b/src/UglyToad.Pdf/Text/Operators/StringTextComponentApproach.cs deleted file mode 100644 index 8ecfec83..00000000 --- a/src/UglyToad.Pdf/Text/Operators/StringTextComponentApproach.cs +++ /dev/null @@ -1,136 +0,0 @@ -namespace UglyToad.Pdf.Text.Operators -{ - using System; - using System.Collections.Generic; - - public class StringTextComponentApproach : ITextComponentApproach - { - public bool CanRead(byte b, int offset) - { - if (offset == 0) - { - if (b == '<' || b == '(') - { - return true; - } - - return false; - } - - return true; - } - - public ITextObjectComponent Read(IReadOnlyList readBytes, IEnumerable furtherBytes, out int offset) - { - var bytes = new List(readBytes); - - bool isHexString = false; - bool isKnownType = false; - if (readBytes.Count > 0) - { - isHexString = readBytes[0] == '<'; - - if (!isHexString && readBytes[0] != '(') - { - throw new InvalidOperationException("String started with an unexpected character: " + bytes[0]); - } - - isKnownType = true; - } - - bool isEscapeActive = false; - int bracketDepth = 0; - using (var reader = furtherBytes.GetEnumerator()) - { - while (reader.MoveNext()) - { - if (!isKnownType) - { - isHexString = reader.Current == '<'; - - if (!isHexString && reader.Current != '(') - { - throw new InvalidOperationException("String started with an unexpected character: " + bytes[0]); - } - - isKnownType = true; - bytes.Add(reader.Current); - continue; - } - - bytes.Add(reader.Current); - - if (isHexString) - { - if (reader.Current == '>') - { - break; - } - - var isValid = IsValidHexCharacter(reader.Current); - - if (!isValid) - { - throw new InvalidOperationException("Found an unexpected character in a hex string: " + reader.Current); - } - } - else - { - bool exit = false; - switch (reader.Current) - { - case (byte)'\\': - isEscapeActive = true; - break; - case (byte)'(': - if (!isEscapeActive) - { - bracketDepth++; - } - - break; - case (byte)')': - if (isEscapeActive) - { - continue; - } - else if (bracketDepth > 0) - { - bracketDepth--; - } - else - { - exit = true; - } - break; - default: - isEscapeActive = false; - break; - } - - if (exit) - { - break; - } - } - } - - if (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current)) - { - throw new InvalidOperationException("Unexpected byte following string operator, expected whitespace: " + (char)reader.Current); - } - } - - offset = bytes.Count; - - return new OperandComponent(new StringOperand(bytes), TextObjectComponentType.String); - } - - private static bool IsValidHexCharacter(byte b) - { - return (b >= '0' && b <= '9') - || (b >= 'a' && b <= 'f') - || (b >= 'A' && b <= 'F'); - } - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Text/TextSectionParser.cs b/src/UglyToad.Pdf/Text/TextSectionParser.cs deleted file mode 100644 index cbfb3659..00000000 --- a/src/UglyToad.Pdf/Text/TextSectionParser.cs +++ /dev/null @@ -1,155 +0,0 @@ -namespace UglyToad.Pdf.Text -{ - using System; - using System.Collections.Generic; - using System.Linq; - using Logging; - using Util.JetBrains.Annotations; - - public class TextSectionParser - { - private readonly ILog log; - - public TextSectionParser(ILog log) - { - this.log = log; - } - - public IReadOnlyList ReadTextObjects(ITextScanner textScanner) - { - bool textSectionActive = false; - var result = new List>(); - var sections = new List(); - while (textScanner.Read()) - { - sections.Add(textScanner.CurrentComponent); - - switch (textScanner.CurrentComponent.Type) - { - case TextObjectComponentType.BeginText: - if (textSectionActive) - { - throw new InvalidOperationException("Found a begin text (BT) nested in another."); - } - - textSectionActive = true; - break; - case TextObjectComponentType.EndText: - textSectionActive = false; - - result.Add(sections); - sections = new List(); - break; - } - } - - foreach (var section in result) - { - if (section[0].Type == TextObjectComponentType.BeginText) - { - ProcessTextSection(section, true); - } - - } - - return result; - } - - private object ProcessTextSection(IReadOnlyList components, bool isLenientParsing) - { - if (components[0].Type != TextObjectComponentType.BeginText) - { - throw new InvalidOperationException("The set of components did not start with Begin Text (BT)"); - } - - if (components[components.Count - 1].Type != TextObjectComponentType.EndText) - { - throw new InvalidOperationException("The set of components did not end with End Text (ET)"); - } - - var builder = new TextObjectBuilder(); - - for (var i = 1; i < components.Count - 1; i++) - { - if (components[i].IsOperator) - { - ApplyOperator(builder, components, i, isLenientParsing); - } - else - { - continue; - } - } - - return null; - } - - private void ApplyOperator(TextObjectBuilder builder, IReadOnlyList components, int index, bool isLenientParsing) - { - var current = components[index]; - - if (!current.IsOperator) - { - throw new InvalidOperationException("Cannot apply operator for component type: " + current); - } - - var operands = new IOperand[current.OperandTypes.Count]; - - var start = index - operands.Length; - - // begin text or start - if (start <= 0) - { - log.Error("Did not find the required number of operands for the current operator."); - - if (isLenientParsing) - { - return; - } - - throw new InvalidOperationException(); - } - - for (int i = start; i < index; i++) - { - var expectedOperand = current.OperandTypes[i - start]; - - if (components[i].Type != expectedOperand) - { - if (isLenientParsing) - { - return; - } - - throw new InvalidOperationException($"Unexpected operand type at index {i - start} for operator: {current}\r\nExpected {expectedOperand} Found {components[i].Type}"); - } - - operands[i - start] = components[i].AsOperand; - } - } - } - - public class TextObjectBuilder - { - public string FontKey { get; set; } - - public decimal FontSize { get; set; } - } - - public interface ITextObjectComponent - { - bool IsOperator { get; } - - IReadOnlyList OperandTypes { get; } - - TextObjectComponentType Type { get; } - - [CanBeNull] - IOperand AsOperand { get; } - } - - public interface IOperand - { - IReadOnlyList RawBytes { get; } - } -} diff --git a/src/UglyToad.Pdf/Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.Pdf/Tokenization/Scanner/CoreTokenScanner.cs index 811ba03a..885753b4 100644 --- a/src/UglyToad.Pdf/Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.Pdf/Tokenization/Scanner/CoreTokenScanner.cs @@ -4,7 +4,6 @@ using System.Collections.Generic; using IO; using Parser.Parts; - using Text.Operators; using Tokenization; using Tokens; @@ -19,7 +18,7 @@ { private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly StringTokenizer StringTokenizer = new StringTokenizer(); - private static readonly Tokenization.NumericTokenizer NumericTokenizer = new Tokenization.NumericTokenizer(); + private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer(); @@ -70,8 +69,7 @@ var currentByte = inputBytes.CurrentByte; var c = (char) currentByte; - if (BaseTextComponentApproach.IsEmpty(currentByte) - || ReadHelper.IsWhitespace(currentByte)) + if (IsEmpty(currentByte) || ReadHelper.IsWhitespace(currentByte)) { isSkippingSymbol = false; continue; @@ -159,5 +157,10 @@ return false; } + + private static bool IsEmpty(byte b) + { + return b == ' ' || b == '\r' || b == '\n' || b == 0; + } } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs index 9de379c5..a8664586 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs @@ -64,7 +64,11 @@ namespace UglyToad.Pdf.Tokenization.Tokens var b = Convert(high, low); bytes.Add(b); - builder.Append((char)b); + + if (b != '\0') + { + builder.Append((char)b); + } } Bytes = bytes; diff --git a/src/UglyToad.Pdf/UglyToad.Pdf.csproj b/src/UglyToad.Pdf/UglyToad.Pdf.csproj index 39eeada7..9f5c4f4a 100644 --- a/src/UglyToad.Pdf/UglyToad.Pdf.csproj +++ b/src/UglyToad.Pdf/UglyToad.Pdf.csproj @@ -4,8 +4,4 @@ netstandard2.0 - - - - diff --git a/src/UglyToad.Pdf/Util/Bootstrapper.cs b/src/UglyToad.Pdf/Util/Bootstrapper.cs index c90396c8..686fcb1a 100644 --- a/src/UglyToad.Pdf/Util/Bootstrapper.cs +++ b/src/UglyToad.Pdf/Util/Bootstrapper.cs @@ -1,6 +1,7 @@ namespace UglyToad.Pdf.Util { using Filters; + using Graphics; using Logging; using Parser; using Parser.PageTree; @@ -47,6 +48,8 @@ var simpleFontParser = new SimpleFontParser(); var compositeFontParser = new CompositeFontParser(); var fontParser = new FontParser(); + var pageContentParser = new PageContentParser(); + var operationFactory = new ReflectionGraphicsStateOperationFactory(); var container = new Container(); container.Register(headerParser); @@ -65,6 +68,8 @@ container.Register(simpleFontParser); container.Register(compositeFontParser); container.Register(fontParser); + container.Register(pageContentParser); + container.Register(operationFactory); return container; }