delete old code and start an approach for parsing fonts and page content streams

This commit is contained in:
Eliot Jones
2017-11-22 18:41:34 +00:00
parent 4b91300466
commit b0e53efbfe
96 changed files with 2307 additions and 1904 deletions

View File

@@ -0,0 +1,425 @@
namespace UglyToad.Pdf.Tests.Parser
{
using Graphics;
using Pdf.Parser;
using Xunit;
public class PageContentParserTests
{
private readonly PageContentParser parser = new PageContentParser();
[Fact]
public void CorrectlyExtractsOperations()
{
var input = StringBytesTestConverter.Convert(SimpleGoogleDocPageContent, false);
var result = parser.Parse(new ReflectionGraphicsStateOperationFactory(), input.Bytes);
}
private const string SimpleGoogleDocPageContent = @"
1 0 0 -1 0 792 cm
q
0 0 612 792 re
W* n
q
.75 0 0 .75 0 0 cm
1 1 1 RG 1 1 1 rg
/G0 gs
0 0 816 1056 re
f
0 0 816 1056 re
f
0 0 816 1056 re
f
Q
Q
q
0 0 612 791.25 re
W* n
q
.75 0 0 .75 0 0 cm
1 1 1 RG 1 1 1 rg
/G0 gs
0 0 816 1055 re
f
0 96 816 960 re
f
0 0 0 RG 0 0 0 rg
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
96 0 Td <0037> Tj
13.0280762 0 Td <004B> Tj
11.8616943 0 Td <004C> Tj
4.7384338 0 Td <0056> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
136.292267 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
136.292267 0 Td <0003> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
142.217911 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
142.217911 0 Td <004C> Tj
4.7384338 0 Td <0056> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
157.620407 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
157.620407 0 Td <0003> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
163.546051 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
163.546051 0 Td <0057> Tj
5.9256439 0 Td <004B> Tj
11.8616943 0 Td <0048> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
193.19508 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
193.19508 0 Td <0003> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
199.12073 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
199.12073 0 Td <0047> Tj
11.8616943 0 Td <0052> Tj
11.8616943 0 Td <0046> Tj
10.6640625 0 Td <0058> Tj
11.8616943 0 Td <0050> Tj
17.766479 0 Td <0048> Tj
11.8616943 0 Td <0051> Tj
11.8616943 0 Td <0057> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
292.7854 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
292.7854 0 Td <0003> Tj
ET
BT
/F1 21.33 Tf
1 0 0 -1 0 140 Tm
298.71106 0 Td <0001> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
298.71106 0 Td <0057> Tj
5.9256287 0 Td <004C> Tj
4.7384338 0 Td <0057> Tj
5.9256592 0 Td <004F> Tj
4.7384033 0 Td <0048> Tj
ET
BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
331.89063 0 Td <0003> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 171 Tm
96 0 Td <0003> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
96 0 Td <0037> Tj
8.9526215 0 Td <004B> Tj
8.1511078 0 Td <0048> Tj
8.1511078 0 Td <0055> Tj
4.8806458 0 Td <0048> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
134.286591 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
134.286591 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
138.358566 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
138.358566 0 Td <004C> Tj
3.2561493 0 Td <0056> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
148.942841 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
148.942841 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
153.014816 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
153.014816 0 Td <0056> Tj
7.328125 0 Td <0052> Tj
8.1511078 0 Td <0050> Tj
12.2087708 0 Td <0048> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
188.85393 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
188.85393 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
192.9259 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
192.9259 0 Td <004F> Tj
3.2561493 0 Td <0048> Tj
8.1511078 0 Td <0047> Tj
8.1511078 0 Td <0048> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
220.63538 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
220.63538 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
224.70735 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
224.70735 0 Td <0057> Tj
4.0719757 0 Td <0048> Tj
8.1511078 0 Td <005B> Tj
7.328125 0 Td <0057> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
248.33054 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
248.33054 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 191 Tm
252.40251 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
252.40251 0 Td <004B> Tj
8.1511078 0 Td <0048> Tj
8.1510925 0 Td <0055> Tj
4.8806763 0 Td <0048> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 191 Tm
281.73438 0 Td <0003> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 211 Tm
96 0 Td <0003> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
96 0 Td <0024> Tj
9.7756042 0 Td <0051> Tj
8.1511078 0 Td <0047> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
122.07782 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
122.07782 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
126.149796 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
126.149796 0 Td <0057> Tj
4.0719757 0 Td <004B> Tj
8.1511078 0 Td <0048> Tj
8.1511078 0 Td <0051> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
154.675095 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
154.675095 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
158.74707 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
158.74707 0 Td <0044> Tj
8.1511078 0 Td <0051> Tj
8.1511078 0 Td <0052> Tj
8.1511078 0 Td <0057> Tj
4.0719757 0 Td <004B> Tj
8.1511078 0 Td <0048> Tj
8.1511078 0 Td <0055> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
208.45523 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
208.45523 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
212.52721 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
212.52721 0 Td <004F> Tj
3.2561493 0 Td <004C> Tj
3.2561493 0 Td <0051> Tj
8.1511078 0 Td <0048> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
235.34172 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
235.34172 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
239.4137 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
239.4137 0 Td <0052> Tj
8.1511078 0 Td <0049> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
251.63678 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
251.63678 0 Td <0003> Tj
ET
BT
/F1 14.6599998 Tf
1 0 0 -1 0 231 Tm
255.70876 0 Td <0001> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
255.70876 0 Td <0057> Tj
4.0719757 0 Td <0048> Tj
8.1510925 0 Td <005B> Tj
7.328125 0 Td <0057> Tj
4.071991 0 Td <0011> Tj
ET
BT
/F0 14.6599998 Tf
1 0 0 -1 0 231 Tm
283.39063 0 Td <0003> Tj
ET
Q
Q
";
}
}

View File

@@ -1,266 +0,0 @@
namespace UglyToad.Pdf.Tests.Text
{
using System.Collections.Generic;
using System.Linq;
using Pdf.Text;
using Pdf.Util;
using Xunit;
using ComponentType = Pdf.Text.TextObjectComponentType;
public class ByteTextScannerTests
{
[Fact]
public void ParseSimpleTest()
{
const string text = @"
BT
/F13 12 Tf
288 720 Td
(ABC) Tj
ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Font,
ComponentType.Numeric,
ComponentType.TextFont,
ComponentType.Numeric,
ComponentType.Numeric,
ComponentType.MoveTextPosition,
ComponentType.String,
ComponentType.ShowText,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Fact]
public void ParseStyledText()
{
const string text = @"BT
/F13 48 Tf
0 40 Td
0 Tr
0.5 g
(Some Text) Tj
ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Font,
ComponentType.Numeric,
ComponentType.TextFont,
ComponentType.Numeric,
ComponentType.Numeric,
ComponentType.MoveTextPosition,
ComponentType.Numeric,
ComponentType.SetTextRenderingMode,
ComponentType.Numeric,
ComponentType.SetGrayNonStroking,
ComponentType.String,
ComponentType.ShowText,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Fact]
public void ParseTextAsPath()
{
const string text = @"BT
/F13 48 Tf 20 38 Td 1 Tr 2 w <0053> Tj ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Font,
ComponentType.Numeric,
ComponentType.TextFont,
ComponentType.Numeric,
ComponentType.Numeric,
ComponentType.MoveTextPosition,
ComponentType.Numeric,
ComponentType.SetTextRenderingMode,
ComponentType.Numeric,
ComponentType.SetLineWidth,
ComponentType.String,
ComponentType.ShowText,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Fact]
public void ParseTextMissingFont()
{
const string text = @"
BT
40 50 Td
(Some more text which
includes a line break, if valid?) Tj
ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Numeric,
ComponentType.Numeric,
ComponentType.MoveTextPosition,
ComponentType.String,
ComponentType.ShowText,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Fact]
public void ParseTextMatrix()
{
const string text = @"BT
1 0 67473.567 -1 0 140 Tm
ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric,
ComponentType.SetTextMatrix,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Fact]
public void ParseSimpleGoogleDocsCase()
{
const string text = @"BT
/F0 21.33 Tf
1 0 0 -1 0 140 Tm
96 0 Td <0037> Tj
13.0280762 0 Td <004B> Tj
11.8616943 0 Td <004C> Tj
4.7384338 0 Td <0056> Tj
ET";
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var components = new List<ITextObjectComponent>();
while (scanner.Read())
{
components.Add(scanner.CurrentComponent);
}
var expected = new[]
{
ComponentType.BeginText,
ComponentType.Font, ComponentType.Numeric, ComponentType.TextFont,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.Numeric, ComponentType.SetTextMatrix,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText,
ComponentType.Numeric, ComponentType.Numeric, ComponentType.MoveTextPosition, ComponentType.String, ComponentType.ShowText,
ComponentType.EndText
};
Assert.Equal(expected, components.Select(x => x.Type));
}
[Theory]
[InlineData("BT", ComponentType.BeginText)]
[InlineData("ET", ComponentType.EndText)]
[InlineData("Tf", ComponentType.TextFont)]
[InlineData("Tj", ComponentType.ShowText)]
[InlineData("Td", ComponentType.MoveTextPosition)]
[InlineData(" Tm", ComponentType.SetTextMatrix)]
[InlineData(" T* ", ComponentType.MoveToNextLineStart)]
[InlineData("\r\n \nTs ", ComponentType.SetTextRise)]
public void RecognisesSingleOperatorAsOnlyStringItem(string text, ComponentType textObjectComponentType)
{
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var result = new List<ITextObjectComponent>();
while (scanner.Read())
{
result.Add(scanner.CurrentComponent);
}
Assert.Single(result);
Assert.Equal(textObjectComponentType, result[0].Type);
}
[Theory]
[InlineData("ETe")]
[InlineData("Tff")]
[InlineData("T j")]
[InlineData(" Ta ")]
[InlineData(" t*")]
[InlineData("\rT\ns")]
[InlineData("no")]
public void SkipsSimilarOperator(string text)
{
var scanner = new ByteTextScanner(OtherEncodings.StringAsLatin1Bytes(text));
var result = new List<ITextObjectComponent>();
while (scanner.Read())
{
result.Add(scanner.CurrentComponent);
}
Assert.Empty(result);
}
}
}

View File

@@ -1,52 +0,0 @@
namespace UglyToad.Pdf.Tests.Text.Operators
{
using System.Collections.Generic;
using System.Linq;
using Pdf.Text;
using Pdf.Text.Operators;
using Xunit;
public class NumericTextComponentApproachTests
{
private readonly NumericTextComponentApproach approach = new NumericTextComponentApproach();
public static IEnumerable<object[]> TestData = new []
{
new object[] { "123" },
new object[] { "43445" },
new object[] { "+17" },
new object[] { "-98" },
new object[] { "0" },
new object[] { "34.5" },
new object[] { "-3.62" },
new object[] { "+123.6" },
new object[] { "4." },
new object[] { "-.002" },
new object[] { "0.0" },
};
[Theory]
[MemberData(nameof(TestData))]
public void CanReadNumbers(string number)
{
var bytes = number.Select(x => (byte) x).ToArray();
var canRead = approach.CanRead(bytes[0], 0);
Assert.True(canRead);
}
[Theory]
[MemberData(nameof(TestData))]
public void ReadsNumbers(string number)
{
var bytes = number.Select(x => (byte)x);
var result = approach.Read(new byte[0], bytes, out var offset);
Assert.NotNull(result);
Assert.Equal(TextObjectComponentType.Numeric, result.Type);
}
}
}

View File

@@ -1,37 +0,0 @@
namespace UglyToad.Pdf.Tests.Text.Operators
{
using System;
using System.Collections.Generic;
using System.Linq;
using Pdf.Text.Operators;
using Xunit;
public class StringTextComponentApproachTests
{
private readonly StringTextComponentApproach approach = new StringTextComponentApproach();
[Theory]
[InlineData("<03)")]
[InlineData("<03AR>")]
[InlineData("<9-3>")]
public void InvalidHexThrows(string s)
{
Action action = () => approach.Read(new List<byte>(), s.Select(x => (byte)x), out var _);
Assert.Throws<InvalidOperationException>(action);
}
[Theory]
[InlineData("<03>")]
[InlineData("<03BA>")]
[InlineData("<9a37eF>")]
public void CanReadValidHex(string s)
{
var result = approach.Read(new List<byte>(), s.Select(x => (byte)x), out var _);
Assert.NotNull(result);
Assert.Equal(s.Select(x => (byte)x).ToArray(), result.AsOperand.RawBytes);
}
}
}

View File

@@ -24,7 +24,7 @@
}
[Theory]
[InlineData("<00>", "\0")]
[InlineData("<00>", "")]
[InlineData("<A1>", "¡")]
public void TokenizesHexStringsCorrectly(string s, string expected)
{

View File

@@ -8,7 +8,7 @@
[Theory]
[InlineData("AE", "®")]
[InlineData("61", "a")]
[InlineData("0061", "\0a")]
[InlineData("0061", "a")]
[InlineData("7465787420736f", "text so")]
public void MapsCorrectlyToString(string input, string expected)
{

View File

@@ -0,0 +1,76 @@
namespace UglyToad.Pdf.Content
{
using System;
using Geometry;
/// <summary>
/// The boundary of the physical medium to display or print on.
/// </summary>
/// <remarks>
/// See table 3.27 from the PDF specification version 1.7.
/// </remarks>
public class MediaBox
{
///<summary>
/// User space units per inch.
/// </summary>
private const decimal PointsPerInch = 72;
/// <summary>
/// User space units per millimeter.
/// </summary>
private const decimal PointsPerMm = 1 / (10 * 2.54m) * PointsPerInch;
/// <summary>
/// A <see cref="MediaBox"/> the size of U.S. Letter, 8.5" x 11" Paper.
/// </summary>
public static readonly MediaBox Letter = new MediaBox(new PdfRectangle(0, 0, 8.5m * PointsPerInch, 11m * PointsPerInch));
/// <summary>
/// A <see cref="MediaBox"/> the size of U.S. Legal, 8.5" x 14" Paper.
/// </summary>
public static readonly MediaBox Legal = new MediaBox(new PdfRectangle(0, 0, 8.5m * PointsPerInch, 14m * PointsPerInch));
/// <summary>
/// A <see cref="MediaBox"/> the size of A0 Paper.
/// </summary>
public static readonly MediaBox A0 = new MediaBox(new PdfRectangle(0, 0, 841 * PointsPerMm, 1189 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A1 Paper.
/// </summary>
public static readonly MediaBox A1 = new MediaBox(new PdfRectangle(0, 0, 594 * PointsPerMm, 841 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A2 Paper.
/// </summary>
public static readonly MediaBox A2 = new MediaBox(new PdfRectangle(0, 0, 420 * PointsPerMm, 594 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A3 Paper.
/// </summary>
public static readonly MediaBox A3 = new MediaBox(new PdfRectangle(0, 0, 297 * PointsPerMm, 420 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A4 Paper.
/// </summary>
public static readonly MediaBox A4 = new MediaBox(new PdfRectangle(0, 0, 210 * PointsPerMm, 297 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A5 Paper.
/// </summary>
public static readonly MediaBox A5 = new MediaBox(new PdfRectangle(0, 0, 148 * PointsPerMm, 210 * PointsPerMm));
/// <summary>
/// A <see cref="MediaBox"/> the size of A6 Paper.
/// </summary>
public static readonly MediaBox A6 = new MediaBox(new PdfRectangle(0, 0, 105 * PointsPerMm, 148 * PointsPerMm));
public PdfRectangle Bounds { get; }
public MediaBox(PdfRectangle bounds)
{
Bounds = bounds ?? throw new ArgumentNullException(nameof(bounds));
}
}
}

View File

@@ -1,89 +1,101 @@
namespace UglyToad.Pdf.Content
{
using System;
using System.Diagnostics;
using ContentStream;
using ContentStream.TypedAccessors;
using Cos;
using Filters;
using Logging;
using Geometry;
using Graphics;
using IO;
using Parser;
using Text;
using Util;
public class Page
{
private readonly ParsingArguments parsingArguments;
private readonly ContentStreamDictionary dictionary;
/// <summary>
/// The 1 indexed page number.
/// </summary>
public int Number { get; }
public bool Loaded { get; private set; }
public MediaBox MediaBox { get; }
internal Page(int number, ContentStreamDictionary dictionary, ParsingArguments parsingArguments)
public PageContent Content { get; }
internal Page(int number, ContentStreamDictionary dictionary, PageTreeMembers pageTreeMembers, ParsingArguments parsingArguments)
{
if (number <= 0)
{
throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative.");
}
Number = number;
Loaded = false;
this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
this.parsingArguments = parsingArguments ?? throw new ArgumentNullException(nameof(parsingArguments));
var mediabox = dictionary.GetDictionaryObject(CosName.MEDIA_BOX) as COSArray;
var contents = dictionary.GetItemOrDefault(CosName.CONTENTS);
var raw = contents as RawCosStream;
var obj = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(7, 0));
var parser = parsingArguments.Container.Get<DynamicParser>()
.Parse(parsingArguments, obj, false) as RawCosStream;
var rw = parser.Decode(parsingArguments.Container.Get<IFilterProvider>());
var format = OtherEncodings.BytesAsLatin1String(rw);
var pee = new TextSectionParser(new NoOpLog()).ReadTextObjects(new ByteTextScanner(rw));
var font0 = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(16, 0));
var cmpa = parsingArguments.CachingProviders.ObjectPool.Get(new CosObjectKey(9, 0));
var toad = parsingArguments.Container.Get<DynamicParser>()
.Parse(parsingArguments, new CosObjectKey(9, 0), false);
var bigsby = (toad as RawCosStream).Decode(parsingArguments.Container.Get<IFilterProvider>());
Number = number;
var ssss = OtherEncodings.BytesAsLatin1String(bigsby);
var type = dictionary.GetName(CosName.TYPE);
if (type != null && !type.Equals(CosName.PAGE) && !parsingArguments.IsLenientParsing)
{
throw new InvalidOperationException($"Created page number {number} but its type was specified as {type} rather than 'Page'.");
}
if (dictionary.TryGetItemOfType(CosName.MEDIA_BOX, out COSArray mediaboxArray))
{
var x1 = mediaboxArray.getInt(0);
var y1 = mediaboxArray.getInt(1);
var x2 = mediaboxArray.getInt(2);
var y2 = mediaboxArray.getInt(3);
MediaBox = new MediaBox(new PdfRectangle(x1, y1, x2, y2));
}
else
{
MediaBox = pageTreeMembers.GetMediaBox();
if (MediaBox == null)
{
if (parsingArguments.IsLenientParsing)
{
MediaBox = MediaBox.A4;
}
else
{
throw new InvalidOperationException("No mediabox was present for page: " + number);
}
}
}
if (dictionary.GetItemOrDefault(CosName.RESOURCES) is ContentStreamDictionary resource)
{
parsingArguments.CachingProviders.ResourceContainer.LoadResourceDictionary(resource, parsingArguments);
}
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
if (contentObject != null)
{
var contentStream = parsingArguments.Container.Get<DynamicParser>()
.Parse(parsingArguments, contentObject, false) as RawCosStream;
if (contentStream == null)
{
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
}
var contents = contentStream.Decode(parsingArguments.Container.Get<IFilterProvider>());
if (Debugger.IsAttached)
{
var textContents = OtherEncodings.BytesAsLatin1String(contents);
}
Content = parsingArguments.Container.Get<PageContentParser>()
.Parse(parsingArguments.Container.Get<IGraphicsStateOperationFactory>(), new ByteArrayInputBytes(contents));
}
}
}
/// <summary>
///
/// </summary>
/// <remarks>
/// The positive x axis extends horizontally to the right and the positive y axis vertically upward, as in standard mathematical practice
/// </remarks>
public struct Rectangle
{
public decimal Width { get; }
public decimal Height { get; }
public decimal Left { get; }
public decimal Top { get; }
public decimal Right { get; }
public decimal Bottom { get; }
public Rectangle(decimal x1, decimal y1, decimal x2, decimal y2)
{
Width = 0;
Height = 0;
Top = 0;
Left = 0;
Right = 0;
Bottom = 0;
}
}
public struct Coordinate
{
public decimal X { get; set; }
public decimal Y { get; set; }
}
}

View File

@@ -0,0 +1,17 @@
namespace UglyToad.Pdf.Content
{
using System.Collections.Generic;
using Graphics.Operations;
/// <summary>
///
/// </summary>
/// <remarks>
/// This should contain a replayable stack of drawing instructions for page content
/// from a content stream in addition to lazily evaluated state such as text on the page or images.
/// </remarks>
public class PageContent
{
internal IReadOnlyList<IGraphicsStateOperation> GraphicsStateOperations { get; set; }
}
}

View File

@@ -0,0 +1,16 @@
namespace UglyToad.Pdf.Content
{
using System;
/// <summary>
/// Contains the values inherited from the Page Tree for this page.
/// </summary>
public class PageTreeMembers
{
public MediaBox GetMediaBox()
{
// TODO: tree inheritance
throw new NotImplementedException("Track inherited members");
}
}
}

View File

@@ -60,7 +60,7 @@
{
if (locatedPages.TryGetValue(pageNumber, out ContentStreamDictionary targetPageDictionary))
{
return new Page(pageNumber, targetPageDictionary, arguments);
return new Page(pageNumber, targetPageDictionary, new PageTreeMembers(), arguments);
}
var observed = new List<int>();

View File

@@ -0,0 +1,52 @@
namespace UglyToad.Pdf.Content
{
using System;
using System.Collections.Generic;
using ContentStream;
using Cos;
using Fonts;
using Parser;
public class ResourceContainer
{
private readonly Dictionary<CosName, IFont> loadedFonts = new Dictionary<CosName, IFont>();
internal void LoadResourceDictionary(ContentStreamDictionary dictionary, ParsingArguments arguments)
{
if (dictionary.TryGetValue(CosName.FONT, out var fontBase) && fontBase is ContentStreamDictionary fontDictionary)
{
LoadFontDictionary(fontDictionary, arguments);
}
}
private void LoadFontDictionary(ContentStreamDictionary fontDictionary, ParsingArguments arguments)
{
foreach (var pair in fontDictionary)
{
if (loadedFonts.ContainsKey(pair.Key))
{
continue;
}
if (!(pair.Value is CosObject objectKey))
{
if (arguments.IsLenientParsing)
{
continue;
}
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
}
var dynamicParser = arguments.Container.Get<DynamicParser>();
var fontObject = dynamicParser.Parse(arguments, objectKey, false) as ContentStreamDictionary;
var font = new CompositeFont();
loadedFonts[pair.Key] = font;
}
}
}
}

View File

@@ -50,6 +50,18 @@
return null;
}
public bool TryGetItemOfType<T>(CosName key, out T item) where T : CosBase
{
item = null;
if (inner.TryGetValue(key, out var value) && value is T t)
{
item = t;
return true;
}
return false;
}
public void Set(CosName key, CosBase value)
{
if (key == null)
@@ -74,6 +86,5 @@
throw new NotImplementedException();
}
#endregion
}
}

View File

@@ -0,0 +1,75 @@
namespace UglyToad.Pdf.Core
{
using System;
using Geometry;
/// <summary>
/// Specifies the conversion from the transformed coordinate space to the original untransformed coordinate space.
/// </summary>
internal struct TransformationMatrix
{
public static TransformationMatrix Default = new TransformationMatrix(new decimal[]
{
1,0,0,
0,1,0,
0,0,1
});
private readonly decimal[] value;
public decimal A => value[0];
public decimal B => value[1];
public decimal C => value[3];
public decimal D => value[4];
public decimal E => value[6];
public decimal F => value[7];
public TransformationMatrix(decimal[] value)
{
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
if (value.Length != 9)
{
throw new ArgumentException("The constructor for the PDF transformation matrix must contain 9 elements. Instead got: " + value);
}
this.value = value;
}
public PdfPoint Transform(PdfPoint original)
{
var x = A * original.X + C * original.Y + E;
var y = B * original.X + D * original.Y + F;
return new PdfPoint(x, y);
}
public static TransformationMatrix FromArray(decimal[] values)
{
if (values.Length == 9)
{
return new TransformationMatrix(values);
}
if (values.Length == 6)
{
return new TransformationMatrix(new []
{
values[0], values[1], 0,
values[2], values[3], 0,
values[4], values[5], 1
});
}
throw new ArgumentException("The array must either define all 9 elements of the matrix or all 6 key elements. Instead array was: " + values);
}
public override string ToString()
{
return $"{A}, {B}, 0\r\n{C}, {D}, 0\r\n{E}, {F}, 1";
}
}
}

View File

@@ -64,24 +64,6 @@
private bool closed = false;
private readonly ScratchFile scratchFile;
public COSDocument() : this(ScratchFile.getMainMemoryOnlyInstance())
{
}
/**
* Constructor that will use the provide memory handler for storage of the
* PDF streams.
*
* @param scratchFile memory handler for buffering of PDF streams
*
*/
public COSDocument(ScratchFile scratchFile)
{
this.scratchFile = scratchFile;
}
/**
* Creates a new COSStream using the current configuration for scratch files.
*
@@ -380,7 +362,6 @@
{
stream.Dispose();
}
scratchFile?.Dispose();
closed = true;
}
}

View File

@@ -1,20 +1,18 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace UglyToad.Pdf.Fonts
namespace UglyToad.Pdf.Fonts
{
using System;
using System.Collections.Generic;
using Cmap;
using Cos;
public class CompositeFont
{
public bool IsSimple { get; } = false;
//public class CompositeFont
//{
// public bool IsSimple { get; } = false;
public CosName SubType { get; } = CosName.TYPE0;
// public CosName SubType { get; } = CosName.TYPE0;
public CharacterIdentifierFont Descendant { get; }
}
// public CharacterIdentifierFont Descendant { get; }
//}
/// <summary>
/// Equivalent to the DW2 array in the font dictionary for vertical fonts.
@@ -49,7 +47,7 @@ namespace UglyToad.Pdf.Fonts
public class CharacterIdentifierFont
{
public const int DefaultWidthWhenUndeclared = 1000;
public CidFontType Subtype { get; }
public CosName BaseFont { get; }
@@ -66,10 +64,10 @@ namespace UglyToad.Pdf.Fonts
public CharacterIdentifierToGlyphIdentifierMap CidToGidMap { get; }
public CharacterIdentifierFont(CidFontType subtype, CosName baseFont, CharacterIdentifierSystemInfo systemInfo,
CosObjectKey fontDescriptor,
public CharacterIdentifierFont(CidFontType subtype, CosName baseFont, CharacterIdentifierSystemInfo systemInfo,
CosObjectKey fontDescriptor,
int defaultWidth,
COSArray widths,
COSArray widths,
CharacterIdentifierToGlyphIdentifierMap cidToGidMap)
{
Subtype = subtype;
@@ -81,7 +79,7 @@ namespace UglyToad.Pdf.Fonts
CidToGidMap = cidToGidMap;
}
}
public class CharacterIdentifierFontBuilder
@@ -98,7 +96,7 @@ namespace UglyToad.Pdf.Fonts
private readonly CharacterIdentifierSystemInfo systemInfo;
private readonly CosObjectKey fontDescriptorKey;
public CharacterIdentifierFontBuilder(CosName subType, CosName baseFont,
public CharacterIdentifierFontBuilder(CosName subType, CosName baseFont,
CharacterIdentifierSystemInfo systemInfo,
CosObjectKey fontDescriptorKey)
{
@@ -126,11 +124,6 @@ namespace UglyToad.Pdf.Fonts
public class CharacterIdentifierToGlyphIdentifierMap
{
}
public class CharacterMap
{
}
}

View File

@@ -0,0 +1,23 @@
namespace UglyToad.Pdf.Fonts
{
using Cmap;
using Cos;
public interface IFont
{
CosName SubType { get; }
string BaseFontType { get; }
CMap ToUnicode { get; }
}
public class CompositeFont : IFont
{
public CosName SubType { get; }
public string BaseFontType { get; }
public CMap ToUnicode { get; }
}
}

View File

@@ -0,0 +1,34 @@
namespace UglyToad.Pdf.Geometry
{
public struct PdfPoint
{
public static PdfPoint Origin = new PdfPoint(0m, 0m);
public decimal X { get; }
public decimal Y { get; }
public PdfPoint(decimal x, decimal y)
{
X = x;
Y = y;
}
public PdfPoint(int x, int y)
{
X = x;
Y = y;
}
public PdfPoint(double x, double y)
{
X = (decimal)x;
Y = (decimal)y;
}
public override string ToString()
{
return $"(x:{X}, y:{Y})";
}
}
}

View File

@@ -0,0 +1,46 @@
namespace UglyToad.Pdf.Geometry
{
using System;
public class PdfRectangle
{
public PdfPoint TopLeft { get; }
public PdfPoint BottomRight { get; }
public PdfPoint TopRight { get; }
public PdfPoint BottomLeft { get; }
public decimal Width { get; }
public decimal Height { get; }
public decimal Area { get; }
public PdfRectangle(PdfPoint point1, PdfPoint point2) : this(point1.X, point1.Y, point2.X, point2.Y) { }
public PdfRectangle(decimal x1, decimal y1, decimal x2, decimal y2)
{
var bottom = Math.Min(y1, y2);
var top = Math.Max(y1, y2);
var left = Math.Min(x1, x2);
var right = Math.Max(x1, x2);
TopLeft = new PdfPoint(left, top);
TopRight = new PdfPoint(right, top);
BottomLeft = new PdfPoint(left, bottom);
BottomRight = new PdfPoint(right, bottom);
Width = right - left;
Height = top - bottom;
Area = Width * Height;
}
public override string ToString()
{
return $"[{TopLeft}, {BottomRight}]";
}
}
}

View File

@@ -0,0 +1,13 @@
namespace UglyToad.Pdf.Graphics
{
using System.Collections.Generic;
using Operations;
using Tokenization.Tokens;
using Util.JetBrains.Annotations;
internal interface IGraphicsStateOperationFactory
{
[CanBeNull]
IGraphicsStateOperation Create(OperatorToken op, IReadOnlyList<IToken> operands);
}
}

View File

@@ -0,0 +1,29 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class AppendDualControlPointBezierCurve : IGraphicsStateOperation
{
public const string Symbol = "c";
public string Operator => Symbol;
public PdfPoint ControlPoint1 { get; }
public PdfPoint ControlPoint2 { get; }
public PdfPoint End { get; }
public AppendDualControlPointBezierCurve(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3)
{
ControlPoint1 = new PdfPoint(x1, y1);
ControlPoint2 = new PdfPoint(x2, y2);
End = new PdfPoint(x3, y3);
}
public override string ToString()
{
return $"{ControlPoint1.X} {ControlPoint1.Y} {ControlPoint2.X} {ControlPoint2.Y} {End.X} {End.Y} {Symbol}";
}
}
}

View File

@@ -0,0 +1,26 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class AppendEndControlPointBezierCurve : IGraphicsStateOperation
{
public const string Symbol = "y";
public string Operator => Symbol;
public PdfPoint ControlPoint1 { get; }
public PdfPoint End { get; }
public AppendEndControlPointBezierCurve(decimal x1, decimal y1, decimal x3, decimal y3)
{
ControlPoint1 = new PdfPoint(x1, y1);
End = new PdfPoint(x3, y3);
}
public override string ToString()
{
return $"{ControlPoint1.X} {ControlPoint1.Y} {End.X} {End.Y} {Symbol}";
}
}
}

View File

@@ -0,0 +1,30 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class AppendRectangle : IGraphicsStateOperation
{
public const string Symbol = "re";
public string Operator => Symbol;
public PdfPoint LowerLeft { get; }
public decimal Width { get; }
public decimal Height { get; }
public AppendRectangle(decimal x, decimal y, decimal width, decimal height)
{
LowerLeft = new PdfPoint(x, y);
Width = width;
Height = height;
}
public override string ToString()
{
return $"{LowerLeft.X} {LowerLeft.Y} {Width} {Height} {Symbol}";
}
}
}

View File

@@ -0,0 +1,26 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class AppendStartControlPointBezierCurve : IGraphicsStateOperation
{
public const string Symbol = "v";
public string Operator => Symbol;
public PdfPoint ControlPoint2 { get; }
public PdfPoint End { get; }
public AppendStartControlPointBezierCurve(decimal x2, decimal y2, decimal x3, decimal y3)
{
ControlPoint2 = new PdfPoint(x2, y2);
End = new PdfPoint(x3, y3);
}
public override string ToString()
{
return $"{ControlPoint2.X} {ControlPoint2.Y} {End.X} {End.Y} {Symbol}";
}
}
}

View File

@@ -0,0 +1,23 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class AppendStraightLineSegment : IGraphicsStateOperation
{
public const string Symbol = "l";
public string Operator => Symbol;
public PdfPoint End { get; }
public AppendStraightLineSegment(decimal x, decimal y)
{
End = new PdfPoint(x, y);
}
public override string ToString()
{
return $"{End.X} {End.Y} {Symbol}";
}
}
}

View File

@@ -0,0 +1,23 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Geometry;
internal class BeginNewSubpath : IGraphicsStateOperation
{
public const string Symbol = "m";
public string Operator => Symbol;
public PdfPoint Point { get; }
public BeginNewSubpath(decimal x, decimal y)
{
Point = new PdfPoint(x, y);
}
public override string ToString()
{
return $"{Point.X} {Point.Y} {Symbol}";
}
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class BeginText : IGraphicsStateOperation
{
public const string Symbol = "BT";
public static readonly BeginText Value = new BeginText();
public string Operator => Symbol;
private BeginText()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class CloseAndStrokePath : IGraphicsStateOperation
{
public const string Symbol = "s";
public static readonly CloseAndStrokePath Value = new CloseAndStrokePath();
public string Operator => Symbol;
private CloseAndStrokePath()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class CloseFillPathEvenOddRuleAndStroke : IGraphicsStateOperation
{
public const string Symbol = "b*";
public static readonly CloseFillPathEvenOddRuleAndStroke Value = new CloseFillPathEvenOddRuleAndStroke();
public string Operator => Symbol;
private CloseFillPathEvenOddRuleAndStroke()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class CloseFillPathNonZeroWindingAndStroke : IGraphicsStateOperation
{
public const string Symbol = "b";
public static readonly CloseFillPathNonZeroWindingAndStroke Value = new CloseFillPathNonZeroWindingAndStroke();
public string Operator => Symbol;
private CloseFillPathNonZeroWindingAndStroke()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class CloseSubpath : IGraphicsStateOperation
{
public const string Symbol = "h";
public static readonly CloseSubpath Value = new CloseSubpath();
public string Operator => Symbol;
private CloseSubpath()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class EndPath : IGraphicsStateOperation
{
public const string Symbol = "n";
public static readonly EndPath Value = new EndPath();
public string Operator => Symbol;
private EndPath()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class EndText : IGraphicsStateOperation
{
public const string Symbol = "q";
public static readonly EndText Value = new EndText();
public string Operator => Symbol;
private EndText()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class FillPathEvenOddRule : IGraphicsStateOperation
{
public const string Symbol = "f*";
public static readonly FillPathEvenOddRule Value = new FillPathEvenOddRule();
public string Operator => Symbol;
private FillPathEvenOddRule()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class FillPathEvenOddRuleAndStroke : IGraphicsStateOperation
{
public const string Symbol = "B*";
public static readonly FillPathEvenOddRuleAndStroke Value = new FillPathEvenOddRuleAndStroke();
public string Operator => Symbol;
private FillPathEvenOddRuleAndStroke()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class FillPathNonZeroWinding : IGraphicsStateOperation
{
public const string Symbol = "f";
public static readonly FillPathNonZeroWinding Value = new FillPathNonZeroWinding();
public string Operator => Symbol;
private FillPathNonZeroWinding()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class FillPathNonZeroWindingAndStroke : IGraphicsStateOperation
{
public const string Symbol = "B";
public static readonly FillPathNonZeroWindingAndStroke Value = new FillPathNonZeroWindingAndStroke();
public string Operator => Symbol;
private FillPathNonZeroWindingAndStroke()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class FillPathNonZeroWindingCompatibility : IGraphicsStateOperation
{
public const string Symbol = "F";
public static readonly FillPathNonZeroWindingCompatibility Value = new FillPathNonZeroWindingCompatibility();
public string Operator => Symbol;
private FillPathNonZeroWindingCompatibility()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,7 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal interface IGraphicsStateOperation
{
string Operator { get; }
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class ModifyClippingByEvenOddIntersect : IGraphicsStateOperation
{
public const string Symbol = "W*";
public static readonly ModifyClippingByEvenOddIntersect Value = new ModifyClippingByEvenOddIntersect();
public string Operator => Symbol;
private ModifyClippingByEvenOddIntersect()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class ModifyClippingByNonZeroWindingIntersect : IGraphicsStateOperation
{
public const string Symbol = "W";
public static readonly ModifyClippingByNonZeroWindingIntersect Value = new ModifyClippingByNonZeroWindingIntersect();
public string Operator => Symbol;
private ModifyClippingByNonZeroWindingIntersect()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,32 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using System;
internal class ModifyTransformationMatrix : IGraphicsStateOperation
{
public const string Symbol = "cm";
public string Operator => Symbol;
public decimal[] Value { get; }
public ModifyTransformationMatrix(decimal[] value)
{
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
if (value.Length != 6)
{
throw new ArgumentException("The cm operator must pass 6 numbers. Instead got: " + value);
}
Value = value;
}
public override string ToString()
{
return $"{Value[0]} {Value[1]} {Value[2]} {Value[3]} {Value[4]} {Value[5]} {Symbol}";
}
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class MoveToNextLine : IGraphicsStateOperation
{
public const string Symbol = "T*";
public static readonly MoveToNextLine Value = new MoveToNextLine();
public string Operator => Symbol;
private MoveToNextLine()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class MoveToNextLineShowString : IGraphicsStateOperation
{
public const string Symbol = "'";
public string Operator => Symbol;
public string Text { get; }
public MoveToNextLineShowString(string text)
{
Text = text;
}
public override string ToString()
{
return $"{Text} {Symbol}";
}
}
}

View File

@@ -0,0 +1,27 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class MoveToNextLineShowStringWithSpacing : IGraphicsStateOperation
{
public const string Symbol = "\"";
public string Operator => Symbol;
public decimal WordSpacing { get; }
public decimal CharacterSpacing { get; }
public string Text { get; }
public MoveToNextLineShowStringWithSpacing(decimal wordSpacing, decimal characterSpacing, string text)
{
WordSpacing = wordSpacing;
CharacterSpacing = characterSpacing;
Text = text;
}
public override string ToString()
{
return $"{WordSpacing} {CharacterSpacing} {Text} {Symbol}";
}
}
}

View File

@@ -0,0 +1,24 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class MoveToNextLineWithOffset : IGraphicsStateOperation
{
public const string Symbol = "Td";
public string Operator => Symbol;
public decimal Tx { get; }
public decimal Ty { get; }
public MoveToNextLineWithOffset(decimal tx, decimal ty)
{
Tx = tx;
Ty = ty;
}
public override string ToString()
{
return $"{Tx} {Ty} {Symbol}";
}
}
}

View File

@@ -0,0 +1,24 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class MoveToNextLineWithOffsetSetLeading : IGraphicsStateOperation
{
public const string Symbol = "TD";
public string Operator => Symbol;
public decimal Tx { get; }
public decimal Ty { get; }
public MoveToNextLineWithOffsetSetLeading(decimal tx, decimal ty)
{
Tx = tx;
Ty = ty;
}
public override string ToString()
{
return $"{Tx} {Ty} {Symbol}";
}
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class Pop : IGraphicsStateOperation
{
public const string Symbol = "Q";
public static readonly Pop Value = new Pop();
public string Operator => Symbol;
private Pop()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,19 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class Push : IGraphicsStateOperation
{
public const string Symbol = "q";
public static readonly Push Value = new Push();
public string Operator => Symbol;
private Push()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetCharacterSpacing : IGraphicsStateOperation
{
public const string Symbol = "Tc";
public string Operator => Symbol;
public decimal Spacing { get; }
public SetCharacterSpacing(decimal spacing)
{
Spacing = spacing;
}
public override string ToString()
{
return $"{Spacing} {Symbol}";
}
}
}

View File

@@ -0,0 +1,9 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetColorRenderingIntent : IGraphicsStateOperation
{
public const string Symbol = "ri";
public string Operator => Symbol;
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetFlatnessTolerance : IGraphicsStateOperation
{
public const string Symbol = "i";
public string Operator => Symbol;
public decimal Tolerance { get; }
public SetFlatnessTolerance(decimal tolerance)
{
Tolerance = tolerance;
}
public override string ToString()
{
return $"{Tolerance} {Symbol}";
}
}
}

View File

@@ -0,0 +1,26 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using Cos;
internal class SetFontSize : IGraphicsStateOperation
{
public const string Symbol = "Tf";
public string Operator => Symbol;
public CosName Font { get; }
public decimal Size { get; }
public SetFontSize(CosName font, decimal size)
{
Font = font;
Size = size;
}
public override string ToString()
{
return $"{Font} {Size} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetHorizontalScaling : IGraphicsStateOperation
{
public const string Symbol = "Tz";
public string Operator => Symbol;
public decimal Scale { get; }
public SetHorizontalScaling(decimal scale)
{
Scale = scale;
}
public override string ToString()
{
return $"{Scale} {Symbol}";
}
}
}

View File

@@ -0,0 +1,36 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using System;
internal class SetLineCap : IGraphicsStateOperation
{
public const string Symbol = "J";
public string Operator => Symbol;
public Style Cap { get; set; }
public SetLineCap(int cap) : this((Style)cap) { }
public SetLineCap(Style cap)
{
if (cap < 0 || (int)cap > 2)
{
throw new ArgumentException("Invalid argument passed for line cap style. Should be 0, 1 or 2; instead got: " + cap);
}
Cap = cap;
}
public override string ToString()
{
return $"{(int) Cap} {Symbol}";
}
public enum Style
{
Butt = 0,
Round = 1,
ProjectingSquare = 2
}
}
}

View File

@@ -0,0 +1,24 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetLineDashPattern : IGraphicsStateOperation
{
public const string Symbol = "d";
public string Operator => Symbol;
public decimal[] Array { get; }
public decimal Phase { get; }
public SetLineDashPattern(decimal[] array, decimal phase)
{
Array = array;
Phase = phase;
}
public override string ToString()
{
return $"{Array} {Phase} {Symbol}";
}
}
}

View File

@@ -0,0 +1,36 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using System;
internal class SetLineJoin : IGraphicsStateOperation
{
public const string Symbol = "j";
public string Operator => Symbol;
public Style Join { get; set; }
public SetLineJoin(int join) : this((Style)join) { }
public SetLineJoin(Style join)
{
if (join < 0 || (int)join > 2)
{
throw new ArgumentException("Invalid argument passed for line join style. Should be 0, 1 or 2; instead got: " + join);
}
Join = join;
}
public override string ToString()
{
return $"{(int)Join} {Symbol}";
}
public enum Style
{
Miter = 0,
Round = 1,
Bevel = 2
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetLineWidth : IGraphicsStateOperation
{
public const string Symbol = "w";
public string Operator => Symbol;
public decimal Width { get; }
public SetLineWidth(decimal width)
{
Width = width;
}
public override string ToString()
{
return $"{Width} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetMiterLimit : IGraphicsStateOperation
{
public const string Symbol = "M";
public string Operator => Symbol;
public decimal Limit { get; }
public SetMiterLimit(decimal limit)
{
Limit = limit;
}
public override string ToString()
{
return $"{Limit} {Symbol}";
}
}
}

View File

@@ -0,0 +1,30 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetNonStrokeColorDeviceCmyk : IGraphicsStateOperation
{
public const string Symbol = "K";
public string Operator => Symbol;
public decimal C { get; }
public decimal M { get; }
public decimal Y { get; }
public decimal K { get; }
public SetNonStrokeColorDeviceCmyk(decimal c, decimal m, decimal y, decimal k)
{
C = c;
M = m;
Y = y;
K = k;
}
public override string ToString()
{
return $"{C} {M} {Y} {K} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetNonStrokeColorDeviceGray : IGraphicsStateOperation
{
public const string Symbol = "g";
public string Operator => Symbol;
public decimal Gray { get; }
public SetNonStrokeColorDeviceGray(decimal gray)
{
Gray = gray;
}
public override string ToString()
{
return $"{Gray} {Symbol}";
}
}
}

View File

@@ -0,0 +1,27 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetNonStrokeColorDeviceRgb : IGraphicsStateOperation
{
public const string Symbol = "rg";
public string Operator => Symbol;
public decimal R { get; }
public decimal G { get; }
public decimal B { get; }
public SetNonStrokeColorDeviceRgb(decimal r, decimal g, decimal b)
{
R = r;
G = g;
B = b;
}
public override string ToString()
{
return $"{R} {G} {B} {Symbol}";
}
}
}

View File

@@ -0,0 +1,30 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetStrokeColorDeviceCmyk : IGraphicsStateOperation
{
public const string Symbol = "K";
public string Operator => Symbol;
public decimal C { get; }
public decimal M { get; }
public decimal Y { get; }
public decimal K { get; }
public SetStrokeColorDeviceCmyk(decimal c, decimal m, decimal y, decimal k)
{
C = c;
M = m;
Y = y;
K = k;
}
public override string ToString()
{
return $"{C} {M} {Y} {K} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetStrokeColorDeviceGray : IGraphicsStateOperation
{
public const string Symbol = "G";
public string Operator => Symbol;
public decimal Gray { get; }
public SetStrokeColorDeviceGray(decimal gray)
{
Gray = gray;
}
public override string ToString()
{
return $"{Gray} {Symbol}";
}
}
}

View File

@@ -0,0 +1,27 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetStrokeColorDeviceRgb : IGraphicsStateOperation
{
public const string Symbol = "RG";
public string Operator => Symbol;
public decimal R { get; }
public decimal G { get; }
public decimal B { get; }
public SetStrokeColorDeviceRgb(decimal r, decimal g, decimal b)
{
R = r;
G = g;
B = b;
}
public override string ToString()
{
return $"{R} {G} {B} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetTextLeading : IGraphicsStateOperation
{
public const string Symbol = "TL";
public string Operator => Symbol;
public decimal Leading { get; }
public SetTextLeading(decimal leading)
{
Leading = leading;
}
public override string ToString()
{
return $"{Leading} {Symbol}";
}
}
}

View File

@@ -0,0 +1,28 @@
namespace UglyToad.Pdf.Graphics.Operations
{
using System;
internal class SetTextMatrix : IGraphicsStateOperation
{
public const string Symbol = "Tm";
public string Operator => Symbol;
public decimal[] Value { get; }
public SetTextMatrix(decimal[] value)
{
if (value.Length != 6)
{
throw new ArgumentException("Text matrix must provide 6 values. Instead got: " + value);
}
Value = value;
}
public override string ToString()
{
return $"{Value[0]} {Value[1]} {Value[2]} {Value[3]} {Value[4]} {Value[5]} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetTextRenderingMode : IGraphicsStateOperation
{
public const string Symbol = "Tr";
public string Operator => Symbol;
public int Mode { get; }
public SetTextRenderingMode(int mode)
{
Mode = mode;
}
public override string ToString()
{
return $"{Mode} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetTextRise : IGraphicsStateOperation
{
public const string Symbol = "Ts";
public string Operator => Symbol;
public decimal Rise { get; }
public SetTextRise(decimal rise)
{
Rise = rise;
}
public override string ToString()
{
return $"{Rise} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class SetWordSpacing : IGraphicsStateOperation
{
public const string Symbol = "Tw";
public string Operator => Symbol;
public decimal Spacing { get; }
public SetWordSpacing(decimal spacing)
{
Spacing = spacing;
}
public override string ToString()
{
return $"{Spacing} {Symbol}";
}
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class ShowString : IGraphicsStateOperation
{
public const string Symbol = "Tj";
public string Operator => Symbol;
public string Text { get; }
public ShowString(string text)
{
Text = text;
}
public override string ToString()
{
return $"{Text} {Symbol}";
}
}
}

View File

@@ -0,0 +1,16 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class ShowStringsWithPositioning : IGraphicsStateOperation
{
public const string Symbol = "TJ";
public string Operator => Symbol;
public object[] Array { get; }
public ShowStringsWithPositioning(object[] array)
{
Array = array;
}
}
}

View File

@@ -0,0 +1,20 @@
namespace UglyToad.Pdf.Graphics.Operations
{
internal class StrokePath : IGraphicsStateOperation
{
public const string Symbol = "S";
public static readonly StrokePath Value = new StrokePath();
public string Operator => Symbol;
private StrokePath()
{
}
public override string ToString()
{
return Symbol;
}
}
}

View File

@@ -0,0 +1,148 @@
namespace UglyToad.Pdf.Graphics
{
using System;
using System.Collections.Generic;
using System.Reflection;
using Cos;
using Operations;
using Tokenization.Tokens;
internal class ReflectionGraphicsStateOperationFactory : IGraphicsStateOperationFactory
{
private readonly IReadOnlyDictionary<string, Type> operations;
public ReflectionGraphicsStateOperationFactory()
{
var assemblyTypes = Assembly.GetAssembly(typeof(ReflectionGraphicsStateOperationFactory)).GetTypes();
var result = new Dictionary<string, Type>();
foreach (var assemblyType in assemblyTypes)
{
if (!assemblyType.IsInterface && typeof(IGraphicsStateOperation).IsAssignableFrom(assemblyType))
{
var symbol = assemblyType.GetField("Symbol");
if (symbol == null)
{
throw new InvalidOperationException("An operation type was defined without the public const Symbol being declared. Type was: " + assemblyType.FullName);
}
var value = symbol.GetValue(null).ToString();
result[value] = assemblyType;
}
}
operations = result;
}
public IGraphicsStateOperation Create(OperatorToken op, IReadOnlyList<IToken> operands)
{
if (!operations.TryGetValue(op.Data, out Type operationType))
{
return null;
}
var constructors = operationType.GetConstructors(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
if (constructors.Length == 0)
{
throw new InvalidOperationException("No constructors to invoke were found for operation type: " + operationType.FullName);
}
// This only works by luck...
var constructor = constructors[0];
if (constructor.IsPrivate)
{
return (IGraphicsStateOperation)operationType.GetField("Value").GetValue(null);
}
var parameters = constructor.GetParameters();
var offset = 0;
var arguments = new List<object>();
foreach (var parameter in parameters)
{
if (parameter.ParameterType == typeof(decimal))
{
if (operands[offset] is NumericToken numeric)
{
arguments.Add(numeric.Data);
}
else
{
throw new InvalidOperationException($"Expected a decimal parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}");
}
offset++;
}
else if (parameter.ParameterType == typeof(int))
{
if (operands[offset] is NumericToken numeric)
{
arguments.Add(numeric.Int);
}
else
{
throw new InvalidOperationException($"Expected an integer parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}");
}
offset++;
}
else if (parameter.ParameterType == typeof(decimal[]))
{
var array = new List<decimal>();
while (offset < operands.Count && operands[offset] is NumericToken numeric)
{
array.Add(numeric.Data);
offset++;
}
arguments.Add(array.ToArray());
}
else if (parameter.ParameterType == typeof(CosName))
{
if (operands[offset] is NameToken name)
{
arguments.Add(name.Data);
}
else
{
throw new InvalidOperationException($"Expected a decimal array parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}");
}
offset++;
}
else if (parameter.ParameterType == typeof(string))
{
if (operands[offset] is StringToken stringToken)
{
arguments.Add(stringToken.Data);
}
else if (operands[offset] is HexToken hexToken)
{
arguments.Add(hexToken.Data);
}
else
{
throw new InvalidOperationException($"Expected a string parameter for operation type {operationType.FullName}. Instead got: {operands[offset]}");
}
offset++;
}
else
{
throw new NotImplementedException($"Unsupported parameter type {parameter.ParameterType.FullName} for operation type {operationType.FullName}.");
}
}
var result = constructor.Invoke(arguments.ToArray());
return (IGraphicsStateOperation)result;
}
}
}

View File

@@ -1,56 +0,0 @@
namespace UglyToad.Pdf.IO
{
using System;
using System.Collections.Generic;
using Text;
using Text.Operators;
using Tokenization.Tokens;
public class NumericTokenizer
{
private static readonly HashSet<byte> SupportedCharacterSet = new HashSet<byte>
{
(byte)'0',
(byte)'1',
(byte)'2',
(byte)'3',
(byte)'4',
(byte)'5',
(byte)'6',
(byte)'7',
(byte)'8',
(byte)'9',
(byte)'+',
(byte)'-',
(byte)'.'
};
public bool CanRead(byte b, int offset)
{
return SupportedCharacterSet.Contains(b);
}
public ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset)
{
offset = readBytes.Count;
var bytes = new List<byte>(readBytes);
using (var reader = furtherBytes.GetEnumerator())
{
while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current))
{
if (!SupportedCharacterSet.Contains(reader.Current))
{
throw new InvalidOperationException("Unsupported byte in numeric operator: " + (char)reader.Current);
}
bytes.Add(reader.Current);
offset++;
}
}
return new OperandComponent(new NumericOperand(bytes), TextObjectComponentType.Numeric);
}
}
}

View File

@@ -1,493 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace UglyToad.Pdf.IO
{
using System.IO;
/**
* Implements a memory page handling mechanism as base for creating (multiple)
* {@link RandomAccess} buffers each having its set of pages (implemented by
* {@link ScratchFileBuffer}). A buffer is created calling {@link #createBuffer()}.
*
* <p>Pages can be stored in main memory or in a temporary file. A mixed mode
* is supported storing a certain amount of pages in memory and only the
* additional ones in temporary file (defined by maximum main memory to
* be used).</p>
*
* <p>Pages can be marked as 'free' in order to re-use them. For in-memory pages
* this will release the used memory while for pages in temporary file this
* simply marks the area as free to re-use.</p>
*
* <p>If a temporary file was created (done with the first page to be stored
* in temporary file) it is deleted when {@link ScratchFile#close()} is called.</p>
*
* <p>Using this class for {@link RandomAccess} buffers allows for a direct control
* on the maximum memory usage and allows processing large files for which we
* otherwise would get an {@link OutOfMemoryError} in case of using {@link RandomAccessBuffer}.</p>
*
* <p>This base class for providing pages is thread safe (the buffer implementations are not).</p>
*/
public class ScratchFile : IDisposable
{
/** number of pages by which we enlarge the scratch file (reduce I/O-operations) */
private static readonly int ENLARGE_PAGE_COUNT = 16;
/** in case of unrestricted main memory usage this is the initial number of pages
* {@link #inMemoryPages} is setup for */
private static readonly int INIT_UNRESTRICTED_MAINMEM_PAGECOUNT = 100000;
private static readonly int PAGE_SIZE = 4096;
private readonly Object ioLock = new Object();
private readonly string scratchFileDirectory;
/** scratch file; only to be accessed under synchronization of {@link #ioLock} */
private string file;
/** random access to scratch file; only to be accessed under synchronization of {@link #ioLock} */
//private java.io.RandomAccessFile raf;
private volatile int pageCount = 0;
//private readonly BitSet freePages = new BitSet();
/** holds pointers to in-memory page content; will be initialized once in case of restricted
* main memory, otherwise it is enlarged as needed and first initialized to a size of
* {@link #INIT_UNRESTRICTED_MAINMEM_PAGECOUNT} */
private volatile byte[][] inMemoryPages;
private readonly int inMemoryMaxPageCount;
private readonly int maxPageCount;
private readonly bool useScratchFile;
private readonly bool maxMainMemoryIsRestricted;
private volatile bool isClosed = false;
/**
* Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
* then the scratch file will be created in that directory.
*
* <p>All pages will be stored in the scratch file.</p>
*
* @param scratchFileDirectory The directory in which to create the scratch file
* or <code>null</code> to created it in the default temporary directory.
*
* @throws IOException If scratch file directory was given but don't exist.
*/
public ScratchFile(string scratchFileDirectory) : this(MemoryUsageSetting.setupTempFileOnly().setTempDir(scratchFileDirectory))
{
}
/**
* Initializes page handler. If a <code>scratchFileDirectory</code> is supplied,
* then the scratch file will be created in that directory.
*
* <p>Depending on the size of allowed memory usage a number of pages (memorySize/{@link #PAGE_SIZE})
* will be stored in-memory and only additional pages will be written to/read from scratch file.</p>
*
* @param memUsageSetting set how memory/temporary files are used for buffering streams etc.
*
* @throws IOException If scratch file directory was given but don't exist.
*/
public ScratchFile(MemoryUsageSetting memUsageSetting)
{
maxMainMemoryIsRestricted = (!memUsageSetting.getUseMainMemory()) || memUsageSetting.isMainMemoryRestricted();
useScratchFile = maxMainMemoryIsRestricted ? memUsageSetting.getUseTempFile() : false;
scratchFileDirectory = useScratchFile ? memUsageSetting.getTempDir() : null;
//if ((scratchFileDirectory != null) && (!scratchFileDirectory.isDirectory()))
//{
// throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory);
//}
maxPageCount = memUsageSetting.isStorageRestricted() ?
(int)Math.Min(int.MaxValue, memUsageSetting.getMaxStorageBytes() / PAGE_SIZE) :
int.MaxValue;
inMemoryMaxPageCount = memUsageSetting.getUseMainMemory() ?
(memUsageSetting.isMainMemoryRestricted() ?
(int)Math.Min(int.MaxValue, memUsageSetting.getMaxMainMemoryBytes() / PAGE_SIZE) :
int.MaxValue) :
0;
inMemoryPages = new byte[maxMainMemoryIsRestricted ? inMemoryMaxPageCount : INIT_UNRESTRICTED_MAINMEM_PAGECOUNT][];
//freePages.set(0, inMemoryPages.Length);
}
/**
* Getter for an instance using only unrestricted main memory for buffering
* (same as <code>new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly())</code>).
*
* @return instance configured to only use main memory with no size restriction
*/
public static ScratchFile getMainMemoryOnlyInstance()
{
try
{
return new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly());
}
catch (IOException ioe)
{
// cannot happen for main memory setup
return null;
}
}
/**
* Returns a new free page, either from free page pool
* or by enlarging scratch file (may be created).
*
* @return index of new page
*/
int getNewPage()
{
//lock (freePages)
//{
// int idx = freePages.nextSetBit(0);
// if (idx < 0)
// {
// enlarge();
// idx = freePages.nextSetBit(0);
// if (idx < 0)
// {
// throw new IOException("Maximum allowed scratch file memory exceeded.");
// }
// }
// freePages.clear(idx);
// if (idx >= pageCount)
// {
// pageCount = idx + 1;
// }
// return idx;
//}
throw new NotImplementedException();
}
/**
* This will provide new free pages by either enlarging the scratch file
* by a number of pages defined by {@link #ENLARGE_PAGE_COUNT} - in case
* scratch file usage is allowed - or increase the {@link #inMemoryPages}
* array in case main memory was not restricted. If neither of both is
* allowed/the case than free pages count won't be changed. The same is true
* if no new pages could be added because we reached the maximum of
* {@link Integer#MAX_VALUE} pages.
*
* <p>If scratch file uage is allowed and scratch file does not exist already
* it will be created.</p>
*
* <p>Only to be called under synchronization on {@link #freePages}.</p>
*/
private void enlarge()
{
lock (ioLock)
{
checkClosed();
if (pageCount >= maxPageCount)
{
return;
}
if (useScratchFile)
{
// create scratch file is needed
//if (raf == null)
//{
// var location = Path.Combine(scratchFileDirectory, "PDFBox.tmp");
// File.Create(location);
// try
// {
// raf = new java.io.RandomAccessFile(file, "rw");
// }
// catch (IOException e)
// {
// File.Delete(file.ToString());
// throw e;
// }
//}
//long fileLen = raf.length();
long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE;
//if (expectedFileLen != fileLen)
{
// throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen);
}
// enlarge if we do not overflow
if (pageCount + ENLARGE_PAGE_COUNT > pageCount)
{
// fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE;
// raf.setLength(fileLen);
// freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT);
}
}
else if (!maxMainMemoryIsRestricted)
{
// increase number of in-memory pages
int oldSize = inMemoryPages.Length;
int newSize = (int)Math.Min(((long)oldSize) * 2, int.MaxValue); // this handles integer overflow
if (newSize > oldSize)
{
byte[][] newInMemoryPages = new byte[newSize][];
System.Array.Copy(inMemoryPages, 0, newInMemoryPages, 0, oldSize);
inMemoryPages = newInMemoryPages;
// freePages.set(oldSize, newSize);
}
}
}
}
/**
* Returns byte size of a page.
*
* @return byte size of a page
*/
int getPageSize()
{
return PAGE_SIZE;
}
/**
* Reads the page with specified index.
*
* @param pageIdx index of page to read
*
* @return byte array of size {@link #PAGE_SIZE} filled with page data read from file
*
* @throws IOException
*/
byte[] readPage(int pageIdx)
{
if ((pageIdx < 0) || (pageIdx >= pageCount))
{
checkClosed();
throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1));
}
// check if we have the page in memory
if (pageIdx < inMemoryMaxPageCount)
{
byte[] page = inMemoryPages[pageIdx];
// handle case that we are closed
if (page == null)
{
checkClosed();
throw new IOException("Requested page with index " + pageIdx + " was not written before.");
}
return page;
}
lock (ioLock)
{
// if (raf == null)
{
checkClosed();
throw new IOException("Missing scratch file to read page with index " + pageIdx + " from.");
}
byte[] page = new byte[PAGE_SIZE];
// raf.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
// raf.readFully(page);
return page;
}
}
/**
* Writes updated page. Page is either kept in-memory if pageIdx &lt; {@link #inMemoryMaxPageCount}
* or is written to scratch file.
*
* <p>Provided page byte array must not be re-used for other pages since we
* store it as is in case of in-memory handling.</p>
*
* @param pageIdx index of page to write
* @param page page to write (length has to be {@value #PAGE_SIZE})
*
* @throws IOException in case page index is out of range or page has wrong length
* or writing to file failed
*/
void writePage(int pageIdx, byte[] page)
{
if ((pageIdx < 0) || (pageIdx >= pageCount))
{
checkClosed();
throw new IOException("Page index out of range: " + pageIdx + ". Max value: " + (pageCount - 1));
}
if (page.Length != PAGE_SIZE)
{
throw new IOException("Wrong page size to write: " + page.Length + ". Expected: " + PAGE_SIZE);
}
if (pageIdx < inMemoryMaxPageCount)
{
if (maxMainMemoryIsRestricted)
{
inMemoryPages[pageIdx] = page;
}
else
{
// need synchronization since inMemoryPages may change
lock (ioLock)
{
inMemoryPages[pageIdx] = page;
}
}
// in case we were closed in between throw exception
checkClosed();
}
else
{
lock (ioLock)
{
checkClosed();
// raf.seek(((long)pageIdx - inMemoryMaxPageCount) * PAGE_SIZE);
// raf.write(page);
}
}
}
/**
* Checks if this page handler has already been closed. If so,
* an {@link IOException} is thrown.
*
* @throws IOException If {@link #close()} has already been called.
*/
void checkClosed()
{
if (isClosed)
{
throw new IOException("Scratch file already closed");
}
}
/**
* Creates a new buffer using this page handler.
*
* @return A new buffer.
*
* @throws IOException If an error occurred.
*/
// public RandomAccess createBuffer()
// {
// return new ScratchFileBuffer(this);
// }
/**
* Creates a new buffer using this page handler and initializes it with the
* data read from provided input stream (input stream is copied to buffer).
* The buffer data pointer is reset to point to first byte.
*
* @return A new buffer containing data read from input stream.
*
* @throws IOException If an error occurred.
*/
// public RandomAccess createBuffer(MemoryStream input)
// {
// ScratchFileBuffer buf = new ScratchFileBuffer(this);
// byte[] byteBuffer = new byte[8192];
// int bytesRead = 0;
// while ((bytesRead = input.Read(byteBuffer, 0, 8192)) > -1)
// {
// buf.write(byteBuffer, 0, bytesRead);
// }
// buf.seek(0);
// return buf;
//}
/**
* Allows a buffer which is cleared/closed to release its pages to be re-used.
*
* @param pageIndexes pages indexes of pages to release
* @param count number of page indexes contained in provided array
*/
void markPagesAsFree(int[] pageIndexes, int off, int count)
{
// lock (freePages)
{
for (int aIdx = off; aIdx < count; aIdx++)
{
int pageIdx = pageIndexes[aIdx];
// if ((pageIdx >= 0) && (pageIdx < pageCount) && (!freePages.get(pageIdx)))
{
// freePages.set(pageIdx);
if (pageIdx < inMemoryMaxPageCount)
{
inMemoryPages[pageIdx] = null; // remark: not in ioLock synchronization since behavior won't
// change even in case of parallel called 'enlarge' method
}
}
}
}
}
/**
* Closes and deletes the temporary file. No further interaction with
* the scratch file or associated buffers can happen after this method is called.
* It also releases in-memory pages.
*
* @throws IOException If there was a problem closing or deleting the temporary file.
*/
public void Dispose()
{
IOException ioexc = null;
lock (ioLock)
{
if (isClosed)
{
return;
}
isClosed = true;
// if (raf != null)
{
try
{
// raf.close();
}
catch (IOException ioe)
{
ioexc = ioe;
}
}
if (file != null)
{
// if (!file.delete())
{
if (File.Exists(file.ToString()) && (ioexc == null))
{
// ioexc = new IOException("Error deleting scratch file: " + file.getAbsolutePath());
}
}
}
}
// lock (freePages)
{
// freePages.clear();
pageCount = 0;
}
if (ioexc != null)
{
throw ioexc;
}
}
}
}

View File

@@ -0,0 +1,11 @@
namespace UglyToad.Pdf.Parser
{
using Content;
using Graphics;
using IO;
internal interface IPageContentParser
{
PageContent Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes);
}
}

View File

@@ -0,0 +1,50 @@
namespace UglyToad.Pdf.Parser
{
using System.Collections.Generic;
using Content;
using Graphics;
using Graphics.Operations;
using IO;
using Tokenization.Scanner;
using Tokenization.Tokens;
internal class PageContentParser : IPageContentParser
{
public PageContent Parse(IGraphicsStateOperationFactory operationFactory, IInputBytes inputBytes)
{
var scanner = new CoreTokenScanner(inputBytes);
var precedingTokens = new List<IToken>();
var graphicsStateOperations = new List<IGraphicsStateOperation>();
while (scanner.MoveNext())
{
var token = scanner.CurrentToken;
if (token is OperatorToken op)
{
var operation = operationFactory.Create(op, precedingTokens);
if (operation != null)
{
graphicsStateOperations.Add(operation);
}
precedingTokens.Clear();
}
else if (token is CommentToken)
{
}
else
{
precedingTokens.Add(token);
}
}
return new PageContent
{
GraphicsStateOperations = graphicsStateOperations
};
}
}
}

View File

@@ -1,8 +1,6 @@
using System;
using System.Text;
namespace UglyToad.Pdf.Parser.PageTree
namespace UglyToad.Pdf.Parser.PageTree
{
using System;
using Content;
using ContentStream;
using ContentStream.TypedAccessors;
@@ -29,14 +27,7 @@ namespace UglyToad.Pdf.Parser.PageTree
throw new InvalidOperationException("Expected a Dictionary of Type Page, instead got this: " + dictionary);
}
var resources = dictionary.GetDictionaryOrDefault(CosName.RESOURCES);
var resourceDictionary = arguments.Container.Get<ResourceDictionaryParser>()
.Parse(resources, arguments);
var font = resourceDictionary.GetFont(CosName.Create("F0"), arguments, out var fontValue);
return new Page(number, dictionary, arguments);
return new Page(number, dictionary, new PageTreeMembers(), arguments);
}
}
@@ -56,7 +47,7 @@ namespace UglyToad.Pdf.Parser.PageTree
var simpleFont = arguments.Container.Get<SimpleFontParser>()
.Parse(dictionary, arguments);
}
return new Font();
}
}
@@ -112,11 +103,11 @@ namespace UglyToad.Pdf.Parser.PageTree
public class SimpleFont
{
}
public class Font
{
}
}

View File

@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Parser
{
using System;
using Content;
using Cos;
using Parts;
@@ -13,10 +14,13 @@
public BruteForceSearcher BruteForceSearcher { get; }
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher)
public ResourceContainer ResourceContainer { get; }
public ParsingCachingProviders(CosObjectPool objectPool, BruteForceSearcher bruteForceSearcher, ResourceContainer resourceContainer)
{
ObjectPool = objectPool ?? throw new ArgumentNullException(nameof(objectPool));
BruteForceSearcher = bruteForceSearcher ?? throw new ArgumentNullException(nameof(bruteForceSearcher));
ResourceContainer = resourceContainer ?? throw new ArgumentNullException(nameof(resourceContainer));
}
}
}

View File

@@ -48,6 +48,7 @@
var dynamicParser = container.Get<DynamicParser>();
var bruteForceSearcher = new BruteForceSearcher(reader);
var resourceContainer = new ResourceContainer();
var root = ParseTrailer(reader, crossReferenceTable, dynamicParser, bruteForceSearcher, pool,
isLenientParsing);
@@ -63,7 +64,7 @@
rootDictionary.Set(CosName.TYPE, CosName.CATALOG);
}
var caching = new ParsingCachingProviders(pool, bruteForceSearcher);
var caching = new ParsingCachingProviders(pool, bruteForceSearcher, resourceContainer);
return new PdfDocument(reader, version, crossReferenceTable, container, isLenientParsing, caching, new Catalog(rootDictionary));
}

View File

@@ -1,135 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace UglyToad.Pdf.Parser
{
using Cos;
using Filters;
using IO;
using Logging;
using Parts;
using Parts.CrossReference;
internal class PDFParser : COSParser
{
private String password = "";
private IInputStream keyStoreInputStream = null;
private String keyAlias = null;
private FileHeaderParser headerParser = new FileHeaderParser(null);
private FileTrailerParser trailerParser = new FileTrailerParser();
public PDFParser(IRandomAccessRead source, string decryptionPassword, IInputStream keyStore,
String alias) : base(source)
{
fileLen = source.Length();
password = decryptionPassword;
keyStoreInputStream = keyStore;
keyAlias = alias;
init();
}
private void init()
{
document = new COSDocument();
}
/**
* The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
* to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
* at the beginning of the file. Last the root object is parsed.
*
* @throws InvalidPasswordException If the password is incorrect.
* @throws IOException If something went wrong.
*/
protected void initialParse(bool isLenient)
{
// Find the cross reference table at the offset given at the end of the document
var xrefOffset = trailerParser.GetXrefOffset(source, isLenient);
ILog log = null;
var bruteForceSearcher = new BruteForceSearcher(source);
var nameParser = new CosNameParser();
var dictionaryParser = new CosDictionaryParser(nameParser, log);
var baseParser = new CosBaseParser(nameParser, new CosStringParser(), dictionaryParser, new CosArrayParser());
var streamParser = new CosStreamParser(log);
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(log), new PngPredictor(), log);
var crossReferenceParser = new CrossReferenceStreamParser(filterProvider);
var crossReferenceTableParser = new FileCrossReferenceTableParser(log, dictionaryParser, baseParser, streamParser, crossReferenceParser,
new CrossReferenceTableParser(log, dictionaryParser, baseParser));
var pool = new CosObjectPool();
var table = crossReferenceTableParser.Parse(source, isLenient, xrefOffset, pool);
CosBase baseObj = parseTrailerValuesDynamically(document.trailer, bruteForceSearcher, baseParser, source, isLenient, document, streamParser, pool);
if (!(baseObj is CosDictionary))
{
throw new InvalidOperationException("Expected root dictionary, but got this: " + baseObj);
}
CosDictionary root = (CosDictionary)baseObj;
// in some pdfs the type value "Catalog" is missing in the root object
if (isLenient && !root.containsKey(CosName.TYPE))
{
root.setItem(CosName.TYPE, CosName.CATALOG);
}
CosObject catalogObj = document.getCatalog();
if (catalogObj.GetObject() is CosDictionary)
{
parseDictObjects((CosDictionary)catalogObj.GetObject(), (CosName[])null, bruteForceSearcher, baseParser, streamParser, source, document, isLenient, pool);
CosBase infoBase = document.trailer.getDictionaryObject(CosName.INFO);
if (infoBase is CosDictionary)
{
parseDictObjects((CosDictionary)infoBase, (CosName[])null, bruteForceSearcher, baseParser, streamParser, source, document, isLenient, pool);
}
document.IsDecrypted = true;
}
initialParseDone = true;
}
/**
* This will parse the stream and populate the COSDocument object. This will close
* the keystore stream when it is done parsing.
*
* @throws InvalidPasswordException If the password is incorrect.
* @throws IOException If there is an error reading from the stream or corrupt data
* is found.
*/
public void Parse(bool isLenientParsing)
{
// set to false if all is processed
bool exceptionOccurred = true;
try
{
// Read the version from the top of the file
var version = headerParser.ReadHeader(source, getIsLenient());
document.Version = version.Version;
if (!initialParseDone)
{
initialParse(isLenientParsing);
}
exceptionOccurred = false;
}
finally
{
IOUtils.closeQuietly(keyStoreInputStream);
if (exceptionOccurred && document != null)
{
IOUtils.closeQuietly(document);
document = null;
}
}
}
}
}

View File

@@ -1,157 +0,0 @@
namespace UglyToad.Pdf.Text
{
using System.Collections.Generic;
using System.Linq;
using Operators;
public class ByteTextScanner : ITextScanner
{
private static readonly ITextComponentApproach[] Approaches =
{
new BaseTextComponentApproach(new[] {(byte) 'B', (byte) 'T'}, TextObjectComponentType.BeginText, new TextObjectComponentType[0]),
new BaseTextComponentApproach(new[] {(byte) 'E', (byte) 'T'}, TextObjectComponentType.EndText, new TextObjectComponentType[0]),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'f'}, TextObjectComponentType.TextFont, new []{ TextObjectComponentType.Font, TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'm'}, TextObjectComponentType.SetTextMatrix, new []
{
TextObjectComponentType.Numeric, TextObjectComponentType.Numeric, TextObjectComponentType.Numeric,
TextObjectComponentType.Numeric, TextObjectComponentType.Numeric, TextObjectComponentType.Numeric
}),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'd'}, TextObjectComponentType.MoveTextPosition, new[]{ TextObjectComponentType.Numeric, TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'D'}, TextObjectComponentType.MoveTextPositionAndSetLeading, new[]{ TextObjectComponentType.Numeric, TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'j'}, TextObjectComponentType.ShowText, new[] { TextObjectComponentType.String }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'J'}, TextObjectComponentType.ShowTextWithIndividualGlyphPositioning, new[]{ TextObjectComponentType.Array }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'L'}, TextObjectComponentType.SetTextLeading, new []{ TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'r'}, TextObjectComponentType.SetTextRenderingMode, new[] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 's'}, TextObjectComponentType.SetTextRise, new[] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'w'}, TextObjectComponentType.SetWordSpacing, new[] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'z'}, TextObjectComponentType.SetHorizontalTextScaling, new[] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) '*'}, TextObjectComponentType.MoveToNextLineStart, new TextObjectComponentType[0]),
new BaseTextComponentApproach(new[] {(byte) 'T', (byte) 'c'}, TextObjectComponentType.SetCharacterSpacing, new[] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'g'}, TextObjectComponentType.SetGrayNonStroking, new [] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'G'}, TextObjectComponentType.SetGrayStroking, new [] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'w'}, TextObjectComponentType.SetLineWidth, new [] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'W'}, TextObjectComponentType.SetClippingPathNonZeroWinding, new [] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) 'W', (byte) '*'}, TextObjectComponentType.SetClippingPathEvenOdd, new [] { TextObjectComponentType.Numeric }),
new BaseTextComponentApproach(new[] {(byte) '\''}, TextObjectComponentType.MoveNextLineAndShowText, new [] { TextObjectComponentType.String }),
new FontTextComponentApproach(),
new NumericTextComponentApproach(),
new StringTextComponentApproach()
};
private readonly byte[] bytes;
private int offset;
public ByteTextScanner(byte[] bytes)
{
this.bytes = bytes;
}
public ITextObjectComponent CurrentComponent { get; private set; }
public bool Read()
{
if (offset == bytes.Length - 1)
{
return false;
}
bool isReadingCandidate = false;
int startOffset = -1;
var validApproaches = new List<ITextComponentApproach>();
var buffer = new List<byte>();
while (offset < bytes.Length)
{
var current = bytes[offset];
// Whitespace clears the current operator search.
if (BaseTextComponentApproach.IsEmpty(current))
{
// TODO: consider the case of two valid operators, one of which is a single character, 'Q' and 'Qe'. For example "BT 10 Q 13 Qe ET"
isReadingCandidate = false;
validApproaches.Clear();
buffer.Clear();
offset++;
continue;
}
buffer.Add(current);
// If we previously started reading a byte which matched some possible approaches.
if (isReadingCandidate)
{
// Remove any approaches which are no longer valid for the next byte.
foreach (var validApproach in new List<ITextComponentApproach>(validApproaches))
{
if (!validApproach.CanRead(current, offset - startOffset))
{
validApproaches.Remove(validApproach);
}
}
// There is a single valid approach which is indicative of a specific operator.
if (validApproaches.Count == 1)
{
CurrentComponent = validApproaches[0].Read(buffer, bytes.Skip(offset + 1), out var localOffset);
if (CurrentComponent != null)
{
offset += localOffset;
return true;
}
isReadingCandidate = false;
}
// This was a false start.
else if (validApproaches.Count == 0)
{
buffer.Clear();
isReadingCandidate = false;
}
}
// If we haven't looked at the first byte after some whitespace.
else if (buffer.Count == 1)
{
// Find any operator approaches which are valid for this first byte.
foreach (var approach in Approaches)
{
if (approach.CanRead(current, 0))
{
validApproaches.Add(approach);
}
}
switch (validApproaches.Count)
{
case 0:
// No valid approaches, this cannot be a operator, continue until we hit a whitespace.
break;
case 1:
// A single valid approach, this immediately matches an operator.
CurrentComponent = validApproaches[0].Read(buffer, bytes.Skip(offset + 1), out var localOffset);
if (CurrentComponent != null)
{
offset += localOffset;
return true;
}
break;
default:
// Multiple valid approaches, use the next character to refine the possible approaches.
startOffset = offset;
isReadingCandidate = true;
break;
}
}
offset++;
}
return false;
}
}
}

View File

@@ -1,11 +0,0 @@
namespace UglyToad.Pdf.Text
{
using System.Collections.Generic;
public interface ITextComponentApproach
{
bool CanRead(byte b, int offset);
ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset);
}
}

View File

@@ -1,9 +0,0 @@
namespace UglyToad.Pdf.Text
{
public interface ITextScanner
{
ITextObjectComponent CurrentComponent { get; }
bool Read();
}
}

View File

@@ -1,41 +0,0 @@
namespace UglyToad.Pdf.Text
{
using System.Collections.Generic;
/// <summary>
/// Represents an operator which operates on operands containing the data.
/// </summary>
public class Operator : ITextObjectComponent
{
/// <summary>
/// Always <see langword="true"/>
/// </summary>
public bool IsOperator { get; } = true;
/// <summary>
/// The ordered operand types required prior to this operator.
/// </summary>
public IReadOnlyList<TextObjectComponentType> OperandTypes { get; }
/// <summary>
/// The type of this operator.
/// </summary>
public TextObjectComponentType Type { get; }
/// <summary>
/// Always <see langword="null"/>.
/// </summary>
public IOperand AsOperand { get; } = null;
public Operator(TextObjectComponentType type, IReadOnlyList<TextObjectComponentType> operandTypes)
{
OperandTypes = operandTypes;
Type = type;
}
public override string ToString()
{
return $"Operator: {Type}";
}
}
}

View File

@@ -1,85 +0,0 @@
namespace UglyToad.Pdf.Text.Operators
{
using System.Collections.Generic;
public class BaseTextComponentApproach : ITextComponentApproach
{
private readonly byte[] bytes;
private readonly TextObjectComponentType textObjectComponentType;
private readonly IReadOnlyList<TextObjectComponentType> operandTypes;
public BaseTextComponentApproach(byte[] bytes, TextObjectComponentType textObjectComponentType,
IReadOnlyList<TextObjectComponentType> operandTypes)
{
this.bytes = bytes;
this.textObjectComponentType = textObjectComponentType;
this.operandTypes = operandTypes;
}
public bool CanRead(byte b, int offset)
{
if (offset >= bytes.Length)
{
return false;
}
return bytes[offset] == b;
}
public ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset)
{
bool hasOpenedEnumerator = false;
offset = bytes.Length;
using (var enumerator = furtherBytes.GetEnumerator())
{
for (var i = 0; i < bytes.Length; i++)
{
if (i < readBytes.Count)
{
if (readBytes[i] != bytes[i])
{
return null;
}
// Look beyond the end
if (i == bytes.Length - 1)
{
if (!hasOpenedEnumerator && enumerator.MoveNext() && !IsEmpty(enumerator.Current))
{
return null;
}
}
}
else
{
hasOpenedEnumerator = true;
if (!enumerator.MoveNext())
{
return null;
}
var curr = enumerator.Current;
if (curr != bytes[i])
{
return null;
}
if (i == bytes.Length - 1 && enumerator.MoveNext() && !IsEmpty(enumerator.Current))
{
return null;
}
}
}
}
return new Operator(textObjectComponentType, operandTypes);
}
public static bool IsEmpty(byte b)
{
return b == ' ' || b == '\r' || b == '\n' || b == 0;
}
}
}

View File

@@ -1,77 +0,0 @@
namespace UglyToad.Pdf.Text.Operators
{
using System.Collections.Generic;
public class FontTextComponentApproach : ITextComponentApproach
{
public bool CanRead(byte b, int offset)
{
if (offset == 0 && b == '/')
{
return true;
}
if (offset == 0)
{
return false;
}
return !BaseTextComponentApproach.IsEmpty(b);
}
public ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset)
{
offset = readBytes.Count;
using (var reader = furtherBytes.GetEnumerator())
{
var values = new List<byte>(readBytes);
while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current))
{
values.Add(reader.Current);
offset++;
}
return new OperandComponent(new FontOperand(values), TextObjectComponentType.Font);
}
}
}
public class OperandComponent : ITextObjectComponent
{
public bool IsOperator { get; } = false;
public IReadOnlyList<TextObjectComponentType> OperandTypes { get; } = new TextObjectComponentType[0];
public TextObjectComponentType Type { get; }
public IOperand AsOperand { get; }
public OperandComponent(IOperand operand, TextObjectComponentType type)
{
Type = type;
AsOperand = operand;
}
}
public class FontOperand : IOperand
{
public IReadOnlyList<byte> RawBytes { get; }
public FontOperand(IReadOnlyList<byte> bytes)
{
RawBytes = bytes;
}
}
public class StringOperand : IOperand
{
public IReadOnlyList<byte> RawBytes { get; }
public StringOperand(IReadOnlyList<byte> bytes)
{
RawBytes = bytes;
}
}
}

View File

@@ -1,62 +0,0 @@
namespace UglyToad.Pdf.Text.Operators
{
using System;
using System.Collections.Generic;
public class NumericTextComponentApproach : ITextComponentApproach
{
private static readonly HashSet<byte> SupportedCharacterSet = new HashSet<byte>
{
(byte)'0',
(byte)'1',
(byte)'2',
(byte)'3',
(byte)'4',
(byte)'5',
(byte)'6',
(byte)'7',
(byte)'8',
(byte)'9',
(byte)'+',
(byte)'-',
(byte)'.'
};
public bool CanRead(byte b, int offset)
{
return SupportedCharacterSet.Contains(b);
}
public ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset)
{
offset = readBytes.Count;
var bytes = new List<byte>(readBytes);
using (var reader = furtherBytes.GetEnumerator())
{
while (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current))
{
if (!SupportedCharacterSet.Contains(reader.Current))
{
throw new InvalidOperationException("Unsupported byte in numeric operator: " + (char)reader.Current);
}
bytes.Add(reader.Current);
offset++;
}
}
return new OperandComponent(new NumericOperand(bytes), TextObjectComponentType.Numeric);
}
}
public class NumericOperand : IOperand
{
public NumericOperand(IReadOnlyList<byte> bytes)
{
}
public IReadOnlyList<byte> RawBytes { get; set; }
}
}

View File

@@ -1,136 +0,0 @@
namespace UglyToad.Pdf.Text.Operators
{
using System;
using System.Collections.Generic;
public class StringTextComponentApproach : ITextComponentApproach
{
public bool CanRead(byte b, int offset)
{
if (offset == 0)
{
if (b == '<' || b == '(')
{
return true;
}
return false;
}
return true;
}
public ITextObjectComponent Read(IReadOnlyList<byte> readBytes, IEnumerable<byte> furtherBytes, out int offset)
{
var bytes = new List<byte>(readBytes);
bool isHexString = false;
bool isKnownType = false;
if (readBytes.Count > 0)
{
isHexString = readBytes[0] == '<';
if (!isHexString && readBytes[0] != '(')
{
throw new InvalidOperationException("String started with an unexpected character: " + bytes[0]);
}
isKnownType = true;
}
bool isEscapeActive = false;
int bracketDepth = 0;
using (var reader = furtherBytes.GetEnumerator())
{
while (reader.MoveNext())
{
if (!isKnownType)
{
isHexString = reader.Current == '<';
if (!isHexString && reader.Current != '(')
{
throw new InvalidOperationException("String started with an unexpected character: " + bytes[0]);
}
isKnownType = true;
bytes.Add(reader.Current);
continue;
}
bytes.Add(reader.Current);
if (isHexString)
{
if (reader.Current == '>')
{
break;
}
var isValid = IsValidHexCharacter(reader.Current);
if (!isValid)
{
throw new InvalidOperationException("Found an unexpected character in a hex string: " + reader.Current);
}
}
else
{
bool exit = false;
switch (reader.Current)
{
case (byte)'\\':
isEscapeActive = true;
break;
case (byte)'(':
if (!isEscapeActive)
{
bracketDepth++;
}
break;
case (byte)')':
if (isEscapeActive)
{
continue;
}
else if (bracketDepth > 0)
{
bracketDepth--;
}
else
{
exit = true;
}
break;
default:
isEscapeActive = false;
break;
}
if (exit)
{
break;
}
}
}
if (reader.MoveNext() && !BaseTextComponentApproach.IsEmpty(reader.Current))
{
throw new InvalidOperationException("Unexpected byte following string operator, expected whitespace: " + (char)reader.Current);
}
}
offset = bytes.Count;
return new OperandComponent(new StringOperand(bytes), TextObjectComponentType.String);
}
private static bool IsValidHexCharacter(byte b)
{
return (b >= '0' && b <= '9')
|| (b >= 'a' && b <= 'f')
|| (b >= 'A' && b <= 'F');
}
}
}

View File

@@ -1,155 +0,0 @@
namespace UglyToad.Pdf.Text
{
using System;
using System.Collections.Generic;
using System.Linq;
using Logging;
using Util.JetBrains.Annotations;
public class TextSectionParser
{
private readonly ILog log;
public TextSectionParser(ILog log)
{
this.log = log;
}
public IReadOnlyList<object> ReadTextObjects(ITextScanner textScanner)
{
bool textSectionActive = false;
var result = new List<List<ITextObjectComponent>>();
var sections = new List<ITextObjectComponent>();
while (textScanner.Read())
{
sections.Add(textScanner.CurrentComponent);
switch (textScanner.CurrentComponent.Type)
{
case TextObjectComponentType.BeginText:
if (textSectionActive)
{
throw new InvalidOperationException("Found a begin text (BT) nested in another.");
}
textSectionActive = true;
break;
case TextObjectComponentType.EndText:
textSectionActive = false;
result.Add(sections);
sections = new List<ITextObjectComponent>();
break;
}
}
foreach (var section in result)
{
if (section[0].Type == TextObjectComponentType.BeginText)
{
ProcessTextSection(section, true);
}
}
return result;
}
private object ProcessTextSection(IReadOnlyList<ITextObjectComponent> components, bool isLenientParsing)
{
if (components[0].Type != TextObjectComponentType.BeginText)
{
throw new InvalidOperationException("The set of components did not start with Begin Text (BT)");
}
if (components[components.Count - 1].Type != TextObjectComponentType.EndText)
{
throw new InvalidOperationException("The set of components did not end with End Text (ET)");
}
var builder = new TextObjectBuilder();
for (var i = 1; i < components.Count - 1; i++)
{
if (components[i].IsOperator)
{
ApplyOperator(builder, components, i, isLenientParsing);
}
else
{
continue;
}
}
return null;
}
private void ApplyOperator(TextObjectBuilder builder, IReadOnlyList<ITextObjectComponent> components, int index, bool isLenientParsing)
{
var current = components[index];
if (!current.IsOperator)
{
throw new InvalidOperationException("Cannot apply operator for component type: " + current);
}
var operands = new IOperand[current.OperandTypes.Count];
var start = index - operands.Length;
// begin text or start
if (start <= 0)
{
log.Error("Did not find the required number of operands for the current operator.");
if (isLenientParsing)
{
return;
}
throw new InvalidOperationException();
}
for (int i = start; i < index; i++)
{
var expectedOperand = current.OperandTypes[i - start];
if (components[i].Type != expectedOperand)
{
if (isLenientParsing)
{
return;
}
throw new InvalidOperationException($"Unexpected operand type at index {i - start} for operator: {current}\r\nExpected {expectedOperand} Found {components[i].Type}");
}
operands[i - start] = components[i].AsOperand;
}
}
}
public class TextObjectBuilder
{
public string FontKey { get; set; }
public decimal FontSize { get; set; }
}
public interface ITextObjectComponent
{
bool IsOperator { get; }
IReadOnlyList<TextObjectComponentType> OperandTypes { get; }
TextObjectComponentType Type { get; }
[CanBeNull]
IOperand AsOperand { get; }
}
public interface IOperand
{
IReadOnlyList<byte> RawBytes { get; }
}
}

View File

@@ -4,7 +4,6 @@
using System.Collections.Generic;
using IO;
using Parser.Parts;
using Text.Operators;
using Tokenization;
using Tokens;
@@ -19,7 +18,7 @@
{
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
private static readonly Tokenization.NumericTokenizer NumericTokenizer = new Tokenization.NumericTokenizer();
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
@@ -70,8 +69,7 @@
var currentByte = inputBytes.CurrentByte;
var c = (char) currentByte;
if (BaseTextComponentApproach.IsEmpty(currentByte)
|| ReadHelper.IsWhitespace(currentByte))
if (IsEmpty(currentByte) || ReadHelper.IsWhitespace(currentByte))
{
isSkippingSymbol = false;
continue;
@@ -159,5 +157,10 @@
return false;
}
private static bool IsEmpty(byte b)
{
return b == ' ' || b == '\r' || b == '\n' || b == 0;
}
}
}

View File

@@ -64,7 +64,11 @@ namespace UglyToad.Pdf.Tokenization.Tokens
var b = Convert(high, low);
bytes.Add(b);
builder.Append((char)b);
if (b != '\0')
{
builder.Append((char)b);
}
}
Bytes = bytes;

View File

@@ -4,8 +4,4 @@
<TargetFramework>netstandard2.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<Folder Include="Text\ComponentHandlers\" />
</ItemGroup>
</Project>

View File

@@ -1,6 +1,7 @@
namespace UglyToad.Pdf.Util
{
using Filters;
using Graphics;
using Logging;
using Parser;
using Parser.PageTree;
@@ -47,6 +48,8 @@
var simpleFontParser = new SimpleFontParser();
var compositeFontParser = new CompositeFontParser();
var fontParser = new FontParser();
var pageContentParser = new PageContentParser();
var operationFactory = new ReflectionGraphicsStateOperationFactory();
var container = new Container();
container.Register(headerParser);
@@ -65,6 +68,8 @@
container.Register(simpleFontParser);
container.Register(compositeFontParser);
container.Register(fontParser);
container.Register(pageContentParser);
container.Register(operationFactory);
return container;
}