move tokenizers to their own project

since both pdfs and Adobe Type1 fonts use postscript type objects, tokenization is needed by the main project and the fonts project
This commit is contained in:
Eliot Jones
2020-01-05 10:40:44 +00:00
parent d09b33af4d
commit bbde38f656
34 changed files with 205 additions and 86 deletions

View File

@@ -1,9 +1,15 @@
namespace UglyToad.PdfPig.Util namespace UglyToad.PdfPig.Core
{ {
using System; using System;
internal static class OctalHelpers /// <summary>
/// Interprets numbers in octal format.
/// </summary>
public static class OctalHelpers
{ {
/// <summary>
/// Read a short.
/// </summary>
public static short CharacterToShort(this char c) public static short CharacterToShort(this char c)
{ {
switch (c) switch (c)
@@ -33,10 +39,11 @@
} }
} }
/// <summary>
/// Read an integer from octal digits.
/// </summary>
public static int FromOctalDigits(short[] octal) public static int FromOctalDigits(short[] octal)
{ {
int sum = 0; int sum = 0;
for (int i = octal.Length - 1; i >= 0; i--) for (int i = octal.Length - 1; i >= 0; i--)
{ {
@@ -47,6 +54,9 @@
return sum; return sum;
} }
/// <summary>
/// Interpret an int as octal.
/// </summary>
public static int FromOctalInt(int input) public static int FromOctalInt(int input)
{ {
var str = input.ToString(); var str = input.ToString();

View File

@@ -18,6 +18,8 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@@ -1,4 +1,4 @@
namespace UglyToad.PdfPig.Tokenization namespace UglyToad.PdfPig.Fonts.Type1
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@@ -6,11 +6,15 @@
using System.Text; using System.Text;
using Core; using Core;
using Tokens; using Tokens;
using Tokenization;
internal class Type1ArrayTokenizer : ITokenizer /// <inheritdoc />
public class Type1ArrayTokenizer : ITokenizer
{ {
/// <inheritdoc />
public bool ReadsNextByte { get; } = false; public bool ReadsNextByte { get; } = false;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{ {
token = null; token = null;

View File

@@ -1,14 +1,17 @@
namespace UglyToad.PdfPig.Tokenization namespace UglyToad.PdfPig.Fonts.Type1
{ {
using System.Text; using System.Text;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
using Tokenization;
internal class Type1NameTokenizer : ITokenizer /// <inheritdoc />
public class Type1NameTokenizer : ITokenizer
{ {
/// <inheritdoc />
public bool ReadsNextByte { get; } = true; public bool ReadsNextByte { get; } = true;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{ {
token = null; token = null;

View File

@@ -27,6 +27,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup> </ItemGroup>

View File

@@ -100,6 +100,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
</ItemGroup> </ItemGroup>

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.Tests.Util namespace UglyToad.PdfPig.Tests.Util
{ {
using PdfPig.Util; using PdfPig.Core;
using Xunit; using Xunit;
public class OctalHelpersTests public class OctalHelpersTests

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("UglyToad.PdfPig.Tests")]

View File

@@ -2,7 +2,6 @@
{ {
using System.Text; using System.Text;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
internal class CommentTokenizer : ITokenizer internal class CommentTokenizer : ITokenizer

View File

@@ -1,12 +1,9 @@
namespace UglyToad.PdfPig.Tokenization namespace UglyToad.PdfPig.Tokenization
{ {
using System.Collections.Generic; using System.Collections.Generic;
using Exceptions;
using Core; using Core;
using Parser.Parts;
using Scanner; using Scanner;
using Tokens; using Tokens;
using Util.JetBrains.Annotations;
internal class DictionaryTokenizer : ITokenizer internal class DictionaryTokenizer : ITokenizer
{ {
@@ -111,7 +108,6 @@
return result; return result;
} }
[CanBeNull]
private static IToken PeekNext(IReadOnlyList<IToken> tokens, int currentIndex) private static IToken PeekNext(IReadOnlyList<IToken> tokens, int currentIndex)
{ {
if (tokens.Count - 1 < currentIndex + 1) if (tokens.Count - 1 < currentIndex + 1)

View File

@@ -3,10 +3,15 @@
using Core; using Core;
using Tokens; using Tokens;
internal class EndOfLineTokenizer : ITokenizer /// <summary>
/// Read an <see cref="EndOfLineToken"/>.
/// </summary>
public class EndOfLineTokenizer : ITokenizer
{ {
/// <inheritdoc />
public bool ReadsNextByte { get; } = false; public bool ReadsNextByte { get; } = false;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{ {
token = null; token = null;

View File

@@ -2,7 +2,6 @@
{ {
using System.Collections.Generic; using System.Collections.Generic;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
internal class HexTokenizer : ITokenizer internal class HexTokenizer : ITokenizer

View File

@@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Tokenization
{
using Core;
using Tokens;
/// <summary>
/// Reads tokens from input data.
/// </summary>
public interface ITokenizer
{
/// <summary>
/// Whether this tokenizer type reads the byte following the token itself to detect if the token has ended.
/// </summary>
bool ReadsNextByte { get; }
/// <summary>
/// Try and read the token of the corresponding type from the input.
/// </summary>
/// <param name="currentByte">The byte read to detect this is the correct tokenizer to use.</param>
/// <param name="inputBytes">The input data.</param>
/// <param name="token">The token of the corresponding type if read.</param>
bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token);
}
}

View File

@@ -4,7 +4,6 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Text; using System.Text;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
internal class NameTokenizer : ITokenizer internal class NameTokenizer : ITokenizer

View File

@@ -2,7 +2,6 @@
{ {
using System.Text; using System.Text;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
internal class PlainTokenizer : ITokenizer internal class PlainTokenizer : ITokenizer

View File

@@ -3,11 +3,12 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using Core; using Core;
using Exceptions;
using Parser.Parts;
using Tokens; using Tokens;
internal class CoreTokenScanner : ISeekableTokenScanner /// <summary>
/// The default <see cref="ITokenScanner"/> for reading PostScript/PDF style data.
/// </summary>
public class CoreTokenScanner : ISeekableTokenScanner
{ {
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer(); private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer(); private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
@@ -22,10 +23,30 @@
private readonly IInputBytes inputBytes; private readonly IInputBytes inputBytes;
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>(); private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
internal long CurrentTokenStart { get; private set; } /// <summary>
/// The offset in the input data at which the <see cref="CurrentToken"/> starts.
/// </summary>
public long CurrentTokenStart { get; private set; }
/// <inheritdoc />
public IToken CurrentToken { get; private set; } public IToken CurrentToken { get; private set; }
/// <inheritdoc />
public long CurrentPosition => inputBytes.CurrentOffset;
private bool hasBytePreRead;
private bool isInInlineImage;
/// <summary>
/// Create a new <see cref="CoreTokenScanner"/> from the input.
/// </summary>
public CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
{
this.scope = scope;
this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes));
}
/// <inheritdoc />
public bool TryReadToken<T>(out T token) where T : class, IToken public bool TryReadToken<T>(out T token) where T : class, IToken
{ {
token = default(T); token = default(T);
@@ -44,22 +65,13 @@
return false; return false;
} }
/// <inheritdoc />
public void Seek(long position) public void Seek(long position)
{ {
inputBytes.Seek(position); inputBytes.Seek(position);
} }
public long CurrentPosition => inputBytes.CurrentOffset; /// <inheritdoc />
private bool hasBytePreRead;
private bool isInInlineImage;
internal CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
{
this.scope = scope;
this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes));
}
public bool MoveNext() public bool MoveNext()
{ {
var endAngleBracesRead = 0; var endAngleBracesRead = 0;
@@ -191,6 +203,7 @@
return false; return false;
} }
/// <inheritdoc />
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer) public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
{ {
if (tokenizer == null) if (tokenizer == null)
@@ -201,6 +214,7 @@
customTokenizers.Add((firstByte, tokenizer)); customTokenizers.Add((firstByte, tokenizer));
} }
/// <inheritdoc />
public void DeregisterCustomTokenizer(ITokenizer tokenizer) public void DeregisterCustomTokenizer(ITokenizer tokenizer)
{ {
customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer)); customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer));

View File

@@ -0,0 +1,29 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
/// <inheritdoc />
/// <summary>
/// A <see cref="T:UglyToad.PdfPig.Tokenization.Scanner.ITokenScanner" /> that supports seeking in the underlying input data.
/// </summary>
public interface ISeekableTokenScanner : ITokenScanner
{
/// <summary>
/// Move to the specified position.
/// </summary>
void Seek(long position);
/// <summary>
/// The current position in the input.
/// </summary>
long CurrentPosition { get; }
/// <summary>
/// Add support for a custom type of tokenizer.
/// </summary>
void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer);
/// <summary>
/// Remove support for a custom type of tokenizer added with <see cref="RegisterCustomTokenizer"/>.
/// </summary>
void DeregisterCustomTokenizer(ITokenizer tokenizer);
}
}

View File

@@ -0,0 +1,26 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using Tokens;
/// <summary>
/// Scan input for PostScript/PDF tokens.
/// </summary>
public interface ITokenScanner
{
/// <summary>
/// Read the next token in the input.
/// </summary>
/// <returns></returns>
bool MoveNext();
/// <summary>
/// The currently read token.
/// </summary>
IToken CurrentToken { get; }
/// <summary>
/// Try reading a token of the specific type.
/// </summary>
bool TryReadToken<T>(out T token) where T : class, IToken;
}
}

View File

@@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
/// <summary>
/// The current scope of the <see cref="ITokenScanner"/>.
/// </summary>
public enum ScannerScope
{
/// <summary>
/// Reading normally.
/// </summary>
None = 0,
/// <summary>
/// Reading inside an array.
/// </summary>
Array = 1,
/// <summary>
/// Reading inside a dictionary.
/// </summary>
Dictionary = 2
}
}

View File

@@ -2,9 +2,7 @@
{ {
using System.Text; using System.Text;
using Core; using Core;
using Parser.Parts;
using Tokens; using Tokens;
using Util;
internal class StringTokenizer : ITokenizer internal class StringTokenizer : ITokenizer
{ {

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
<DocumentationFile>obj\Debug\netstandard2.0\UglyToad.PdfPig.Core.xml</DocumentationFile>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
</Project>

View File

@@ -20,6 +20,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.DocumentLay
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UglyToad.PdfPig.Tokens", "UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj", "{D840FF69-4250-4B05-9829-5ABEC43EC82C}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UglyToad.PdfPig.Tokens", "UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj", "{D840FF69-4250-4B05-9829-5ABEC43EC82C}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UglyToad.PdfPig.Tokenization", "UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj", "{FD005C50-CD2C-497E-8F7E-6D791091E9B0}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
@@ -50,6 +52,10 @@ Global
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Debug|Any CPU.Build.0 = Debug|Any CPU {D840FF69-4250-4B05-9829-5ABEC43EC82C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.ActiveCfg = Release|Any CPU {D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.Build.0 = Release|Any CPU {D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.Build.0 = Release|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings namespace UglyToad.PdfPig.PdfFonts.Encodings
{ {
using Util; using Core;
internal class MacExpertEncoding : Encoding internal class MacExpertEncoding : Encoding
{ {

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings namespace UglyToad.PdfPig.PdfFonts.Encodings
{ {
using Util; using Core;
/// <summary> /// <summary>
/// Similar to the <see cref="MacRomanEncoding"/> with 15 additional entries. /// Similar to the <see cref="MacRomanEncoding"/> with 15 additional entries.

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings namespace UglyToad.PdfPig.PdfFonts.Encodings
{ {
using Util; using Core;
internal class MacRomanEncoding : Encoding internal class MacRomanEncoding : Encoding
{ {

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings namespace UglyToad.PdfPig.PdfFonts.Encodings
{ {
using Util; using Core;
internal class StandardEncoding : Encoding internal class StandardEncoding : Encoding
{ {

View File

@@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings namespace UglyToad.PdfPig.PdfFonts.Encodings
{ {
using Util; using Core;
internal class WinAnsiEncoding : Encoding internal class WinAnsiEncoding : Encoding
{ {

View File

@@ -5,6 +5,7 @@
using Core; using Core;
using Encodings; using Encodings;
using Fonts; using Fonts;
using Fonts.Type1;
using Tokenization; using Tokenization;
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;

View File

@@ -1,12 +0,0 @@
namespace UglyToad.PdfPig.Tokenization
{
using Core;
using Tokens;
internal interface ITokenizer
{
bool ReadsNextByte { get; }
bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token);
}
}

View File

@@ -1,24 +0,0 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using Tokens;
internal interface ITokenScanner
{
bool MoveNext();
IToken CurrentToken { get; }
bool TryReadToken<T>(out T token) where T : class, IToken;
}
internal interface ISeekableTokenScanner : ITokenScanner
{
void Seek(long position);
long CurrentPosition { get; }
void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer);
void DeregisterCustomTokenizer(ITokenizer tokenizer);
}
}

View File

@@ -1,9 +0,0 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
internal enum ScannerScope
{
None,
Array,
Dictionary
}
}

View File

@@ -52,6 +52,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup> </ItemGroup>