handle eof further back in file

an %%eof for a pdf file may appear further back than the last 1024 bytes. this change doubles the search range. it also handles an empty differences array being defined for a font encoding.

we also remove the old approach to dependency injection from the code since we are now favouring static classes where possible.
This commit is contained in:
Eliot Jones
2020-01-07 11:12:37 +00:00
parent 3c19b988e2
commit 0b048fde57
8 changed files with 26 additions and 115 deletions

View File

@@ -6,10 +6,8 @@
using PdfPig.Tokenization.Scanner;
using Xunit;
public class FileTrailerParserTests
public class FileTrailerFileTrailerParserTests
{
private readonly FileTrailerParser parser = new FileTrailerParser();
[Fact]
public void FindsCompliantStartXref()
{
@@ -25,7 +23,7 @@ startxref
%%EOF", false);
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Equal(456, result);
}
@@ -51,7 +49,7 @@ startxref
startxref
17", false);
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Equal(17, result);
}
@@ -77,7 +75,7 @@ startref
start_rexf
17", false);
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Throws<PdfDocumentFormatException>(action);
}
@@ -87,7 +85,7 @@ start_rexf
{
var input = StringBytesTestConverter.Convert("11 0 obj", false);
Action action = () => parser.GetFirstCrossReferenceOffset(null, new CoreTokenScanner(input.Bytes), false);
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(null, new CoreTokenScanner(input.Bytes), false);
Assert.Throws<ArgumentNullException>(action);
}
@@ -97,7 +95,7 @@ start_rexf
{
var input = StringBytesTestConverter.Convert("11 0 obj", false);
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, null, false);
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, null, false);
Assert.Throws<ArgumentNullException>(action);
}
@@ -113,7 +111,7 @@ startxref
<< /Why (am i here?) >> 69
%EOF", false);
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Throws<PdfDocumentFormatException>(action);
}
@@ -128,7 +126,7 @@ endobj
startxref
", false);
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Throws<PdfDocumentFormatException>(action);
}
@@ -154,7 +152,7 @@ startxref
%%EOF", false);
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Equal(1274665676543, result);
}
@@ -168,7 +166,7 @@ startxref %Commented here
%%EOF", false);
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
Assert.Equal(57695, result);
}

View File

@@ -1,7 +1,7 @@
namespace UglyToad.PdfPig.Tokens
{
/// <summary>
/// Represents an End Of Line marker found in Adobe Type 1 font files. This is not used by the main PDF tokenizer.
/// Represents an End Of Line marker found in Adobe Type 1 font files and the cross-reference table.
/// </summary>
public class EndOfLineToken : IToken
{

View File

@@ -3,7 +3,6 @@
using System;
using System.Collections.Generic;
using Core;
using Exceptions;
using Tokenization.Scanner;
using Tokens;
@@ -20,12 +19,12 @@
* %%EOF
*/
internal class FileTrailerParser
internal static class FileTrailerParser
{
/// <summary>
/// Acrobat viewers require the EOF to be in the last 1024 bytes instead of at the end.
/// The %%EOF may be further back in the file.
/// </summary>
private const int EndOfFileSearchRange = 1024;
private const int EndOfFileSearchRange = 2048;
private static readonly byte[] StartXRefBytes =
{
@@ -40,7 +39,7 @@
(byte) 'f'
};
public long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
public static long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
{
if (bytes == null)
{

View File

@@ -23,7 +23,6 @@
using PdfFonts.Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Util;
internal static class PdfDocumentFactory
{
@@ -53,8 +52,6 @@
private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions options = null)
{
var container = Bootstrapper.GenerateContainer(options?.Logger);
var isLenientParsing = options?.UseLenientParsing ?? true;
var tokenScanner = new CoreTokenScanner(inputBytes);
@@ -76,15 +73,14 @@
passwords.Add(string.Empty);
}
var document = OpenDocument(inputBytes, tokenScanner, container, isLenientParsing, passwords);
var document = OpenDocument(inputBytes, tokenScanner, options?.Logger ?? new NoOpLog(), isLenientParsing, passwords);
return document;
}
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing, IReadOnlyList<string> passwords)
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList<string> passwords)
{
var log = container.Get<ILog>();
var filterProvider = container.Get<IFilterProvider>();
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(log), new PngPredictor(), log);
CrossReferenceTable crossReferenceTable = null;
@@ -102,7 +98,7 @@
var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);
var crossReferenceOffset = container.Get<FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
// TODO: make this use the scanner.
var validator = new CrossReferenceOffsetValidator(xrefValidator);

View File

@@ -69,9 +69,15 @@
private static IReadOnlyList<(int, string)> ProcessDifferences(ArrayToken differenceArray)
{
var activeCode = differenceArray.GetNumeric(0).Int;
var differences = new List<(int, string)>();
if (differenceArray.Length == 0)
{
return differences;
}
var activeCode = differenceArray.GetNumeric(0).Int;
for (int i = 1; i < differenceArray.Data.Count; i++)
{
var entry = differenceArray.Data[i];

View File

@@ -1,43 +0,0 @@
namespace UglyToad.PdfPig.Util
{
using Filters;
using Logging;
using Parser.FileStructure;
using PdfFonts.Parser;
internal static class Bootstrapper
{
private static IContainer _testContainer;
internal static void SetTestContainer(IContainer container)
{
_testContainer = container;
}
public static IContainer GenerateContainer(ILog logger)
{
if (_testContainer != null)
{
return _testContainer;
}
if (logger == null)
{
logger = new NoOpLog();
}
var trailerParser = new FileTrailerParser();
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(logger), new PngPredictor(), logger);
var cmapParser = new CMapParser();
var container = new Container();
container.Register(trailerParser);
container.Register(filterProvider);
container.Register(cmapParser);
container.Register(logger);
return container;
}
}
}

View File

@@ -1,38 +0,0 @@
namespace UglyToad.PdfPig.Util
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
internal class Container : IContainer
{
private readonly Dictionary<Type, object> objects = new Dictionary<Type, object>();
public void Register<T>(T obj)
{
if (obj == null)
{
throw new ArgumentNullException(nameof(obj), "Cannot register a null object with the container. Type was: " + typeof(T));
}
objects[typeof(T)] = obj;
var interfaces = typeof(T).GetInterfaces();
foreach (var @interface in interfaces)
{
objects[@interface] = obj;
}
}
[DebuggerStepThrough]
public T Get<T>()
{
if (!objects.TryGetValue(typeof(T), out var obj))
{
throw new InvalidOperationException($"The type {typeof(T)} was not registered with the container.");
}
return (T) obj;
}
}
}

View File

@@ -1,7 +0,0 @@
namespace UglyToad.PdfPig.Util
{
internal interface IContainer
{
T Get<T>();
}
}