mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-19 02:37:56 +08:00
handle eof further back in file
an %%eof for a pdf file may appear further back than the last 1024 bytes. this change doubles the search range. it also handles an empty differences array being defined for a font encoding. we also remove the old approach to dependency injection from the code since we are now favouring static classes where possible.
This commit is contained in:
@@ -6,10 +6,8 @@
|
||||
using PdfPig.Tokenization.Scanner;
|
||||
using Xunit;
|
||||
|
||||
public class FileTrailerParserTests
|
||||
public class FileTrailerFileTrailerParserTests
|
||||
{
|
||||
private readonly FileTrailerParser parser = new FileTrailerParser();
|
||||
|
||||
[Fact]
|
||||
public void FindsCompliantStartXref()
|
||||
{
|
||||
@@ -25,7 +23,7 @@ startxref
|
||||
|
||||
%%EOF", false);
|
||||
|
||||
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Equal(456, result);
|
||||
}
|
||||
@@ -51,7 +49,7 @@ startxref
|
||||
startxref
|
||||
17", false);
|
||||
|
||||
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Equal(17, result);
|
||||
}
|
||||
@@ -77,7 +75,7 @@ startref
|
||||
start_rexf
|
||||
17", false);
|
||||
|
||||
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -87,7 +85,7 @@ start_rexf
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert("11 0 obj", false);
|
||||
|
||||
Action action = () => parser.GetFirstCrossReferenceOffset(null, new CoreTokenScanner(input.Bytes), false);
|
||||
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(null, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
@@ -97,7 +95,7 @@ start_rexf
|
||||
{
|
||||
var input = StringBytesTestConverter.Convert("11 0 obj", false);
|
||||
|
||||
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, null, false);
|
||||
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, null, false);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
@@ -113,7 +111,7 @@ startxref
|
||||
<< /Why (am i here?) >> 69
|
||||
%EOF", false);
|
||||
|
||||
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -128,7 +126,7 @@ endobj
|
||||
startxref
|
||||
", false);
|
||||
|
||||
Action action = () => parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
Action action = () => FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -154,7 +152,7 @@ startxref
|
||||
|
||||
%%EOF", false);
|
||||
|
||||
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Equal(1274665676543, result);
|
||||
}
|
||||
@@ -168,7 +166,7 @@ startxref %Commented here
|
||||
|
||||
%%EOF", false);
|
||||
|
||||
var result = parser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
var result = FileTrailerParser.GetFirstCrossReferenceOffset(input.Bytes, new CoreTokenScanner(input.Bytes), false);
|
||||
|
||||
Assert.Equal(57695, result);
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
namespace UglyToad.PdfPig.Tokens
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents an End Of Line marker found in Adobe Type 1 font files. This is not used by the main PDF tokenizer.
|
||||
/// Represents an End Of Line marker found in Adobe Type 1 font files and the cross-reference table.
|
||||
/// </summary>
|
||||
public class EndOfLineToken : IToken
|
||||
{
|
||||
|
@@ -3,7 +3,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Core;
|
||||
using Exceptions;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
@@ -20,12 +19,12 @@
|
||||
* %%EOF
|
||||
*/
|
||||
|
||||
internal class FileTrailerParser
|
||||
internal static class FileTrailerParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Acrobat viewers require the EOF to be in the last 1024 bytes instead of at the end.
|
||||
/// The %%EOF may be further back in the file.
|
||||
/// </summary>
|
||||
private const int EndOfFileSearchRange = 1024;
|
||||
private const int EndOfFileSearchRange = 2048;
|
||||
|
||||
private static readonly byte[] StartXRefBytes =
|
||||
{
|
||||
@@ -40,7 +39,7 @@
|
||||
(byte) 'f'
|
||||
};
|
||||
|
||||
public long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||
public static long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||
{
|
||||
if (bytes == null)
|
||||
{
|
||||
|
@@ -23,7 +23,6 @@
|
||||
using PdfFonts.Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal static class PdfDocumentFactory
|
||||
{
|
||||
@@ -53,8 +52,6 @@
|
||||
|
||||
private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions options = null)
|
||||
{
|
||||
var container = Bootstrapper.GenerateContainer(options?.Logger);
|
||||
|
||||
var isLenientParsing = options?.UseLenientParsing ?? true;
|
||||
|
||||
var tokenScanner = new CoreTokenScanner(inputBytes);
|
||||
@@ -76,15 +73,14 @@
|
||||
passwords.Add(string.Empty);
|
||||
}
|
||||
|
||||
var document = OpenDocument(inputBytes, tokenScanner, container, isLenientParsing, passwords);
|
||||
var document = OpenDocument(inputBytes, tokenScanner, options?.Logger ?? new NoOpLog(), isLenientParsing, passwords);
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, IContainer container, bool isLenientParsing, IReadOnlyList<string> passwords)
|
||||
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList<string> passwords)
|
||||
{
|
||||
var log = container.Get<ILog>();
|
||||
var filterProvider = container.Get<IFilterProvider>();
|
||||
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(log), new PngPredictor(), log);
|
||||
|
||||
CrossReferenceTable crossReferenceTable = null;
|
||||
|
||||
@@ -102,7 +98,7 @@
|
||||
|
||||
var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);
|
||||
|
||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
|
||||
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
|
||||
|
||||
// TODO: make this use the scanner.
|
||||
var validator = new CrossReferenceOffsetValidator(xrefValidator);
|
||||
|
@@ -69,9 +69,15 @@
|
||||
|
||||
private static IReadOnlyList<(int, string)> ProcessDifferences(ArrayToken differenceArray)
|
||||
{
|
||||
var activeCode = differenceArray.GetNumeric(0).Int;
|
||||
var differences = new List<(int, string)>();
|
||||
|
||||
if (differenceArray.Length == 0)
|
||||
{
|
||||
return differences;
|
||||
}
|
||||
|
||||
var activeCode = differenceArray.GetNumeric(0).Int;
|
||||
|
||||
for (int i = 1; i < differenceArray.Data.Count; i++)
|
||||
{
|
||||
var entry = differenceArray.Data[i];
|
||||
|
@@ -1,43 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Util
|
||||
{
|
||||
using Filters;
|
||||
using Logging;
|
||||
using Parser.FileStructure;
|
||||
using PdfFonts.Parser;
|
||||
|
||||
internal static class Bootstrapper
|
||||
{
|
||||
private static IContainer _testContainer;
|
||||
|
||||
internal static void SetTestContainer(IContainer container)
|
||||
{
|
||||
_testContainer = container;
|
||||
}
|
||||
|
||||
public static IContainer GenerateContainer(ILog logger)
|
||||
{
|
||||
if (_testContainer != null)
|
||||
{
|
||||
return _testContainer;
|
||||
}
|
||||
|
||||
if (logger == null)
|
||||
{
|
||||
logger = new NoOpLog();
|
||||
}
|
||||
|
||||
var trailerParser = new FileTrailerParser();
|
||||
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(logger), new PngPredictor(), logger);
|
||||
|
||||
var cmapParser = new CMapParser();
|
||||
|
||||
var container = new Container();
|
||||
container.Register(trailerParser);
|
||||
container.Register(filterProvider);
|
||||
container.Register(cmapParser);
|
||||
container.Register(logger);
|
||||
|
||||
return container;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,38 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Util
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
internal class Container : IContainer
|
||||
{
|
||||
private readonly Dictionary<Type, object> objects = new Dictionary<Type, object>();
|
||||
|
||||
public void Register<T>(T obj)
|
||||
{
|
||||
if (obj == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(obj), "Cannot register a null object with the container. Type was: " + typeof(T));
|
||||
}
|
||||
|
||||
objects[typeof(T)] = obj;
|
||||
var interfaces = typeof(T).GetInterfaces();
|
||||
|
||||
foreach (var @interface in interfaces)
|
||||
{
|
||||
objects[@interface] = obj;
|
||||
}
|
||||
}
|
||||
|
||||
[DebuggerStepThrough]
|
||||
public T Get<T>()
|
||||
{
|
||||
if (!objects.TryGetValue(typeof(T), out var obj))
|
||||
{
|
||||
throw new InvalidOperationException($"The type {typeof(T)} was not registered with the container.");
|
||||
}
|
||||
|
||||
return (T) obj;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,7 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Util
|
||||
{
|
||||
internal interface IContainer
|
||||
{
|
||||
T Get<T>();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user