mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
remove all old parsing logic
This commit is contained in:
@@ -1,57 +0,0 @@
|
||||
// ReSharper disable ObjectCreationAsStatement
|
||||
|
||||
namespace UglyToad.PdfPig.Tests.Parser.Parts
|
||||
{
|
||||
using System;
|
||||
using IO;
|
||||
using PdfPig.Cos;
|
||||
using PdfPig.Parser.Parts;
|
||||
using Xunit;
|
||||
|
||||
public class CosDictionaryParserTests
|
||||
{
|
||||
private readonly CosNameParser nameParser = new CosNameParser();
|
||||
private readonly CosDictionaryParser parser;
|
||||
|
||||
public CosDictionaryParserTests()
|
||||
{
|
||||
parser = new CosDictionaryParser(nameParser, new TestingLog());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NameParserIsNull_Throws()
|
||||
{
|
||||
Action action = () => new CosDictionaryParser(null, new TestingLog());
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RandomAccessReadIsNull_Throws()
|
||||
{
|
||||
var baseParser = new CosBaseParser(nameParser, new CosStringParser(), parser, new CosArrayParser());
|
||||
|
||||
Action action = () => parser.Parse(null, baseParser, new CosObjectPool());
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BaseParserIsNull_Throws()
|
||||
{
|
||||
Action action = () => parser.Parse(new RandomAccessBuffer(), null, new CosObjectPool());
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DocumentIsNull_Throws()
|
||||
{
|
||||
var baseParser = new CosBaseParser(nameParser, new CosStringParser(), parser, new CosArrayParser());
|
||||
|
||||
Action action = () => parser.Parse(new RandomAccessBuffer(), baseParser, null);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,23 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Tests
|
||||
{
|
||||
using IO;
|
||||
using PdfPig.ContentStream;
|
||||
using PdfPig.Cos;
|
||||
using PdfPig.Parser.Parts;
|
||||
|
||||
internal class TestDictionaryParser : IDictionaryParser
|
||||
{
|
||||
public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
|
||||
{
|
||||
return new PdfDictionary();
|
||||
}
|
||||
}
|
||||
|
||||
internal class TestBaseParser : IBaseParser
|
||||
{
|
||||
public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
|
||||
{
|
||||
return CosNull.Null;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,5 +1,8 @@
|
||||
namespace UglyToad.PdfPig.Parser.FileStructure
|
||||
{
|
||||
using IO;
|
||||
using Tokenization.Scanner;
|
||||
|
||||
internal class CrossReferenceOffsetValidator
|
||||
{
|
||||
private readonly XrefOffsetValidator offsetValidator;
|
||||
@@ -9,9 +12,9 @@
|
||||
this.offsetValidator = offsetValidator;
|
||||
}
|
||||
|
||||
public long Validate(long crossReferenceOffset, bool isLenientParsing)
|
||||
public long Validate(long crossReferenceOffset, ISeekableTokenScanner scanner, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceOffset, isLenientParsing);
|
||||
long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceOffset, scanner, reader, isLenientParsing);
|
||||
if (fixedOffset > -1)
|
||||
{
|
||||
crossReferenceOffset = fixedOffset;
|
||||
|
@@ -6,7 +6,6 @@
|
||||
using Exceptions;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Parts.CrossReference;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
@@ -14,19 +13,17 @@
|
||||
internal class CrossReferenceParser
|
||||
{
|
||||
private readonly ILog log;
|
||||
private readonly CosDictionaryParser dictionaryParser;
|
||||
private readonly CosBaseParser baseParser;
|
||||
private readonly XrefOffsetValidator offsetValidator;
|
||||
private readonly CrossReferenceStreamParser crossReferenceStreamParser;
|
||||
private readonly CrossReferenceTableParser crossReferenceTableParser;
|
||||
private readonly XrefCosOffsetChecker xrefCosChecker;
|
||||
|
||||
public CrossReferenceParser(ILog log, CosDictionaryParser dictionaryParser, CosBaseParser baseParser,
|
||||
public CrossReferenceParser(ILog log, XrefOffsetValidator offsetValidator,
|
||||
CrossReferenceStreamParser crossReferenceStreamParser,
|
||||
CrossReferenceTableParser crossReferenceTableParser)
|
||||
{
|
||||
this.log = log;
|
||||
this.dictionaryParser = dictionaryParser;
|
||||
this.baseParser = baseParser;
|
||||
this.offsetValidator = offsetValidator;
|
||||
this.crossReferenceStreamParser = crossReferenceStreamParser;
|
||||
this.crossReferenceTableParser = crossReferenceTableParser;
|
||||
|
||||
@@ -36,8 +33,7 @@
|
||||
public CrossReferenceTable Parse(IRandomAccessRead reader, bool isLenientParsing, long xrefLocation,
|
||||
CosObjectPool pool, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner)
|
||||
{
|
||||
var xrefOffsetValidator = new XrefOffsetValidator(log, reader, dictionaryParser, baseParser, pool);
|
||||
long fixedOffset = xrefOffsetValidator.CheckXRefOffset(xrefLocation, isLenientParsing);
|
||||
long fixedOffset = offsetValidator.CheckXRefOffset(xrefLocation, tokenScanner, reader, isLenientParsing);
|
||||
if (fixedOffset > -1)
|
||||
{
|
||||
xrefLocation = fixedOffset;
|
||||
@@ -81,7 +77,7 @@
|
||||
int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;
|
||||
|
||||
// check the xref stream reference
|
||||
fixedOffset = xrefOffsetValidator.CheckXRefOffset(streamOffset, isLenientParsing);
|
||||
fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, reader, isLenientParsing);
|
||||
if (fixedOffset > -1 && fixedOffset != streamOffset)
|
||||
{
|
||||
log.Warn($"/XRefStm offset {streamOffset} is incorrect, corrected to {fixedOffset}");
|
||||
@@ -148,7 +144,7 @@
|
||||
if (previousCrossReferenceLocation > 0)
|
||||
{
|
||||
// check the xref table reference
|
||||
fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing);
|
||||
fixedOffset = offsetValidator.CheckXRefOffset(previousCrossReferenceLocation, tokenScanner, reader, isLenientParsing);
|
||||
if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation)
|
||||
{
|
||||
previousCrossReferenceLocation = fixedOffset;
|
||||
|
@@ -2,37 +2,27 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
|
||||
internal class XrefOffsetValidator
|
||||
{
|
||||
private static readonly long MinimumSearchOffset = 6;
|
||||
|
||||
private readonly ILog log;
|
||||
private readonly IRandomAccessRead source;
|
||||
private readonly CosDictionaryParser dictionaryParser;
|
||||
private readonly CosBaseParser baseParser;
|
||||
private readonly CosObjectPool pool;
|
||||
|
||||
private List<long> bfSearchXRefTablesOffsets = null;
|
||||
private List<long> bfSearchXRefStreamsOffsets = null;
|
||||
private List<long> bfSearchXRefTablesOffsets;
|
||||
private List<long> bfSearchXRefStreamsOffsets;
|
||||
|
||||
public XrefOffsetValidator(ILog log, IRandomAccessRead source, CosDictionaryParser dictionaryParser,
|
||||
CosBaseParser baseParser,
|
||||
CosObjectPool pool)
|
||||
public XrefOffsetValidator(ILog log)
|
||||
{
|
||||
this.log = log;
|
||||
this.source = source;
|
||||
this.dictionaryParser = dictionaryParser;
|
||||
this.baseParser = baseParser;
|
||||
this.pool = pool;
|
||||
}
|
||||
|
||||
public long CheckXRefOffset(long startXRefOffset, bool isLenientParsing)
|
||||
public long CheckXRefOffset(long startXRefOffset, ISeekableTokenScanner scanner, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
// repair mode isn't available in non-lenient mode
|
||||
if (!isLenientParsing)
|
||||
@@ -40,127 +30,133 @@
|
||||
return startXRefOffset;
|
||||
}
|
||||
|
||||
source.Seek(startXRefOffset);
|
||||
reader.Seek(startXRefOffset);
|
||||
|
||||
ReadHelper.SkipSpaces(source);
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
if (source.Peek() == 'x' && ReadHelper.IsString(source, "xref"))
|
||||
if (reader.Peek() == 'x' && ReadHelper.IsString(reader, "xref"))
|
||||
{
|
||||
return startXRefOffset;
|
||||
}
|
||||
if (startXRefOffset > 0)
|
||||
{
|
||||
if (CheckXRefStreamOffset(source, startXRefOffset, true, pool))
|
||||
if (CheckXRefStreamOffset(startXRefOffset, scanner, true))
|
||||
{
|
||||
return startXRefOffset;
|
||||
}
|
||||
|
||||
return CalculateXRefFixedOffset(startXRefOffset);
|
||||
return CalculateXRefFixedOffset(startXRefOffset, scanner, reader);
|
||||
}
|
||||
|
||||
// can't find a valid offset
|
||||
return -1;
|
||||
}
|
||||
|
||||
private long CalculateXRefFixedOffset(long objectOffset)
|
||||
|
||||
private long CalculateXRefFixedOffset(long objectOffset, ISeekableTokenScanner scanner, IRandomAccessRead reader)
|
||||
{
|
||||
if (objectOffset < 0)
|
||||
{
|
||||
// LOG.error("Invalid object offset " + objectOffset + " when searching for a xref table/stream");
|
||||
log.Error($"Invalid object offset {objectOffset} when searching for a xref table/stream");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// start a brute force search for all xref tables and try to find the offset we are looking for
|
||||
long newOffset = BfSearchForXRef(objectOffset);
|
||||
long newOffset = BfSearchForXRef(objectOffset, scanner, reader);
|
||||
if (newOffset > -1)
|
||||
{
|
||||
// LOG.debug("Fixed reference for xref table/stream " + objectOffset + " -> " + newOffset);
|
||||
log.Debug($"Fixed reference for xref table/stream {objectOffset} -> {newOffset}");
|
||||
return newOffset;
|
||||
}
|
||||
// LOG.error("Can't find the object xref table/stream at offset " + objectOffset);
|
||||
|
||||
log.Error($"Can\'t find the object xref table/stream at offset {objectOffset}");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void BfSearchForXRefStreams()
|
||||
private void BfSearchForXRefStreams(IRandomAccessRead reader)
|
||||
{
|
||||
if (bfSearchXRefStreamsOffsets == null)
|
||||
if (bfSearchXRefStreamsOffsets != null)
|
||||
{
|
||||
// a pdf may contain more than one /XRef entry
|
||||
bfSearchXRefStreamsOffsets = new List<long>();
|
||||
long originOffset = source.GetPosition();
|
||||
source.Seek(MinimumSearchOffset);
|
||||
// search for XRef streams
|
||||
var objString = " obj";
|
||||
while (!source.IsEof())
|
||||
return;
|
||||
}
|
||||
|
||||
// a pdf may contain more than one /XRef entry
|
||||
bfSearchXRefStreamsOffsets = new List<long>();
|
||||
long originOffset = reader.GetPosition();
|
||||
reader.Seek(MinimumSearchOffset);
|
||||
// search for XRef streams
|
||||
var objString = " obj";
|
||||
while (!reader.IsEof())
|
||||
{
|
||||
if (ReadHelper.IsString(reader, "xref"))
|
||||
{
|
||||
if (ReadHelper.IsString(source, "xref"))
|
||||
// search backwards for the beginning of the stream
|
||||
long newOffset = -1;
|
||||
long xrefOffset = reader.GetPosition();
|
||||
bool objFound = false;
|
||||
for (int i = 1; i < 40 && !objFound; i++)
|
||||
{
|
||||
// search backwards for the beginning of the stream
|
||||
long newOffset = -1;
|
||||
long xrefOffset = source.GetPosition();
|
||||
bool objFound = false;
|
||||
for (int i = 1; i < 40 && !objFound; i++)
|
||||
long currentOffset = xrefOffset - (i * 10);
|
||||
if (currentOffset > 0)
|
||||
{
|
||||
long currentOffset = xrefOffset - (i * 10);
|
||||
if (currentOffset > 0)
|
||||
reader.Seek(currentOffset);
|
||||
for (int j = 0; j < 10; j++)
|
||||
{
|
||||
source.Seek(currentOffset);
|
||||
for (int j = 0; j < 10; j++)
|
||||
if (ReadHelper.IsString(reader, objString))
|
||||
{
|
||||
if (ReadHelper.IsString(source, objString))
|
||||
long tempOffset = currentOffset - 1;
|
||||
reader.Seek(tempOffset);
|
||||
int genId = reader.Peek();
|
||||
// is the next char a digit?
|
||||
if (ReadHelper.IsDigit(genId))
|
||||
{
|
||||
long tempOffset = currentOffset - 1;
|
||||
source.Seek(tempOffset);
|
||||
int genId = source.Peek();
|
||||
// is the next char a digit?
|
||||
if (ReadHelper.IsDigit(genId))
|
||||
tempOffset--;
|
||||
reader.Seek(tempOffset);
|
||||
if (ReadHelper.IsSpace(reader))
|
||||
{
|
||||
tempOffset--;
|
||||
source.Seek(tempOffset);
|
||||
if (ReadHelper.IsSpace(source))
|
||||
int length = 0;
|
||||
reader.Seek(--tempOffset);
|
||||
while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(reader))
|
||||
{
|
||||
int length = 0;
|
||||
source.Seek(--tempOffset);
|
||||
while (tempOffset > MinimumSearchOffset && ReadHelper.IsDigit(source))
|
||||
{
|
||||
source.Seek(--tempOffset);
|
||||
length++;
|
||||
}
|
||||
if (length > 0)
|
||||
{
|
||||
source.Read();
|
||||
newOffset = source.GetPosition();
|
||||
}
|
||||
reader.Seek(--tempOffset);
|
||||
length++;
|
||||
}
|
||||
if (length > 0)
|
||||
{
|
||||
reader.Read();
|
||||
newOffset = reader.GetPosition();
|
||||
}
|
||||
}
|
||||
objFound = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentOffset++;
|
||||
source.Read();
|
||||
}
|
||||
objFound = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentOffset++;
|
||||
reader.Read();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newOffset > -1)
|
||||
{
|
||||
bfSearchXRefStreamsOffsets.Add(newOffset);
|
||||
}
|
||||
source.Seek(xrefOffset + 5);
|
||||
}
|
||||
source.Read();
|
||||
if (newOffset > -1)
|
||||
{
|
||||
bfSearchXRefStreamsOffsets.Add(newOffset);
|
||||
}
|
||||
reader.Seek(xrefOffset + 5);
|
||||
}
|
||||
source.Seek(originOffset);
|
||||
reader.Read();
|
||||
}
|
||||
reader.Seek(originOffset);
|
||||
}
|
||||
|
||||
private long BfSearchForXRef(long xrefOffset)
|
||||
private long BfSearchForXRef(long xrefOffset, ISeekableTokenScanner scanner, IRandomAccessRead reader)
|
||||
{
|
||||
long newOffset = -1;
|
||||
long newOffsetTable = -1;
|
||||
long newOffsetStream = -1;
|
||||
BfSearchForXRefTables();
|
||||
BfSearchForXRefStreams();
|
||||
BfSearchForXRefTables(reader);
|
||||
BfSearchForXRefStreams(reader);
|
||||
if (bfSearchXRefTablesOffsets != null)
|
||||
{
|
||||
// TODO to be optimized, this won't work in every case
|
||||
@@ -200,31 +196,31 @@
|
||||
return newOffset;
|
||||
}
|
||||
|
||||
private void BfSearchForXRefTables()
|
||||
private void BfSearchForXRefTables(IRandomAccessRead reader)
|
||||
{
|
||||
if (bfSearchXRefTablesOffsets == null)
|
||||
{
|
||||
// a pdf may contain more than one xref entry
|
||||
bfSearchXRefTablesOffsets = new List<long>();
|
||||
long originOffset = source.GetPosition();
|
||||
source.Seek(MinimumSearchOffset);
|
||||
long originOffset = reader.GetPosition();
|
||||
reader.Seek(MinimumSearchOffset);
|
||||
// search for xref tables
|
||||
while (!source.IsEof())
|
||||
while (!reader.IsEof())
|
||||
{
|
||||
if (ReadHelper.IsString(source, "xref"))
|
||||
if (ReadHelper.IsString(reader, "xref"))
|
||||
{
|
||||
long newOffset = source.GetPosition();
|
||||
source.Seek(newOffset - 1);
|
||||
long newOffset = reader.GetPosition();
|
||||
reader.Seek(newOffset - 1);
|
||||
// ensure that we don't read "startxref" instead of "xref"
|
||||
if (ReadHelper.IsWhitespace(source))
|
||||
if (ReadHelper.IsWhitespace(reader))
|
||||
{
|
||||
bfSearchXRefTablesOffsets.Add(newOffset);
|
||||
}
|
||||
source.Seek(newOffset + 4);
|
||||
reader.Seek(newOffset + 4);
|
||||
}
|
||||
source.Read();
|
||||
reader.Read();
|
||||
}
|
||||
source.Seek(originOffset);
|
||||
reader.Seek(originOffset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,7 +248,7 @@
|
||||
return newValue;
|
||||
}
|
||||
|
||||
private bool CheckXRefStreamOffset(IRandomAccessRead source, long startXRefOffset, bool isLenient, CosObjectPool pool)
|
||||
private bool CheckXRefStreamOffset(long startXRefOffset, ISeekableTokenScanner scanner, bool isLenient)
|
||||
{
|
||||
// repair mode isn't available in non-lenient mode
|
||||
if (!isLenient || startXRefOffset == 0)
|
||||
@@ -260,37 +256,41 @@
|
||||
return true;
|
||||
}
|
||||
// seek to offset-1
|
||||
source.Seek(startXRefOffset - 1);
|
||||
int nextValue = source.Read();
|
||||
// the first character has to be a whitespace, and then a digit
|
||||
if (ReadHelper.IsWhitespace(nextValue))
|
||||
scanner.Seek(startXRefOffset - 1);
|
||||
if (scanner.TryReadToken(out NumericToken objectNumber))
|
||||
{
|
||||
ReadHelper.SkipSpaces(source);
|
||||
if (ReadHelper.IsDigit(source))
|
||||
try
|
||||
{
|
||||
try
|
||||
if (!scanner.TryReadToken(out NumericToken generation))
|
||||
{
|
||||
// it's a XRef stream
|
||||
ObjectHelper.ReadObjectNumber(source);
|
||||
ObjectHelper.ReadGenerationNumber(source);
|
||||
|
||||
ReadHelper.ReadExpectedString(source, "obj", true);
|
||||
|
||||
// check the dictionary to avoid false positives
|
||||
PdfDictionary dict = dictionaryParser.Parse(source, baseParser, pool);
|
||||
source.Seek(startXRefOffset);
|
||||
|
||||
if (dict.IsType(CosName.XREF))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
log.Debug($"When checking offset at {startXRefOffset} did not find the generation number. Got: {objectNumber} {generation}.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
scanner.MoveNext();
|
||||
|
||||
var obj = scanner.CurrentToken;
|
||||
|
||||
if (!ReferenceEquals(obj, OperatorToken.StartObject))
|
||||
{
|
||||
log.Error("Couldn't read the xref stream object.", ex);
|
||||
// there wasn't an object of a xref stream
|
||||
source.Seek(startXRefOffset);
|
||||
scanner.Seek(startXRefOffset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// check the dictionary to avoid false positives
|
||||
if (!scanner.TryReadToken(out DictionaryToken dictionary))
|
||||
{
|
||||
scanner.Seek(startXRefOffset);
|
||||
|
||||
}
|
||||
|
||||
if (dictionary.TryGet(NameToken.Type, out var type) && NameToken.Xref.Equals(type))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
log.Error("Couldn't read the xref stream object.", ex);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@@ -1,71 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Util;
|
||||
|
||||
internal class CosArrayParser
|
||||
{
|
||||
public COSArray Parse(IRandomAccessRead reader, CosBaseParser baseParser, CosObjectPool pool)
|
||||
{
|
||||
ReadHelper.ReadExpectedChar(reader, '[');
|
||||
var po = new COSArray();
|
||||
CosBase pbo;
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
int i;
|
||||
while (((i = reader.Peek()) > 0) && ((char)i != ']'))
|
||||
{
|
||||
pbo = baseParser.Parse(reader, pool);
|
||||
if (pbo is CosObject)
|
||||
{
|
||||
// We have to check if the expected values are there or not PDFBOX-385
|
||||
if (po.get(po.size() - 1) is CosInt)
|
||||
{
|
||||
var genNumber = (CosInt)po.remove(po.size() - 1);
|
||||
if (po.get(po.size() - 1) is CosInt)
|
||||
{
|
||||
var number = (CosInt)po.remove(po.size() - 1);
|
||||
IndirectReference key = new IndirectReference(number.AsLong(), genNumber.AsInt());
|
||||
pbo = pool.Get(key);
|
||||
}
|
||||
else
|
||||
{
|
||||
// the object reference is somehow wrong
|
||||
pbo = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pbo = null;
|
||||
}
|
||||
}
|
||||
if (pbo != null)
|
||||
{
|
||||
po.add(pbo);
|
||||
}
|
||||
else
|
||||
{
|
||||
//it could be a bad object in the array which is just skipped
|
||||
// LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());
|
||||
|
||||
// This could also be an "endobj" or "endstream" which means we can assume that
|
||||
// the array has ended.
|
||||
string isThisTheEnd = ReadHelper.ReadString(reader);
|
||||
reader.Unread(OtherEncodings.StringAsLatin1Bytes(isThisTheEnd));
|
||||
if (string.Equals(isThisTheEnd, "endobj") || string.Equals(isThisTheEnd, "endstream"))
|
||||
{
|
||||
return po;
|
||||
}
|
||||
}
|
||||
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
}
|
||||
// read ']'
|
||||
reader.Read();
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
return po;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,166 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Util;
|
||||
|
||||
internal interface IBaseParser
|
||||
{
|
||||
CosBase Parse(IRandomAccessRead reader, CosObjectPool pool);
|
||||
}
|
||||
|
||||
internal class CosBaseParser : IBaseParser
|
||||
{
|
||||
private readonly CosNameParser nameParser;
|
||||
private readonly CosStringParser stringParser;
|
||||
private readonly CosDictionaryParser dictionaryParser;
|
||||
private readonly CosArrayParser arrayParser;
|
||||
|
||||
public CosBaseParser(CosNameParser nameParser, CosStringParser stringParser,
|
||||
CosDictionaryParser dictionaryParser, CosArrayParser arrayParser)
|
||||
{
|
||||
this.nameParser = nameParser;
|
||||
this.stringParser = stringParser;
|
||||
this.dictionaryParser = dictionaryParser;
|
||||
this.arrayParser = arrayParser;
|
||||
}
|
||||
|
||||
public CosBase Parse(IRandomAccessRead reader, CosObjectPool pool)
|
||||
{
|
||||
CosBase retval = null;
|
||||
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
int nextByte = reader.Peek();
|
||||
|
||||
if (nextByte == -1)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
char c = (char)nextByte;
|
||||
switch (c)
|
||||
{
|
||||
case '<':
|
||||
{
|
||||
// pull off first left bracket
|
||||
int leftBracket = reader.Read();
|
||||
// check for second left bracket
|
||||
c = (char)reader.Peek();
|
||||
reader.Unread(leftBracket);
|
||||
if (c == '<')
|
||||
{
|
||||
retval = dictionaryParser.Parse(reader, this, pool);
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
}
|
||||
else
|
||||
{
|
||||
retval = stringParser.Parse(reader);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '[':
|
||||
{
|
||||
// array
|
||||
retval = arrayParser.Parse(reader, this, pool);
|
||||
break;
|
||||
}
|
||||
case '(':
|
||||
retval = stringParser.Parse(reader);
|
||||
break;
|
||||
case '/':
|
||||
// name
|
||||
retval = nameParser.Parse(reader);
|
||||
break;
|
||||
case 'n':
|
||||
{
|
||||
// null
|
||||
ReadHelper.ReadExpectedString(reader, "null");
|
||||
retval = CosNull.Null;
|
||||
break;
|
||||
}
|
||||
case 't':
|
||||
{
|
||||
string truestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(4));
|
||||
if (truestring.Equals("true"))
|
||||
{
|
||||
retval = PdfBoolean.True;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IOException("expected true actual='" + truestring + "' " + reader +
|
||||
"' at offset " + reader.GetPosition());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'f':
|
||||
{
|
||||
string falsestring = OtherEncodings.BytesAsLatin1String(reader.ReadFully(5));
|
||||
if (falsestring.Equals("false"))
|
||||
{
|
||||
retval = PdfBoolean.False;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IOException("expected false actual='" + falsestring + "' " + reader +
|
||||
"' at offset " + reader.GetPosition());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'R':
|
||||
reader.Read();
|
||||
retval = new CosObject(null);
|
||||
break;
|
||||
default:
|
||||
|
||||
if (char.IsDigit(c) || c == '-' || c == '+' || c == '.')
|
||||
{
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int ic = reader.Read();
|
||||
c = (char)ic;
|
||||
while (char.IsDigit(c) ||
|
||||
c == '-' ||
|
||||
c == '+' ||
|
||||
c == '.' ||
|
||||
c == 'E' ||
|
||||
c == 'e')
|
||||
{
|
||||
buf.Append(c);
|
||||
ic = reader.Read();
|
||||
c = (char)ic;
|
||||
}
|
||||
if (ic != -1)
|
||||
{
|
||||
reader.Unread(ic);
|
||||
}
|
||||
retval = CosNumberFactory.get(buf.ToString()) as CosBase;
|
||||
}
|
||||
else
|
||||
{
|
||||
//This is not suppose to happen, but we will allow for it
|
||||
//so we are more compatible with POS writers that don't
|
||||
//follow the spec
|
||||
string badstring = ReadHelper.ReadString(reader);
|
||||
if (badstring == string.Empty)
|
||||
{
|
||||
int peek = reader.Peek();
|
||||
// we can end up in an infinite loop otherwise
|
||||
throw new IOException("Unknown dir object c='" + c +
|
||||
"' cInt=" + (int)c + " peek='" + (char)peek
|
||||
+ "' peekInt=" + peek + " at offset " + reader.GetPosition());
|
||||
}
|
||||
|
||||
// if it's an endstream/endobj, we want to put it back so the caller will see it
|
||||
if (string.Equals("endobj", badstring) || string.Equals("endstream", badstring))
|
||||
{
|
||||
reader.Unread(OtherEncodings.StringAsLatin1Bytes(badstring));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,205 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using System;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Logging;
|
||||
using Util;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
internal interface IDictionaryParser
|
||||
{
|
||||
PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool);
|
||||
}
|
||||
|
||||
internal class CosDictionaryParser : IDictionaryParser
|
||||
{
|
||||
private readonly ILog log;
|
||||
private readonly CosNameParser nameParser;
|
||||
|
||||
protected static readonly int E = 'e';
|
||||
protected static readonly int N = 'n';
|
||||
protected static readonly int D = 'd';
|
||||
|
||||
protected static readonly int S = 's';
|
||||
protected static readonly int T = 't';
|
||||
protected static readonly int R = 'r';
|
||||
protected static readonly int A = 'a';
|
||||
protected static readonly int M = 'm';
|
||||
|
||||
protected static readonly int O = 'o';
|
||||
protected static readonly int B = 'b';
|
||||
protected static readonly int J = 'j';
|
||||
|
||||
public CosDictionaryParser(CosNameParser nameParser, ILog log)
|
||||
{
|
||||
this.log = log;
|
||||
this.nameParser = nameParser ?? throw new ArgumentNullException();
|
||||
}
|
||||
|
||||
public PdfDictionary Parse(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
|
||||
{
|
||||
if (reader == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(reader));
|
||||
}
|
||||
|
||||
if (baseParser == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(baseParser));
|
||||
}
|
||||
|
||||
if (pool == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(pool));
|
||||
}
|
||||
|
||||
ReadHelper.ReadExpectedChar(reader, '<');
|
||||
ReadHelper.ReadExpectedChar(reader, '<');
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
var dictionary = new PdfDictionary();
|
||||
|
||||
var done = false;
|
||||
while (!done)
|
||||
{
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
var c = (char)reader.Peek();
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '>':
|
||||
done = true;
|
||||
break;
|
||||
case '/':
|
||||
var nameValue = ParseCosDictionaryNameValuePair(reader, baseParser, pool);
|
||||
|
||||
if (nameValue.key != null && nameValue.value != null)
|
||||
{
|
||||
dictionary.Set(nameValue.key, nameValue.value);
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
if (ReadUntilEnd(reader))
|
||||
{
|
||||
return new PdfDictionary();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ReadHelper.ReadExpectedString(reader, ">>");
|
||||
|
||||
return dictionary;
|
||||
}
|
||||
|
||||
[ItemCanBeNull]
|
||||
private (CosName key, CosBase value) ParseCosDictionaryNameValuePair(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
|
||||
{
|
||||
var key = nameParser.Parse(reader);
|
||||
var value = ParseValue(reader, baseParser, pool);
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
if ((char)reader.Peek() == 'd')
|
||||
{
|
||||
// if the next string is 'def' then we are parsing a cmap stream
|
||||
// and want to ignore it, otherwise throw an exception.
|
||||
var potentialDef = ReadHelper.ReadString(reader);
|
||||
if (!potentialDef.Equals("def"))
|
||||
{
|
||||
reader.Unread(OtherEncodings.StringAsLatin1Bytes(potentialDef));
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
}
|
||||
}
|
||||
|
||||
if (value == null)
|
||||
{
|
||||
log?.Warn("Bad Dictionary Declaration " + ReadHelper.ReadString(reader));
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
// label this item as direct, to avoid signature problems.
|
||||
value.Direct = true;
|
||||
|
||||
return (key, value);
|
||||
}
|
||||
|
||||
private static CosBase ParseValue(IRandomAccessRead reader, IBaseParser baseParser, CosObjectPool pool)
|
||||
{
|
||||
var numOffset = reader.GetPosition();
|
||||
var value = baseParser.Parse(reader, pool);
|
||||
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
|
||||
// proceed if the given object is a number and the following is a number as well
|
||||
if (!(value is ICosNumber) || !ReadHelper.IsDigit(reader))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
// read the remaining information of the object number
|
||||
var genOffset = reader.GetPosition();
|
||||
var generationNumber = baseParser.Parse(reader, pool);
|
||||
ReadHelper.SkipSpaces(reader);
|
||||
ReadHelper.ReadExpectedChar(reader, 'R');
|
||||
if (!(value is CosInt))
|
||||
{
|
||||
throw new InvalidOperationException("expected number, actual=" + value + " at offset " + numOffset);
|
||||
}
|
||||
if (!(generationNumber is CosInt))
|
||||
{
|
||||
throw new InvalidOperationException("expected number, actual=" + value + " at offset " + genOffset);
|
||||
}
|
||||
|
||||
var key = new IndirectReference(((CosInt)value).AsLong(), ((CosInt)generationNumber).AsInt());
|
||||
|
||||
// dereference the object
|
||||
return pool.Get(key);
|
||||
}
|
||||
|
||||
private static bool ReadUntilEnd(IRandomAccessRead reader)
|
||||
{
|
||||
var c = reader.Read();
|
||||
while (c != -1 && c != '/' && c != '>')
|
||||
{
|
||||
// in addition to stopping when we find / or >, we also want
|
||||
// to stop when we find endstream or endobj.
|
||||
if (c == E)
|
||||
{
|
||||
c = reader.Read();
|
||||
if (c == N)
|
||||
{
|
||||
c = reader.Read();
|
||||
if (c == D)
|
||||
{
|
||||
c = reader.Read();
|
||||
var isStream = c == S && reader.Read() == T && reader.Read() == R
|
||||
&& reader.Read() == E && reader.Read() == A && reader.Read() == M;
|
||||
var isObj = !isStream && c == O && reader.Read() == B && reader.Read() == J;
|
||||
if (isStream || isObj)
|
||||
{
|
||||
// we're done reading this object!
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
c = reader.Read();
|
||||
}
|
||||
if (c == -1)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
reader.Unread(c);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,88 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using Cos;
|
||||
using IO;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
internal class CosNameParser
|
||||
{
|
||||
[NotNull]
|
||||
public CosName Parse([NotNull]IRandomAccessRead reader)
|
||||
{
|
||||
if (reader == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(reader));
|
||||
}
|
||||
|
||||
ReadHelper.ReadExpectedChar(reader, '/');
|
||||
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var writer = new BinaryWriter(memoryStream))
|
||||
{
|
||||
int c = reader.Read();
|
||||
while (c != -1)
|
||||
{
|
||||
byte ch = (byte)c;
|
||||
if (ch == '#')
|
||||
{
|
||||
int ch1 = reader.Read();
|
||||
int ch2 = reader.Read();
|
||||
// Prior to PDF v1.2, the # was not a special character. Also,
|
||||
// it has been observed that various PDF tools do not follow the
|
||||
// spec with respect to the # escape, even though they report
|
||||
// PDF versions of 1.2 or later. The solution here is that we
|
||||
// interpret the # as an escape only when it is followed by two
|
||||
// valid hex digits.
|
||||
if (ReadHelper.IsHexDigit((char)ch1) && ReadHelper.IsHexDigit((char)ch2))
|
||||
{
|
||||
string hex = "" + (char)ch1 + (char)ch2;
|
||||
try
|
||||
{
|
||||
var byteToWrite = (byte)Convert.ToInt32(hex, 16);
|
||||
writer.Write(byteToWrite);
|
||||
}
|
||||
catch (FormatException e)
|
||||
{
|
||||
throw new IOException("Error: expected hex digit, actual='" + hex + "'", e);
|
||||
}
|
||||
c = reader.Read();
|
||||
}
|
||||
else
|
||||
{
|
||||
// check for premature EOF
|
||||
if (ch2 == -1 || ch1 == -1)
|
||||
{
|
||||
//LOG.error("Premature EOF in BaseParser#parseCosName");
|
||||
c = -1;
|
||||
break;
|
||||
}
|
||||
reader.Unread(ch2);
|
||||
c = ch1;
|
||||
writer.Write(ch);
|
||||
}
|
||||
}
|
||||
else if (ReadHelper.IsEndOfName(ch))
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.Write(ch);
|
||||
c = reader.Read();
|
||||
}
|
||||
}
|
||||
if (c != -1)
|
||||
{
|
||||
reader.Unread(c);
|
||||
}
|
||||
|
||||
byte[] bytes = memoryStream.ToArray();
|
||||
var str = ReadHelper.IsValidUtf8(bytes) ? Encoding.UTF8.GetString(memoryStream.ToArray()) : Encoding.GetEncoding("windows-1252").GetString(memoryStream.ToArray());
|
||||
return CosName.Create(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,260 +0,0 @@
|
||||
namespace UglyToad.PdfPig.Parser.Parts
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using Cos;
|
||||
using IO;
|
||||
|
||||
internal class CosStringParser
|
||||
{
|
||||
public CosString Parse(IRandomAccessRead seqSource)
|
||||
{
|
||||
char nextChar = (char)seqSource.Read();
|
||||
if (nextChar == '<')
|
||||
{
|
||||
return ParseHexString(seqSource);
|
||||
}
|
||||
|
||||
if (nextChar != '(')
|
||||
{
|
||||
throw new IOException("parseCOSstring string should start with '(' or '<' and not '" +
|
||||
nextChar + "' " + seqSource);
|
||||
}
|
||||
|
||||
using (var memoryStream = new MemoryStream())
|
||||
using (var writer = new StreamWriter(memoryStream))
|
||||
{
|
||||
// This is the number of braces read
|
||||
int braces = 1;
|
||||
int c = seqSource.Read();
|
||||
while (braces > 0 && c != -1)
|
||||
{
|
||||
char ch = (char) c;
|
||||
int nextc = -2; // not yet read
|
||||
|
||||
if (ch == ')')
|
||||
{
|
||||
|
||||
braces--;
|
||||
braces = CheckForEndOfString(seqSource, braces);
|
||||
if (braces != 0)
|
||||
{
|
||||
writer.Write(ch);
|
||||
}
|
||||
}
|
||||
else if (ch == '(')
|
||||
{
|
||||
braces++;
|
||||
writer.Write(ch);
|
||||
}
|
||||
else if (ch == '\\')
|
||||
{
|
||||
//patched by ram
|
||||
char next = (char) seqSource.Read();
|
||||
switch (next)
|
||||
{
|
||||
case 'n':
|
||||
writer.Write('\n');
|
||||
break;
|
||||
case 'r':
|
||||
writer.Write('\r');
|
||||
break;
|
||||
case 't':
|
||||
writer.Write('\t');
|
||||
break;
|
||||
case 'b':
|
||||
writer.Write('\b');
|
||||
break;
|
||||
case 'f':
|
||||
writer.Write('\f');
|
||||
break;
|
||||
case ')':
|
||||
// PDFBox 276 /Title (c:\)
|
||||
braces = CheckForEndOfString(seqSource, braces);
|
||||
if (braces != 0)
|
||||
{
|
||||
writer.Write(next);
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.Write('\\');
|
||||
}
|
||||
break;
|
||||
case '(':
|
||||
case '\\':
|
||||
writer.Write(next);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
{
|
||||
var octal = new StringBuilder();
|
||||
octal.Append(next);
|
||||
c = seqSource.Read();
|
||||
char digit = (char) c;
|
||||
if (digit >= '0' && digit <= '7')
|
||||
{
|
||||
octal.Append(digit);
|
||||
c = seqSource.Read();
|
||||
digit = (char) c;
|
||||
if (digit >= '0' && digit <= '7')
|
||||
{
|
||||
octal.Append(digit);
|
||||
}
|
||||
else
|
||||
{
|
||||
nextc = c;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nextc = c;
|
||||
}
|
||||
|
||||
int character;
|
||||
try
|
||||
{
|
||||
character = Convert.ToInt32(octal.ToString(), 8);
|
||||
}
|
||||
catch (FormatException e)
|
||||
{
|
||||
throw new IOException("Error: Expected octal character, actual='" + octal + "'", e);
|
||||
}
|
||||
|
||||
writer.Write(character);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
|
||||
{
|
||||
// this is a break in the line so ignore it and the newline and continue
|
||||
c = seqSource.Read();
|
||||
while (ReadHelper.IsEndOfLine(c) && c != -1)
|
||||
{
|
||||
c = seqSource.Read();
|
||||
}
|
||||
|
||||
nextc = c;
|
||||
|
||||
break;
|
||||
}
|
||||
// dropping the backslash
|
||||
// see 7.3.4.2 Literal strings for further information
|
||||
writer.Write(next);
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.Write(ch);
|
||||
}
|
||||
if (nextc != -2)
|
||||
{
|
||||
c = nextc;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = seqSource.Read();
|
||||
}
|
||||
}
|
||||
if (c != -1)
|
||||
{
|
||||
seqSource.Unread(c);
|
||||
}
|
||||
writer.Flush();
|
||||
return new CosString(memoryStream.ToArray());
|
||||
}
|
||||
}
|
||||
|
||||
private static int CheckForEndOfString(IRandomAccessRead reader, int bracesParameter)
|
||||
{
|
||||
int braces = bracesParameter;
|
||||
byte[] nextThreeBytes = new byte[3];
|
||||
int amountRead = reader.Read(nextThreeBytes);
|
||||
|
||||
// Check the next 3 bytes if available
|
||||
// The following cases are valid indicators for the end of the string
|
||||
// 1. Next line contains another COSObject: CR + LF + '/'
|
||||
// 2. CosDictionary ends in the next line: CR + LF + '>'
|
||||
// 3. Next line contains another COSObject: CR + '/'
|
||||
// 4. CosDictionary ends in the next line: CR + '>'
|
||||
if (amountRead == 3 && nextThreeBytes[0] == ReadHelper.AsciiCarriageReturn)
|
||||
{
|
||||
if (nextThreeBytes[1] == ReadHelper.AsciiLineFeed && nextThreeBytes[2] == '/' || nextThreeBytes[2] == '>'
|
||||
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
|
||||
{
|
||||
braces = 0;
|
||||
}
|
||||
}
|
||||
if (amountRead > 0)
|
||||
{
|
||||
reader.Unread(nextThreeBytes, 0, amountRead);
|
||||
}
|
||||
return braces;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This will parse a PDF HEX string with fail fast semantic meaning that we stop if a not allowed character is found.
|
||||
/// This is necessary in order to detect malformed input and be able to skip to next object start.
|
||||
/// We assume starting '<' was already read.
|
||||
/// </summary>
|
||||
private static CosString ParseHexString(IRandomAccessRead reader)
|
||||
{
|
||||
var sBuf = new StringBuilder();
|
||||
while (true)
|
||||
{
|
||||
int c = reader.Read();
|
||||
if (ReadHelper.IsHexDigit((char)c))
|
||||
{
|
||||
sBuf.Append((char)c);
|
||||
}
|
||||
else if (c == '>')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (c < 0)
|
||||
{
|
||||
throw new IOException("Missing closing bracket for hex string. Reached EOS.");
|
||||
}
|
||||
else if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\b' || c == '\f')
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
// if invalid chars was found: discard last
|
||||
// hex character if it is not part of a pair
|
||||
if (sBuf.Length % 2 != 0)
|
||||
{
|
||||
sBuf.Remove(sBuf.Length - 1, 1);
|
||||
}
|
||||
|
||||
// read till the closing bracket was found
|
||||
do
|
||||
{
|
||||
c = reader.Read();
|
||||
}
|
||||
while (c != '>' && c >= 0);
|
||||
|
||||
// might have reached EOF while looking for the closing bracket
|
||||
// this can happen for malformed PDFs only. Make sure that there is
|
||||
// no endless loop.
|
||||
if (c < 0)
|
||||
{
|
||||
throw new IOException("Missing closing bracket for hex string. Reached EOS.");
|
||||
}
|
||||
|
||||
// exit loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
return CosString.ParseHex(sBuf.ToString());
|
||||
}
|
||||
}
|
||||
}
|
@@ -17,6 +17,7 @@
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parts;
|
||||
using Parts.CrossReference;
|
||||
using Tokenization.Scanner;
|
||||
using Tokenization.Tokens;
|
||||
using Util;
|
||||
@@ -64,18 +65,21 @@
|
||||
var locationProvider = new ObjectLocationProvider(() => crossReferenceTable, pool, bruteForceSearcher);
|
||||
var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider);
|
||||
|
||||
var xrefValidator = new XrefOffsetValidator(log);
|
||||
|
||||
var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
|
||||
var crossReferenceParser = new CrossReferenceParser(log, xrefValidator, crossReferenceStreamParser, new CrossReferenceTableParser());
|
||||
|
||||
var version = container.Get<FileHeaderParser>().Parse(scanner, isLenientParsing);
|
||||
|
||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
|
||||
|
||||
// TODO: make this use the scanner.
|
||||
var validator = new CrossReferenceOffsetValidator(new XrefOffsetValidator(log, reader, container.Get<CosDictionaryParser>(),
|
||||
container.Get<CosBaseParser>(), pool));
|
||||
var validator = new CrossReferenceOffsetValidator(xrefValidator);
|
||||
|
||||
crossReferenceOffset = validator.Validate(crossReferenceOffset, isLenientParsing);
|
||||
crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, reader, isLenientParsing);
|
||||
|
||||
crossReferenceTable = container.Get<CrossReferenceParser>()
|
||||
.Parse(reader, isLenientParsing, crossReferenceOffset, pool, pdfScanner, scanner);
|
||||
crossReferenceTable = crossReferenceParser.Parse(reader, isLenientParsing, crossReferenceOffset, pool, pdfScanner, scanner);
|
||||
|
||||
var trueTypeFontParser = new TrueTypeFontParser();
|
||||
var fontDescriptorFactory = new FontDescriptorFactory();
|
||||
|
@@ -4,8 +4,6 @@
|
||||
using Fonts.Parser;
|
||||
using Logging;
|
||||
using Parser.FileStructure;
|
||||
using Parser.Parts;
|
||||
using Parser.Parts.CrossReference;
|
||||
|
||||
internal static class Bootstrapper
|
||||
{
|
||||
@@ -30,13 +28,7 @@
|
||||
|
||||
var headerParser = new FileHeaderParser(logger);
|
||||
var trailerParser = new FileTrailerParser();
|
||||
var nameParser = new CosNameParser();
|
||||
var dictionaryParser = new CosDictionaryParser(nameParser, logger);
|
||||
var baseParser = new CosBaseParser(nameParser, new CosStringParser(), dictionaryParser, new CosArrayParser());
|
||||
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(logger), new PngPredictor(), logger);
|
||||
var crossReferenceParser = new CrossReferenceStreamParser(filterProvider);
|
||||
|
||||
var crossReferenceTableParser = new CrossReferenceParser(logger, dictionaryParser, baseParser, crossReferenceParser, new CrossReferenceTableParser());
|
||||
|
||||
var cmapParser = new CMapParser();
|
||||
var afmParser = new AdobeFontMetricsParser();
|
||||
@@ -44,11 +36,6 @@
|
||||
var container = new Container();
|
||||
container.Register(headerParser);
|
||||
container.Register(trailerParser);
|
||||
container.Register(nameParser);
|
||||
container.Register(dictionaryParser);
|
||||
container.Register(baseParser);
|
||||
container.Register(crossReferenceParser);
|
||||
container.Register(crossReferenceTableParser);
|
||||
container.Register(filterProvider);
|
||||
container.Register(cmapParser);
|
||||
container.Register(afmParser);
|
||||
|
Reference in New Issue
Block a user