use offset to file header to correct cross references

if the %pdf version header comment is offset from the start of the file the cross reference offsets will also be wrong by that amount. this change updates the cross reference location logic to use the offset from the located version header.
This commit is contained in:
Eliot Jones
2020-01-26 15:30:20 +00:00
parent a561c8954e
commit 693a3d5958
9 changed files with 66 additions and 9 deletions

View File

@@ -33,6 +33,7 @@
var result = FileHeaderParser.Parse(scanner, false, log);
Assert.Equal(format, result.VersionString);
Assert.Equal(0, result.OffsetInFile);
}
[Fact]
@@ -47,6 +48,7 @@
var result = FileHeaderParser.Parse(scanner, false, log);
Assert.Equal(1.2m, result.Version);
Assert.Equal(9, result.OffsetInFile);
}
[Fact]
@@ -68,6 +70,7 @@
var result = FileHeaderParser.Parse(scanner, false, log);
Assert.Equal(1.2m, result.Version);
Assert.Equal(13, result.OffsetInFile);
}
[Fact]
@@ -79,6 +82,7 @@
var result = FileHeaderParser.Parse(scanner, true, log);
Assert.Equal(1.7m, result.Version);
Assert.Equal(13, result.OffsetInFile);
}
[Fact]
@@ -90,6 +94,7 @@ three %PDF-1.6");
var result = FileHeaderParser.Parse(scanner, true, log);
Assert.Equal(1.6m, result.Version);
Assert.Equal(15, result.OffsetInFile);
}
[Fact]
@@ -127,9 +132,10 @@ three %PDF-1.6");
{
var scanner = StringBytesTestConverter.Scanner(@"%FDF-1.6");
FileHeaderParser.Parse(scanner, false, log);
var result = FileHeaderParser.Parse(scanner, false, log);
Assert.Equal(0, scanner.CurrentPosition);
Assert.Equal(0, result.OffsetInFile);
}
}
}

View File

@@ -28,6 +28,8 @@ namespace UglyToad.PdfPig.Tests.Tokens
}
public long CurrentPosition { get; set; }
public long Length { get; } = 10;
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
{
throw new NotImplementedException();

View File

@@ -33,7 +33,10 @@
/// <inheritdoc />
public long CurrentPosition => inputBytes.CurrentOffset;
/// <inheritdoc />
public long Length => inputBytes.Length;
private bool hasBytePreRead;
private bool isInInlineImage;

View File

@@ -16,6 +16,11 @@
/// </summary>
long CurrentPosition { get; }
/// <summary>
/// The length of the data represented by this scanner.
/// </summary>
long Length { get; }
/// <summary>
/// Add support for a custom type of tokenizer.
/// </summary>

View File

@@ -1,15 +1,28 @@
namespace UglyToad.PdfPig.Content
{
using System;
internal class HeaderVersion
{
public decimal Version { get; }
public string VersionString { get; }
public HeaderVersion(decimal version, string versionString)
/// <summary>
/// The offset in bytes from the start of the file to the start of the version comment.
/// </summary>
public long OffsetInFile { get; }
public HeaderVersion(decimal version, string versionString, long offsetInFile)
{
Version = version;
VersionString = versionString;
if (offsetInFile < 0)
{
throw new ArgumentOutOfRangeException($"Invalid offset for header version, must be positive. Got: {offsetInFile}.");
}
OffsetInFile = offsetInFile;
}
public override string ToString()

View File

@@ -30,7 +30,10 @@
this.xrefCosChecker = xrefCosChecker;
}
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner)
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
long offsetCorrection,
IPdfTokenScanner pdfScanner,
ISeekableTokenScanner tokenScanner)
{
long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);
if (fixedOffset > -1)
@@ -70,7 +73,14 @@
CrossReferenceTablePart tablePart = crossReferenceTableParser.Parse(tokenScanner,
previousCrossReferenceLocation, isLenientParsing);
previousCrossReferenceLocation = tablePart.GetPreviousOffset();
var nextOffset = tablePart.GetPreviousOffset();
if (nextOffset >= 0)
{
nextOffset += offsetCorrection;
}
previousCrossReferenceLocation = nextOffset;
DictionaryToken tableDictionary = tablePart.Dictionary;
@@ -150,6 +160,12 @@
table.Add(tablePart);
previousCrossReferenceLocation = tablePart.Previous;
if (previousCrossReferenceLocation >= 0)
{
previousCrossReferenceLocation += offsetCorrection;
}
if (previousCrossReferenceLocation > 0)
{
// check the xref table reference

View File

@@ -78,9 +78,14 @@
return HandleMissingVersion(comment, isLenientParsing, log);
}
var atEnd = scanner.CurrentPosition == scanner.Length;
var rewind = atEnd ? 1 : 2;
var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;
scanner.Seek(0);
var result = new HeaderVersion(version, comment.Data);
var result = new HeaderVersion(version, comment.Data, commentOffset);
return result;
}
@@ -91,7 +96,7 @@
{
log.Warn($"Did not find a version header of the correct format, defaulting to 1.4 since lenient. Header was: {comment.Data}.");
return new HeaderVersion(1.4m, "PDF-1.4");
return new HeaderVersion(1.4m, "PDF-1.4", 0);
}
throw new PdfDocumentFormatException($"The comment which should have provided the version was in the wrong format: {comment.Data}.");

View File

@@ -98,14 +98,19 @@
var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner,
isLenientParsing) + version.OffsetInFile;
// TODO: make this use the scanner.
var validator = new CrossReferenceOffsetValidator(xrefValidator);
crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);
crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, pdfScanner, scanner);
crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing,
crossReferenceOffset,
version.OffsetInFile,
pdfScanner,
scanner);
var fontDescriptorFactory = new FontDescriptorFactory();

View File

@@ -46,6 +46,8 @@
public long CurrentPosition => coreTokenScanner.CurrentPosition;
public long Length => coreTokenScanner.Length;
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider,
IEncryptionHandler encryptionHandler)
{