mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-18 18:27:55 +08:00
use offset to file header to correct cross references
if the %pdf version header comment is offset from the start of the file the cross reference offsets will also be wrong by that amount. this change updates the cross reference location logic to use the offset from the located version header.
This commit is contained in:
@@ -33,6 +33,7 @@
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(format, result.VersionString);
|
||||
Assert.Equal(0, result.OffsetInFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -47,6 +48,7 @@
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(1.2m, result.Version);
|
||||
Assert.Equal(9, result.OffsetInFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -68,6 +70,7 @@
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(1.2m, result.Version);
|
||||
Assert.Equal(13, result.OffsetInFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -79,6 +82,7 @@
|
||||
var result = FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Equal(1.7m, result.Version);
|
||||
Assert.Equal(13, result.OffsetInFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -90,6 +94,7 @@ three %PDF-1.6");
|
||||
var result = FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Equal(1.6m, result.Version);
|
||||
Assert.Equal(15, result.OffsetInFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -127,9 +132,10 @@ three %PDF-1.6");
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner(@"%FDF-1.6");
|
||||
|
||||
FileHeaderParser.Parse(scanner, false, log);
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(0, scanner.CurrentPosition);
|
||||
Assert.Equal(0, result.OffsetInFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -28,6 +28,8 @@ namespace UglyToad.PdfPig.Tests.Tokens
|
||||
}
|
||||
|
||||
public long CurrentPosition { get; set; }
|
||||
public long Length { get; } = 10;
|
||||
|
||||
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
|
@@ -33,7 +33,10 @@
|
||||
|
||||
/// <inheritdoc />
|
||||
public long CurrentPosition => inputBytes.CurrentOffset;
|
||||
|
||||
|
||||
/// <inheritdoc />
|
||||
public long Length => inputBytes.Length;
|
||||
|
||||
private bool hasBytePreRead;
|
||||
private bool isInInlineImage;
|
||||
|
||||
|
@@ -16,6 +16,11 @@
|
||||
/// </summary>
|
||||
long CurrentPosition { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The length of the data represented by this scanner.
|
||||
/// </summary>
|
||||
long Length { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Add support for a custom type of tokenizer.
|
||||
/// </summary>
|
||||
|
@@ -1,15 +1,28 @@
|
||||
namespace UglyToad.PdfPig.Content
|
||||
{
|
||||
using System;
|
||||
|
||||
internal class HeaderVersion
|
||||
{
|
||||
public decimal Version { get; }
|
||||
|
||||
public string VersionString { get; }
|
||||
|
||||
public HeaderVersion(decimal version, string versionString)
|
||||
/// <summary>
|
||||
/// The offset in bytes from the start of the file to the start of the version comment.
|
||||
/// </summary>
|
||||
public long OffsetInFile { get; }
|
||||
|
||||
public HeaderVersion(decimal version, string versionString, long offsetInFile)
|
||||
{
|
||||
Version = version;
|
||||
VersionString = versionString;
|
||||
if (offsetInFile < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException($"Invalid offset for header version, must be positive. Got: {offsetInFile}.");
|
||||
}
|
||||
|
||||
OffsetInFile = offsetInFile;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
|
@@ -30,7 +30,10 @@
|
||||
this.xrefCosChecker = xrefCosChecker;
|
||||
}
|
||||
|
||||
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation, IPdfTokenScanner pdfScanner, ISeekableTokenScanner tokenScanner)
|
||||
public CrossReferenceTable Parse(IInputBytes bytes, bool isLenientParsing, long crossReferenceLocation,
|
||||
long offsetCorrection,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
ISeekableTokenScanner tokenScanner)
|
||||
{
|
||||
long fixedOffset = offsetValidator.CheckXRefOffset(crossReferenceLocation, tokenScanner, bytes, isLenientParsing);
|
||||
if (fixedOffset > -1)
|
||||
@@ -70,7 +73,14 @@
|
||||
CrossReferenceTablePart tablePart = crossReferenceTableParser.Parse(tokenScanner,
|
||||
previousCrossReferenceLocation, isLenientParsing);
|
||||
|
||||
previousCrossReferenceLocation = tablePart.GetPreviousOffset();
|
||||
var nextOffset = tablePart.GetPreviousOffset();
|
||||
|
||||
if (nextOffset >= 0)
|
||||
{
|
||||
nextOffset += offsetCorrection;
|
||||
}
|
||||
|
||||
previousCrossReferenceLocation = nextOffset;
|
||||
|
||||
DictionaryToken tableDictionary = tablePart.Dictionary;
|
||||
|
||||
@@ -150,6 +160,12 @@
|
||||
table.Add(tablePart);
|
||||
|
||||
previousCrossReferenceLocation = tablePart.Previous;
|
||||
|
||||
if (previousCrossReferenceLocation >= 0)
|
||||
{
|
||||
previousCrossReferenceLocation += offsetCorrection;
|
||||
}
|
||||
|
||||
if (previousCrossReferenceLocation > 0)
|
||||
{
|
||||
// check the xref table reference
|
||||
|
@@ -78,9 +78,14 @@
|
||||
return HandleMissingVersion(comment, isLenientParsing, log);
|
||||
}
|
||||
|
||||
var atEnd = scanner.CurrentPosition == scanner.Length;
|
||||
var rewind = atEnd ? 1 : 2;
|
||||
|
||||
var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;
|
||||
|
||||
scanner.Seek(0);
|
||||
|
||||
var result = new HeaderVersion(version, comment.Data);
|
||||
var result = new HeaderVersion(version, comment.Data, commentOffset);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -91,7 +96,7 @@
|
||||
{
|
||||
log.Warn($"Did not find a version header of the correct format, defaulting to 1.4 since lenient. Header was: {comment.Data}.");
|
||||
|
||||
return new HeaderVersion(1.4m, "PDF-1.4");
|
||||
return new HeaderVersion(1.4m, "PDF-1.4", 0);
|
||||
}
|
||||
|
||||
throw new PdfDocumentFormatException($"The comment which should have provided the version was in the wrong format: {comment.Data}.");
|
||||
|
@@ -98,14 +98,19 @@
|
||||
|
||||
var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);
|
||||
|
||||
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
|
||||
var crossReferenceOffset = FileTrailerParser.GetFirstCrossReferenceOffset(inputBytes, scanner,
|
||||
isLenientParsing) + version.OffsetInFile;
|
||||
|
||||
// TODO: make this use the scanner.
|
||||
var validator = new CrossReferenceOffsetValidator(xrefValidator);
|
||||
|
||||
crossReferenceOffset = validator.Validate(crossReferenceOffset, scanner, inputBytes, isLenientParsing);
|
||||
|
||||
crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing, crossReferenceOffset, pdfScanner, scanner);
|
||||
crossReferenceTable = crossReferenceParser.Parse(inputBytes, isLenientParsing,
|
||||
crossReferenceOffset,
|
||||
version.OffsetInFile,
|
||||
pdfScanner,
|
||||
scanner);
|
||||
|
||||
var fontDescriptorFactory = new FontDescriptorFactory();
|
||||
|
||||
|
@@ -46,6 +46,8 @@
|
||||
|
||||
public long CurrentPosition => coreTokenScanner.CurrentPosition;
|
||||
|
||||
public long Length => coreTokenScanner.Length;
|
||||
|
||||
public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider,
|
||||
IEncryptionHandler encryptionHandler)
|
||||
{
|
||||
|
Reference in New Issue
Block a user