mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
fix bugs with reading cross reference tables.
This commit is contained in:
@@ -0,0 +1,39 @@
|
|||||||
|
namespace UglyToad.Pdf.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class FontSizeTestFromGoogleChromeTests
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||||
|
|
||||||
|
return Path.Combine(documentFolder, "Font Size Test - from google chrome print pdf.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GetsCorrectNumberOfPages()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var pageCount = document.NumberOfPages;
|
||||||
|
|
||||||
|
Assert.Equal(1, pageCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GetsCorrectPageWidthAndHeight()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.Equal(595, page.Width);
|
||||||
|
Assert.Equal(842, page.Height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -10,8 +10,8 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
|
<None Remove="Fonts\TrueType\google-simple-doc.ttf" />
|
||||||
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
||||||
|
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Text - from google chrome print pdf.pdf" />
|
|
||||||
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
<Content Include="Integration\Documents\Font Size Test - from libre office.pdf">
|
<Content Include="Integration\Documents\Font Size Test - from libre office.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
<Content Include="Integration\Documents\Font Size Text - from google chrome print pdf.pdf">
|
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
<Content Include="Integration\Documents\Single Page Simple - from google drive.pdf">
|
<Content Include="Integration\Documents\Single Page Simple - from google drive.pdf">
|
||||||
|
@@ -71,7 +71,12 @@
|
|||||||
|
|
||||||
if (offsetOrStreamNumber == null)
|
if (offsetOrStreamNumber == null)
|
||||||
{
|
{
|
||||||
return CosNull.Null;
|
if (isLenientParsing)
|
||||||
|
{
|
||||||
|
return CosNull.Null;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
|
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
|
||||||
|
@@ -10,6 +10,9 @@
|
|||||||
|
|
||||||
internal class CrossReferenceTableParser
|
internal class CrossReferenceTableParser
|
||||||
{
|
{
|
||||||
|
private const string InUseEntry = "n";
|
||||||
|
private const string FreeEntry = "f";
|
||||||
|
|
||||||
private readonly ILog log;
|
private readonly ILog log;
|
||||||
private readonly CosDictionaryParser dictionaryParser;
|
private readonly CosDictionaryParser dictionaryParser;
|
||||||
private readonly CosBaseParser baseParser;
|
private readonly CosBaseParser baseParser;
|
||||||
@@ -25,7 +28,8 @@
|
|||||||
{
|
{
|
||||||
builder = null;
|
builder = null;
|
||||||
|
|
||||||
long xrefTableStartOffset = source.GetPosition();
|
var tableStartOffset = source.GetPosition();
|
||||||
|
|
||||||
if (source.Peek() != 'x')
|
if (source.Peek() != 'x')
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
@@ -40,6 +44,7 @@
|
|||||||
// check for trailer after xref
|
// check for trailer after xref
|
||||||
var str = ReadHelper.ReadString(source);
|
var str = ReadHelper.ReadString(source);
|
||||||
byte[] b = OtherEncodings.StringAsLatin1Bytes(str);
|
byte[] b = OtherEncodings.StringAsLatin1Bytes(str);
|
||||||
|
|
||||||
source.Rewind(b.Length);
|
source.Rewind(b.Length);
|
||||||
|
|
||||||
if (str.StartsWith("trailer"))
|
if (str.StartsWith("trailer"))
|
||||||
@@ -54,23 +59,25 @@
|
|||||||
XRefType = CrossReferenceType.Table
|
XRefType = CrossReferenceType.Table
|
||||||
};
|
};
|
||||||
|
|
||||||
// Xref tables can have multiple sections. Each starts with a starting object id and a count.
|
// Tables can have multiple sections. Each starts with a starting object id and a count.
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
var currentLine = ReadHelper.ReadLine(source);
|
if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition))
|
||||||
String[] splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries);
|
|
||||||
if (splitString.Length != 2)
|
|
||||||
{
|
{
|
||||||
log.Warn("Unexpected XRefTable Entry: " + currentLine);
|
if (isLenientParsing)
|
||||||
break;
|
{
|
||||||
|
log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Unexpected subsection definition in the cross-reference table at offset {offset}");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// first obj id
|
|
||||||
long currObjID = long.Parse(splitString[0]);
|
var currentObjectId = subsectionDefinition.FirstNumber;
|
||||||
// the number of objects in the xref table
|
|
||||||
int count = int.Parse(splitString[1]);
|
|
||||||
|
|
||||||
ReadHelper.SkipSpaces(source);
|
ReadHelper.SkipSpaces(source);
|
||||||
for (int i = 0; i < count; i++)
|
for (var i = 0; i < subsectionDefinition.Count; i++)
|
||||||
{
|
{
|
||||||
if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek()))
|
if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek()))
|
||||||
{
|
{
|
||||||
@@ -81,42 +88,47 @@
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
//Ignore table contents
|
//Ignore table contents
|
||||||
currentLine = ReadHelper.ReadLine(source);
|
var currentLine = ReadHelper.ReadLine(source);
|
||||||
splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries);
|
var splitString = currentLine.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
|
||||||
if (splitString.Length < 3)
|
if (splitString.Length < 3)
|
||||||
{
|
{
|
||||||
log.Warn("invalid xref line: " + currentLine);
|
log.Warn("invalid xref line: " + currentLine);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This supports the corrupt table as reported in
|
// This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
|
||||||
* PDFBOX-474 (XXXX XXX XX n) */
|
if (splitString[splitString.Length - 1].Equals(InUseEntry))
|
||||||
if (splitString[splitString.Length - 1].Equals("n"))
|
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
long currOffset = long.Parse(splitString[0]);
|
var objectOffset = long.Parse(splitString[0]);
|
||||||
if (currOffset >= xrefTableStartOffset && currOffset <= source.GetPosition())
|
|
||||||
|
if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition())
|
||||||
{
|
{
|
||||||
// PDFBOX-3923: offset points inside this table - that can't be good
|
// PDFBOX-3923: offset points inside this table - that can't be good
|
||||||
throw new InvalidOperationException("XRefTable offset " + currOffset +
|
throw new InvalidOperationException(
|
||||||
" is within xref table for " + currObjID);
|
$"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}");
|
||||||
}
|
}
|
||||||
int currGenID = int.Parse(splitString[1]);
|
|
||||||
builder.Add(currObjID, currGenID, currOffset);
|
var generation = int.Parse(splitString[1]);
|
||||||
|
builder.Add(currentObjectId, generation, objectOffset);
|
||||||
}
|
}
|
||||||
catch (FormatException e)
|
catch (FormatException e)
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("Bad", e);
|
throw new InvalidOperationException("Bad", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!splitString[2].Equals("f"))
|
else if (!splitString[2].Equals(FreeEntry))
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("Corrupt XRefTable Entry - ObjID:" + currObjID);
|
throw new InvalidOperationException(
|
||||||
|
$"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}.");
|
||||||
}
|
}
|
||||||
currObjID++;
|
|
||||||
|
currentObjectId++;
|
||||||
|
|
||||||
ReadHelper.SkipSpaces(source);
|
ReadHelper.SkipSpaces(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadHelper.SkipSpaces(source);
|
ReadHelper.SkipSpaces(source);
|
||||||
if (!ReadHelper.IsDigit(source))
|
if (!ReadHelper.IsDigit(source))
|
||||||
{
|
{
|
||||||
|
@@ -45,15 +45,14 @@
|
|||||||
|
|
||||||
var table = new CrossReferenceTableBuilder();
|
var table = new CrossReferenceTableBuilder();
|
||||||
|
|
||||||
long prev = xrefLocation;
|
long previousCrossReferenceLocation = xrefLocation;
|
||||||
// ---- parse whole chain of xref tables/object streams using PREV reference
|
// ---- parse whole chain of xref tables/object streams using PREV reference
|
||||||
HashSet<long> prevSet = new HashSet<long>();
|
HashSet<long> prevSet = new HashSet<long>();
|
||||||
while (prev > 0)
|
while (previousCrossReferenceLocation > 0)
|
||||||
{
|
{
|
||||||
// seek to xref table
|
// seek to xref table
|
||||||
reader.Seek(prev);
|
reader.Seek(previousCrossReferenceLocation);
|
||||||
|
|
||||||
// skip white spaces
|
|
||||||
ReadHelper.SkipSpaces(reader);
|
ReadHelper.SkipSpaces(reader);
|
||||||
|
|
||||||
var isTable = reader.Peek() == X;
|
var isTable = reader.Peek() == X;
|
||||||
@@ -63,7 +62,7 @@
|
|||||||
{
|
{
|
||||||
// xref table and trailer
|
// xref table and trailer
|
||||||
// use existing parser to parse xref table
|
// use existing parser to parse xref table
|
||||||
if (!crossReferenceTableParser.TryParse(reader, prev, isLenientParsing, pool, out var tableBuilder))
|
if (!crossReferenceTableParser.TryParse(reader, previousCrossReferenceLocation, isLenientParsing, pool, out var tableBuilder))
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException($"Expected trailer object at position: {reader.GetPosition()}");
|
throw new InvalidOperationException($"Expected trailer object at position: {reader.GetPosition()}");
|
||||||
}
|
}
|
||||||
@@ -89,7 +88,7 @@
|
|||||||
ReadHelper.SkipSpaces(reader);
|
ReadHelper.SkipSpaces(reader);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
streamPart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing);
|
streamPart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing);
|
||||||
}
|
}
|
||||||
catch (InvalidOperationException ex)
|
catch (InvalidOperationException ex)
|
||||||
{
|
{
|
||||||
@@ -115,46 +114,50 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prev = trailer.GetLongOrDefault(CosName.PREV);
|
previousCrossReferenceLocation = trailer.GetLongOrDefault(CosName.PREV);
|
||||||
if (prev > 0)
|
if (previousCrossReferenceLocation > 0)
|
||||||
{
|
{
|
||||||
// check the xref table reference
|
// check the xref table reference
|
||||||
fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing);
|
fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing);
|
||||||
if (fixedOffset > -1 && fixedOffset != prev)
|
if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation)
|
||||||
{
|
{
|
||||||
prev = fixedOffset;
|
previousCrossReferenceLocation = fixedOffset;
|
||||||
trailer.SetLong(CosName.PREV, prev);
|
trailer.SetLong(CosName.PREV, previousCrossReferenceLocation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tableBuilder.Previous = tableBuilder.Dictionary.GetLongOrDefault(CosName.PREV);
|
tableBuilder.Previous = tableBuilder.Dictionary.GetLongOrDefault(CosName.PREV);
|
||||||
|
|
||||||
table.Add(tableBuilder.AsCrossReferenceTablePart());
|
table.Add(tableBuilder.AsCrossReferenceTablePart());
|
||||||
table.Add(streamPart);
|
|
||||||
|
if (streamPart != null)
|
||||||
|
{
|
||||||
|
table.Add(streamPart);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// parse xref stream
|
// parse xref stream
|
||||||
var tablePart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing);
|
var tablePart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing);
|
||||||
table.Add(tablePart);
|
table.Add(tablePart);
|
||||||
|
|
||||||
prev = tablePart.Previous;
|
previousCrossReferenceLocation = tablePart.Previous;
|
||||||
if (prev > 0)
|
if (previousCrossReferenceLocation > 0)
|
||||||
{
|
{
|
||||||
// check the xref table reference
|
// check the xref table reference
|
||||||
fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing);
|
fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing);
|
||||||
if (fixedOffset > -1 && fixedOffset != prev)
|
if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation)
|
||||||
{
|
{
|
||||||
prev = fixedOffset;
|
previousCrossReferenceLocation = fixedOffset;
|
||||||
tablePart.FixOffset(prev);
|
tablePart.FixOffset(previousCrossReferenceLocation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (prevSet.Contains(prev))
|
if (prevSet.Contains(previousCrossReferenceLocation))
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("/Prev loop at offset " + prev);
|
throw new InvalidOperationException("/Prev loop at offset " + previousCrossReferenceLocation);
|
||||||
}
|
}
|
||||||
prevSet.Add(prev);
|
prevSet.Add(previousCrossReferenceLocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
var resolved = table.Build(xrefLocation, log);
|
var resolved = table.Build(xrefLocation, log);
|
||||||
|
@@ -0,0 +1,82 @@
|
|||||||
|
namespace UglyToad.Pdf.Parser.Parts.CrossReference
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using IO;
|
||||||
|
using Logging;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Each subsection of the cross-reference table starts with a line defining the starting object number
|
||||||
|
/// and the count of objects in the subsection.
|
||||||
|
/// </summary>
|
||||||
|
/// <example>
|
||||||
|
/// xref
|
||||||
|
/// 12 16
|
||||||
|
/// ...
|
||||||
|
///
|
||||||
|
/// Defines a table subsection that starts with object 12 and has 16 entries (12-27).
|
||||||
|
/// </example>
|
||||||
|
internal struct TableSubsectionDefinition
|
||||||
|
{
|
||||||
|
private static readonly char[] Splitters = { ' ' };
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The first object number in the table.
|
||||||
|
/// </summary>
|
||||||
|
public long FirstNumber { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The number of consecutive objects declared in the table.
|
||||||
|
/// </summary>
|
||||||
|
public int Count { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create a new <see cref="TableSubsectionDefinition"/> to define a range of consecutive objects in the cross-reference table.
|
||||||
|
/// </summary>
|
||||||
|
public TableSubsectionDefinition(long firstNumber, int count)
|
||||||
|
{
|
||||||
|
FirstNumber = firstNumber;
|
||||||
|
Count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Attempts to read the <see cref="TableSubsectionDefinition"/> from the current line of the source.
|
||||||
|
/// </summary>
|
||||||
|
public static bool TryRead(ILog log, IRandomAccessRead source, out TableSubsectionDefinition definition)
|
||||||
|
{
|
||||||
|
definition = default(TableSubsectionDefinition);
|
||||||
|
|
||||||
|
var line = ReadHelper.ReadLine(source);
|
||||||
|
|
||||||
|
var parts = line.Split(Splitters, StringSplitOptions.RemoveEmptyEntries);
|
||||||
|
|
||||||
|
if (parts.Length != 2)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
|
||||||
|
var firstObjectId = long.Parse(parts[0]);
|
||||||
|
var objectCount = int.Parse(parts[1]);
|
||||||
|
|
||||||
|
definition = new TableSubsectionDefinition(firstObjectId, objectCount);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
log.Error(
|
||||||
|
$"The format for the subsection definition was invalid, expected [long] [int], instead got '{line}'", ex);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return $"{FirstNumber} {Count}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user