mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-11-28 17:47:12 +08:00
#434 ensure companion stream is added to cross reference on building
This commit is contained in:
@@ -1,39 +1,39 @@
|
|||||||
namespace UglyToad.PdfPig.Tests.Integration
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
{
|
{
|
||||||
//using System;
|
using System;
|
||||||
//using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
//using System.IO;
|
using System.IO;
|
||||||
//using Xunit;
|
using Xunit;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A class for testing files which are not checked in to source control.
|
/// A class for testing files which are not checked in to source control.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class LocalTests
|
public class LocalTests
|
||||||
{
|
{
|
||||||
//[Fact]
|
[Fact]
|
||||||
//public void Tests()
|
public void Tests()
|
||||||
//{
|
{
|
||||||
// var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
|
var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
|
||||||
|
|
||||||
// foreach (var file in files)
|
foreach (var file in files)
|
||||||
// {
|
{
|
||||||
// try
|
try
|
||||||
// {
|
{
|
||||||
// using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
||||||
// {
|
{
|
||||||
// for (var i = 1; i <= document.NumberOfPages; i++)
|
for (var i = 1; i <= document.NumberOfPages; i++)
|
||||||
// {
|
{
|
||||||
// var page = document.GetPage(i);
|
var page = document.GetPage(i);
|
||||||
// var text = page.Text;
|
var text = page.Text;
|
||||||
// Trace.WriteLine(text);
|
Trace.WriteLine(text);
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// catch (Exception ex)
|
catch (Exception ex)
|
||||||
// {
|
{
|
||||||
// throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
|
throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
//}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,28 +34,35 @@
|
|||||||
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
||||||
Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
|
Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
|
||||||
|
|
||||||
List<long> xrefSeqBytePos = new List<long>();
|
var xrefPartToBytePositionOrder = new List<long>();
|
||||||
|
|
||||||
var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);
|
var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);
|
||||||
|
|
||||||
if (currentPart == null)
|
if (currentPart == null)
|
||||||
{
|
{
|
||||||
// no XRef at given position
|
// no XRef at given position
|
||||||
log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset);
|
log.Warn($"Did not find an XRef object at the specified startxref position {firstCrossReferenceOffset}");
|
||||||
|
|
||||||
// use all objects in byte position order (last entries overwrite previous ones)
|
// use all objects in byte position order (last entries overwrite previous ones)
|
||||||
xrefSeqBytePos.AddRange(parts.Select(x => x.Offset));
|
xrefPartToBytePositionOrder.AddRange(parts.Select(x => x.Offset));
|
||||||
xrefSeqBytePos.Sort();
|
xrefPartToBytePositionOrder.Sort();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// copy xref type
|
// copy xref type
|
||||||
type = currentPart.Type;
|
type = currentPart.Type;
|
||||||
|
|
||||||
|
|
||||||
// found starting Xref object
|
// found starting Xref object
|
||||||
// add this and follow chain defined by 'Prev' keys
|
// add this and follow chain defined by 'Prev' keys
|
||||||
xrefSeqBytePos.Add(firstCrossReferenceOffset);
|
xrefPartToBytePositionOrder.Add(firstCrossReferenceOffset);
|
||||||
|
|
||||||
|
// Get any streams that are tied to this table.
|
||||||
|
var activePart = currentPart;
|
||||||
|
var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset);
|
||||||
|
foreach (var dependent in dependents)
|
||||||
|
{
|
||||||
|
xrefPartToBytePositionOrder.Add(dependent.Offset);
|
||||||
|
}
|
||||||
|
|
||||||
while (currentPart.Dictionary != null)
|
while (currentPart.Dictionary != null)
|
||||||
{
|
{
|
||||||
@@ -72,21 +79,21 @@
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
xrefSeqBytePos.Add(prevBytePos);
|
xrefPartToBytePositionOrder.Add(prevBytePos);
|
||||||
|
|
||||||
// sanity check to prevent infinite loops
|
// sanity check to prevent infinite loops
|
||||||
if (xrefSeqBytePos.Count >= parts.Count)
|
if (xrefPartToBytePositionOrder.Count >= parts.Count)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// have to reverse order so that later XRefs will overwrite previous ones
|
// have to reverse order so that later XRefs will overwrite previous ones
|
||||||
xrefSeqBytePos.Reverse();
|
xrefPartToBytePositionOrder.Reverse();
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge used and sorted XRef/trailer
|
// merge used and sorted XRef/trailer
|
||||||
foreach (long bPos in xrefSeqBytePos)
|
foreach (long bPos in xrefPartToBytePositionOrder)
|
||||||
{
|
{
|
||||||
var currentObject = parts.First(x => x.Offset == bPos || x.Offset == bPos + offsetCorrection);
|
var currentObject = parts.First(x => x.Offset == bPos || x.Offset == bPos + offsetCorrection);
|
||||||
if (currentObject.Dictionary != null)
|
if (currentObject.Dictionary != null)
|
||||||
|
|||||||
@@ -33,13 +33,24 @@
|
|||||||
|
|
||||||
public CrossReferenceType Type { get; }
|
public CrossReferenceType Type { get; }
|
||||||
|
|
||||||
public CrossReferenceTablePart(IReadOnlyDictionary<IndirectReference, long> objectOffsets, long offset, long previous, DictionaryToken dictionary, CrossReferenceType type)
|
/// <summary>
|
||||||
|
/// For Xref streams indicated by tables they should be used together when constructing the final table.
|
||||||
|
/// </summary>
|
||||||
|
public long? TiedToXrefAtOffset { get; }
|
||||||
|
|
||||||
|
public CrossReferenceTablePart(
|
||||||
|
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||||
|
long offset, long previous,
|
||||||
|
DictionaryToken dictionary,
|
||||||
|
CrossReferenceType type,
|
||||||
|
long? tiedToXrefAtOffset)
|
||||||
{
|
{
|
||||||
ObjectOffsets = objectOffsets;
|
ObjectOffsets = objectOffsets;
|
||||||
Offset = offset;
|
Offset = offset;
|
||||||
Previous = previous;
|
Previous = previous;
|
||||||
Dictionary = dictionary;
|
Dictionary = dictionary;
|
||||||
Type = type;
|
Type = type;
|
||||||
|
TiedToXrefAtOffset = tiedToXrefAtOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void FixOffset(long offset)
|
public void FixOffset(long offset)
|
||||||
|
|||||||
@@ -15,7 +15,9 @@
|
|||||||
public DictionaryToken Dictionary { get; set; }
|
public DictionaryToken Dictionary { get; set; }
|
||||||
|
|
||||||
public CrossReferenceType XRefType { get; set; }
|
public CrossReferenceType XRefType { get; set; }
|
||||||
|
|
||||||
|
public long? TiedToPreviousAtOffset { get; set; }
|
||||||
|
|
||||||
public void Add(long objectId, int generationNumber, long offset)
|
public void Add(long objectId, int generationNumber, long offset)
|
||||||
{
|
{
|
||||||
IndirectReference objKey = new IndirectReference(objectId, generationNumber);
|
IndirectReference objKey = new IndirectReference(objectId, generationNumber);
|
||||||
@@ -28,7 +30,7 @@
|
|||||||
|
|
||||||
public CrossReferenceTablePart Build()
|
public CrossReferenceTablePart Build()
|
||||||
{
|
{
|
||||||
return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType);
|
return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType, TiedToPreviousAtOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -82,9 +82,11 @@
|
|||||||
// check for a XRef stream, it may contain some object ids of compressed objects
|
// check for a XRef stream, it may contain some object ids of compressed objects
|
||||||
if (tableDictionary.ContainsKey(NameToken.XrefStm))
|
if (tableDictionary.ContainsKey(NameToken.XrefStm))
|
||||||
{
|
{
|
||||||
log.Debug("Cross reference table contained referenced to stream. Reading the stream.");
|
log.Debug("Cross reference table contained reference to stream. Reading the stream.");
|
||||||
|
|
||||||
int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;
|
var tiedToTableAtOffset = tablePart.Offset;
|
||||||
|
|
||||||
|
int streamOffset = ((NumericToken) tableDictionary.Data[NameToken.XrefStm]).Int;
|
||||||
|
|
||||||
// check the xref stream reference
|
// check the xref stream reference
|
||||||
fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
|
fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
|
||||||
@@ -96,8 +98,13 @@
|
|||||||
|
|
||||||
// Update the cross reference table to be a stream instead.
|
// Update the cross reference table to be a stream instead.
|
||||||
tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
|
tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
|
||||||
tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
|
tablePart = new CrossReferenceTablePart(
|
||||||
tablePart.Previous, tableDictionary, tablePart.Type);
|
tablePart.ObjectOffsets,
|
||||||
|
streamOffset,
|
||||||
|
tablePart.Previous,
|
||||||
|
tableDictionary,
|
||||||
|
tablePart.Type,
|
||||||
|
tiedToTableAtOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the stream from the table.
|
// Read the stream from the table.
|
||||||
@@ -105,7 +112,7 @@
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
|
TryParseCrossReferenceStream(streamOffset, pdfScanner, tiedToTableAtOffset, out streamPart);
|
||||||
}
|
}
|
||||||
catch (InvalidOperationException ex)
|
catch (InvalidOperationException ex)
|
||||||
{
|
{
|
||||||
@@ -149,7 +156,7 @@
|
|||||||
tokenScanner.Seek(previousCrossReferenceLocation);
|
tokenScanner.Seek(previousCrossReferenceLocation);
|
||||||
|
|
||||||
// parse xref stream
|
// parse xref stream
|
||||||
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, out var tablePart))
|
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, null, out var tablePart))
|
||||||
{
|
{
|
||||||
if (!TryBruteForceXrefTableLocate(bytes, previousCrossReferenceLocation, out var actualOffset))
|
if (!TryBruteForceXrefTableLocate(bytes, previousCrossReferenceLocation, out var actualOffset))
|
||||||
{
|
{
|
||||||
@@ -218,7 +225,10 @@
|
|||||||
return resolved;
|
return resolved;
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool TryParseCrossReferenceStream(long objByteOffset, IPdfTokenScanner pdfScanner,
|
private bool TryParseCrossReferenceStream(
|
||||||
|
long objByteOffset,
|
||||||
|
IPdfTokenScanner pdfScanner,
|
||||||
|
long? fromTableAtOffset,
|
||||||
out CrossReferenceTablePart xrefTablePart)
|
out CrossReferenceTablePart xrefTablePart)
|
||||||
{
|
{
|
||||||
xrefTablePart = null;
|
xrefTablePart = null;
|
||||||
@@ -236,7 +246,7 @@
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, objectStream);
|
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, fromTableAtOffset, objectStream);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parses through the unfiltered stream and populates the xrefTable HashMap.
|
/// Parses through the unfiltered stream and populates the xrefTable HashMap.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
|
public CrossReferenceTablePart Parse(long streamOffset, long? fromTableAtOffset, StreamToken stream)
|
||||||
{
|
{
|
||||||
var decoded = stream.Decode(filterProvider);
|
var decoded = stream.Decode(filterProvider);
|
||||||
|
|
||||||
@@ -38,7 +38,8 @@
|
|||||||
Offset = streamOffset,
|
Offset = streamOffset,
|
||||||
Previous = previousOffset,
|
Previous = previousOffset,
|
||||||
Dictionary = stream.StreamDictionary,
|
Dictionary = stream.StreamDictionary,
|
||||||
XRefType = CrossReferenceType.Stream
|
XRefType = CrossReferenceType.Stream,
|
||||||
|
TiedToPreviousAtOffset = fromTableAtOffset
|
||||||
};
|
};
|
||||||
|
|
||||||
var objectNumbers = GetObjectNumbers(stream.StreamDictionary);
|
var objectNumbers = GetObjectNumbers(stream.StreamDictionary);
|
||||||
|
|||||||
Reference in New Issue
Block a user