mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
revert flate decode handling to more lenient processing (#1254)
* revert flate decode handling to more lenient processing the change to use zlib/a adler checksum verification flow meant that invalid flate streams would not be decoded correctly. this caused issues for files that included invalid/missing checksums. this reverts the processing to the old approach for files like #1235 * fix object stream offset handling and track circular refs * update tests * normalize line endings for mac runner * fixes for mac clownery * add next pair to common crawl action * add a test case for the root cause of the int overflow
This commit is contained in:
2
.github/workflows/run_common_crawl_tests.yml
vendored
2
.github/workflows/run_common_crawl_tests.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pair: ["0000-0001", "0002-0003", "0004-0005", "0006-0007"]
|
||||
pair: ["0000-0001", "0002-0003", "0004-0005", "0006-0007", "0008-0009"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
20
src/UglyToad.PdfPig.Core/XrefEntryType.cs
Normal file
20
src/UglyToad.PdfPig.Core/XrefEntryType.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace UglyToad.PdfPig.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Indicates where an object is located in the Xref.
|
||||
/// </summary>
|
||||
public enum XrefEntryType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Free object.
|
||||
/// </summary>
|
||||
Free = 0,
|
||||
/// <summary>
|
||||
/// Located as an object in the file.
|
||||
/// </summary>
|
||||
File = 1,
|
||||
/// <summary>
|
||||
/// Located in a compressed object stream.
|
||||
/// </summary>
|
||||
ObjectStream = 2
|
||||
}
|
||||
42
src/UglyToad.PdfPig.Core/XrefLocation.cs
Normal file
42
src/UglyToad.PdfPig.Core/XrefLocation.cs
Normal file
@@ -0,0 +1,42 @@
|
||||
namespace UglyToad.PdfPig.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Information about where an object is located in the file according to the Xref (or brute force parsing).
|
||||
/// </summary>
|
||||
public readonly struct XrefLocation
|
||||
{
|
||||
/// <summary>
|
||||
/// Which type of location is indicated.
|
||||
/// </summary>
|
||||
public readonly XrefEntryType Type;
|
||||
|
||||
/// <summary>
|
||||
/// If <see cref="Type"/> is <see cref="XrefEntryType.File"/> then byte offset, otherwise <see cref="XrefEntryType.ObjectStream"/> this is the stream number.
|
||||
/// </summary>
|
||||
public readonly long Value1;
|
||||
|
||||
/// <summary>
|
||||
/// If <see cref="Type"/> is <see cref="XrefEntryType.ObjectStream"/> then the index of the object in the stream.
|
||||
/// </summary>
|
||||
public readonly int Value2; // only used for ObjectStream
|
||||
|
||||
private XrefLocation(XrefEntryType type, long value1, int value2)
|
||||
{
|
||||
Type = type;
|
||||
Value1 = value1;
|
||||
Value2 = value2;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a location mapped to a byte offset in the file.
|
||||
/// </summary>
|
||||
public static XrefLocation File(long offset)
|
||||
=> new XrefLocation(XrefEntryType.File, offset, 0);
|
||||
|
||||
/// <summary>
|
||||
/// Create a location mapped to an index inside and object stream.
|
||||
/// </summary>
|
||||
public static XrefLocation Stream(long objStream, int index)
|
||||
=> new XrefLocation(XrefEntryType.ObjectStream, objStream, index);
|
||||
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig.Tests.Filters
|
||||
{
|
||||
using PdfPig.Core;
|
||||
using PdfPig.Filters;
|
||||
using PdfPig.Tokens;
|
||||
|
||||
@@ -11,15 +12,32 @@
|
||||
public void EncodeAndDecodePreservesInput()
|
||||
{
|
||||
var parameters = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
||||
var input = new byte[] {67, 69, 69, 10, 4, 20, 6, 19, 120, 64, 64, 64, 32};
|
||||
var input = new byte[] { 67, 69, 69, 10, 4, 20, 6, 19, 120, 64, 64, 64, 32 };
|
||||
|
||||
using (var inputStream = new MemoryStream(input))
|
||||
{
|
||||
inputStream.Seek(0, SeekOrigin.Begin);
|
||||
var result = filter.Encode(inputStream, parameters, 0);
|
||||
var result = filter.Encode(inputStream, parameters);
|
||||
var decoded = filter.Decode(result, parameters, TestFilterProvider.Instance, 0);
|
||||
Assert.Equal(input, decoded.ToArray());
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanDecodeCorruptedInputIssue1235()
|
||||
{
|
||||
const string hexStr =
|
||||
"789C958D5D0AC2400C844FB077980B74BB7FD9D982F820B43E8B7B03C542C187EAFDC1F84B7D1164200999E49BD9044C6653D10E1E443DA1AF6636ED76EF315E7572968E1ECDAB7FB7506C4C59C0AEB3912EE270366AAAF4E36D364BF7911450DC274A5112B1AC9751D77A58680B51A4D8AE433D62953C037396E0F290FBE098B267A43051725AA34E77E44EF50B1B52B42C90E4ADF83FB94FDD0000000000";
|
||||
|
||||
var hex = new HexToken(hexStr.AsSpan());
|
||||
|
||||
var parameters = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
||||
|
||||
var result = filter.Decode(hex.Bytes.ToArray(), parameters, TestFilterProvider.Instance, 0);
|
||||
|
||||
var text = OtherEncodings.BytesAsLatin1String(result.ToArray());
|
||||
|
||||
Assert.StartsWith("q", text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,7 +327,7 @@
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("StackOverflow_Issue_1122.pdf");
|
||||
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
|
||||
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
|
||||
Assert.StartsWith("Circular reference encountered when looking", ex.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -386,7 +386,7 @@
|
||||
{
|
||||
var path = IntegrationHelpers.GetSpecificTestDocumentPath("SpookyPass.pdf");
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
|
||||
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
|
||||
Assert.StartsWith("Object stream cannot contain itself", ex.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -552,7 +552,7 @@
|
||||
{
|
||||
var page = document.GetPage(13);
|
||||
// This used to fail with an overflow exception when we failed to validate the zlib encoded data
|
||||
Assert.NotNull(DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
|
||||
Assert.Throws<OverflowException>(() => DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ public class FirstPassParserTests
|
||||
Assert.Equal(2, results.Parts.Count);
|
||||
Assert.NotNull(results.Trailer);
|
||||
|
||||
Assert.Equal(results.XrefOffsets[new IndirectReference(8, 0)], 500);
|
||||
Assert.Equal(results.XrefOffsets[new IndirectReference(8, 0)].Value1, 500);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -589,7 +589,7 @@ public class XrefTableParserTests
|
||||
{
|
||||
Assert.True(table.ObjectOffsets.TryGetValue(offset.Key, out var actual));
|
||||
|
||||
Assert.Equal(offset.Value, actual);
|
||||
Assert.Equal(offset.Value, actual.Value1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -216,6 +216,68 @@ l";
|
||||
Assert.NotEmpty(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HandlesIssue953_IntOverflowContent()
|
||||
{
|
||||
// After ( + ) Tj operator the content stream becomes corrupt, our current parser therefore reads wrong
|
||||
// values for operations and this results in a problem when applying the show text operations, we should safely discard or recover on BT/ET boundaries.
|
||||
const string s =
|
||||
"""
|
||||
BT
|
||||
/TT6 1 Tf
|
||||
12.007 0 0 12.007 163.2j
|
||||
-0.19950 Tc
|
||||
0 Tw
|
||||
(x)Tj
|
||||
-0.1949 1.4142 TD
|
||||
(H)Tj
|
||||
/TT7 1 Tf
|
||||
12.031 0 0 12.031 157.38 85.2 Tm
|
||||
<0077>Tj
|
||||
-0.1945 1.4114 TD
|
||||
<0077>Tj
|
||||
/TT4 1 Tf
|
||||
12.007 0 0 12.007 174.42 94.5601 Tm
|
||||
0.0004 Tc
|
||||
-0.0005 Tw
|
||||
( + )Tj
|
||||
E9 478l)]T862.68E9 478E9 484.54 9 155l)]T862.6av9 478E9 15.2(
|
||||
ET
|
||||
154.386( i92 m
|
||||
171.6 97.62 l
|
||||
S
|
||||
BT
|
||||
/TT6 28 Tf
|
||||
12.03128 T2002.0307 163.2j
|
||||
-0.19950 DAc
|
||||
0 Tw853Tj
|
||||
0.1945 1.4142 om)873j
|
||||
-0.574142 om)68.80
|
||||
-0.5797 0 TD
|
||||
(f)Tj
|
||||
/TT( )7Tf
|
||||
0.31945 1.5341 TD371.4j
|
||||
2.82
|
||||
8.2652 0 5.724 TD
|
||||
0 Tc
|
||||
-0.0001 2748.3( = 091ity )-27483
|
||||
[(te27483
|
||||
[(te27483
|
||||
[(te27483
|
||||
[(te27483
|
||||
[(te27483
|
||||
[(Eq.)52 \(2.1
|
||||
(
|
||||
""";
|
||||
|
||||
var input = StringBytesTestConverter.Convert(s, false);
|
||||
|
||||
var lenientParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, new StackDepthGuard(256), true);
|
||||
var result = lenientParser.Parse(1, input.Bytes, log);
|
||||
|
||||
Assert.NotEmpty(result);
|
||||
}
|
||||
|
||||
private static string LineEndingsToWhiteSpace(string str)
|
||||
{
|
||||
return str.Replace("\r\n", " ").Replace('\n', ' ').Replace('\r', ' ');
|
||||
|
||||
@@ -59,7 +59,7 @@ startxref
|
||||
|
||||
Assert.Equal(4, locations.Count);
|
||||
|
||||
Assert.Equal(TestDataOffsets, locations.Values);
|
||||
Assert.Equal(TestDataOffsets, locations.Values.Select(x => x.Value1));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -111,7 +111,7 @@ endobj
|
||||
s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
|
||||
};
|
||||
|
||||
Assert.Equal(expectedLocations, locations.Values);
|
||||
Assert.Equal(expectedLocations, locations.Values.Select(x => x.Value1));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -142,7 +142,7 @@ endobj";
|
||||
s.IndexOf("5 0 obj", StringComparison.OrdinalIgnoreCase)
|
||||
};
|
||||
|
||||
Assert.Equal(expectedLocations, locations.Values);
|
||||
Assert.Equal(expectedLocations, locations.Values.Select(x => x.Value1));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -156,17 +156,17 @@ endobj";
|
||||
|
||||
Assert.Equal(13, locations.Count);
|
||||
|
||||
Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
|
||||
Assert.Equal(244, locations[new IndirectReference(2, 0)]);
|
||||
Assert.Equal(15, locations[new IndirectReference(3, 0)]);
|
||||
Assert.Equal(222, locations[new IndirectReference(4, 0)]);
|
||||
Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
|
||||
Assert.Equal(353, locations[new IndirectReference(6, 0)]);
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||
Assert.Equal(6183, locations[new IndirectReference(1, 0)].Value1);
|
||||
Assert.Equal(244, locations[new IndirectReference(2, 0)].Value1);
|
||||
Assert.Equal(15, locations[new IndirectReference(3, 0)].Value1);
|
||||
Assert.Equal(222, locations[new IndirectReference(4, 0)].Value1);
|
||||
Assert.Equal(5766, locations[new IndirectReference(5, 0)].Value1);
|
||||
Assert.Equal(353, locations[new IndirectReference(6, 0)].Value1);
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)].Value1);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)].Value1);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)].Value1);
|
||||
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)].Value1);
|
||||
Assert.StartsWith("3 0 obj", s);
|
||||
}
|
||||
}
|
||||
@@ -180,17 +180,17 @@ endobj";
|
||||
|
||||
Assert.Equal(13, locations.Count);
|
||||
|
||||
Assert.Equal(6183, locations[new IndirectReference(1, 0)]);
|
||||
Assert.Equal(244, locations[new IndirectReference(2, 0)]);
|
||||
Assert.Equal(15, locations[new IndirectReference(3, 0)]);
|
||||
Assert.Equal(222, locations[new IndirectReference(4, 0)]);
|
||||
Assert.Equal(5766, locations[new IndirectReference(5, 0)]);
|
||||
Assert.Equal(353, locations[new IndirectReference(6, 0)]);
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)]);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)]);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)]);
|
||||
Assert.Equal(6183, locations[new IndirectReference(1, 0)].Value1);
|
||||
Assert.Equal(244, locations[new IndirectReference(2, 0)].Value1);
|
||||
Assert.Equal(15, locations[new IndirectReference(3, 0)].Value1);
|
||||
Assert.Equal(222, locations[new IndirectReference(4, 0)].Value1);
|
||||
Assert.Equal(5766, locations[new IndirectReference(5, 0)].Value1);
|
||||
Assert.Equal(353, locations[new IndirectReference(6, 0)].Value1);
|
||||
Assert.Equal(581, locations[new IndirectReference(7, 0)].Value1);
|
||||
Assert.Equal(5068, locations[new IndirectReference(8, 0)].Value1);
|
||||
Assert.Equal(5091, locations[new IndirectReference(9, 0)].Value1);
|
||||
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)]);
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(3, 0)].Value1);
|
||||
Assert.StartsWith("3 0 obj", s);
|
||||
}
|
||||
|
||||
@@ -203,21 +203,21 @@ endobj";
|
||||
|
||||
Assert.Equal(13, locations.Count);
|
||||
|
||||
Assert.Equal(17, locations[new IndirectReference(1, 0)]);
|
||||
Assert.Equal(249, locations[new IndirectReference(2, 0)]);
|
||||
Assert.Equal(14291, locations[new IndirectReference(3, 0)]);
|
||||
Assert.Equal(275, locations[new IndirectReference(4, 0)]);
|
||||
Assert.Equal(382, locations[new IndirectReference(5, 0)]);
|
||||
Assert.Equal(13283, locations[new IndirectReference(6, 0)]);
|
||||
Assert.Equal(13309, locations[new IndirectReference(7, 0)]);
|
||||
Assert.Equal(13556, locations[new IndirectReference(8, 0)]);
|
||||
Assert.Equal(13926, locations[new IndirectReference(9, 0)]);
|
||||
Assert.Equal(14183, locations[new IndirectReference(10, 0)]);
|
||||
Assert.Equal(14224, locations[new IndirectReference(11, 0)]);
|
||||
Assert.Equal(14428, locations[new IndirectReference(12, 0)]);
|
||||
Assert.Equal(14488, locations[new IndirectReference(13, 0)]);
|
||||
Assert.Equal(17, locations[new IndirectReference(1, 0)].Value1);
|
||||
Assert.Equal(249, locations[new IndirectReference(2, 0)].Value1);
|
||||
Assert.Equal(14291, locations[new IndirectReference(3, 0)].Value1);
|
||||
Assert.Equal(275, locations[new IndirectReference(4, 0)].Value1);
|
||||
Assert.Equal(382, locations[new IndirectReference(5, 0)].Value1);
|
||||
Assert.Equal(13283, locations[new IndirectReference(6, 0)].Value1);
|
||||
Assert.Equal(13309, locations[new IndirectReference(7, 0)].Value1);
|
||||
Assert.Equal(13556, locations[new IndirectReference(8, 0)].Value1);
|
||||
Assert.Equal(13926, locations[new IndirectReference(9, 0)].Value1);
|
||||
Assert.Equal(14183, locations[new IndirectReference(10, 0)].Value1);
|
||||
Assert.Equal(14224, locations[new IndirectReference(11, 0)].Value1);
|
||||
Assert.Equal(14428, locations[new IndirectReference(12, 0)].Value1);
|
||||
Assert.Equal(14488, locations[new IndirectReference(13, 0)].Value1);
|
||||
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)]);
|
||||
var s = GetStringAt(bytes, locations[new IndirectReference(12, 0)].Value1);
|
||||
Assert.StartsWith("12 0 obj", s);
|
||||
}
|
||||
|
||||
@@ -230,7 +230,7 @@ endobj";
|
||||
|
||||
var locations = BruteForceSearcher.GetObjectLocations(input);
|
||||
|
||||
Assert.Equal(TestDataOffsets, locations.Values);
|
||||
Assert.Equal(TestDataOffsets, locations.Values.Select(x => x.Value1));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -265,7 +265,7 @@ endobj
|
||||
s.IndexOf("11 0 obj", StringComparison.OrdinalIgnoreCase)
|
||||
};
|
||||
|
||||
Assert.Equal(expectedLocations, locations.Values);
|
||||
Assert.Equal(expectedLocations, locations.Values.Select(x => x.Value1));
|
||||
}
|
||||
|
||||
private static string GetStringAt(IInputBytes bytes, long location)
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
var reference1 = new IndirectReference(7, 0);
|
||||
var reference2 = new IndirectReference(9, 0);
|
||||
|
||||
scanner.Objects[reference1] = new ObjectToken(10, reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(12, reference2, new NumericToken(69));
|
||||
scanner.Objects[reference1] = new ObjectToken(XrefLocation.File(10), reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(XrefLocation.File(12), reference2, new NumericToken(69));
|
||||
|
||||
Assert.True(DirectObjectFinder.TryGet(new IndirectReferenceToken(reference1), scanner, out NumericToken result));
|
||||
|
||||
@@ -29,8 +29,8 @@
|
||||
var reference1 = new IndirectReference(7, 0);
|
||||
var reference2 = new IndirectReference(9, 0);
|
||||
|
||||
scanner.Objects[reference1] = new ObjectToken(10, reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(12, reference2, new NumericToken(69));
|
||||
scanner.Objects[reference1] = new ObjectToken(XrefLocation.File(10), reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(XrefLocation.File(12), reference2, new NumericToken(69));
|
||||
|
||||
var result = DirectObjectFinder.Get<NumericToken>(reference1, scanner);
|
||||
|
||||
@@ -43,8 +43,8 @@
|
||||
var reference1 = new IndirectReference(7, 0);
|
||||
var reference2 = new IndirectReference(9, 0);
|
||||
|
||||
scanner.Objects[reference1] = new ObjectToken(10, reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(12, reference2, new NumericToken(69));
|
||||
scanner.Objects[reference1] = new ObjectToken(XrefLocation.File(10), reference1, new IndirectReferenceToken(reference2));
|
||||
scanner.Objects[reference2] = new ObjectToken(XrefLocation.File(12), reference2, new NumericToken(69));
|
||||
|
||||
var result = DirectObjectFinder.Get<NumericToken>(new IndirectReferenceToken(reference1), scanner);
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
var reference = new IndirectReference(10, 0);
|
||||
|
||||
const string expected = "Goopy";
|
||||
scanner.Objects[reference] = new ObjectToken(10, reference, new ArrayToken(new []
|
||||
scanner.Objects[reference] = new ObjectToken(XrefLocation.File(10), reference, new ArrayToken(new []
|
||||
{
|
||||
new StringToken(expected)
|
||||
}));
|
||||
@@ -74,12 +74,12 @@
|
||||
var reference2 = new IndirectReference(69, 0);
|
||||
|
||||
const string expected = "Goopy";
|
||||
scanner.Objects[reference] = new ObjectToken(10, reference, new ArrayToken(new[]
|
||||
scanner.Objects[reference] = new ObjectToken(XrefLocation.File(10), reference, new ArrayToken(new[]
|
||||
{
|
||||
new IndirectReferenceToken(reference2)
|
||||
}));
|
||||
|
||||
scanner.Objects[reference2] = new ObjectToken(69, reference2, new StringToken(expected));
|
||||
scanner.Objects[reference2] = new ObjectToken(XrefLocation.File(69), reference2, new StringToken(expected));
|
||||
|
||||
var result = DirectObjectFinder.Get<StringToken>(reference, scanner);
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
{
|
||||
var reference = new IndirectReference(10, 0);
|
||||
|
||||
scanner.Objects[reference] = new ObjectToken(10, reference, new ArrayToken(new[]
|
||||
scanner.Objects[reference] = new ObjectToken(XrefLocation.File(10), reference, new ArrayToken(new[]
|
||||
{
|
||||
new NumericToken(5), new NumericToken(6), new NumericToken(0)
|
||||
}));
|
||||
|
||||
@@ -6,14 +6,14 @@
|
||||
|
||||
internal class TestObjectLocationProvider : IObjectLocationProvider
|
||||
{
|
||||
public Dictionary<IndirectReference, long> Offsets { get; } = new Dictionary<IndirectReference, long>();
|
||||
public Dictionary<IndirectReference, XrefLocation> Offsets { get; } = new Dictionary<IndirectReference, XrefLocation>();
|
||||
|
||||
public bool TryGetOffset(IndirectReference reference, out long offset)
|
||||
public bool TryGetOffset(IndirectReference reference, out XrefLocation offset)
|
||||
{
|
||||
return Offsets.TryGetValue(reference, out offset);
|
||||
}
|
||||
|
||||
public void UpdateOffset(IndirectReference reference, long offset)
|
||||
public void UpdateOffset(IndirectReference reference, XrefLocation offset)
|
||||
{
|
||||
Offsets[reference] = offset;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -12,7 +12,7 @@
|
||||
/// <summary>
|
||||
/// The offset to the start of the object number from the start of the file in bytes.
|
||||
/// </summary>
|
||||
public long Position { get; }
|
||||
public XrefLocation Position { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The object and generation number of the object.
|
||||
@@ -30,7 +30,7 @@
|
||||
/// <param name="position">The offset in bytes from the start of the file for this object.</param>
|
||||
/// <param name="number">The identifier for this object.</param>
|
||||
/// <param name="data">The data contained in this object.</param>
|
||||
public ObjectToken(long position, IndirectReference number, IToken data)
|
||||
public ObjectToken(XrefLocation position, IndirectReference number, IToken data)
|
||||
{
|
||||
Position = position;
|
||||
Number = number;
|
||||
|
||||
@@ -28,12 +28,12 @@
|
||||
|
||||
private readonly IPdfTokenScanner tokenScanner;
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IReadOnlyDictionary<IndirectReference, long> objectOffsets;
|
||||
private readonly IReadOnlyDictionary<IndirectReference, XrefLocation> objectOffsets;
|
||||
|
||||
public AcroFormFactory(
|
||||
IPdfTokenScanner tokenScanner,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets)
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> objectOffsets)
|
||||
{
|
||||
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
|
||||
this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider));
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
{
|
||||
using Fonts;
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using Tokens;
|
||||
using UglyToad.PdfPig.Core;
|
||||
using Core;
|
||||
using Util;
|
||||
|
||||
/// <summary>
|
||||
@@ -55,89 +54,41 @@
|
||||
return input;
|
||||
}
|
||||
|
||||
private static Memory<byte> Decompress(Memory<byte> input, int predictor, int colors, int bitsPerComponent, int columns)
|
||||
private static Memory<byte> Decompress(Memory<byte> input,
|
||||
int predictor,
|
||||
int colors,
|
||||
int bitsPerComponent,
|
||||
int columns)
|
||||
{
|
||||
#if NET
|
||||
using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input);
|
||||
// The first 2 bytes are the header which DeflateStream does not support.
|
||||
memoryStream.ReadByte();
|
||||
memoryStream.ReadByte();
|
||||
|
||||
try
|
||||
{
|
||||
using (var zlib = new ZLibStream(memoryStream, CompressionMode.Decompress))
|
||||
using (var output = new MemoryStream((int)(input.Length * 1.5)))
|
||||
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
|
||||
{
|
||||
zlib.CopyTo(f);
|
||||
f.Flush();
|
||||
using var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress);
|
||||
using var output = new MemoryStream((int)(input.Length * 1.5));
|
||||
using var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns);
|
||||
|
||||
deflate.CopyTo(f);
|
||||
f.Flush();
|
||||
|
||||
return output.AsMemory();
|
||||
}
|
||||
return output.AsMemory();
|
||||
}
|
||||
catch (InvalidDataException ex)
|
||||
{
|
||||
throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
|
||||
}
|
||||
#else
|
||||
// Ideally we would like to use the ZLibStream class but that is only available in .NET 5+.
|
||||
// We look at the raw data now
|
||||
// * First we have 2 bytes, specifying the type of compression
|
||||
// * Then we have the deflated data
|
||||
// * Then we have a 4 byte checksum (Adler32)
|
||||
|
||||
// Would be so nice to have zlib do the framing here... but the deflate stream already reads data from the stream that we need.
|
||||
|
||||
using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input.Slice(2, input.Length - 2 /* Header */ - 4 /* Checksum */));
|
||||
// The first 2 bytes are the header which DeflateStream can't handle. After the s
|
||||
var adlerBytes = input.Slice(input.Length - 4, 4).Span;
|
||||
uint expected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
|
||||
uint altExpected = expected;
|
||||
|
||||
// Sometimes the data ends with "\r\n", "\r" or "\n" and we don't know if it is part of the zlib
|
||||
// Ideally this would have been removed by the caller from the provided length...
|
||||
if (adlerBytes[3] == '\n' || adlerBytes[3] == '\r')
|
||||
{
|
||||
if (adlerBytes[3] == '\n' && adlerBytes[2] == '\r')
|
||||
{
|
||||
// Now we don't know which value is the good one. The value could be ok, or padding.
|
||||
// Lets allow both values for now. Allowing two out of 2^32 is much better than allowing everything
|
||||
adlerBytes = input.Slice(input.Length - 6, 4).Span;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Same but now for just '\n' or '\r' instead of '\r\n'
|
||||
adlerBytes = input.Slice(input.Length - 5, 4).Span;
|
||||
}
|
||||
|
||||
altExpected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
|
||||
using (var adlerStream = new Adler32ChecksumStream(deflate))
|
||||
using (var output = new MemoryStream((int)(input.Length * 1.5)))
|
||||
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
|
||||
{
|
||||
adlerStream.CopyTo(f);
|
||||
f.Flush();
|
||||
|
||||
uint actual = adlerStream.Checksum;
|
||||
if (expected != actual && altExpected != actual)
|
||||
{
|
||||
throw new CorruptCompressedDataException("Flate stream has invalid checksum");
|
||||
}
|
||||
|
||||
return output.AsMemory();
|
||||
}
|
||||
}
|
||||
catch (InvalidDataException ex)
|
||||
{
|
||||
throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public byte[] Encode(Stream input, DictionaryToken streamDictionary, int index)
|
||||
/// <summary>
|
||||
/// Convert a decoded data stream back to the encoded version.
|
||||
/// </summary>
|
||||
/// <param name="input">The decoded data.</param>
|
||||
/// <param name="streamDictionary">The stream dictionary with the parameters to use.</param>
|
||||
/// <returns>The Flate encoded data.</returns>
|
||||
public byte[] Encode(Stream input, DictionaryToken streamDictionary)
|
||||
{
|
||||
const int headerLength = 2;
|
||||
const int checksumLength = 4;
|
||||
|
||||
@@ -16,7 +16,7 @@ internal static partial class FirstPassParser
|
||||
{
|
||||
log ??= new NoOpLog();
|
||||
|
||||
IReadOnlyDictionary<IndirectReference, long>? bruteForceOffsets = null;
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation>? bruteForceOffsets = null;
|
||||
var didBruteForce = false;
|
||||
DictionaryToken? bruteForceTrailer = null;
|
||||
|
||||
@@ -92,7 +92,7 @@ internal static partial class FirstPassParser
|
||||
}
|
||||
|
||||
DictionaryToken? lastTrailer = null;
|
||||
var flattenedOffsets = new Dictionary<IndirectReference, long>();
|
||||
var flattenedOffsets = new Dictionary<IndirectReference, XrefLocation>();
|
||||
foreach (var xrefPart in orderedXrefs)
|
||||
{
|
||||
if (xrefPart.Dictionary != null)
|
||||
@@ -230,12 +230,12 @@ internal class FirstPassResults
|
||||
/// <summary>
|
||||
/// All offsets found if a brute-force search was applied.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<IndirectReference, long>? BruteForceOffsets { get; }
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation>? BruteForceOffsets { get; }
|
||||
|
||||
/// <summary>
|
||||
/// All offsets found from the leaf xref.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<IndirectReference, long> XrefOffsets { get; }
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation> XrefOffsets { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The trailer dictionary of the leaf xref if we found any.
|
||||
@@ -244,8 +244,8 @@ internal class FirstPassResults
|
||||
|
||||
public FirstPassResults(
|
||||
IReadOnlyList<IXrefSection> parts,
|
||||
IReadOnlyDictionary<IndirectReference, long>? bruteForceOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, long> xrefOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation>? bruteForceOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> xrefOffsets,
|
||||
DictionaryToken? trailer)
|
||||
{
|
||||
Parts = parts;
|
||||
|
||||
@@ -14,7 +14,7 @@ internal interface IXrefSection
|
||||
/// <summary>
|
||||
/// The bytes offsets of the objects in this xref.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets { get; }
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation> ObjectOffsets { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The dictionary for this xref, for the trailer xref this is the trailer dictionary, for streams the stream dictionary.
|
||||
|
||||
@@ -19,7 +19,7 @@ internal static class XrefBruteForcer
|
||||
// Guard against circular references; only read xref at each offset once
|
||||
var xrefOffsetSeen = new HashSet<long>();
|
||||
|
||||
var bruteForceObjPositions = new Dictionary<IndirectReference, long>();
|
||||
var bruteForceObjPositions = new Dictionary<IndirectReference, XrefLocation>();
|
||||
|
||||
DictionaryToken? trailer = null;
|
||||
|
||||
@@ -123,7 +123,7 @@ internal static class XrefBruteForcer
|
||||
|
||||
if (buffer.EndsWith(" obj") && numericsQueue[0] > 0)
|
||||
{
|
||||
bruteForceObjPositions[new IndirectReference(numericsQueue[0], (int)numericsQueue[1])] = positionsQueue[0];
|
||||
bruteForceObjPositions[new IndirectReference(numericsQueue[0], (int)numericsQueue[1])] = XrefLocation.File(positionsQueue[0]);
|
||||
|
||||
lastObjPosition = positionsQueue[0];
|
||||
|
||||
@@ -208,12 +208,12 @@ internal static class XrefBruteForcer
|
||||
|
||||
public class Result(
|
||||
IReadOnlyList<IXrefSection> xRefParts,
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> objectOffsets,
|
||||
DictionaryToken? lastTrailer)
|
||||
{
|
||||
public IReadOnlyList<IXrefSection> XRefParts { get; } = xRefParts;
|
||||
|
||||
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets { get; } = objectOffsets;
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation> ObjectOffsets { get; } = objectOffsets;
|
||||
|
||||
public DictionaryToken? LastTrailer { get; } = lastTrailer;
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ internal sealed class XrefStream : IXrefSection
|
||||
/// <summary>
|
||||
/// The corresponding byte offset for each keyed object in this document.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets { get; }
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation> ObjectOffsets { get; }
|
||||
|
||||
public DictionaryToken Dictionary { get; }
|
||||
|
||||
@@ -20,7 +20,7 @@ internal sealed class XrefStream : IXrefSection
|
||||
|
||||
public XrefStream(
|
||||
long offset,
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> objectOffsets,
|
||||
DictionaryToken streamDictionary,
|
||||
XrefOffsetCorrection correctionType,
|
||||
long offsetCorrection)
|
||||
|
||||
@@ -98,7 +98,7 @@ internal static class XrefStreamParser
|
||||
? stackalloc byte[fieldSizes.LineLength]
|
||||
: new byte[fieldSizes.LineLength];
|
||||
|
||||
var numbers = new List<(long obj, int gen, int off)>();
|
||||
var numbers = new List<(long obj, int gen, XrefLocation location)>();
|
||||
|
||||
foreach (var objectNumber in objectNumbers)
|
||||
{
|
||||
@@ -136,7 +136,7 @@ internal static class XrefStreamParser
|
||||
|
||||
return new XrefStream(
|
||||
xrefOffset,
|
||||
numbers.ToDictionary(x => new IndirectReference(x.obj, x.gen), x => (long)x.off),
|
||||
numbers.ToDictionary(x => new IndirectReference(x.obj, x.gen), x => x.location),
|
||||
dictToken,
|
||||
offsetCorrectionType,
|
||||
offsetCorrection);
|
||||
@@ -175,7 +175,7 @@ internal static class XrefStreamParser
|
||||
int type,
|
||||
long objectNumber,
|
||||
XrefFieldSize fieldSizes,
|
||||
List<(long, int, int)> results,
|
||||
List<(long, int, XrefLocation)> results,
|
||||
ReadOnlySpan<byte> lineBuffer)
|
||||
{
|
||||
switch (type)
|
||||
@@ -184,19 +184,23 @@ internal static class XrefStreamParser
|
||||
// Ignore free objects.
|
||||
break;
|
||||
case 1:
|
||||
// Non object stream entries.
|
||||
var offset = 0;
|
||||
for (var i = 0; i < fieldSizes.Field2Size; i++)
|
||||
{
|
||||
offset += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
|
||||
}
|
||||
var genNum = 0;
|
||||
for (var i = 0; i < fieldSizes.Field3Size; i++)
|
||||
{
|
||||
genNum += (lineBuffer[i + fieldSizes.Field1Size + fieldSizes.Field2Size] & 0x00ff) << ((fieldSizes.Field3Size - i - 1) * 8);
|
||||
var offset = ReadUnsigned(
|
||||
lineBuffer,
|
||||
fieldSizes.Field1Size,
|
||||
fieldSizes.Field2Size);
|
||||
|
||||
var genNum = ReadUnsigned(
|
||||
lineBuffer,
|
||||
fieldSizes.Field1Size + fieldSizes.Field2Size,
|
||||
fieldSizes.Field3Size);
|
||||
|
||||
if (offset < 0)
|
||||
{
|
||||
throw new PdfDocumentFormatException(
|
||||
$"Location with negative offset {offset} found for object {objectNumber}");
|
||||
}
|
||||
|
||||
results.Add((objectNumber, genNum, offset));
|
||||
results.Add((objectNumber, (int)genNum, XrefLocation.File(offset)));
|
||||
|
||||
break;
|
||||
case 2:
|
||||
@@ -205,28 +209,49 @@ internal static class XrefStreamParser
|
||||
* 2nd argument is object number of object stream
|
||||
* 3rd argument is index of object within object stream
|
||||
*
|
||||
* For sequential PDFParser we do not need this information
|
||||
* because
|
||||
* These objects are handled by the dereferenceObjects() method
|
||||
* since they're only pointing to object numbers
|
||||
*
|
||||
* However for XRef aware parsers we have to know which objects contain
|
||||
* object streams. We will store this information in normal xref mapping
|
||||
* table but add object stream number with minus sign in order to
|
||||
* distinguish from file offsets
|
||||
*/
|
||||
var objstmObjNr = 0;
|
||||
for (var i = 0; i < fieldSizes.Field2Size; i++)
|
||||
|
||||
var objectStreamNumber = ReadUnsigned(
|
||||
lineBuffer,
|
||||
fieldSizes.Field1Size,
|
||||
fieldSizes.Field2Size);
|
||||
|
||||
var streamIndex = ReadUnsigned(
|
||||
lineBuffer,
|
||||
fieldSizes.Field1Size + fieldSizes.Field2Size,
|
||||
fieldSizes.Field3Size);
|
||||
|
||||
if (objectStreamNumber < 0)
|
||||
{
|
||||
objstmObjNr += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
|
||||
throw new PdfDocumentFormatException(
|
||||
$"Location with negative or zero object stream number {objectStreamNumber} found for object {objectNumber}");
|
||||
}
|
||||
|
||||
results.Add((objectNumber, 0, -objstmObjNr));
|
||||
if (streamIndex < 0)
|
||||
{
|
||||
throw new PdfDocumentFormatException(
|
||||
$"Location with negative stream index {streamIndex} found for object {objectNumber} in stream {objectStreamNumber}");
|
||||
}
|
||||
|
||||
results.Add((objectNumber, 0, XrefLocation.Stream(objectStreamNumber, (int)streamIndex)));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static long ReadUnsigned(ReadOnlySpan<byte> buffer, int start, int width)
|
||||
{
|
||||
long value = 0;
|
||||
|
||||
for (int i = 0; i < width; i++)
|
||||
{
|
||||
value <<= 8;
|
||||
value |= buffer[start + i];
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private static (long from, long? to) ReadStreamTolerant(IInputBytes bytes)
|
||||
{
|
||||
var buffer = new CircularByteBuffer("endstream ".Length);
|
||||
|
||||
@@ -13,7 +13,7 @@ internal sealed class XrefTable : IXrefSection
|
||||
/// <summary>
|
||||
/// The corresponding byte offset for each keyed object in this document.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets { get; }
|
||||
public IReadOnlyDictionary<IndirectReference, XrefLocation> ObjectOffsets { get; }
|
||||
|
||||
public DictionaryToken? Dictionary { get; }
|
||||
|
||||
@@ -23,7 +23,7 @@ internal sealed class XrefTable : IXrefSection
|
||||
|
||||
public XrefTable(
|
||||
long offset,
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> objectOffsets,
|
||||
DictionaryToken? trailer,
|
||||
XrefOffsetCorrection correctionType,
|
||||
long offsetCorrection)
|
||||
|
||||
@@ -152,7 +152,7 @@ internal static class XrefTableParser
|
||||
}
|
||||
}
|
||||
|
||||
var offsets = new Dictionary<IndirectReference, long>();
|
||||
var offsets = new Dictionary<IndirectReference, XrefLocation>();
|
||||
if (readNums.Count == 0)
|
||||
{
|
||||
if (trailer != null)
|
||||
@@ -233,7 +233,7 @@ internal static class XrefTableParser
|
||||
if (type == occupiedSentinel)
|
||||
{
|
||||
var indirectRef = new IndirectReference(objNum, (int)gen);
|
||||
offsets[indirectRef] = objOffset;
|
||||
offsets[indirectRef] = XrefLocation.File(objOffset);
|
||||
}
|
||||
|
||||
objNum++;
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
/// </summary>
|
||||
/// <param name="bytes">The bytes of the document.</param>
|
||||
/// <returns>The object keys and offsets for the objects in this document.</returns>
|
||||
public static IReadOnlyDictionary<IndirectReference, long> GetObjectLocations(IInputBytes bytes)
|
||||
public static IReadOnlyDictionary<IndirectReference, XrefLocation> GetObjectLocations(IInputBytes bytes)
|
||||
{
|
||||
if (bytes is null)
|
||||
{
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
var lastEndOfFile = GetLastEndOfFileMarker(bytes);
|
||||
|
||||
var results = new Dictionary<IndirectReference, long>();
|
||||
var results = new Dictionary<IndirectReference, XrefLocation>();
|
||||
|
||||
var generationBytes = new StringBuilder();
|
||||
var objectNumberBytes = new StringBuilder();
|
||||
@@ -174,7 +174,7 @@
|
||||
var obj = long.Parse(objectNumberBytes.ToString(), CultureInfo.InvariantCulture);
|
||||
var generation = int.Parse(generationBytes.ToString(), CultureInfo.InvariantCulture);
|
||||
|
||||
results[new IndirectReference(obj, generation)] = bytes.CurrentOffset;
|
||||
results[new IndirectReference(obj, generation)] = XrefLocation.File(bytes.CurrentOffset);
|
||||
|
||||
generationBytes.Clear();
|
||||
objectNumberBytes.Clear();
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
internal interface IObjectLocationProvider
|
||||
{
|
||||
bool TryGetOffset(IndirectReference reference, out long offset);
|
||||
bool TryGetOffset(IndirectReference reference, out XrefLocation offset);
|
||||
|
||||
void UpdateOffset(IndirectReference reference, long offset);
|
||||
void UpdateOffset(IndirectReference reference, XrefLocation offset);
|
||||
|
||||
bool TryGetCached(IndirectReference reference, [NotNullWhen(true)] out ObjectToken? objectToken);
|
||||
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
|
||||
private readonly IInputBytes bytes;
|
||||
|
||||
private IReadOnlyDictionary<IndirectReference, long>? bruteForcedOffsets;
|
||||
private IReadOnlyDictionary<IndirectReference, XrefLocation>? bruteForcedOffsets;
|
||||
|
||||
private readonly Dictionary<IndirectReference, long> offsets;
|
||||
private readonly Dictionary<IndirectReference, XrefLocation> offsets;
|
||||
|
||||
public ObjectLocationProvider(
|
||||
IReadOnlyDictionary<IndirectReference, long> xrefOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, long>? bruteForcedOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation> xrefOffsets,
|
||||
IReadOnlyDictionary<IndirectReference, XrefLocation>? bruteForcedOffsets,
|
||||
IInputBytes bytes)
|
||||
{
|
||||
offsets = new Dictionary<IndirectReference, long>();
|
||||
offsets = new Dictionary<IndirectReference, XrefLocation>();
|
||||
foreach (var xrefOffset in xrefOffsets)
|
||||
{
|
||||
offsets[xrefOffset.Key] = xrefOffset.Value;
|
||||
@@ -32,7 +32,7 @@
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
public bool TryGetOffset(IndirectReference reference, out long offset)
|
||||
public bool TryGetOffset(IndirectReference reference, out XrefLocation offset)
|
||||
{
|
||||
if (bruteForcedOffsets != null && bruteForcedOffsets.TryGetValue(reference, out var bfOffset))
|
||||
{
|
||||
@@ -42,16 +42,6 @@
|
||||
|
||||
if (offsets.TryGetValue(reference, out offset))
|
||||
{
|
||||
if (offset + reference.ObjectNumber == 0)
|
||||
{
|
||||
// We have a case where 'offset' and
|
||||
// 'reference.ObjectNumber' have the same value
|
||||
// and opposite signs.
|
||||
// This results in an infinite recursion in
|
||||
// PdfTokenScanner.GetObjectFromStream() where
|
||||
// `var streamObjectNumber = offset * -1;`
|
||||
throw new PdfDocumentFormatException("Avoiding infinite recursion in ObjectLocationProvider.TryGetOffset() as 'offset' and 'reference.ObjectNumber' have the same value and opposite signs.");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -63,7 +53,7 @@
|
||||
return bruteForcedOffsets.TryGetValue(reference, out offset);
|
||||
}
|
||||
|
||||
public void UpdateOffset(IndirectReference reference, long offset)
|
||||
public void UpdateOffset(IndirectReference reference, XrefLocation offset)
|
||||
{
|
||||
offsets[reference] = offset;
|
||||
}
|
||||
@@ -81,8 +71,9 @@
|
||||
}
|
||||
|
||||
// Don't cache incorrect locations.
|
||||
if (!force && offsets.TryGetValue(objectToken.Number, out var expected)
|
||||
&& objectToken.Position != expected)
|
||||
if (!force
|
||||
&& offsets.TryGetValue(objectToken.Number, out var expected)
|
||||
&& (objectToken.Position.Type != expected.Type || objectToken.Position.Value1 != expected.Value1))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
namespace UglyToad.PdfPig.Tokenization.Scanner
|
||||
{
|
||||
using Core;
|
||||
using Encryption;
|
||||
using Filters;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
@@ -7,9 +10,6 @@
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using Core;
|
||||
using Encryption;
|
||||
using Filters;
|
||||
using Tokens;
|
||||
using UglyToad.PdfPig.Parser.FileStructure;
|
||||
|
||||
@@ -164,7 +164,7 @@
|
||||
var actualReference = new IndirectReference(objectNumber.Int, generation.Int);
|
||||
var actualToken = encryptionHandler.Decrypt(actualReference, readTokens[0]);
|
||||
|
||||
CurrentToken = new ObjectToken(startPosition, actualReference, actualToken);
|
||||
CurrentToken = new ObjectToken(XrefLocation.File(startPosition), actualReference, actualToken);
|
||||
|
||||
readTokens.Clear();
|
||||
coreTokenScanner.Seek(previousTokenPositions[0]);
|
||||
@@ -191,7 +191,7 @@
|
||||
var actualReference = new IndirectReference(objectNumber.Int, generation.Int);
|
||||
var actualToken = encryptionHandler.Decrypt(actualReference, readTokens[0]);
|
||||
|
||||
CurrentToken = new ObjectToken(startPosition, actualReference, actualToken);
|
||||
CurrentToken = new ObjectToken(XrefLocation.File(startPosition), actualReference, actualToken);
|
||||
readTokens.Clear();
|
||||
coreTokenScanner.Seek(previousTokenPositions[2]);
|
||||
|
||||
@@ -291,9 +291,9 @@
|
||||
|
||||
token = encryptionHandler.Decrypt(reference, token);
|
||||
|
||||
CurrentToken = new ObjectToken(startPosition, reference, token);
|
||||
CurrentToken = new ObjectToken(XrefLocation.File(startPosition), reference, token);
|
||||
|
||||
objectLocationProvider.UpdateOffset(reference, startPosition);
|
||||
objectLocationProvider.UpdateOffset(reference, XrefLocation.File(startPosition));
|
||||
|
||||
readTokens.Clear();
|
||||
return true;
|
||||
@@ -626,10 +626,10 @@
|
||||
// We can only find it if we know where it is.
|
||||
if (objectLocationProvider.TryGetOffset(lengthReference.Data, out var offset))
|
||||
{
|
||||
if (offset < 0)
|
||||
if (offset.Type == XrefEntryType.ObjectStream)
|
||||
{
|
||||
ushort searchDepth = 0;
|
||||
var result = GetObjectFromStream(lengthReference.Data, offset, ref searchDepth);
|
||||
Span<int> stack = stackalloc int[7];
|
||||
var result = GetObjectFromStream(lengthReference.Data, offset, stack, 0);
|
||||
|
||||
if (!(result.Data is NumericToken streamLengthToken))
|
||||
{
|
||||
@@ -639,8 +639,9 @@
|
||||
|
||||
return streamLengthToken.Long;
|
||||
}
|
||||
|
||||
// Move to the length object and read it.
|
||||
Seek(offset);
|
||||
Seek(offset.Value1);
|
||||
|
||||
// Keep a copy of the read tokens here since this list must be empty prior to move next.
|
||||
var oldData = new List<IToken>(readTokens);
|
||||
@@ -721,19 +722,31 @@
|
||||
|
||||
public ObjectToken? Get(IndirectReference reference)
|
||||
{
|
||||
ushort searchDepth = 0;
|
||||
return Get(reference, ref searchDepth);
|
||||
Span<int> stack = stackalloc int[7];
|
||||
return Get(reference, stack, 0);
|
||||
}
|
||||
|
||||
private ObjectToken? Get(IndirectReference reference, ref ushort searchDepth)
|
||||
private ObjectToken? Get(IndirectReference reference, Span<int> navSet, byte depth)
|
||||
{
|
||||
if (searchDepth > 100)
|
||||
if (depth >= navSet.Length)
|
||||
{
|
||||
throw new PdfDocumentFormatException("Reached maximum search depth while getting indirect reference.");
|
||||
var chain = string.Join(", ", navSet.ToArray());
|
||||
throw new PdfDocumentFormatException($"Deep object chain detected when looking for {reference}: {chain}.");
|
||||
}
|
||||
|
||||
searchDepth++;
|
||||
// Cycle detection (linear scan, but depth is tiny)
|
||||
for (var i = 0; i < depth; i++)
|
||||
{
|
||||
if (navSet[i] == reference.ObjectNumber)
|
||||
{
|
||||
var chain = string.Join(", ", navSet.ToArray());
|
||||
throw new PdfDocumentFormatException(
|
||||
$"Circular reference encountered when looking for object {reference}. Involved objects were: {chain}");
|
||||
}
|
||||
}
|
||||
|
||||
navSet[depth] = (int)reference.ObjectNumber;
|
||||
depth++;
|
||||
|
||||
if (isDisposed)
|
||||
{
|
||||
@@ -756,20 +769,20 @@
|
||||
}
|
||||
|
||||
// Negative offsets refer to a stream with that number.
|
||||
if (offset < 0)
|
||||
if (offset.Type == XrefEntryType.ObjectStream)
|
||||
{
|
||||
var result = GetObjectFromStream(reference, offset, ref searchDepth);
|
||||
if (offset.Value1 == reference.ObjectNumber)
|
||||
{
|
||||
throw new PdfDocumentFormatException(
|
||||
$"Object stream cannot contain itself, looking for object {reference} in {offset.Value1}");
|
||||
}
|
||||
|
||||
var result = GetObjectFromStream(reference, offset, navSet, depth);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (offset == 0 && reference.Generation > ushort.MaxValue)
|
||||
{
|
||||
// TODO - To remove as should not happen anymore
|
||||
return new ObjectToken(offset, reference, NullToken.Instance);
|
||||
}
|
||||
|
||||
Seek(offset);
|
||||
Seek(offset.Value1);
|
||||
|
||||
if (!MoveNext())
|
||||
{
|
||||
@@ -793,7 +806,7 @@
|
||||
{
|
||||
// Using 0 position as it isn't written to stream and this value doesn't
|
||||
// seem to be used by any callers. In future may need to revisit this.
|
||||
overwrittenTokens[reference] = new ObjectToken(0, reference, token);
|
||||
overwrittenTokens[reference] = new ObjectToken(XrefLocation.File(0), reference, token);
|
||||
}
|
||||
|
||||
private bool TryBruteForceFileToFindReference(IndirectReference reference, [NotNullWhen(true)] out ObjectToken? result)
|
||||
@@ -826,11 +839,11 @@
|
||||
}
|
||||
}
|
||||
|
||||
private ObjectToken GetObjectFromStream(IndirectReference reference, long offset, ref ushort searchDepth)
|
||||
private ObjectToken GetObjectFromStream(IndirectReference reference, XrefLocation offset, Span<int> navSet, byte depth)
|
||||
{
|
||||
var streamObjectNumber = offset * -1;
|
||||
var streamObjectNumber = offset.Value1;
|
||||
|
||||
var streamObject = Get(new IndirectReference(streamObjectNumber, 0), ref searchDepth);
|
||||
var streamObject = Get(new IndirectReference(streamObjectNumber, 0), navSet, depth);
|
||||
|
||||
if (!(streamObject?.Data is StreamToken stream))
|
||||
{
|
||||
@@ -853,7 +866,7 @@
|
||||
return result;
|
||||
}
|
||||
|
||||
private IReadOnlyList<ObjectToken> ParseObjectStream(StreamToken stream, long offset)
|
||||
private IReadOnlyList<ObjectToken> ParseObjectStream(StreamToken stream, XrefLocation offset)
|
||||
{
|
||||
if (!stream.StreamDictionary.TryGet(NameToken.N, out var numberToken)
|
||||
|| !(numberToken is NumericToken numberOfObjects))
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
var profileBytes = ProfileStreamReader.GetSRgb2014();
|
||||
|
||||
var compressedBytes = DataCompresser.CompressBytes(profileBytes);
|
||||
var compressedBytes = DataCompressor.CompressBytes(profileBytes);
|
||||
|
||||
var profileStreamDictionary = new Dictionary<NameToken, IToken>
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
using Filters;
|
||||
using Tokens;
|
||||
|
||||
internal static class DataCompresser
|
||||
internal static class DataCompressor
|
||||
{
|
||||
public static byte[] CompressBytes(IReadOnlyList<byte> bytes) => CompressBytes(bytes.ToArray());
|
||||
public static byte[] CompressBytes(byte[] bytes)
|
||||
@@ -15,7 +15,7 @@
|
||||
{
|
||||
var parameters = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
||||
var flater = new FlateFilter();
|
||||
var result = flater.Encode(memoryStream, parameters, 0);
|
||||
var result = flater.Encode(memoryStream, parameters);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -51,7 +51,7 @@
|
||||
var newEncoding = new TrueTypeSubsetEncoding(characterMapping.Keys.ToList());
|
||||
var subsetBytes = TrueTypeSubsetter.Subset(fontFileBytes.ToArray(), newEncoding);
|
||||
|
||||
var embeddedFile = DataCompresser.CompressToStream(subsetBytes);
|
||||
var embeddedFile = DataCompressor.CompressToStream(subsetBytes);
|
||||
|
||||
var fileRef = writer.WriteToken(embeddedFile);
|
||||
|
||||
@@ -110,7 +110,7 @@
|
||||
var descriptor = writer.WriteToken(new DictionaryToken(descriptorDictionary));
|
||||
|
||||
var toUnicodeCMap = ToUnicodeCMapBuilder.ConvertToCMapStream(characterMapping);
|
||||
var toUnicodeStream = DataCompresser.CompressToStream(toUnicodeCMap);
|
||||
var toUnicodeStream = DataCompressor.CompressToStream(toUnicodeCMap);
|
||||
var toUnicode = writer.WriteToken(toUnicodeStream);
|
||||
|
||||
var dictionary = new Dictionary<NameToken, IToken>
|
||||
|
||||
@@ -107,7 +107,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
}
|
||||
outputStreamT.Seek(0, SeekOrigin.Begin);
|
||||
|
||||
var compressedBytes = DataCompresser.CompressBytes(outputStreamT.ToArray());
|
||||
var compressedBytes = DataCompressor.CompressBytes(outputStreamT.ToArray());
|
||||
var outputStreamDictionary = new Dictionary<NameToken, IToken>()
|
||||
{
|
||||
{ NameToken.Length, new NumericToken(compressedBytes.Length) },
|
||||
|
||||
@@ -767,7 +767,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
var compressedSmask = DataCompresser.CompressBytes(smaskData);
|
||||
var compressedSmask = DataCompressor.CompressBytes(smaskData);
|
||||
|
||||
// Create a soft-mask.
|
||||
var smaskDictionary = new Dictionary<NameToken, IToken>
|
||||
@@ -786,7 +786,7 @@
|
||||
smaskReference = documentBuilder.AddImage(new DictionaryToken(smaskDictionary), compressedSmask);
|
||||
}
|
||||
|
||||
var compressed = DataCompresser.CompressBytes(data);
|
||||
var compressed = DataCompressor.CompressBytes(data);
|
||||
|
||||
var imgDictionary = new Dictionary<NameToken, IToken>
|
||||
{
|
||||
@@ -1218,7 +1218,7 @@
|
||||
|
||||
var bytes = memoryStream.ToArray();
|
||||
|
||||
var stream = DataCompresser.CompressToStream(bytes);
|
||||
var stream = DataCompressor.CompressToStream(bytes);
|
||||
|
||||
return writer.WriteToken(stream);
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
|
||||
var ir = ReserveObjectNumber();
|
||||
offsets.Add(ir.Data, Stream.Position);
|
||||
var obj = new ObjectToken(Stream.Position, ir.Data, token);
|
||||
var obj = new ObjectToken(XrefLocation.File(Stream.Position), ir.Data, token);
|
||||
TokenWriter.WriteToken(obj, Stream);
|
||||
return ir;
|
||||
}
|
||||
@@ -71,7 +71,7 @@
|
||||
}
|
||||
|
||||
offsets.Add(indirectReference.Data, Stream.Position);
|
||||
var obj = new ObjectToken(Stream.Position, indirectReference.Data, token);
|
||||
var obj = new ObjectToken(XrefLocation.File(Stream.Position), indirectReference.Data, token);
|
||||
TokenWriter.WriteToken(obj, Stream);
|
||||
return indirectReference;
|
||||
}
|
||||
@@ -98,7 +98,6 @@
|
||||
TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data);
|
||||
}
|
||||
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (DisposeStream)
|
||||
|
||||
Reference in New Issue
Block a user