File buffering read stream investigation (#1140)
Some checks failed
Build and test / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled

* add test for filebufferingreadstream

* #1124 do not trust reported stream length if bytes can be read at end

the filebufferingreadstream input stream does not report more than the read
length. the change to seek the xref  in a sliding window from the end broke
with the assumption that the reported length was correct. here we switch to
reading the window or continue reading if we can read beyond the stream's
initially reported length while seeking the startxref marker

* remove rogue newlines
This commit is contained in:
Eliot Jones
2025-09-07 15:39:46 +02:00
committed by GitHub
parent e4ed4d1b39
commit dd5aa46c75
2 changed files with 26 additions and 0 deletions

View File

@@ -1,5 +1,9 @@
namespace UglyToad.PdfPig.Tests.Integration
{
#if NET9_0_OR_GREATER
using Microsoft.AspNetCore.WebUtilities;
#endif
public class SinglePageLibreOfficeImages
{
private static string GetFilePath() => IntegrationHelpers.GetDocumentPath(@"Single Page Images - from libre office.pdf");
@@ -17,6 +21,23 @@
}
}
#if NET9_0_OR_GREATER
[Fact]
public void CanUseFileBufferingReadStream()
{
var bytes = File.ReadAllBytes(GetFilePath());
using var mem = new MemoryStream(bytes);
using var fbrs = new FileBufferingReadStream(mem, 256);
using var doc = PdfDocument.Open(fbrs);
var page = doc.GetPage(1);
Assert.NotEmpty(page.Text);
}
#endif
[Fact]
public void ImagesHaveCorrectDimensionsAndLocations()
{

View File

@@ -208,4 +208,9 @@
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)' == 'net9.0'">
<PackageReference Include="Microsoft.AspNetCore.WebUtilities">
<Version>9.0.8</Version>
</PackageReference>
</ItemGroup>
</Project>