diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf new file mode 100644 index 00000000..bb12f5f1 Binary files /dev/null and b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from open office.pdf differ diff --git a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs index 9882c047..8334c1f3 100644 --- a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs +++ b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromLibreOfficeTests.cs @@ -2,6 +2,7 @@ { using System; using System.IO; + using System.Linq; using Content; using Xunit; @@ -35,5 +36,18 @@ Assert.Equal(PageSize.A4, page.Size); } } + + [Fact] + public void GetsCorrectPageTextIgnoringHiddenCharacters() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + var text = string.Join(string.Empty, page.Letters.Select(x => x.Value)); + + Assert.Equal("36pt font14 pt font6pt font", text); + } + } } } diff --git a/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs new file mode 100644 index 00000000..f5aa0827 --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleOpenOfficeTests.cs @@ -0,0 +1,39 @@ +namespace UglyToad.Pdf.Tests.Integration +{ + using System; + using System.IO; + using Content; + using Xunit; + + public class SinglePageSimpleOpenOfficeTests + { + private static string GetFilename() + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + return Path.Combine(documentFolder, "Single Page Simple - from open office.pdf"); + } + + [Fact] + public void HasCorrectNumberOfPages() + { + var file = GetFilename(); + + using (var document = PdfDocument.Open(File.ReadAllBytes(file))) + { + Assert.Equal(1, document.NumberOfPages); + } + } + + [Fact] + public void HasCorrectPageSize() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.Equal(PageSize.Letter, page.Size); + } + } + } +} diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj index 49106bee..3f547ef0 100644 --- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj +++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj @@ -13,6 +13,7 @@ + @@ -31,6 +32,9 @@ PreserveNewest + + PreserveNewest + diff --git a/src/UglyToad.Pdf/Content/IPageFactory.cs b/src/UglyToad.Pdf/Content/IPageFactory.cs index 5663d145..f07fdb5a 100644 --- a/src/UglyToad.Pdf/Content/IPageFactory.cs +++ b/src/UglyToad.Pdf/Content/IPageFactory.cs @@ -7,5 +7,7 @@ { Page Create(int number, PdfDictionary dictionary, PageTreeMembers pageTreeMembers, IRandomAccessRead reader, bool isLenientParsing); + + void LoadResources(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing); } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Content/PageFactory.cs b/src/UglyToad.Pdf/Content/PageFactory.cs index f9d0dc0c..6873b196 100644 --- a/src/UglyToad.Pdf/Content/PageFactory.cs +++ b/src/UglyToad.Pdf/Content/PageFactory.cs @@ -45,7 +45,7 @@ UserSpaceUnit userSpaceUnit = GetUserSpaceUnits(dictionary); - LoadResources(number, dictionary, reader, isLenientParsing); + LoadResources(dictionary, reader, isLenientParsing); PageContent content = default(PageContent); @@ -136,7 +136,7 @@ return mediaBox; } - private void LoadResources(int pageNumber, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) + public void LoadResources(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { var resources = dictionary.GetItemOrDefault(CosName.RESOURCES); @@ -155,13 +155,8 @@ if (resourceDictionary is PdfDictionary resolvedDictionary) { resourceStore.LoadResourceDictionary(resolvedDictionary, reader, isLenientParsing); - - return; } } - - throw new InvalidOperationException( - $"No resource dictionary was found for this page ({pageNumber}), the page dictionary was {dictionary}."); } } } diff --git a/src/UglyToad.Pdf/Content/Pages.cs b/src/UglyToad.Pdf/Content/Pages.cs index aa24a9ad..95d2131d 100644 --- a/src/UglyToad.Pdf/Content/Pages.cs +++ b/src/UglyToad.Pdf/Content/Pages.cs @@ -64,6 +64,7 @@ { if (locatedPages.TryGetValue(pageNumber, out PdfDictionary targetPageDictionary)) { + // TODO: cache the page return pageFactory.Create(pageNumber, targetPageDictionary, new PageTreeMembers(), reader, isLenientParsing); } @@ -119,6 +120,8 @@ var kids = currentPageDictionary.GetDictionaryObject(CosName.KIDS) as COSArray; + pageFactory.LoadResources(currentPageDictionary, reader, isLenientParsing); + bool childFound = false; foreach (var kid in kids.OfType()) { diff --git a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs index b5cb0950..7f480319 100644 --- a/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.Pdf/Graphics/ContentStreamProcessor.cs @@ -79,10 +79,15 @@ public void ShowText(IInputBytes bytes) { - var font = resourceStore.GetFont(GetCurrentState().FontState.FontName); - var currentState = GetCurrentState(); + var font = resourceStore.GetFont(currentState.FontState.FontName); + + if (font == null) + { + throw new InvalidOperationException($"Could not find the font with name {currentState.FontState.FontName} in the resource store. It has not been loaded yet."); + } + var fontSize = currentState.FontState.FontSize; var horizontalScaling = currentState.FontState.HorizontalScaling; var characterSpacing = currentState.FontState.CharacterSpacing;