From bfdca3079f3a62c1ae759479ec85de635dc1be97 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Wed, 3 Jan 2018 19:13:12 +0000 Subject: [PATCH] change the itext document test to reflect its text being form content. fix readme typo --- README.md | 2 +- ...ingle Page Form Content - from itext 1_1.pdf} | Bin ...t1.cs => SinglePageFormContentIText1Tests.cs} | 15 +++++++++++++-- src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj | 4 ++-- src/UglyToad.Pdf/Parser/PageFactory.cs | 5 ++++- 5 files changed, 20 insertions(+), 6 deletions(-) rename src/UglyToad.Pdf.Tests/Integration/Documents/{Single Page Simple - from itext 1_1.pdf => Single Page Form Content - from itext 1_1.pdf} (100%) rename src/UglyToad.Pdf.Tests/Integration/{SinglePageSimpleIText1.cs => SinglePageFormContentIText1Tests.cs} (68%) diff --git a/README.md b/README.md index 55a272f4..124c00ef 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ The ```Page``` contains the page width and height in points as well as mapping t PageSize size = Page.Size; - bool sA4 = size == PageSize.A4; + bool isA4 = size == PageSize.A4; The ```PdfDocument``` will also support opening from byte arrays (as well as streams eventually): diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from itext 1_1.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Form Content - from itext 1_1.pdf similarity index 100% rename from src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Simple - from itext 1_1.pdf rename to src/UglyToad.Pdf.Tests/Integration/Documents/Single Page Form Content - from itext 1_1.pdf diff --git a/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleIText1.cs b/src/UglyToad.Pdf.Tests/Integration/SinglePageFormContentIText1Tests.cs similarity index 68% rename from src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleIText1.cs rename to src/UglyToad.Pdf.Tests/Integration/SinglePageFormContentIText1Tests.cs index 45a49bd8..35156401 100644 --- a/src/UglyToad.Pdf.Tests/Integration/SinglePageSimpleIText1.cs +++ b/src/UglyToad.Pdf.Tests/Integration/SinglePageFormContentIText1Tests.cs @@ -5,13 +5,13 @@ using Content; using Xunit; - public class SinglePageSimpleIText1Tests + public class SinglePageFormContentIText1Tests { private static string GetFilename() { var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); - return Path.Combine(documentFolder, "Single Page Simple - from itext 1_1.pdf"); + return Path.Combine(documentFolder, "Single Page Form Content - from itext 1_1.pdf"); } [Fact] @@ -35,5 +35,16 @@ Assert.Equal(PageSize.A4, page.Size); } } + + [Fact] + public void DoesNotExtractText() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.Empty(page.Letters); + } + } } } diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj index 75b7bb07..fad17196 100644 --- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj +++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj @@ -12,9 +12,9 @@ + - @@ -37,7 +37,7 @@ PreserveNewest - + PreserveNewest diff --git a/src/UglyToad.Pdf/Parser/PageFactory.cs b/src/UglyToad.Pdf/Parser/PageFactory.cs index cf206e2e..4d812f6f 100644 --- a/src/UglyToad.Pdf/Parser/PageFactory.cs +++ b/src/UglyToad.Pdf/Parser/PageFactory.cs @@ -8,6 +8,7 @@ using Geometry; using Graphics; using IO; + using Util; internal class PageFactory : IPageFactory { @@ -60,7 +61,9 @@ } var contents = contentStream.Decode(filterProvider); - + + var txt = OtherEncodings.BytesAsLatin1String(contents); + var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents)); var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);