add content order text extractor and example of use

This commit is contained in:
Eliot Jones
2020-04-19 17:06:34 +01:00
parent f18bc0766a
commit 407ee5ca51
8 changed files with 202 additions and 3 deletions

View File

@@ -0,0 +1,22 @@
namespace UglyToad.Examples
{
using System;
using PdfPig;
using PdfPig.DocumentLayoutAnalysis.TextExtractor;
internal static class ExtractTextWithNewlines
{
public static void Run(string filePath)
{
using (var document = PdfDocument.Open(filePath))
{
foreach (var page in document.GetPages())
{
var text = ContentOrderTextExtractor.GetText(page, true);
Console.WriteLine(text);
}
}
}
}
}

View File

@@ -17,12 +17,16 @@
var examples = new Dictionary<int, (string name, Action action)>
{
{1,
("Extract Words with newline detection",
("Extract Words with newline detection (example with algorithm)",
() => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "Two Page Text Only - from libre office.pdf")))
},
{2,
("Extract images",
() => ExtractImages.Run(Path.Combine(filesDirectory, "2006_Swedish_Touring_Car_Championship.pdf")))
},
{3,
("Extract Text with newlines (using built-in content extractor)",
() => ExtractTextWithNewlines.Run(Path.Combine(filesDirectory, "Two Page Text Only - from libre office.pdf")))
}
};

View File

@@ -6,6 +6,7 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\src\UglyToad.PdfPig.DocumentLayoutAnalysis\UglyToad.PdfPig.DocumentLayoutAnalysis.csproj" />
<ProjectReference Include="..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
</ItemGroup>
</Project>

View File

@@ -15,6 +15,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Tokens", ".
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig", "..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj", "{75ED54D6-308F-44AD-B85E-C027F3AA80AE}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.DocumentLayoutAnalysis", "..\src\UglyToad.PdfPig.DocumentLayoutAnalysis\UglyToad.PdfPig.DocumentLayoutAnalysis.csproj", "{70FEC330-CF3F-4815-9BA6-E622907086C9}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -45,6 +47,10 @@ Global
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.Build.0 = Release|Any CPU
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE