mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
add content order text extractor and example of use
This commit is contained in:
22
examples/ExtractTextWithNewlines.cs
Normal file
22
examples/ExtractTextWithNewlines.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
namespace UglyToad.Examples
|
||||
{
|
||||
using System;
|
||||
using PdfPig;
|
||||
using PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
||||
|
||||
internal static class ExtractTextWithNewlines
|
||||
{
|
||||
public static void Run(string filePath)
|
||||
{
|
||||
using (var document = PdfDocument.Open(filePath))
|
||||
{
|
||||
foreach (var page in document.GetPages())
|
||||
{
|
||||
var text = ContentOrderTextExtractor.GetText(page, true);
|
||||
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,12 +17,16 @@
|
||||
var examples = new Dictionary<int, (string name, Action action)>
|
||||
{
|
||||
{1,
|
||||
("Extract Words with newline detection",
|
||||
("Extract Words with newline detection (example with algorithm)",
|
||||
() => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "Two Page Text Only - from libre office.pdf")))
|
||||
},
|
||||
{2,
|
||||
("Extract images",
|
||||
() => ExtractImages.Run(Path.Combine(filesDirectory, "2006_Swedish_Touring_Car_Championship.pdf")))
|
||||
},
|
||||
{3,
|
||||
("Extract Text with newlines (using built-in content extractor)",
|
||||
() => ExtractTextWithNewlines.Run(Path.Combine(filesDirectory, "Two Page Text Only - from libre office.pdf")))
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\src\UglyToad.PdfPig.DocumentLayoutAnalysis\UglyToad.PdfPig.DocumentLayoutAnalysis.csproj" />
|
||||
<ProjectReference Include="..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
@@ -15,6 +15,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Tokens", ".
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig", "..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj", "{75ED54D6-308F-44AD-B85E-C027F3AA80AE}"
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.DocumentLayoutAnalysis", "..\src\UglyToad.PdfPig.DocumentLayoutAnalysis\UglyToad.PdfPig.DocumentLayoutAnalysis.csproj", "{70FEC330-CF3F-4815-9BA6-E622907086C9}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -45,6 +47,10 @@ Global
|
||||
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{70FEC330-CF3F-4815-9BA6-E622907086C9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
Reference in New Issue
Block a user