mirror of
https://github.com/UglyToad/PdfPig.git
synced 2026-03-10 00:23:29 +08:00
add examples directory and first example
This commit is contained in:
55
examples/OpenDocumentAndExtractWords.cs
Normal file
55
examples/OpenDocumentAndExtractWords.cs
Normal file
@@ -0,0 +1,55 @@
|
||||
namespace UglyToad.Examples
|
||||
{
|
||||
using System;
|
||||
using System.Text;
|
||||
using PdfPig;
|
||||
using PdfPig.Content;
|
||||
|
||||
public static class OpenDocumentAndExtractWords
|
||||
{
|
||||
public static void Run(string filePath)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
using (var document = PdfDocument.Open(filePath))
|
||||
{
|
||||
Word previous = null;
|
||||
foreach (var page in document.GetPages())
|
||||
{
|
||||
foreach (var word in page.GetWords())
|
||||
{
|
||||
if (previous != null)
|
||||
{
|
||||
var hasInsertedWhitespace = false;
|
||||
var bothNonEmpty = previous.Letters.Count > 0 && word.Letters.Count > 0;
|
||||
if (bothNonEmpty)
|
||||
{
|
||||
var prevLetter1 = previous.Letters[0];
|
||||
var currentLetter1 = word.Letters[0];
|
||||
|
||||
var baselineGap = Math.Abs(prevLetter1.StartBaseLine.Y - currentLetter1.StartBaseLine.Y);
|
||||
|
||||
if (baselineGap > 3)
|
||||
{
|
||||
hasInsertedWhitespace = true;
|
||||
sb.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasInsertedWhitespace)
|
||||
{
|
||||
sb.Append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
sb.Append(word.Text);
|
||||
|
||||
previous = word;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine(sb.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user