mirror of
				https://github.com/UglyToad/PdfPig.git
				synced 2025-10-31 16:46:55 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| namespace UglyToad.Examples
 | |
| {
 | |
|     using System;
 | |
|     using System.Text;
 | |
|     using PdfPig;
 | |
|     using PdfPig.Content;
 | |
| 
 | |
|     public static class OpenDocumentAndExtractWords
 | |
|     {
 | |
|         public static void Run(string filePath)
 | |
|         {
 | |
|             var sb = new StringBuilder();
 | |
| 
 | |
|             using (var document = PdfDocument.Open(filePath))
 | |
|             {
 | |
|                 Word previous = null;
 | |
|                 foreach (var page in document.GetPages())
 | |
|                 {
 | |
|                     foreach (var word in page.GetWords())
 | |
|                     {
 | |
|                         if (previous != null)
 | |
|                         {
 | |
|                             var hasInsertedWhitespace = false;
 | |
|                             var bothNonEmpty = previous.Letters.Count > 0 && word.Letters.Count > 0;
 | |
|                             if (bothNonEmpty)
 | |
|                             {
 | |
|                                 var prevLetter1 = previous.Letters[0];
 | |
|                                 var currentLetter1 = word.Letters[0];
 | |
| 
 | |
|                                 var baselineGap = Math.Abs(prevLetter1.StartBaseLine.Y - currentLetter1.StartBaseLine.Y);
 | |
| 
 | |
|                                 if (baselineGap > 3)
 | |
|                                 {
 | |
|                                     hasInsertedWhitespace = true;
 | |
|                                     sb.AppendLine();
 | |
|                                 }
 | |
|                             }
 | |
| 
 | |
|                             if (!hasInsertedWhitespace)
 | |
|                             {
 | |
|                                 sb.Append(" ");
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         sb.Append(word.Text);
 | |
| 
 | |
|                         previous = word;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             Console.WriteLine(sb.ToString());
 | |
|         }
 | |
|     }
 | |
| }
 | 
