mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 09:21:57 +08:00
Updated FAQ (markdown)
parent
a4351764a1
commit
b9575389de
83
FAQ.md
83
FAQ.md
@ -5,4 +5,85 @@ On Linux, installing additional font should help:
|
||||
```bash
|
||||
sudo apt install ttf*
|
||||
```
|
||||
See [issue](https://github.com/UglyToad/PdfPig/issues/840)
|
||||
See [issue](https://github.com/UglyToad/PdfPig/issues/840)
|
||||
|
||||
# Some images cannot be extracted
|
||||
PdfPig does not support all images filters out of the box.
|
||||
Filters requiring external implementation are: DCT, JPX and JBIG2. You can either implement your own, or use the following NuGet packages:
|
||||
- PdfPig.Filters.Dct.JpegLibrary
|
||||
- PdfPig.Filters.Jbig2.PdfboxJbig2
|
||||
- PdfPig.Filters.Jpx.OpenJpegDotNet
|
||||
|
||||
Once the Nuget packages are added, use the following:
|
||||
```csharp
|
||||
// Create your filter provider
|
||||
public sealed class MyFilterProvider : BaseFilterProvider
|
||||
{
|
||||
/// <summary>
|
||||
/// The single instance of this provider.
|
||||
/// </summary>
|
||||
public static readonly IFilterProvider Instance = new MyFilterProvider();
|
||||
|
||||
/// <inheritdoc/>
|
||||
private MyFilterProvider() : base(GetDictionary())
|
||||
{
|
||||
}
|
||||
|
||||
private static Dictionary<string, IFilter> GetDictionary()
|
||||
{
|
||||
// new filters
|
||||
var jbig2 = new PdfboxJbig2DecodeFilter();
|
||||
var jpx = new OpenJpegJpxDecodeFilter();
|
||||
var dct = new JpegLibraryDctDecodeFilter();
|
||||
|
||||
// Default filters
|
||||
var ascii85 = new Ascii85Filter();
|
||||
var asciiHex = new AsciiHexDecodeFilter();
|
||||
var ccitt = new CcittFaxDecodeFilter();
|
||||
var dct = new DctDecodeFilter();
|
||||
var flate = new FlateFilter();
|
||||
var runLength = new RunLengthFilter();
|
||||
var lzw = new LzwFilter();
|
||||
|
||||
return new Dictionary<string, IFilter>
|
||||
{
|
||||
{ NameToken.Ascii85Decode.Data, ascii85 },
|
||||
{ NameToken.Ascii85DecodeAbbreviation.Data, ascii85 },
|
||||
{ NameToken.AsciiHexDecode.Data, asciiHex },
|
||||
{ NameToken.AsciiHexDecodeAbbreviation.Data, asciiHex },
|
||||
{ NameToken.CcittfaxDecode.Data, ccitt },
|
||||
{ NameToken.CcittfaxDecodeAbbreviation.Data, ccitt },
|
||||
{ NameToken.DctDecode.Data, dct },
|
||||
{ NameToken.DctDecodeAbbreviation.Data, dct },
|
||||
{ NameToken.FlateDecode.Data, flate },
|
||||
{ NameToken.FlateDecodeAbbreviation.Data, flate },
|
||||
{ NameToken.Jbig2Decode.Data, jbig2 },
|
||||
{ NameToken.JpxDecode.Data, jpx },
|
||||
{ NameToken.RunLengthDecode.Data, runLength },
|
||||
{ NameToken.RunLengthDecodeAbbreviation.Data, runLength },
|
||||
{ NameToken.LzwDecode.Data, lzw },
|
||||
{ NameToken.LzwDecodeAbbreviation.Data, lzw }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
var parsingOption = new ParsingOptions()
|
||||
{
|
||||
UseLenientParsing = true, // Optinal
|
||||
SkipMissingFonts = true, // Optinal
|
||||
FilterProvider = MyFilterProvider.Instance
|
||||
};
|
||||
|
||||
using (var doc = PdfDocument.Open("my_document.pdf", parsingOption))
|
||||
{
|
||||
int i = 0;
|
||||
foreach (var page in doc.GetPages())
|
||||
{
|
||||
foreach (var pdfImage in page.GetImages())
|
||||
{
|
||||
// Process your images, e.g.:
|
||||
File.WriteAllBytes($"image_{i++}.png", bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Loading…
Reference in New Issue
Block a user