Updated FAQ (markdown)

2025-08-20 09:21:57 +08:00 · 2025-04-21 14:09:50 +01:00 · 2025-04-21 14:09:50 +01:00 · b9575389de
commit b9575389de
parent a4351764a1
1 changed files with 82 additions and 1 deletions
--- a/FAQ.md
+++ b/FAQ.md
@ -5,4 +5,85 @@ On Linux, installing additional font should help:
 ```bash
 sudo apt install ttf*
 ```
-See [issue](https://github.com/UglyToad/PdfPig/issues/840)
+See [issue](https://github.com/UglyToad/PdfPig/issues/840)
+
+# Some images cannot be extracted
+PdfPig does not support all images filters out of the box.
+Filters requiring external implementation are: DCT, JPX and JBIG2. You can either implement your own, or use the following NuGet packages:
+- PdfPig.Filters.Dct.JpegLibrary
+- PdfPig.Filters.Jbig2.PdfboxJbig2
+- PdfPig.Filters.Jpx.OpenJpegDotNet
+
+Once the Nuget packages are added, use the following:
+```csharp
+// Create your filter provider
+public sealed class MyFilterProvider : BaseFilterProvider
+{
+    /// <summary>
+    /// The single instance of this provider.
+    /// </summary>
+    public static readonly IFilterProvider Instance = new MyFilterProvider();
+
+    /// <inheritdoc/>
+    private MyFilterProvider() : base(GetDictionary())
+    {
+    }
+
+    private static Dictionary<string, IFilter> GetDictionary()
+    {
+        // new filters
+        var jbig2 = new PdfboxJbig2DecodeFilter(); 
+        var jpx = new OpenJpegJpxDecodeFilter();
+        var dct = new JpegLibraryDctDecodeFilter();
+        
+        // Default filters
+        var ascii85 = new Ascii85Filter();
+        var asciiHex = new AsciiHexDecodeFilter();
+        var ccitt = new CcittFaxDecodeFilter();
+        var dct = new DctDecodeFilter();
+        var flate = new FlateFilter();
+        var runLength = new RunLengthFilter();
+        var lzw = new LzwFilter();
+
+        return new Dictionary<string, IFilter>
+        {
+            { NameToken.Ascii85Decode.Data, ascii85 },
+            { NameToken.Ascii85DecodeAbbreviation.Data, ascii85 },
+            { NameToken.AsciiHexDecode.Data, asciiHex },
+            { NameToken.AsciiHexDecodeAbbreviation.Data, asciiHex },
+            { NameToken.CcittfaxDecode.Data, ccitt },
+            { NameToken.CcittfaxDecodeAbbreviation.Data, ccitt },
+            { NameToken.DctDecode.Data, dct },
+            { NameToken.DctDecodeAbbreviation.Data, dct },
+            { NameToken.FlateDecode.Data, flate },
+            { NameToken.FlateDecodeAbbreviation.Data, flate },
+            { NameToken.Jbig2Decode.Data, jbig2 },
+            { NameToken.JpxDecode.Data, jpx },
+            { NameToken.RunLengthDecode.Data, runLength },
+            { NameToken.RunLengthDecodeAbbreviation.Data, runLength },
+            { NameToken.LzwDecode.Data, lzw },
+            { NameToken.LzwDecodeAbbreviation.Data, lzw }
+        };
+    }
+}
+
+var parsingOption = new ParsingOptions()
+{
+	UseLenientParsing = true, // Optinal
+	SkipMissingFonts = true, // Optinal
+	FilterProvider = MyFilterProvider.Instance
+};
+
+using (var doc = PdfDocument.Open("my_document.pdf", parsingOption))
+{
+	int i = 0;
+	foreach (var page in doc.GetPages())
+	{
+		foreach (var pdfImage in page.GetImages())
+		{
+			// Process your images, e.g.:
+			File.WriteAllBytes($"image_{i++}.png", bytes);
+		}
+	}
+}
+```