From c8874c598446d30dce84fdd393bb1b81bbcf2666 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 11 Dec 2022 16:18:09 -0500 Subject: [PATCH] #483 make skip missing fonts even more resilient to nonsense files --- .../CorruptCompressedDataException.cs | 34 +++++++++++++++++++ src/UglyToad.PdfPig/Content/IResourceStore.cs | 2 +- src/UglyToad.PdfPig/Content/ResourceStore.cs | 19 ++++++++--- src/UglyToad.PdfPig/Filters/FlateFilter.cs | 14 ++++++-- .../Graphics/ContentStreamProcessor.cs | 2 +- src/UglyToad.PdfPig/Parser/PageFactory.cs | 4 +-- 6 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 src/UglyToad.PdfPig.Fonts/CorruptCompressedDataException.cs diff --git a/src/UglyToad.PdfPig.Fonts/CorruptCompressedDataException.cs b/src/UglyToad.PdfPig.Fonts/CorruptCompressedDataException.cs new file mode 100644 index 00000000..9cb4b394 --- /dev/null +++ b/src/UglyToad.PdfPig.Fonts/CorruptCompressedDataException.cs @@ -0,0 +1,34 @@ +namespace UglyToad.PdfPig.Fonts +{ + using System; + using System.Runtime.Serialization; + + /// + /// Thrown when a PDF contains an invalid compressed data stream. + /// + [Serializable] + public class CorruptCompressedDataException : Exception + { + /// + public CorruptCompressedDataException() + { + } + + /// + public CorruptCompressedDataException(string message) : base(message) + { + } + + /// + public CorruptCompressedDataException(string message, Exception inner) : base(message, inner) + { + } + + /// + protected CorruptCompressedDataException( + SerializationInfo info, + StreamingContext context) : base(info, context) + { + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Content/IResourceStore.cs b/src/UglyToad.PdfPig/Content/IResourceStore.cs index 26984a6c..5cbfa8fc 100644 --- a/src/UglyToad.PdfPig/Content/IResourceStore.cs +++ b/src/UglyToad.PdfPig/Content/IResourceStore.cs @@ -6,7 +6,7 @@ internal interface IResourceStore { - void LoadResourceDictionary(DictionaryToken resourceDictionary); + void LoadResourceDictionary(DictionaryToken resourceDictionary, InternalParsingOptions parsingOptions); /// /// Remove any named resources and associated state for the last resource dictionary loaded. diff --git a/src/UglyToad.PdfPig/Content/ResourceStore.cs b/src/UglyToad.PdfPig/Content/ResourceStore.cs index de743aee..fa6e146e 100644 --- a/src/UglyToad.PdfPig/Content/ResourceStore.cs +++ b/src/UglyToad.PdfPig/Content/ResourceStore.cs @@ -33,7 +33,7 @@ this.fontFactory = fontFactory; } - public void LoadResourceDictionary(DictionaryToken resourceDictionary) + public void LoadResourceDictionary(DictionaryToken resourceDictionary, InternalParsingOptions parsingOptions) { lastLoadedFont = (null, null); @@ -43,7 +43,7 @@ { var fontDictionary = DirectObjectFinder.Get(fontBase, scanner); - LoadFontDictionary(fontDictionary); + LoadFontDictionary(fontDictionary, parsingOptions); } if (resourceDictionary.TryGet(NameToken.Xobject, out var xobjectBase)) @@ -132,7 +132,7 @@ currentResourceState.Pop(); } - private void LoadFontDictionary(DictionaryToken fontDictionary) + private void LoadFontDictionary(DictionaryToken fontDictionary, InternalParsingOptions parsingOptions) { lastLoadedFont = (null, null); @@ -157,7 +157,18 @@ continue; } - loadedFonts[reference] = fontFactory.Get(fontObject); + try + { + loadedFonts[reference] = fontFactory.Get(fontObject); + } + catch + { + if (!parsingOptions.SkipMissingFonts) + { + throw; + } + } + } else if (pair.Value is DictionaryToken fd) { diff --git a/src/UglyToad.PdfPig/Filters/FlateFilter.cs b/src/UglyToad.PdfPig/Filters/FlateFilter.cs index b67b2010..511080b3 100644 --- a/src/UglyToad.PdfPig/Filters/FlateFilter.cs +++ b/src/UglyToad.PdfPig/Filters/FlateFilter.cs @@ -1,5 +1,6 @@ namespace UglyToad.PdfPig.Filters { + using Fonts; using System; using System.Collections.Generic; using System.IO; @@ -79,10 +80,17 @@ memoryStream.ReadByte(); memoryStream.ReadByte(); - using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress)) + try { - deflate.CopyTo(output); - return output.ToArray(); + using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress)) + { + deflate.CopyTo(output); + return output.ToArray(); + } + } + catch (InvalidDataException ex) + { + throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex); } } } diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 8f6d5440..1e5efffd 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -479,7 +479,7 @@ var hasResources = formStream.StreamDictionary.TryGet(NameToken.Resources, pdfScanner, out var formResources); if (hasResources) { - resourceStore.LoadResourceDictionary(formResources); + resourceStore.LoadResourceDictionary(formResources, parsingOptions); } // 1. Save current state. diff --git a/src/UglyToad.PdfPig/Parser/PageFactory.cs b/src/UglyToad.PdfPig/Parser/PageFactory.cs index 61e54289..591500b2 100644 --- a/src/UglyToad.PdfPig/Parser/PageFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PageFactory.cs @@ -63,13 +63,13 @@ { var resource = pageTreeMembers.ParentResources.Dequeue(); - resourceStore.LoadResourceDictionary(resource); + resourceStore.LoadResourceDictionary(resource, parsingOptions); stackDepth++; } if (dictionary.TryGet(NameToken.Resources, pdfScanner, out DictionaryToken resources)) { - resourceStore.LoadResourceDictionary(resources); + resourceStore.LoadResourceDictionary(resources, parsingOptions); stackDepth++; }