From 9ae0a5ec1592e569d3c2a07cac0df3882cf9fbc6 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 25 Apr 2021 16:22:22 -0400 Subject: [PATCH] allow stream filters to contain indirect references to name tokens --- .../TestFilterProvider.cs | 8 +- .../Scanner/PdfTokenScannerTests.cs | 84 ++++++++++++++++--- src/UglyToad.PdfPig.sln.DotSettings | 6 +- .../AcroForms/AcroFormFactory.cs | 7 +- .../AdvancedPdfDocumentAccess.cs | 6 +- src/UglyToad.PdfPig/Content/PageContent.cs | 4 +- src/UglyToad.PdfPig/Content/XmpMetadata.cs | 9 +- .../Filters/DefaultFilterProvider.cs | 1 + .../Filters/FilterProviderWithLookup.cs | 73 ++++++++++++++++ .../Filters/IFilterProvider.cs | 6 ++ .../Graphics/ContentStreamProcessor.cs | 8 +- .../Graphics/InlineImageBuilder.cs | 4 +- .../Graphics/MarkedContentStack.cs | 2 +- src/UglyToad.PdfPig/Parser/PageFactory.cs | 8 +- .../Parser/PdfDocumentFactory.cs | 2 +- src/UglyToad.PdfPig/PdfDocument.cs | 12 ++- src/UglyToad.PdfPig/PdfExtensions.cs | 13 +++ .../Parser/Handlers/TrueTypeFontHandler.cs | 8 +- .../Parser/Handlers/Type0FontHandler.cs | 8 +- .../Parser/Handlers/Type1FontHandler.cs | 8 +- .../Parser/Handlers/Type3FontHandler.cs | 6 +- .../PdfFonts/Parser/Parts/CidFontFactory.cs | 12 +-- .../Tokenization/Scanner/PdfTokenScanner.cs | 6 +- .../Util/ColorSpaceDetailsParser.cs | 4 +- .../XObjects/XObjectFactory.cs | 7 +- 25 files changed, 239 insertions(+), 73 deletions(-) create mode 100644 src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs diff --git a/src/UglyToad.PdfPig.Tests/TestFilterProvider.cs b/src/UglyToad.PdfPig.Tests/TestFilterProvider.cs index f024771b..b6240dba 100644 --- a/src/UglyToad.PdfPig.Tests/TestFilterProvider.cs +++ b/src/UglyToad.PdfPig.Tests/TestFilterProvider.cs @@ -2,9 +2,10 @@ { using System.Collections.Generic; using PdfPig.Filters; + using PdfPig.Tokenization.Scanner; using PdfPig.Tokens; - internal class TestFilterProvider : IFilterProvider + internal class TestFilterProvider : ILookupFilterProvider { public IReadOnlyList GetFilters(DictionaryToken dictionary) { @@ -20,5 +21,10 @@ { return new List(); } + + public IReadOnlyList GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner) + { + return new List(); + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs index 4b72a319..6931a9b1 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs @@ -15,7 +15,9 @@ [Fact] public void ReadsSimpleObject() { - const string s = @"294 0 obj /WDKAAR+CMBX12 endobj"; + const string s = @"294 0 obj +/WDKAAR+CMBX12 +endobj"; var pdfScanner = GetScanner(s); @@ -116,7 +118,15 @@ endobj [Fact] public void ReadsArrayObject() { - const string s = @" endobj 295 0 obj [ 676 938 875 787 750 880 813 875 813 875 813 656 625 625 938 938 313 344 563 563 563 563 563 850 500 574 813 875 563 1019 1144 875 313 ] endobj"; + const string s = @" +endobj + +295 0 obj +[ +676 938 875 787 750 880 813 875 813 875 813 656 625 625 938 938 313 +344 563 563 563 563 563 850 500 574 813 875 563 1019 1144 875 313 +] +endobj"; var pdfScanner = GetScanner(s); @@ -143,8 +153,29 @@ endobj { const string s = @" -274 0 obj << /Type /Pages /Count 2 /Parent 275 0 R /Kids [ 121 0 R 125 0 R ] >> endobj - %Other parts... 310 0 obj /WPXNWT+CMR9 endobj 311 0 obj << /Type /Font /Subtype /Type1 /FirstChar 0 /LastChar 127 /Widths 313 0 R /BaseFont 310 0 R /FontDescriptor 312 0 R >> endobj"; +274 0 obj +<< +/Type /Pages +/Count 2 +/Parent 275 0 R +/Kids [ 121 0 R 125 0 R ] +>> +endobj + +%Other parts... + +310 0 obj +/WPXNWT+CMR9 +endobj 311 0 obj +<< +/Type /Font +/Subtype /Type1 +/FirstChar 0 +/LastChar 127 +/Widths 313 0 R +/BaseFont 310 0 R /FontDescriptor 312 0 R +>> +endobj"; var scanner = GetScanner(s); @@ -191,12 +222,17 @@ endobj public void ReadsStreamObject() { const string s = @" -352 0 obj << /S 1273 /Filter /FlateDecode /Length 353 0 R >> stream -H‰œUkLSgþÚh¹IÝÅlK(%[ÈÅ©+ ƒåꩊèæÇtnZ)Z¹¨Oå~9ŠÊµo”[éiK)÷B¹´ -É² ©¸˜ n±º×dKöcÏ÷ãœç{ßï}¾÷ÍÉs  Ô;€ -À»—ÀF`ÇF@ƒ 4 ˜ï @¥T¨³fY: žw̵;’’Îq®]cƒÿdp¨ÛI3F#G©#œ)TÇqW£NÚѬgOKbü‡µ#á¡£Þaîtƒƒ›ß– ¾“S>}µuÕõ5M±¢ª†»øÞû•q÷îÜ~¬PòžÞ~•¬ëɃGÅ-Ñ­ím·°gêêb,/,£P§õ^ v¾ãÁô¿¿ŠTE]²±{šuwÔ`LG³DªìTÈ A¡¬àð‰É©ˆ°‘¼›‚%¥×s³®í»š}%§X{{tøNåÝž¶ö¢ÖÞ¾–~´¼¬°À“Éððr¥8»P£ØêÁi½®Û(éhŽ‘ú;x#dÃÄ$m -+) )†…±n -9ùyŽA·n\ï»t!=3£½¡:®­µåâ¹Ô³ø¼ËiûSÎsë;•Dt—ö$WÉ4U‘¢ºÚšñá1íÐèÔó‚svõ(/(+D²#mZÏ6êüÝ7x‡—†”‡E„²‚|ê«êªDµ5q°šR¦RÈ£ n¾[è~“}ýƒÝ½SꞦ'æQŽzÝ‚mæ +352 0 obj +<< /S 1273 /Filter /FlateDecode /Length 353 0 R >> +stream +H‰œUkLSgþÚh¹IÝÅlK(%[ÈÅ©+ ƒåꩊèæÇtnZ)Z¹¨Oå~9ŠÊµo”[éiK)÷B¹´ +É² ©¸˜ n±º×dKöcÏ÷ãœç{ßï}¾÷ÍÉs  Ô;€ +À»—ÀF`ÇF@ƒ 4 ˜ï @¥T¨³fY: žw̵;’’Îq®]cƒÿdp¨ÛI3F#G©#œ)TÇqW£NÚѬgOKbü‡µ#á¡£Þaîtƒƒ›ß– +¾“S>}µuÕõ5M±¢ª†»øÞû•q÷îÜ~¬PòžÞ~•¬ëɃGÅ-Ñ­ím·°gêêb,/,£P§õ^ v¾ãÁô¿¿ŠTE]²±{šuwÔ`LG³DªìTÈ +A¡¬àð‰É©ˆ°‘¼›‚%¥×s³®í»š}%§X{{tøNåÝž¶ö¢ÖÞ¾–~´¼¬°À“Éððr¥8»P£ØêÁi½®Û(éhŽ‘ú;x#dÃÄ$m ++) +)†…±n +9ùyŽA·n\ï»t!=3£½¡:®­µåâ¹Ô³ø¼ËiûSÎsë;•Dt—ö$WÉ4U‘¢ºÚšñá1íÐèÔó‚svõ(/(+D²#mZÏ6êüÝ7x‡—†”‡E„²‚|ê«êªDµ5q°šR¦RÈ£ n¾[è~“}ýƒÝ½SꞦ'æQŽzÝ‚mæ óF+Õ%ù‡ƒß9SˆŒÓãšH¶~L-#T]êîÁ©ÎkbjÒp½¸$¤´(4<,""øfvΕ< VЍ«#4'2l'Ð1ñðn?sìûãI'OŸøñçŸN5(äÊ'âÎѾÞþíðƒQmu}]Õ£‡c›©.Œòµ9zz0Ѳ‚B¢«#š-3ªàŸŸ¦Pà8®Ó…¼æ¢BaÅÐkëÊŠukÈÊÖL£­ivvv…k2=µZMØ|Úl(ŠZ­V›ÍbI>Ÿl¹œ(â±Äb­ø”Uª ñeü©U*‹’“Oð,„E+¶Êà>ŽU”ÎÌõçlºFÃ_ÃÙl?¶=>>!>þC¿-×à©©©x¾€¢ŠÊåòtÃ0‹Æôz“‰ NÊ,¬‚kÀ°F‚XÛ4&“ÉfÃñÅæûæy=ÆãIðE _¾Èårår/XÞ„/·qò›m¶ìÖ|†óx8Wð¹hºÜÂÕalÎü’˜Ã0^Òòòü¼yÞ¶´´DX )¨ÇM8lüM…Oúý| 1Ïãk»:t<…ÂÚl¶e¾†” éKÜl6c¹¸É„› ”)‰'3¤œ\–™ËN–™ÿe^Ё² y÷ð¹f`3ëž´ ¸“$d:e†)!%2ºdvË@½N¼ªŠ Ùná¹ ¼¿@ €Ã.èšs ì÷ûM€2(E4_ | FÑ.@v@÷¤ÃÅ0È Pž~,€:»H¤k¾ hT Œ € êÇV:Ô…©@@oH¯(3T‰{""C½SñŠœþtz3€•ƒ ñf.¬SЍøzWþ*$9gj=~Ì·QD E6o¥Ûi/Â`1ígGMq,;}޼sÔ×®kDü˜J{e5‚²ìɐ~Y)}fA>:˜ù–""Yò ç¹=ù²yÛ¡¿i aœ‘ØÏºþÇoäO ôkÆ) endstream @@ -321,7 +357,7 @@ endobj << /Length 288 /Filter /FlateDecode >> stream -xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A- YYøï+Ï4¡t#qfîFWQY*­Dïv5:è”–§ñjB‹½Òa¤ •p7¤K  ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8 Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8; ¤iL°!Ø %É`K°ßì¸ÃöÜáÜ)  [‚#CFðİ#(yƒg^ÿ¶æò +xœ]‘ËjÃ0E÷ÿÃ,ÓEð#NÒ€1¤N^ôA~€-]A- YYøï+Ï4¡t#qfîFWQY*­Dïv5:è”–§ñjB‹½Òa¤ •p7¤K  ƒÈûëyr8Tº!Ïà úð‚ÉÙVG9¶ø@Å7+Ñ*ÝÃ곬¹T_ùƵƒ8 Š$vË̗Ƽ6BDöu%½B¹yí$—Ù ¤\Hx71JœL#Ð6ºÇ0È㸀ü|. µüßõÏ""WÛ‰¯Æ.êÄ«ã8; ¤iL°!Ø %É`K°ßì¸ÃöÜáÜ)  [‚#CFðİ#(yƒg^ÿ¶æò ÿž“¸Zë#¢?¢h–P”Æû?šÑï÷ø¯‰Šendstream endobj @@ -339,7 +375,31 @@ endobj"; [Fact] public void ReadsStringsWithMissingEndBracket() { - const string input = @"5 0 obj << /Kids [4 0 R 12 0 R 17 0 R 20 0 R 25 0 R 28 0 R ] /Count 6 /Type /Pages /MediaBox [ 0 0 612 792 ] >> endobj 1 0 obj << /Creator (Corel WordPerfect - [D:\Wpdocs\WEBSITE\PROC&POL.WP6 (unmodified) /CreationDate (D:19980224130723) /Title (Proc&Pol.pdf) /Author (J. L. Swezey) /Producer (Acrobat PDFWriter 3.03 for Windows NT) /Keywords (Budapest Treaty; Patent deposits; IDA) /Subject (Patent Collection Procedures and Policies) >> endobj 3 0 obj << /Pages 5 0 R /Type /Catalog >> endobj"; + const string input = @"5 0 obj +<< +/Kids [4 0 R 12 0 R 17 0 R 20 0 R 25 0 R 28 0 R ] +/Count 6 +/Type /Pages +/MediaBox [ 0 0 612 792 ] +>> +endobj +1 0 obj +<< +/Creator (Corel WordPerfect - [D:\Wpdocs\WEBSITE\PROC&POL.WP6 (unmodified) +/CreationDate (D:19980224130723) +/Title (Proc&Pol.pdf) +/Author (J. L. Swezey) +/Producer (Acrobat PDFWriter 3.03 for Windows NT) +/Keywords (Budapest Treaty; Patent deposits; IDA) +/Subject (Patent Collection Procedures and Policies) +>> +endobj +3 0 obj +<< +/Pages 5 0 R +/Type /Catalog +>> +endobj"; var scanner = GetScanner(input); diff --git a/src/UglyToad.PdfPig.sln.DotSettings b/src/UglyToad.PdfPig.sln.DotSettings index 274f9a44..9ae00f92 100644 --- a/src/UglyToad.PdfPig.sln.DotSettings +++ b/src/UglyToad.PdfPig.sln.DotSettings @@ -9,4 +9,8 @@ RGB XY <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> - <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> \ No newline at end of file + <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> + True + True + True + True \ No newline at end of file diff --git a/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs b/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs index a0445e66..1e05043f 100644 --- a/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs +++ b/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs @@ -6,7 +6,6 @@ using Content; using Core; using CrossReference; - using Exceptions; using Fields; using Filters; using Parser.Parts; @@ -30,10 +29,10 @@ }; private readonly IPdfTokenScanner tokenScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly CrossReferenceTable crossReferenceTable; - public AcroFormFactory(IPdfTokenScanner tokenScanner, IFilterProvider filterProvider, CrossReferenceTable crossReferenceTable) + public AcroFormFactory(IPdfTokenScanner tokenScanner, ILookupFilterProvider filterProvider, CrossReferenceTable crossReferenceTable) { this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner)); this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); @@ -314,7 +313,7 @@ } else if (DirectObjectFinder.TryGet(textValueToken, tokenScanner, out StreamToken valueStreamToken)) { - textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider).ToArray()); + textValue = OtherEncodings.BytesAsLatin1String(valueStreamToken.Decode(filterProvider, tokenScanner).ToArray()); } } diff --git a/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs b/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs index 7569e801..5e3e0260 100644 --- a/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs +++ b/src/UglyToad.PdfPig/AdvancedPdfDocumentAccess.cs @@ -16,13 +16,13 @@ public class AdvancedPdfDocumentAccess : IDisposable { private readonly IPdfTokenScanner pdfScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly Catalog catalog; private bool isDisposed; internal AdvancedPdfDocumentAccess(IPdfTokenScanner pdfScanner, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, Catalog catalog) { this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); @@ -73,7 +73,7 @@ fileSpecification = fileSpecificationToken.Data; } - var fileBytes = fileStreamToken.Decode(filterProvider); + var fileBytes = fileStreamToken.Decode(filterProvider, pdfScanner); result.Add(new EmbeddedFile(keyValuePair.Key, fileSpecification, fileBytes, fileStreamToken)); } diff --git a/src/UglyToad.PdfPig/Content/PageContent.cs b/src/UglyToad.PdfPig/Content/PageContent.cs index f2a03104..c44d74be 100644 --- a/src/UglyToad.PdfPig/Content/PageContent.cs +++ b/src/UglyToad.PdfPig/Content/PageContent.cs @@ -21,7 +21,7 @@ private readonly IReadOnlyList> images; private readonly IReadOnlyList markedContents; private readonly IPdfTokenScanner pdfScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IResourceStore resourceStore; internal IReadOnlyList GraphicsStateOperations { get; } @@ -37,7 +37,7 @@ IReadOnlyList> images, IReadOnlyList markedContents, IPdfTokenScanner pdfScanner, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, IResourceStore resourceStore) { GraphicsStateOperations = graphicsStateOperations; diff --git a/src/UglyToad.PdfPig/Content/XmpMetadata.cs b/src/UglyToad.PdfPig/Content/XmpMetadata.cs index f874f566..d4160388 100644 --- a/src/UglyToad.PdfPig/Content/XmpMetadata.cs +++ b/src/UglyToad.PdfPig/Content/XmpMetadata.cs @@ -6,6 +6,7 @@ using System.Xml.Linq; using Core; using Filters; + using Tokenization.Scanner; using Tokens; using Util.JetBrains.Annotations; @@ -15,7 +16,8 @@ /// public class XmpMetadata { - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; + private readonly IPdfTokenScanner pdfTokenScanner; /// /// The underlying for this metadata. @@ -23,9 +25,10 @@ [NotNull] public StreamToken MetadataStreamToken { get; } - internal XmpMetadata(StreamToken stream, IFilterProvider filterProvider) + internal XmpMetadata(StreamToken stream, ILookupFilterProvider filterProvider, IPdfTokenScanner pdfTokenScanner) { this.filterProvider = filterProvider ?? throw new ArgumentNullException(nameof(filterProvider)); + this.pdfTokenScanner = pdfTokenScanner; MetadataStreamToken = stream ?? throw new ArgumentNullException(nameof(stream)); } @@ -35,7 +38,7 @@ /// The bytes for the metadata object with any filters removed. public IReadOnlyList GetXmlBytes() { - return MetadataStreamToken.Decode(filterProvider); + return MetadataStreamToken.Decode(filterProvider, pdfTokenScanner); } /// diff --git a/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs index 1ee3dba4..7c198e96 100644 --- a/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs @@ -117,5 +117,6 @@ { return filterInstances.Values.Distinct().ToList(); } + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs b/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs new file mode 100644 index 00000000..5405782f --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/FilterProviderWithLookup.cs @@ -0,0 +1,73 @@ +namespace UglyToad.PdfPig.Filters +{ + using System; + using System.Collections.Generic; + using System.Linq; + using Core; + using Parser.Parts; + using Tokenization.Scanner; + using Tokens; + + internal class FilterProviderWithLookup : ILookupFilterProvider + { + private readonly IFilterProvider inner; + + public FilterProviderWithLookup(IFilterProvider inner) + { + this.inner = inner; + } + + public IReadOnlyList GetFilters(DictionaryToken dictionary) + => inner.GetFilters(dictionary); + + public IReadOnlyList GetNamedFilters(IReadOnlyList names) + => inner.GetNamedFilters(names); + + public IReadOnlyList GetAllFilters() + => inner.GetAllFilters(); + + public IReadOnlyList GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner) + { + if (dictionary == null) + { + throw new ArgumentNullException(nameof(dictionary)); + } + + if (!dictionary.TryGet(NameToken.Filter, out var token)) + { + return EmptyArray.Instance; + } + + switch (token) + { + case ArrayToken filters: + var result = new NameToken[filters.Data.Count]; + for (var i = 0; i < filters.Data.Count; i++) + { + var filterToken = filters.Data[i]; + var filterName = (NameToken)filterToken; + result[i] = filterName; + } + + return GetNamedFilters(result); + case NameToken name: + return GetNamedFilters(new[] {name}); + case IndirectReferenceToken irt: + if (DirectObjectFinder.TryGet(irt, scanner, out var indirectName)) + { + return GetNamedFilters(new []{ indirectName }); + } + else if (DirectObjectFinder.TryGet(irt, scanner, out var indirectArray)) + { + return GetNamedFilters(indirectArray.Data.Select(x => (NameToken) x).ToList()); + } + else + { + throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}."); + } + default: + throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}."); + } + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/IFilterProvider.cs b/src/UglyToad.PdfPig/Filters/IFilterProvider.cs index dad2422c..51bcfecc 100644 --- a/src/UglyToad.PdfPig/Filters/IFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/IFilterProvider.cs @@ -1,6 +1,7 @@ namespace UglyToad.PdfPig.Filters { using System.Collections.Generic; + using Tokenization.Scanner; using Tokens; /// @@ -23,4 +24,9 @@ /// IReadOnlyList GetAllFilters(); } + + internal interface ILookupFilterProvider : IFilterProvider + { + IReadOnlyList GetFilters(DictionaryToken dictionary, IPdfTokenScanner scanner); + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 482e2249..95ee0495 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -16,7 +16,7 @@ using System.Linq; using Tokenization.Scanner; using Tokens; - using UglyToad.PdfPig.Graphics.Operations.TextPositioning; + using Operations.TextPositioning; using XObjects; using static PdfPig.Core.PdfSubpath; @@ -47,7 +47,7 @@ private readonly PageRotationDegrees rotation; private readonly IPdfTokenScanner pdfScanner; private readonly IPageContentParser pageContentParser; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly ILog log; private readonly bool clipPaths; private readonly PdfVector pageSize; @@ -88,7 +88,7 @@ public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, PageRotationDegrees rotation, IPdfTokenScanner pdfScanner, IPageContentParser pageContentParser, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, ILog log, bool clipPaths, PdfVector pageSize) @@ -448,7 +448,7 @@ startState.CurrentTransformationMatrix = resultingTransformationMatrix; - var contentStream = formStream.Decode(filterProvider); + var contentStream = formStream.Decode(filterProvider, pdfScanner); var operations = pageContentParser.Parse(pageNumber, new ByteArrayInputBytes(contentStream), log); diff --git a/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs b/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs index 56a9bedb..ec95c769 100644 --- a/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs +++ b/src/UglyToad.PdfPig/Graphics/InlineImageBuilder.cs @@ -18,7 +18,7 @@ public IReadOnlyList Bytes { get; set; } - public InlineImage CreateInlineImage(TransformationMatrix transformationMatrix, IFilterProvider filterProvider, + public InlineImage CreateInlineImage(TransformationMatrix transformationMatrix, ILookupFilterProvider filterProvider, IPdfTokenScanner tokenScanner, RenderingIntent defaultRenderingIntent, IResourceStore resourceStore) @@ -28,8 +28,6 @@ throw new InvalidOperationException($"Inline image builder not completely defined before calling {nameof(CreateInlineImage)}."); } - - var bounds = transformationMatrix.Transform(new PdfRectangle(new PdfPoint(1, 1), new PdfPoint(0, 0))); diff --git a/src/UglyToad.PdfPig/Graphics/MarkedContentStack.cs b/src/UglyToad.PdfPig/Graphics/MarkedContentStack.cs index bef8b3ed..9d26487b 100644 --- a/src/UglyToad.PdfPig/Graphics/MarkedContentStack.cs +++ b/src/UglyToad.PdfPig/Graphics/MarkedContentStack.cs @@ -69,7 +69,7 @@ public void AddXObject(XObjectContentRecord xObject, IPdfTokenScanner scanner, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, IResourceStore resourceStore) { if (top != null && xObject.Type == XObjectType.Image) diff --git a/src/UglyToad.PdfPig/Parser/PageFactory.cs b/src/UglyToad.PdfPig/Parser/PageFactory.cs index 287f8d38..20ffce3e 100644 --- a/src/UglyToad.PdfPig/Parser/PageFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PageFactory.cs @@ -19,11 +19,11 @@ { private readonly IPdfTokenScanner pdfScanner; private readonly IResourceStore resourceStore; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IPageContentParser pageContentParser; private readonly ILog log; - public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, IFilterProvider filterProvider, + public PageFactory(IPdfTokenScanner pdfScanner, IResourceStore resourceStore, ILookupFilterProvider filterProvider, IPageContentParser pageContentParser, ILog log) { @@ -122,7 +122,7 @@ throw new InvalidOperationException($"Could not find the contents for object {obj}."); } - bytes.AddRange(contentStream.Decode(filterProvider)); + bytes.AddRange(contentStream.Decode(filterProvider, pdfScanner)); if (i < array.Data.Count - 1) { @@ -141,7 +141,7 @@ throw new InvalidOperationException("Failed to parse the content for the page: " + number); } - var bytes = contentStream.Decode(filterProvider); + var bytes = contentStream.Decode(filterProvider, pdfScanner); content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, clipPaths, mediaBox); } diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index ad1e9be6..c5ac1eb4 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -83,7 +83,7 @@ private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList passwords, bool clipPaths) { - var filterProvider = DefaultFilterProvider.Instance; + var filterProvider = new FilterProviderWithLookup(DefaultFilterProvider.Instance); CrossReferenceTable crossReferenceTable = null; diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index cccd9cfc..bbc762d9 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -44,7 +44,7 @@ [NotNull] private readonly IPdfTokenScanner pdfScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly BookmarksProvider bookmarksProvider; [NotNull] @@ -92,7 +92,7 @@ DocumentInformation information, EncryptionDictionary encryptionDictionary, IPdfTokenScanner pdfScanner, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, AcroFormFactory acroFormFactory, BookmarksProvider bookmarksProvider, bool clipPaths) @@ -202,7 +202,7 @@ return false; } - metadata = new XmpMetadata(xmpStreamToken, filterProvider); + metadata = new XmpMetadata(xmpStreamToken, filterProvider, pdfScanner); return true; } @@ -219,7 +219,11 @@ } bookmarks = bookmarksProvider.GetBookmarks(Structure.Catalog); - if (bookmarks != null) return true; + if (bookmarks != null) + { + return true; + } + return false; } diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index 93bffa7e..0d0809fb 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -62,5 +62,18 @@ return transform; } + + internal static IReadOnlyList Decode(this StreamToken stream, ILookupFilterProvider filterProvider, IPdfTokenScanner scanner) + { + var filters = filterProvider.GetFilters(stream.StreamDictionary, scanner); + + var transform = stream.Data; + for (var i = 0; i < filters.Count; i++) + { + transform = filters[i].Decode(transform, stream.StreamDictionary, i); + } + + return transform; + } } } diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/TrueTypeFontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/TrueTypeFontHandler.cs index aa887dbe..95ddad0d 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/TrueTypeFontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/TrueTypeFontHandler.cs @@ -25,12 +25,12 @@ { private readonly ILog log; private readonly IPdfTokenScanner pdfScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IEncodingReader encodingReader; private readonly ISystemFontFinder systemFontFinder; private readonly IFontHandler type1FontHandler; - public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, + public TrueTypeFontHandler(ILog log, IPdfTokenScanner pdfScanner, ILookupFilterProvider filterProvider, IEncodingReader encodingReader, ISystemFontFinder systemFontFinder, IFontHandler type1FontHandler) @@ -109,7 +109,7 @@ { var toUnicode = DirectObjectFinder.Get(toUnicodeObj, pdfScanner); - var decodedUnicodeCMap = toUnicode.Decode(filterProvider); + var decodedUnicodeCMap = toUnicode.Decode(filterProvider, pdfScanner); if (decodedUnicodeCMap != null) { @@ -173,7 +173,7 @@ { var fontFileStream = DirectObjectFinder.Get(descriptor.FontFile.ObjectKey, pdfScanner); - var fontFile = fontFileStream.Decode(filterProvider); + var fontFile = fontFileStream.Decode(filterProvider, pdfScanner); if (descriptor.FontFile.FileType == DescriptorFontFile.FontFileType.FromSubtype) { diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type0FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type0FontHandler.cs index d17b144f..d12e2162 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type0FontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type0FontHandler.cs @@ -16,10 +16,10 @@ internal class Type0FontHandler : IFontHandler { private readonly CidFontFactory cidFontFactory; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IPdfTokenScanner scanner; - public Type0FontHandler(CidFontFactory cidFontFactory, IFilterProvider filterProvider, + public Type0FontHandler(CidFontFactory cidFontFactory, ILookupFilterProvider filterProvider, IPdfTokenScanner scanner) { this.cidFontFactory = cidFontFactory; @@ -66,7 +66,7 @@ if (DirectObjectFinder.TryGet(toUnicodeValue, scanner, out var toUnicodeStream)) { - var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider); + var decodedUnicodeCMap = toUnicodeStream?.Decode(filterProvider, scanner); if (decodedUnicodeCMap != null) { @@ -155,7 +155,7 @@ } else if (dictionary.TryGet(NameToken.Encoding, scanner, out StreamToken stream)) { - var decoded = stream.Decode(filterProvider); + var decoded = stream.Decode(filterProvider, scanner); var cmap = CMapCache.Parse(new ByteArrayInputBytes(decoded)); diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs index 4b187770..51a2dfe6 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs @@ -17,10 +17,10 @@ internal class Type1FontHandler : IFontHandler { private readonly IPdfTokenScanner pdfScanner; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IEncodingReader encodingReader; - public Type1FontHandler(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider, + public Type1FontHandler(IPdfTokenScanner pdfScanner, ILookupFilterProvider filterProvider, IEncodingReader encodingReader) { this.pdfScanner = pdfScanner; @@ -92,7 +92,7 @@ { var toUnicode = DirectObjectFinder.Get(toUnicodeObj, pdfScanner); - var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); + var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, pdfScanner); if (decodedUnicodeCMap != null) { @@ -143,7 +143,7 @@ return null; } - var bytes = stream.Decode(filterProvider); + var bytes = stream.Decode(filterProvider, pdfScanner); // We have a Compact Font Format font rather than an Adobe Type 1 Font. if (stream.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subTypeName) diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type3FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type3FontHandler.cs index c3d6030e..387569e3 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type3FontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type3FontHandler.cs @@ -13,11 +13,11 @@ internal class Type3FontHandler : IFontHandler { - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IEncodingReader encodingReader; private readonly IPdfTokenScanner scanner; - public Type3FontHandler(IPdfTokenScanner scanner, IFilterProvider filterProvider, + public Type3FontHandler(IPdfTokenScanner scanner, ILookupFilterProvider filterProvider, IEncodingReader encodingReader) { this.filterProvider = filterProvider; @@ -42,7 +42,7 @@ { var toUnicode = DirectObjectFinder.Get(toUnicodeObj, scanner); - var decodedUnicodeCMap = toUnicode?.Decode(filterProvider); + var decodedUnicodeCMap = toUnicode?.Decode(filterProvider, scanner); if (decodedUnicodeCMap != null) { diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Parts/CidFontFactory.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Parts/CidFontFactory.cs index 01f9d175..8cbdb01d 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Parts/CidFontFactory.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Parts/CidFontFactory.cs @@ -17,10 +17,10 @@ internal class CidFontFactory { - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly IPdfTokenScanner pdfScanner; - public CidFontFactory(IPdfTokenScanner pdfScanner, IFilterProvider filterProvider) + public CidFontFactory(IPdfTokenScanner pdfScanner, ILookupFilterProvider filterProvider) { this.pdfScanner = pdfScanner; this.filterProvider = filterProvider; @@ -109,7 +109,7 @@ return null; } - var fontFile = fontFileStream.Decode(filterProvider); + var fontFile = fontFileStream.Decode(filterProvider, pdfScanner); switch (descriptor.FontFile.FileType) { @@ -134,14 +134,14 @@ if (subtypeName == NameToken.CidFontType0C || subtypeName == NameToken.Type1C) { - var bytes = str.Decode(filterProvider); + var bytes = str.Decode(filterProvider, pdfScanner); var font = CompactFontFormatParser.Parse(new CompactFontFormatData(bytes)); return new PdfCidCompactFontFormatFont(font); } if (subtypeName == NameToken.OpenType) { - var bytes = str.Decode(filterProvider); + var bytes = str.Decode(filterProvider, pdfScanner); var ttf = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new ByteArrayInputBytes(bytes))); return new PdfCidTrueTypeFont(ttf); } @@ -301,7 +301,7 @@ throw new PdfDocumentFormatException($"No stream or name token found for /CIDToGIDMap in dictionary: {dictionary}."); } - var bytes = stream.Decode(filterProvider); + var bytes = stream.Decode(filterProvider, pdfScanner); return new CharacterIdentifierToGlyphIndexMap(bytes); } diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index 360c131f..5006373c 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -22,7 +22,7 @@ private readonly IInputBytes inputBytes; private readonly IObjectLocationProvider objectLocationProvider; - private readonly IFilterProvider filterProvider; + private readonly ILookupFilterProvider filterProvider; private readonly CoreTokenScanner coreTokenScanner; private IEncryptionHandler encryptionHandler; @@ -50,7 +50,7 @@ public long Length => coreTokenScanner.Length; - public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, IFilterProvider filterProvider, + public PdfTokenScanner(IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, ILookupFilterProvider filterProvider, IEncryptionHandler encryptionHandler) { this.inputBytes = inputBytes; @@ -794,7 +794,7 @@ } // Read the N integers - var bytes = new ByteArrayInputBytes(stream.Decode(filterProvider)); + var bytes = new ByteArrayInputBytes(stream.Decode(filterProvider, this)); var scanner = new CoreTokenScanner(bytes); diff --git a/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs b/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs index 47513598..66f6fb27 100644 --- a/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs +++ b/src/UglyToad.PdfPig/Util/ColorSpaceDetailsParser.cs @@ -71,7 +71,7 @@ DictionaryToken imageDictionary, IPdfTokenScanner scanner, IResourceStore resourceStore, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, bool cannotRecurse = false) { if (!colorSpace.HasValue) @@ -179,7 +179,7 @@ } else if (DirectObjectFinder.TryGet(fourth, scanner, out StreamToken tableStreamToken)) { - tableBytes = tableStreamToken.Decode(filterProvider); + tableBytes = tableStreamToken.Decode(filterProvider, scanner); } else { diff --git a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs index f9bd802d..57c487f0 100644 --- a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs +++ b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs @@ -9,7 +9,6 @@ using Graphics; using Graphics.Colors; using Graphics.Core; - using Parser.Parts; using Tokenization.Scanner; using Tokens; using Util; @@ -17,7 +16,7 @@ internal static class XObjectFactory { public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScanner pdfScanner, - IFilterProvider filterProvider, + ILookupFilterProvider filterProvider, IResourceStore resourceStore) { if (xObject == null) @@ -89,7 +88,7 @@ var supportsFilters = filterDictionary != null; if (filterDictionary != null) { - var filters = filterProvider.GetFilters(filterDictionary); + var filters = filterProvider.GetFilters(filterDictionary, pdfScanner); foreach (var filter in filters) { if (!filter.IsSupported) @@ -100,7 +99,7 @@ } } - var decodedBytes = supportsFilters ? new Lazy>(() => xObject.Stream.Decode(filterProvider)) + var decodedBytes = supportsFilters ? new Lazy>(() => xObject.Stream.Decode(filterProvider, pdfScanner)) : null; var decode = EmptyArray.Instance;