From 06253966e4726beff15c969075a20894648a8610 Mon Sep 17 00:00:00 2001 From: mvantzet Date: Fri, 13 Jan 2023 12:35:25 +0100 Subject: [PATCH 1/2] Added Letter properties RenderingMode, StrokeColor, FillColor and added those as mandatory constructor arguments. Kept property Color, which contains either StrokeColor (if rendering mode is Stroke) or FillColor (for all other rendering modes). In PdfPageBuilder opted for default text rendering mode "Fill" which seems like a sensible default. --- .../DuplicateOverlappingTextProcessor.cs | 4 +- .../ContentOrderTextExtractor.cs | 4 +- src/UglyToad.PdfPig/Content/Letter.cs | 39 +++++++++++++++++-- .../Graphics/ContentStreamProcessor.cs | 35 +++++------------ src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs | 11 +++++- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs index b3e3e2b1..e6900dfe 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs @@ -77,7 +77,9 @@ letter.Width, letter.FontSize, fontDetails, - letter.Color, + letter.RenderingMode, + letter.StrokeColor, + letter.FillColor, letter.PointSize, letter.TextSequence); diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs index 8766a694..80f5f58a 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs @@ -65,7 +65,9 @@ letter.Width, letter.FontSize, letter.Font, - letter.Color, + letter.RenderingMode, + letter.StrokeColor, + letter.FillColor, letter.PointSize, letter.TextSequence); } diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index b15ea4f3..08abfcd3 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -61,12 +61,33 @@ /// Details about the font for this letter. /// public FontDetails Font { get; } + + /// + /// Text rendering mode that indicates whether we should draw this letter's strokes, + /// fill, both, neither (in case of hidden text), etc. + /// If it calls for stroking the is used. + /// If it calls for filling, the is used. + /// In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. + /// + public TextRenderingMode RenderingMode { get; } /// - /// The color of the letter. + /// The primary color of the letter, which is either the in case + /// is , or otherwise + /// it is the . /// public IColor Color { get; } + /// + /// Stroking color + /// + public IColor StrokeColor { get; } + + /// + /// Non-stroking (fill) color + /// + public IColor FillColor { get; } + /// /// The size of the font in points. /// @@ -86,7 +107,9 @@ double width, double fontSize, FontDetails font, - IColor color, + TextRenderingMode renderingMode, + IColor strokeColor, + IColor fillColor, double pointSize, int textSequence) { @@ -97,7 +120,17 @@ Width = width; FontSize = fontSize; Font = font; - Color = color ?? GrayColor.Black; + RenderingMode = renderingMode; + if (renderingMode == TextRenderingMode.Stroke) + { + Color = StrokeColor = strokeColor ?? GrayColor.Black; + FillColor = fillColor; + } + else + { + Color = FillColor = fillColor ?? GrayColor.Black; + StrokeColor = strokeColor; + } PointSize = pointSize; TextSequence = textSequence; TextOrientation = GetTextOrientation(); diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 1e5efffd..7759c9cc 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -292,14 +292,7 @@ var transformedPdfBounds = PerformantRectangleTransformer .Transform(renderingMatrix, textMatrix, transformationMatrix, new PdfRectangle(0, 0, boundingBox.Width, 0)); - // If the text rendering mode calls for filling, the current nonstroking color in the graphics state is used; - // if it calls for stroking, the current stroking color is used. - // In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. - // TODO: expose color as something more advanced - var color = currentState.FontState.TextRenderingMode != TextRenderingMode.Stroke - ? currentState.CurrentNonStrokingColor - : currentState.CurrentStrokingColor; - + Letter letter = null; if (Diacritics.IsInCombiningDiacriticRange(unicode) && bytes.CurrentOffset > 0 && letters.Count > 0) { @@ -319,26 +312,16 @@ attachTo.Width, attachTo.FontSize, attachTo.Font, - attachTo.Color, + attachTo.RenderingMode, + attachTo.StrokeColor, + attachTo.FillColor, attachTo.PointSize, attachTo.TextSequence); } - else - { - letter = new Letter( - unicode, - transformedGlyphBounds, - transformedPdfBounds.BottomLeft, - transformedPdfBounds.BottomRight, - transformedPdfBounds.Width, - fontSize, - font.Details, - color, - pointSize, - textSequence); - } } - else + + // If we did not create a letter for a combined diacritic, create one here. + if (letter == null) { letter = new Letter( unicode, @@ -348,7 +331,9 @@ transformedPdfBounds.Width, fontSize, font.Details, - color, + currentState.FontState.TextRenderingMode, + currentState.CurrentStrokingColor, + currentState.CurrentNonStrokingColor, pointSize, textSequence); } diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index c814f75b..7552a9ac 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -895,7 +895,16 @@ var documentSpace = textMatrix.Transform(renderingMatrix.Transform(fontMatrix.Transform(rect))); - var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, (double)fontSize, FontDetails.GetDefault(name), + var letter = new Letter( + c.ToString(), + documentSpace, + advanceRect.BottomLeft, + advanceRect.BottomRight, + width, + (double)fontSize, + FontDetails.GetDefault(name), + TextRenderingMode.Fill, + GrayColor.Black, GrayColor.Black, (double)fontSize, textSequence); From 2acca329874679f787ab754d5369c620c7ac3f28 Mon Sep 17 00:00:00 2001 From: mvantzet Date: Fri, 13 Jan 2023 14:11:13 +0100 Subject: [PATCH 2/2] Added integration test to see if we can detect the presence of invisible text (text rendering mode = Neither), visible text, the presence of images and the presence of paths. Certain combinations thereof potentially must be run through OCR. --- .../Documents/Various Content Types.pdf | Bin 0 -> 31327 bytes .../Integration/PageContentTests.cs | 24 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf create mode 100644 src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf new file mode 100644 index 0000000000000000000000000000000000000000..19c833951ee40368337d6e52ba310733892c575c GIT binary patch literal 31327 zcmeFZc|26#|2Tf{%rFdM7)zFnHL{LfmKg~lS<6<9eN7ZfvbCzoQW2#>mQpE%7GbE+ z`VwhD_7*KNN=(d*@42IT_4<6?kKgC_`F_5i&p+SqW$r!ad7kHap4-{ay(%uNZM8_+ zMmUwxvCd~W5>b~J9S}jpnVaKuVuC{ABB?I^p~36oqlhG&j$3eSbo}y_z;nat zSVwP-3*H(ROVnSCyZCPhQKG?O%wtzfFi{7|kJE7q4hr{Q6`erz)dh)$riR-3y85R2 z#=5%3CdLLt69baAiHWJPk-o99siB_9CY+Al_UQPSMMkXMi8`x;cZ3H9yVDUwF zw#NgS0P3;be`{@$ z>e!NqBt4=>2+`MEm!zv_sb{XMZ)s?*YhbAbf`&-kh#Ms78e5W#&2>!>5Esvs3pcUc z}d>6;P* zqkv_i42Y|viLN+=MbJZ8gPr;LewW2E>idnFhE!$85=KV!08(<^7NY`P!5-e z-vGl!fQkOEGL05_Sd?lD(SWqbgYnNCIIb-K*CxNjFyNM%`~>_Wp>M)vQ_uKsL5+a2^?wzL)6-jw|6+<{ z%w{x23uh)V)B7#sbx=0Ch) z3o#5AvI5j(Jt%#W5P~r|yfr)DIvEf@oiS`b5Lf&7DQ8B+(JKO4WUseVI5GNo+U^K{s;o;}u;U@?Q2oe6%*Fp<~=Yt=kLea1ygu=sU zJiKrZk_STaz}yS^dBG^aBQKVZA1441>O~+Fj7Fm{XoPEkoea_;44y|+fn>!i=IW1C z+(yvbbLR;@QB#B_#HbFb|voHf8b#Hp~JM}Ik_kD@=u;BC@C!~KYyX(;-zb~ zb@kV8+`QFr=kC3>_Ky1x9(F(LdH&+%tKPnm(Xlsg-@PCI@Nw$vx9RUQv&>{XtW&f2X)JyDtuc_rk0Cu^F1mH?ftnCxCmEbg8uNfWCFr4XQ^ z1B9FJGC%=LP!1qmbU^-q7T{rdgwW5DQV4ZOfhk<{T(_TWXd#@9B1|(%8C+giYT&^d zhsih|gn)|t0YJb7l|uMQfGbA|!L5tJpB+J(u#tL~Cb-*etS<-KotGv?2*?U6MiT=2 zXlhVLz0^zzfGz6@?8`4$7sdsYmldOz^#tS{2f18no5R(hHmPzT2I1yRSqT@mQQ83_ zInvhkG>yX~$Lt7)&C^g|Uk;%z7XmnKI*IFgKs;mjFAf~-DZFZJK#%}{v!o6>S-Pyp z?I|iCe_7Aji(xRy*5V>yNp`xF;7Z}OxJa6Cd;)MYcDvd@swN+-mi07M7%UbL3(_k( z>IwgNwh>%l8p+N8xlh<2r7Y{I1Cg~{2C~$*P1u*kUSc+cFM89%(V?t4Ue^V9844JDo`tir1jfm-L_E#3iNGM zAnmXxfL#zIKp`1jgIUuE4=P&F($S3W9p&7@O^VUxT?GhHecLcr#VA6?1UQo5l$F8R zi%!teQiE+a+`Ao@$YS`9J7C}^PO-FFF{YDnt89$WxEm?fjqnO( z$Q|Kh=1Sprq^V6J*r+r?w;koB^$-D=kC0mzkcM;cQG-EF7nEDqE0C)%FRRA|1|gBPS&XjN z0tSTC$t!#uAnHRz+ccAstT1Q{G!Vs@2}k?BeFcyY7gUPWAVh)aucb{uAW#v39=N<> zfu(^>9oOgs#rUk-=19RV>)8gJ90;C^rml{P?FVy z)H6W10I_`cyW;?CL#TsJaj_E0C+#B4klf#FbP|C@cXzv>-sKq7ovnbwL2U&UY%QRx zg;Y`KiPrK&A81`q2o1DNr^1scV3vVa4`&AQsD>dS2OplFzRvX+Br{@gUA2~C8i~TcXVQ@dTo!6m<XdYLN|+;{!-%}=tZ6CL#j+VwfBD5p?pm)enT6Z%4a=S{7H+Pw>zTQ|{|mrM~YQ?1LAeqoY1|PtUwzvh{7YreIMcK^wbh#*HXv2i*XUFpnYM%874?!#RS7iwSSYyw?d@ebUQ;u-9wrM!jITUv z@kO(jF0sot6$i1i+oayn4b4?R3AY*R-?fgW9xRy2DQb*6Jsw>%J*ZF>!N_Nc@1BXB zvTSRZvMn9CMJ-EgK4$X%X`O~*_PxW+5FRFd%4l5Cei%$=WI1Zudul(UdZ3qZ;~yg(L?@p0^u!(`cQtb1jNy z4EFiF-uL~RV#JI*Pikv$vr!nY$@dA3yUWzml4lG4at!Yu?^+|0I>g&7P?ARWO+wtg zg)wnQky$#j$oXV@9C+Pf(7&y8XosgG0lrkJo+ zGq+3?;h3N9ZC>W_VcFSBPJ*GYN~+x~4z(p)Gfs1GV^f_YEJoX?Mw4;JT)N$x>uM6_Ri6;spAICtx_o?ZgT| zPX)RbWK36}4|=pxgAioc1_p2t2xdHm3)X^a`4qLI{0SK8fhb7f=H#ZSf$?r`U2Yw? z6`j?BWL?`-2vD6{02uX4&>Vi~&s2jdk)347F7R*c`h3;Bz0fR!uu0hIq|`(ar!ZtG zeyl-|?wpG*faa7&)7zr?lcX5Q?`h`YSAE8%RVsdb(e};X8gr;r^Gi(3C4P$oDJxT6 z6*1NeOC@cI91=`2HQvm}u>L}GEK4rPD>=Jl)_p#g{M!1s{;fl1c8}%jBq!9zhWbL= zbw-6{BZU&~dwG9&8e(QfAh!zcQb-i6d1S;p8u_RKTE2Yw6)+7=q?TG1pVGuQ}5+-xJJKd2-1Q*wK)qo=SVUy3T# z?ljHpiFnibd~% z6k%72e2?%R&hiBE=+?sMB&E@_^7m$ngUh?4OKu4iO>POSo0tA@`A0~X;e_*DpEE17 zj#UhN@Y#=z^W5QHr`MjmzGp8PO$}u6AVsl)nNC_wMKtXaW=^rHc<~G0;Ez@Fk`BgV zlf)cGvGBkOld61rXc1Gbv$d*Er0@M3_vH;aLe#H%=vL|)}`#Mc^w)bCNfR?Z<3qG|p^Qx45^Ghx` zF&Fpv+C_FZXqz?1EbfQ+#aK!z*C-#if0F}-p2!66zvJiGL$3whW9;bc=Z`NM5+gd= zmx&$0#c3tlo(Qz+<8Nkp$w4aES)rtQ8z^;^{IyvY0XwpD!dg~3<#dI-5VJM^J!A0d z#~jlROKWy-2le*BmIv#r%0K+DV* zPSH-$&SWVG$p#Y9#=C}7NQ8@?OV zVihn)%54P0U+z>W#Ur;4o+@IAr;omdo94O|uX^7A>@93_g(6PJ~Fx{WPx&+yKw z_aD8$o4qunvzacT#;g#BeHawoIT~e`dH>h(LSOk4Lrv6B>`^fCw2_G%%QA1W#RQ#@BVqBM!(vrt_v+M9jD@>gm zU)?##at+aLJDC@--Tq@ZzoxzN@mZC)&82bg7pW`;JBcq zeL&ZhCX(5*@4khVIE%9Ywc0E|A!KGqRiCiS5y$$k=JKN!b5}F#V~%MUPhRO@$Hu0S zZ+6YSoaQND=5TORjP6J6TC4g^)lwVAE?Meb=#jwAVL{P1?xfZU-yJY(-z?O#VF7Zy zz0o`r73ocp897DX^U^$vC9;Nibjep5cNBq4xNQibpMdX$fMo_(Wc)yI{lWFc0`5%I z!DlV#B-*EnG7ZS-h2lLd`%$NLLv~N6U#*?pdBdq*(Q5AaItZtZ0Ua9*MSifuYh6ZP*CU(MX*O>6( zY8J7pN}4R3u*GiMrAnu^=kvwE8#>n0j5Z2S-hO{ViuIuwLfwP(oGEHmQS8GUIh{7= zz8fR?>E&q~`G6{igeeW`Qg)UH?!(v~O?mu9w zp%uYO1G3uiKk{peoQYvqB<-5lPIxrGyEQRCU$7t8#J=HC?@+P1=EY<8AKbktc+12@ zSzZN=dik(mPNYx$i(;>$AD=W#1f0 zSO31|4j6h|r|zSi@R>@tRJF;o}T?DUJ=Q+FpAmO)Gjv=c(ox zPJT_#cY&q2EXSpf$1LYVsdvSHAR|7>ZgLGdib~6y-!a*vc&mdQf1O5U7p7Tm-InRtFmI+%~M!5O#CG_y(LAbhh?VV_th#|`%`!}+h9+vY!O4n@O_U9_G8EAZ-Dz^ zn&WspaWV*`$?orHJw^? zRw=ds;+!VgYK#@Vy3E0ujp+1+I!P}-?9oIuZmDl}u?*Kbo4xhCkxK4% z0cg!!_oFsxqf%nlGtZiaqlKa}x!Y5rFtnqr>prkPCN<|4fGl>)N06q$%9tQB$O_5m z_H!F{!G5cmPh_bU4Zol8pTgROt=(YUx9^h#R%*kITdIDNz21f7%8=&Ns@KlU6*`}H z?wnthWKuVkzrQkbC@V=zZ1jcre9U#ZBu4sBG+O)zJlL_{vK4Zh911bW>D6yZroz zS--i_0sl^p6v-aY(W0A4R7y4}nj&#U=9{YNVyz(oVskQUqlk5rf zaML#TEs5y|U%V=sG&j~4O;tadf=VNsb34kPD}Vp9;j-)33uZ=5v_^NmKeo5bRXrpi z5yC1z#l*!`olB8SX^~&X(W=u6ColPKVbs1>{>G<=2IH%WMwe~du#GcXpL~O1&EdNc z8gAIM0JUXHC)2|P=X4Hwlx<0ten*kYx~CbDES+X)!%nZC?9*u@6Y7S9S@Kis>bD$g zu$b+;u&eI0iT>k!yyf^*MjGj6p=1GjzSe%M?tpf%{g?Gnp4XV#)mLv0Jm}+}$jNr! zNWUw*zi^J0aHY$h)ta&vhN;8$g3^IsEjVK(ghJh zGLVJfQX1GP2nP$y->Nb|hdabnnY~-pr?a7P%IL^;4r=P`1HFx#U$v;zHO4-Fw@>3+ z&hxI^GsjyqlTZEVeu|}W?vrabrGLK2a9+!By3+U2C;hC-%!P`wV4mkaCk0z~ueekq z_-#Mtt5Z@esDq6R_suLbX6R^7#<2?#`92%7uq@W^ZfZ*f`p4A&WJB|-tjj*@a&}imTu%%+9jP^Q zZkL{Kzfb?UU8=`ktthE@b9@aX?DK^j+Iisc)QvIfzD5D9Pamp3HSG0%uu~cv=JKYN zM;S%I(%9wfywu@SMN7yb(Gne_&r*}ebO`->H75;Cx=pq`kkTPIERn}#K7Nvx(BT73 zMfcw3@QsYfEw_FapPeCn;9b+zN79ds)2BahI)k@3Te?3iYCMs?!`AA|o|}F}x0|I| ze9WDU9ld@G&%M)Bp^+V@Dm9O(ZsWYZIk*6A2;4M)Z1cIUQG<7bU5dQL%^t9%30fE+ zCJa_ZZX%mDtc%+y{4PV@z8^PuO60qgx7FW^BGjzWyPBc<&@RSG@q8%(W!C@yWAnC9jqe3roCM|0?SVm^=k8Bz4y4|oGCZiS8d9?hd5?5(M6MYx zGh;lzt$F?E)u5iwR+?QElgdH0#UHI#esvg|BJAbI_+_>KV=w2=eVzZVutn{R>I-)( z_Ek&#fN9^AQAe{aMRSqFBS5~Y@kvA0!z7fxaMsWgq3cd9Q>v%mj$NIsx66J(Ki>#8 zPaHOaVIUYYf(^G&+QzOdP@TY*=Fj~Ou%Y4t|8V``YL^I+SE!qUEk{Cb(?vBn@~b=t zS31vsVPAA~iat}ong86tBl^9>P1m-C>Yquz0)3FFxtl!_AX6$wuM&6k>DrQHS2`5F zvb@Z5E^1mhDn4@M)~v9z4h^~%R+zmOX={&Q*d&A74uQcF|$V(KFkI< z@++H$)SZy<+P=HmZvFE4hmrRy{s%kjxNUXanqlQ<53P6Y$>~e8ybXpMR8D7KdOKL^ zSoPAUmp&rH2%7rjIct+*r6(E{fSrG`;$X*YV5$TA{p)Bq z*v`s34m_V7R&qQI7uPq5N`ma-tM8YI;-A6AE&(2h3GuIl|1H3Zw^H<=MvW z&T|RzCLZK(57wEQonp3Nb2)>lOY7*_^7Z7*B-e8e+Hne+8zdwJx)X{I2Yj=;bX&t@ z#-BmgwA<-@P~<__B66YSk<#h8tBi@hWy2UHARkP zUElK><-Tr8=;DpQtUC!u_BuC4SbQ4M$&{}ZZWHePI^8-($3{qN^32#(Y!UN*xEphp zKcjFyw#Y>d{ix#qaL269SUdBqLo;;CvFdos{j#)RFdMW?Qez%I&+=hLjub5o7p?3| zx#2JVIe5tqNf&S7QjM){X5vcq@w=*8t6l|iaB@{C4@Hd+);}_BbYi@F7pVWrc6j55 zuf~JNU)8i?FWcTj?c%Wy7^}XSBe0QWN!nd ztV(U&8h>RiYaKJb%<%I9^y*RRqaAs^ow>{P_3BRVzW&^`uw`4%%Xj4|XHr{Z>aM4^ z7vT&=>00eYsau6FX07oBywO-4|9&1ux1kT8A~T%FIT`~#EKg?UgeNg6EAd=t6b&C+f{=a_6_~6Vy_S zp44sKA-~Pp#XEMstL%f3&yxJm^B=Asef41`ro5@?N&_ZK>iLdKE3%M-Ow+{z>eB7B zOXt?xqV}Zmm=R8vq)CcLOj~cU{=^>4Zs}TEHT0-( zwmidLXCK(>s_|J9p>3V`D-0%%HF1O|P=*bqAIi$yuwv zkq>n7oNk(morz`!PxUjk}?HU)a=-z)xeldX$dUsUP}O~E>6fzguVwr$>P#;xRp>};UqrHmUKoaEH( z$cnp5PEYYJK%URWTIPihp+4eI$Po;R8?u9fs|JSBEcIVBE3xp5Jq=#qyvKB}U^%ppGlIDUM%>V~m zI_kNHR=^}0+293xuSg@mwyRAWoG&T;T8?bwxOEjktO3xg^e0$+7v3)~Y`*T;mrWYJ z1+A}y8?^m~awbmuS(bT@Jx{fzUIzx<)oj6eY-z$M&s~;sEsBS8lBV}09?kD6IWTh) zj6g%^XWqGGtlzT%i^1PFfO1_o-=>DTywzd*OwGReK3LS_AJC zd@VYVDilx8pyJKR!}KM->dW7D4VPxI0 z;cTJRf<~9$ZKmq+Y)pKo06$2Y{v}(n=uucPbv{{i&ZM$}?0~rTTooq)(B(p3kUi^-&%AJc%8bfa2!0Phq?k zpk@zGJDxhDn*9gWMy$F5T}r=-8-b-m6~ZmBj0LvEhOaLZbaEcvxu?;k=3vp9mworE zWh*^(%}8@wZ)Bt>(q%Pf!S?FiQtgZLVhPvZTwQlI`NAgkY1NM=wLcsp86k^fV~v*! zP|#5C%(t1-%nu`zsQyPP<(ZG_|GKbSENQ&#&{k`)M!K{9B+BXmbP-~aOHSmLiu^4gkt9hiq ztPE6P9tQR^cAk>7h6FTw*Qa*Rk8j`0HW`xGlhb?mXU|&AR24UB0C(|awqCk-{8Ne= zBX7S{EUwey@#XIVBGd<&DcxXq7duu6rra1uW_EbM*Dwy5(Ow?MyIOMFMqiEHytoCZ zqUt!uh|@s@r(Z(vY7Xi#HhsUEm(H=4@Z02d?A58#hv#==`;sruQ922|f%YW{Z)}bm z^LWb!fnhe7_FQqLK8A=w>Ii>4e%xmPy1QM5SNXfDA^VxwOU?Z+ zA2l0;CW$yMsiRyd%sh87{Gq91aUO?JXg^+s->7Vq-lsx z>>YW@?5ii#I44qG&IqxKC;9L!l_|ydoWy><1!!7P4c!`6bv|yAsd{5V{Sy8SOS@?; zIK|O^E?Ah8u6vbk2Y7PebQPSU)-iXk>5lCgjMOzEPj)3j%t0EP?# zS@_8bKck|YbH&2DKuM7)51yv{MaE3jpWXaaC#%s@aA#l6eY!=Z^45briBZ_t?8zFe z!iMH1u-gx|Un>Z=z|nv+u8xIjPoU*s27p9!w@o;LqXPVGNJT{3EH{XyV83NXtO~=9TPhY zroBmPJ16ml`}MN=PqHvGM%!o+aw=LL>DRopM|k-Ffk&sD0MN`x6|8I(T}vE&9Nq9J7lt z@5>0KRpu=~tDhI^u1!B^{XEE;rJaG+*s((6$5VD`3g%N$ZE72PcT2&zo}<6^yl=~G zx3AA{(Sm1!KkCJ_=3gq{_J^C#1JtV5=NU*;V&JiXZ;| z^>yXGuC>i|F!M6#l(6H*EPQmU(cJq9{Af1*we2;d1I5?+kCUH2F|RnCPS3km#o~K8 zA(XNT2rE+0H^$7LT6yo-d#5s=$ED3Zr}nUeeAv6*AqOE%i+NaK$Z1JxJ8c~}2rM~O zl2fob3FDRjugbb@mocDo_eAcPrjpJgS0a$2>K9D^Mu5>_c8boTlA?q~7hC zWed=Fap=<`A9iBZ1F*jO{;1uJ$1^;7B-Q;hs z)P1>-=Y%{9zi(9hI(^=ZdE`!rOT4gfWVZSd^CP)r<1zN)r|bNZlV0%st- znBOjZy3whW;Oyc2d0;z%=g^H1%v#CYt6RtyH+{7H4y9VhpBjx`@+yITx8Ul5OZ`W# z=y=rSJ^aY~LD@~gj@sr$t2rN$SMcI=ct!XuY4fXrdpZ5DTSmt6jNlW`(=j4enD{3? zPoPv?GNF-Y7vsjLCoM@n#c|WP&)N2~O2;zasV_xmhd}txHrv0?*s;VQ~y> zL?f?|$A@1}Y^ac0`pPes*>qd%i&IiGMINk*wJIm2%I{r(wpi$_E3lHaRO!Atolyfe zux{`(^KRfQ`sF^&Zl>pV9voarXM(l+yD8e;e;Eo1yY6#M6H9wS*fUcUm?0k!ITy^3 z&y1rV;^3Y(t}k(($z1Mz?cm&tb?;0?bg@q}hO_SBDKgVL%RtM=Q`!WMNP)dc#9k7# zwP5rF_MwmyPmY5*2BF|svARANtsX(sBAhDui3Q+bj2Bkci6H%`<>1UYRM1VVo$K|@ z{_7*2+8Lbc%F$vJW{!0VMQmO+wd8bkWmrRo>fo3559p2+%ZjsmmKCSj@>RWepJ`@_ zl0$|2B-@&K_q;jnSa)`-L2tQfXx)j`$AV^qg{libA51gfN(a5(@L@XtNKd2x*wCSu zG3im-eR9iMPX=L6o>KK6U)3SiS(ZsXB|Z;oHfCK@U25;A_L)PEX2u)czCJ9i$ehr+ z6WXu7{7o2o+V)m7txu)nFxa!dyQa=<*5_d6u5esd`5<<^{3-FCP?|nP{^9GV527{N zRv2tA6%X2W(9MLH)qa{1^4VxtW7N6+=JJa&>-i-vZrK<2 zWJQ+wbIM=Jve&z>oViGC`v}d2a`+!j73l0OsxR92U7)wE`B-IbnDLhmoYKuZ?T>e_ zcwAkx{8|BL-{l|P8`N5JkG8zQ_jpb#06949^Cj%_(Op!KgOoB}--cnjmy=w8Gwh|&Hq>Y}+~ZGKu2pw=N)Ms?X2z{NZh zHmV7Kg7hf@0J9jCHI3ka$r6}6f-{w}!X5H^EhX6Jnq@yOs1Q#NzP{;^(% zqHa*U|7-lyy%ttg`z=jUTNT039XmXf)#yAj;Mu;j?_GAuV1&_z8^$x8Vvh(~ikDaP z^ZGeVW5hmD%j+JLWQ|JFyk2;fxn0eIH(>YjeNjXI*jUosz%ET4K0}q_$R(|wGH+hW zOum?~uQjvcwSC#fu+!1h&j)Qu8a7Myc=Xh9b9>Tqs8E{_z>S7YrUKDKV&)aKgy|3 z7oZ8(d5Hw~nHy8`Y0*i(7l-;j6iOVQD0}wV=p5afb*9wuIi$P9Bj}*XiWoa^Xo2D0 z`O2(Co}aTMoG#KVoe-B07jC@GJK{y&H%Dc+{zqT!NZ*zoiq8pnlRRmB=BOI_>nPoh zK3qU$xJ*b6zc^SVV5z#+bf^gONT2oN4Bsr{SKBKn{Kl{$ux-o4j~T zLTwTxdyZDIByt?HI%i541Fs_p1I%^3WlGFbTd(GW37RX@b7FmpL2B!k0RMUG1l3iR z8yMHrEJweLksrHY-ycXm3@T4M`;rzpR6g@-c#2|!M6=-3cDumjZA$)H+Tz*0U!-PS zHg#nOAFY|MpBez`hHjU!E)T6RK_UqT^*@m8)$B&)DOsP?R;v6^eot}sL&l5CF|60N zr5y7&fvgDZmj@q9LtSpu*Nvke&?A!rhOF6n9Ib%?5yfTVJ)>}gzD3=kBg|SgR^*Rs zRpV3+PJVJLO)-x{5uHaGm!Pm?{7lGibuk0Qayej%m zTe(z;tT)GcbX4}`!@ z;n8!YATYf6H|y3rOTg<)f1U+&0mq!c3MzsI2I0R>tO`8g9@AeE#yzaR zjeAzYuZ#Oy$Ge}aJ#EPO2~YAE7!r)963itlneO+lt6vQBX{z-Z=)0wqsQcM&Ma%F( z*}bqVZ5*8EM}ITxpfgrGU89B(sXb;-7nY3Jh*R>lcor zi?S@EzHzF1*U8I&nPx|Rp9cp%EiKt#-)H!Yr4;-4_fOg48$W$x(%bcY4a;^`ZPB_i zp%fr+sN{9Wwice=h8WbF_n)Paw+Kp`xCcFucL!vZ|!y7Unu3FwDjQp z)kCx0!<_mLQ~{1S(|5RIpO*RZ;Q{qHs<4mwHLIXn`xWioYq~9^Z9`f^0yBm~GjPzy zd9HrJ0o`BP?E6F0h~NA=sSDoNKyi~w3lJ0iZ3^ItzcVkI!WOLR=j8zFy&vktL0c&||lyZ{C! zt_ZnLHjw=L@?WhnvOcN?sDSM+BoXQDX%<{Q5MH=%^&!>o*Xwk)yB5XC0^1vlTqDf> z@%y>&fy~0X>=L=u!nl+stV-*7|3C#xv94CX@*)wSG8Z*45AQ(n-VspSxO|8sIpB9OKZy!i^%A=0?H&4DQ$>ZgG9 z1Vn3Q-EOl$1Pe6+w6hd|Kp4>)*xr+%2_brNQG>wrvBw2Q=pZ_C2!keVeMkjSGSff#dVb*COdL+k<1)76i0`_!bC49|bV(*Al=}5CnOV_}?EA z5ENPh0ROjLJeiO9_kSn=T?s9PPJ!=Bd`7^3-#Kd+=Kg{BUySfA=06FY zhw7kAs2AGwlb|i+0)<2I0P^S63UvV}a=Ac0&?SiWXW}2>O6Vp?M=lr056VU)`Inaq zln>QH9selB1&V@p177N(UYG46$pd1I;f|3?5Tgd!p=s^{XcO6RW`kqpGw9qyUhTe>KsMSgi^k z{(pe|swn_7>fRjVn*)WB+%FgJiw zvsqb#g7}1xVu7SK{z26)u3k2)WfT;=wKVOkmnZ>BAPKVOlC0*nuEugn62pO1)zA@Q zjb;lig~zC%c2HOjggcJop|E*V>36&lS} ziD*mnqxs)lNDELRiYU=)1O;g^HbC8wi&r%a;0;G)s%Q=5W^^m zR4gLI8fc8-|IG8o~lXOUc#i7Noy0 zTw_3#snv%JAn(-xTrFV@yiC=G<`ooDE$=521nOM~(Mv|5n&J&C?5*S#?2Wh`(2)8t zC=OgeEZ{31UXJccj;=mlz^V$~+!Q1MA%{`1`8Q;gykr*100*q*Q{f|ep=8kB08s=0 zh_)c0w8|XZtFbD4;3onU&V?e5yULvC1O`C1)j#o&_&NjsLs>vgkOD~NYVM~pt8F!Y>kv_HwH-J9OV_|> z{+Vq5E15{+Q*v~tA(l}3Ne}{@+gC$~3sX$+Kx|O+kqWH@+FqgvsM6dDO{<|$kjfmk z5_ls`$v%jtdUwkYHXJ9@0r(gaQ0sv0HukZTPEKIHD>bxQ<{3lDjC zT>~Fhvi4i$U84-rylxRer@@7y5GVpfP>4tbq2vZ4d|Z5)TNDUN10MmefxEfZn1ZGmQ1dQ+sR^ya{ zS5*rrtwP{r1WsOr^CEDp(yD4$0tVUBK)rLRhQZy#*GmCZNuu-v8JfrhVkvLXsL({- zY60fVAc)5uj6ubrWzbfNrMQTQjF1cr^GXO2@S=DjUW`0Z1k4=#yk%tgG5kyYy!o+I zZ+`HNFkTEV)ti736(Pb;iEs=NrV!yxn>KBS$od+1NMB!ytf7Gyg-oS%kSJahmNdod z|NIjLL6R`6@$}0?GWaR|K{Jg1x(Sj%~K^P4QiU^|kc?1P9GBN}pB24v`5yc4jd1FviZwz`^ z4lb61W8`2LIhZ1cNVjPd1jK^=mW{f36Lr%jAm~r=NV;Sg5eToe91%moSRaHG@CrmS ztdCYOtE@zSSw)QL>4ziaDHM559?S=$TufJq&37#d!=d*sVNVxxf=bu}B1}#pgvjEt z@R=u_XDvj8S}3UmUTHyy7gA6p%E-x3z2)T4{QP)e5)4|B>WvoLp$La4!nTUAsvBJ8IKQxsvM;-V6$+kRc!H*>YZy#YlaV4xvHLI1G5DKI!NK1o+)39Lkf@FI|c zo+QwWHi)6{BBH?DTt5&8-&~8EJIZMQCnx;j_0wb4GB&M|VA+0p(&MIXB|N#$R@DIZ zgRQ}DlR0PjJjsyL$!X{Gayr#y;bdteBWa*74WO?x!1hv54#iJQP>NTOcd4`}4w8;fD2VRDc(>>&-WmWBG`|4V8^Xd%&{!-#KN^TDT8D?r@Nh03-j9dl@Nfto z-hhW~@vtf$mH@RG4^QLacX+r554Ypt>v;G)9^S!4^Z14K0uSHA!*zeq`Hip|59{G! zn23i7co@RNrqg)%4VPFh9uC682EP(e3_LuHho9o%7Can-hbeei7mwr<`2${!hl{zX zE?k6P9`?)Uwow;Vyq)WO!0UcrzcIFL`-K9Yz!0DXlF|Wnb!I2-sC zzeHFVBPt{!D2T<1V9;U&fsWdckfz!(BN{h!1){r|||@dtH4x`zOR1BR$-0$fah zj}qWC0vtntT?jCR0P7N9A^{d5z{6am;(sDI6Bd0QX#H<}gBIZrjf3DH?ZTpY{>EJl z{pu$`N1?ATYO15J0crp4CPcZF#ncou7)TcZ>myx*p)pzk4Wkv{Cmg$z@PlT!{}TH_ z-2pZu9X|37erAaD2h4dsq+fvdags4%?08@<&>pCOzMvJ<69Q~b84yJYp!m?@5`qvP z%#Y^d zEVDvFLRdyRgs`z=OQZde)~t56c2{8RDx)eNjxUY_$N9kaaWQc@E*IenIoOW>;EeKd zAC-V}}zxTSQduB9mwuUa( z(D52NT0_G%1f(kQ2j^<&N)4TrW$7B~uOXK#F!0kg^r|e6F9+ z2~699YzNh$;J7S6bts#YcmtWvGpLdWMW|>-HiNoQy1j&#gb_}! z`heQzfAm3d`DX{$g$x@sV9dNGug9;4leX&hwN;fB#k`hSSG3A*Rh*^D+G=4_Y{LiE zC@Y6+$yG}?Yw1EQ9j&GQTH0Pqj#^r%rR%jcEw9PAAd6aR!|6mVP1TZGOS83fwU%b` zSM3o=VX6fE&ikaH0-hJl$r&EFa&@a*wHOmM+)nyiDaq-%aFR%(_BZBtzeqowf9kYl z>p85pn%|(mcoB<}zkBJ{KcAaFJHQ@jdCU^#*?NAIutveT?AAjPFYme$?j8xtDD+WI z<>4RN>=orjC0I|DD25J{l_%^pWv8^AqITM6r!G6Gc5>RuYNt6n&D!ao?DRKwdOKg* zXs55+>0g)NU$@hX`ASCeWIjo@O4id>p#G+vaQy%3Sb>SQ1m8g|t*odVK2TL$u~Jmi_HZ?+)ihg8 zXR9GDI|6Jkjn9*hG<@L#jfDy>3&v%6(~>1BS)bv{v-bIq-kxthf1XZR{@l1Wf40*4 z?q{#$SEHZf!?`@?tX}vR+cadX6h8|;UxBaWmY3OVH zvrf92r!ny9C6u5M&cf6!hXqVxP6Eqj2RStIgQKvtH5M<`#k6U0RM@7!bL&^ZTh~up z>fWYP^j-8gwD!~gw%O%EWwY|p+7NW{t%YAJzrp&oK^zD-t}fY9Csu4(;q}y&th)Pd zD1z>;T7{MDnpM_0FU@-Cs+VTG1Qtq+^75`t8;hz})gcva+IZKN=Q}oSe&?Y-{C<1n>0?hv+UGue?fs5T@l(&9 zZjT&2b~MudpR>uI4^iSB@2=xd?|Q7G!PBz;=%Z)ft)Kt**LP2hbnI{6=52fAnUTNx z@U9K8u_D?QKDB-V8<&Wc;x2J5{GHN`V!f@hvbx%}exudBWwUeSz~+@MTZOG+ra4&!P_u5j>ZY@@t^vtexcEYS%ZzMV-dLsPw`#mZTEUw(UcWEdvH5+4 zZCRDvpVdIsHe<=8%Dr11Rj;yc*sQ$&$`7u5^xDg@OIPOp;LM+$`+N;Xa10MIzWvNQ zKc4^Xf|%c@{O?ogdE3-{%?r=m`dQJpe`PGJPQQNWjrCPOc9FJ8-wK!Utm3C#SO1=em+`|0dkg6c3tt9*N6;j-pn~9{WX` z^_QZ}_7kxWsosQFaW#N91J__VCwAk^G^?#y@h#?&RVBqg(Bt$A%PUGo$ygh$muwH) z&KCuW{8<=!;k@#o36oH*iV0gprFD}DE27EjH(@Ji#!T2I)>?nx zgo{L-^-oN=Sp3F%*@R1k+jh}}OEH!{GT}1I$F~15;d0SZ@{cB5A%3s4+=T7o@zQ)- zm7=lqgRaC_BA0kF7FDBKPE#X#`bb886W$%!ia-f|pW37AW67A>sb|u9M$0Ail&7ro zcCvuF7gg-ia@*A2RK(Ml7>yaU>R>FBi0_S!O(wNWM>c}b+2b4Z4eIR_x99iAGFddG z20Vyuu~;Z;TgiR+MPh8JHP(&qYSR#2G!F<4~ zt6DUQjze2|WO9P-1gFd8h0`J#9cAgHmP0KQSBmZ0zdVjX787k zal{1E9vQA>BH0J?r`U9p(m}ZsgS~xfe;WGfhH!9LNw7`LdnSU^m`y;9iF7XO$tIE> zJu~L*@9q;_cnmy-ltX$_#6(o6NE-4Qpoq{#S{xA>Ni`0vD%`+s#W^5+!jGhi9#HA1 zB?*2Nkq~syr{$6BNdQV=8ZDD0UtU`P{CiC)c1bC=0n-cq2x{mHv>&lwFVl8-}1rMI8+=+8hwmX%o zs3i-TSkpP#E0%OZN`DYI9ew&TeW>8!9&;@@DVJft1p2IsAxJfdq~g&%+krK~_S2+B zRI%?4E%u|uIO=1)EmCqW16~a!T(b^%T;f#mIP}fh&qB-0m$!EoEq~DhclM1zaK-Ro zp4)st%F6ky^qU>XXOUySWItlLSqdFk_Km}kgxejLFw?UCGB^8m%H$k{53{HI?smAh zhAp@aZh_l4B&A76*|;^P8SCuqhX&oavAWj zMx(N}21s!yoiinm5~EceJgk!_xLNy*je;VAzOg-w@np<&JUW(Zjy1+wH~7+!k?m{9 zG6Adyn$6)~THJ$$r5DoiKM$yu&s!`h&!-+!X7B%-k1fhtT0TBlXND{jkdfypW7f=% zx=HktIaq^vOAKO8W?S~l`q;YNrsj+pcCiK?HMh^1JYV{6>xJQs3Dn2h_aLzsdSu_n zpq+lSfbD^?78eW0AkrOvZ0&%1%! zg>#R2?goA*@;l9ZFY+iCBUFqf`gVD|OxEy4n!QC=ZjpM6j($c1pV8DOCqA+K*W6au zg}KXfmj0W=H!s{&d^gE)6R%X=aNZcWF>)h)w5dOpWS6=qGUhb)Loj$e2HG69Al;ypJ znTx0FE4nXHKlO=r=y1@e??9dF8%VPQ za|4#}Kxsv{qu^>&B5>o#<+-?eV2CRjbRf*eE6pkoN{K@Cw*40#<34vWJv>^SOB z9Adk$;GVCI-lG$5?HjDGzyH1B1-w5~GVsVWJvY61kdNVSJUm_W+_V^a_>qU+BKo5P z$6k0r?5MqeIxzUq^hoW2`=_G-!VF9SaMr$6BX%6fW^?uONA>l9Cvg<@ll4G8mIWSI zQ79MnS;}IH!Vl$C&jd!EfUV~;AefK9{a9A;AqyVV$xO`6B=X11vIZl!=CM1yri;*p zi?7gPgE6sRZ9!PC-W&pIMLJTvheKBGU=Z7@6(KcwUAqTws;b@{9sH|b+*rwdR+-m$ zotNplV#X?0&lr3NCKd~*{Ms%;ads!Qv8>u;l2xwC_aiG`h54P_jY&7;Gwsb`5O(E(Kzq6KdOa;$|Dj&@ht(lh5TK^Y)Rl)$h%||Be*N~ckj}{!O?FEZ{y2q zB@f!kkA+9Ph^wo5_r;#bsfdb-sDPd3xz{}JKqt)zfku#9j-AR;1!YT1#o-tI3$69R zVO4i5)iA_k!k8O6an~rRkD+z6lg&rp^Z8pE z;Zw_SKz)HO&>G@Ew#(=Ot<8S;(lT7o7YKb3*VuAi9|$`0QR? l$KQ>}KR&@I#a&BQzx<03Et8W5L9AF`OWEelJNvuK{ttj8yO#g} literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs new file mode 100644 index 00000000..222c74a0 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs @@ -0,0 +1,24 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using PdfPig.Core; + using Xunit; + + public class PageContentTests + { + [Fact] + public void DetectPageContents() + { + var file = IntegrationHelpers.GetDocumentPath("Various Content Types"); + + using (var document = PdfDocument.Open(file, ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + var letters = page.Letters; + Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Stroke); // "REGULAR TEXT" + Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Neither); // "INVISIBLE TEXT" + Assert.NotEmpty(page.Content.GetImages()); + Assert.NotEmpty(page.Content.Paths); + } + } + } +}