From 1fb416eee39a4ee2f6b7fa2b91ebc1a7a3d05017 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Wed, 18 Dec 2019 11:41:02 +0000 Subject: [PATCH] add convenience method to retrieve all hyperlinks and their text from annotations on a page --- ...gle Page Hyperlinks - from open office.pdf | Bin 0 -> 14835 bytes .../SinglePageHyperlinksOpenOffice.cs | 52 ++++++++++++++ .../SwedishTouringCarChampionshipTests.cs | 34 +++++++++ .../PublicApiScannerTests.cs | 1 + .../Annotations/HyperlinkFactory.cs | 65 ++++++++++++++++++ src/UglyToad.PdfPig/Content/Hyperlink.cs | 57 +++++++++++++++ src/UglyToad.PdfPig/Content/Page.cs | 19 ++++- src/UglyToad.PdfPig/Geometry/PdfPoint.cs | 6 +- src/UglyToad.PdfPig/Geometry/PdfRectangle.cs | 6 +- src/UglyToad.PdfPig/Parser/PageFactory.cs | 4 +- 10 files changed, 233 insertions(+), 11 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Integration/Documents/Single Page Hyperlinks - from open office.pdf create mode 100644 src/UglyToad.PdfPig.Tests/Integration/SinglePageHyperlinksOpenOffice.cs create mode 100644 src/UglyToad.PdfPig/Annotations/HyperlinkFactory.cs create mode 100644 src/UglyToad.PdfPig/Content/Hyperlink.cs diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/Single Page Hyperlinks - from open office.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/Single Page Hyperlinks - from open office.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cc183201cad5e18181f58b675b532db59a326a51 GIT binary patch literal 14835 zcma)j1yo#1+HFE`4-hQ42kmZ}Zrt77-CdjDAvlDPAc5d6K?1?u2`+))1b6oU!CsS@ zJ9F=ynf1S3>rkialzg?%uIk0!UG&Q0l59YBFgkr_Zf8?xO=ku=hzdaEWNM4f&(9%m z;b;Z3rs9N`sB%c!*uyMbIV9~(U>4#QW=`f7f`aI7Fjor`2XxO=_vk*`P5^G`q4yge zqTJzLT~^${X`Rg4sz3@^rTI*7$Xe{wh_q|l3$DOX4)EMX4h?rCEfwYSlCf{kRRvN7 zyH3e(6oo#qsyaLW!V@E$bP2pjLyuluekCVNda@}eE}90j&k~EHTdd*NkF60;pzy(r zpefyOZF@Sk4`kS;b<~})dxe|A`m+gjLIc%ubrBmOKJ0WT_B~$!RYW_sFhTN7K*4M- zgCg15ek;|}+86#tJ;N-s)_i%p&mB8^Wlrw$g{PSfqR!u=JM7SK5^8C>RnSL9X>J^T z$~vdC`{?fawUFkvLZcc>PXa^I3)25#uqZ{%r|yyNuJbQ+3rF*RA_2d901y2EvxgfH z7uUnn-(}GMK$iP|Ap1|`fmA>M4*>jUoY#4L)b%dBS`Q7IJ!KId$6A!I7(IWELA?~{ zfH&8RfMyQ}|Hv{E!4`n1iXfeU*yS~7?m~gm6O~eBbIKhxwQg?nF-_mJ)~%u>%VzAe z)o$|V?b#~#&ClE0HQiAu4fBeGk$$ZirC;{y_GgCg=AKtIk%BI}oBU@yklYh7{cU%Rjk1{dV)^43x4Gormnl4BT4LY?^{Dm~w->J56P@Kb>2 zCj)f~Ecj@gVJVCv57l-nc~!$UN>fW+UsYGLl=1!$`&>p# zMpT>8>8ltqQr2`({+qSRHO#AGyOPU3a6N|K703%Exer>dfw%VLTZ~U%%BS-uUHgqO z%%x-;_?W3oZFkZ9U(=|3>OEYJ5%ZkchlLlYlsc)$Q`@mME zAgPNJlbFvVcJWpZbS5NtsiKN}ej}+0qh+3wJ)6O)PlzA5K~|NQ)M8(EVQFS*UFn=5 znJ}5qJy)DqykUQytW=nZ%=J^GgEpXqO8bhscO zFfZt&RbW-yXf`!W4qY5?@td2h*`RXX6)ew1^ULcUlW%UnMH#1r}vrKo3auxvP}dJMMiV`;1lA*_cszbmH?)wFz@5#KV+mAs;-JWm0G?8OXxWtXH$lg}&J{!lHShg(n@hu@*vd9u>EiD}Nd#uODEbHw&I2`6Wd z=FmXjR~f>)=3OpV+InihDM=Ckyo3tbzfzzryhSA8DlOd94R) z{1hPjfR#o3gFs#M0_(g#J_zLrsCJaGt0TO<6??R0-WFPVa_Rp($ygV_lW5YImcQJ^ z110GUX#x=UentXW`bE?kyvrO6VY1*+DkRPO5W;Vv!B1!E;1l*Fvch&XJ$&oy+YHnJ z46|v2+R=`1^!2B>wNvR|9z6xctYhJ7(UX4jOTp=Yp& z+tT^W?mqCG5*zSTLLTDYPX{iQ?qlbe(+czej>o$N?BJn1;lQ_9?@%o^7NCvM#M{dr zoJgCta9{W;Y%W?8Qu6eN2fk>V?Z=vNeNG2iIyZnbEoOyQb60w zuQ6l+MQRRbh6R$C+HSUA9$U;yrr|D?_~Mdah2#37hNqKz##cXg_ap@@W6C|BsuV+g z9lusTidAoTB-uzH*w7+bm|UY~rDT{A%C4eh^?rh)Y7^V)Ntn3^@GTy1@_0HwS`Psp zY&s)N08_%vCSQRAvFj5tfmz_*PZVo=umJ0TEJ%AeUN=g=l3AKCKVmv2)##gfzhGE3 zpxza#SH|vBN_*HB7b1~1&DSIyANfW!F#RwF&B5g2WlsDMLPM5BYIES@TJYSrNI6I7 z{9t*?Cp;G=a}_e|J>wtTN(bc&G7cKihFj#v+|Qk>yb{Dct)2U@aIh#*o9x>zI>ICo zI9%G?L;2hDu%t1XSeL|tOnBbpNjKnb1=^B!guFY}GkS?%(e{Zs)Oh#UO_*5mW6xs} zp`LZ;(9|qm%U%WU+)u6q9_e}^+y>aSZO(hzG+>JZ(`vmlO@W8y z62ikDL`k$iD%*x^$JY71j~40nb}mL*dN;Bu?IQL_+l^!WQ7Q7 zHi-s9`sdrHO_`*j&kMW9mMD9v4mv;)jo+Rs6za1|3b;mWWnE&wIqY5LA}ISx7-B1Y zj8xKWggpBg<;lw*u^q`yxE>V#NA?xx-Hmy<_ozLcNo%j(|a#r3Ib8XB6v z(6uTySjCyd!)Lc;y)(8^(4g-roaoNVoOPZ>CbIux|0D=29BbCar5{=5d9eLbZNMC| z0tDCI*vy=cZLLt+J5r_}!fo)nmCJ)HOUm|~yURJZ(}XTo1>_-35RL0h%#ztW&$KQ$ zq+mRDfKZs%(66m#;WTGy|7^s?tMAI)<7HtEojkGF;m)otW7OyA7GnnL-&Pg@|g{ie$EN%(EbVe7(MAOvE%IrAbIFM6m$Y^Abs*|A@oSv;S%;??>1>n&TrZk~>v)59(d0_v! z+9T@v;8oYp&o&=hi<|uw6FE!$Bj(y;E54hC7Ra5utJj5@Y&<^foQ03BhAfS&_zZK#zf%7s57EigwDu(pw~ z3bQ)*Ar!DV_an$)1hXz#^=o>}E_V#Q+?oswVtUr2UM|lCbuf=GtuKTSwjCvE*`EK( zCCn(3IPub>KV%yc5Ew#r-K5J<>OlHT|H@~5c0#Zt?)(6qVw_n~f;ocpWvZr_G?vsm zGSL+B67Y}$7maibs;|>vuHb90xyBQUd(Kp)cADZSF*zV5@lb4k95E%C72@cGpM!zFL^{vi{Uy$XRhOz7A3H7Ppaj~(a8N{WwX}% zikYkB0YXOK-D3aph-;D>-i@9&!1Nxc+le*KgU;M+`8?(#rXpZ5OEC+u#I_lo(n_oy zB<61ORB@%~yJekms0AnSpzt8~tDz-`SR4srO}EPjuvj?tQ^|u6en~**gow|6cyq&i z-tJG{c}B1({er?eRcYam45|BT_=6L8jT~gTFie5 zaH$*8d3#H2CUmfAi|2X>D3U9hnBcNZqgWj!DN<;%0rYC+oHlCGwlA?KEr0fX6z)HY z8D{gSI4Y$~+tGss+bsT@7%TpKuVh0^$d8is>l7pI==q;chy0%2DQ4Vp%{M*%zqZt` zL%lE8N6MxvC%(tb5WEDb#VnHtixyr5M&VSCnP{0TuJNwJ`CWlDQ~GCH2m z;cEvP>h*Los+x2{={T8hc z7AA)e4$j|gP{>saSyP9C3^XjB?eDsNe#22%GWG}umRJ_5r}Dk!7b_wPMHtS!EaOGY zho>@EC^_n*il-{#ks5hys@-nOall6x??e{ue}WJhDtdleReDHN^lT^zb2g!kXr~P) z?{SpgWFu$1IZ8sQq%0AnaA8EL5WFLgf95tJ`{T&#LN;hX!)&Z%Mx!y!o}Go(J$qKS zv0J@ruCw+khznb)d{&pS=+d-B8-NbI4hn3aUcn)qNQx30cN=z+x8shevlD$YrS(| zs79P)H?61hIH>X?|H1kAKyBhj5XLqE#*6KMXq04Px8>*|oF&?+=2LxWWV~Z5_3BP) z_a>>JpqqQa^JQBUCbvj|Er`ujfO&zET|>||;^HK#!`ufI!^+Q+dPA!@bB5R&uvYt! zgwGD2`^};=Ja1XVTCiKJSk(Ow%4htqa@L)G2G3p#-CvcpRshZTIMTODF5Inzn)CJ# zLWHhde-56ti?=2$q03_A_rOnRijl7V%2rGAugH$-Rl?#% zMm8iIwe^ipO?iZVDn64J^mxU)aDyp!g<)yw_?EDLQ&Le8v#i!I<5rBaSXqQia}$?% z?+PUkk|90Fa<5vmxqs+?FLi$zROx@z=2v-oL1FZo(lTns=x!zF{&M2N$bY><>Fxxp z!*V0&a@hHUFVP~&-~nFt=!b-LLjvi$W#9ccl6T&E+uyZMUMQJR?l|LSmm9OEEYn}S zDOaz2`8mygVGlMd=T(?jKT2so=~Pq3mB34`=>IL>LtO`)oK5px`AhF>v*=AIHj-Co zD&jT$G;=#LS`qy>>7(ap3-L=1Xf4$3m3aK@?TLs>A2a0w8M>`vh3Q51RMU@35J}K} z;8lS@W9)KAVrU8U(Mj)C@xfGde!Eg@UgPH@N3*18{OOLESU0a zCfDeW(;0~v9c}Ytti0ab%!t@T?HhYPmr){Pyf)Va>rXM_5p`7AwIT($7oF z`_^UDtE;kpDEZ{!C70E}?ctIWW$}LfuL0-Dg7Qz>96Jpf1pPMqIviKx)Emj2nKlJV zdZsJ05om`mTN2(<-5{}tEk+i^3(5RCZb2J)lzzL_AH=k|bfQba;3P(wx~|JDbiupW z&rvj4bWwD#=qF=0mP}BLT=6_ZZ!gR_*kFwiv1Qo*iOYuuY)ehQxz-&Jot2ZtPspI9>sk_@1x^z4Qt&jzCllmPzS}J zTdhPSb*~KX&gceHtB>LjgHe4fyPsdLn2nN^0mu}-GKOZyRjMtIADnr@81NL(6qmi< z?M_|i6oL(^zz{Ajh*6&52$>Rw33VL0RQC7f@qJO#6jpSX?dq?Fed4RA*G6 zP-?`mdGL|EII+$%vc$yj_fjOT{K@^nF+4F5S;e(O`77JfzNxx$&{9d|=y%KVFm~oW z3@gwm%(NR{F^BEUo=z}M8j5;ae(5Qnh*J6Y#yRy1mSeT<-xlJbD|4>{_lFt1$ zD$TILdgMz3j1w6R#(gIaa30h&^?jK4(( zeBLQ*(_IdV`ZROiXq}YP_eu3V!30QPB5XY)D=)?3JWmAka2p>vHj$E&myrn*OSFY6 z<$208{KRfgP@JQDPXQ@@q0H3lviGnUZn*Xmw3$XHOC#RK`F0l|x=JS!yT&8~EG0HY zsvs#p_jICG$R$wU)rD*if-v7@h&Pk%NC` z-f-J)iPZyh33s0L0y?`~hE@AyK#{M4pH`;isxZIdf7(rr%b4D()N`0~Xr=5OKNk3k zYq{WQb8F1r;de4A1B3zhP?b+*-v{sKo_-XrUyOhH_$s^Pl})br40A`7>U>TwaIo%1 zK6Znqvt$TR8uMJ6OgIvsNod$MX_L&V!A5Bf%!vE$g-$aTXSykcc%r^w4OsWi6U*@j zdFNmwkEI)B=XdwyQT{qb7MFEbNJo=2RlSOUqbq!KN0bszpbnLtI5MaTjOKExYQ^iO= zkcCIjtY0zN>Kc?U&Oda9o=bUnTEc=?;>h{)yrf&JGIlowEl&-JAw`_KJ-c^vw9i(I zHUgydg22+pACo>maX-}wI7x1zIrm9*7c-J7qbwZn#W`;uKyk&wPsGDl9j;e8tDem( zo3RIlG|Sk5zTPMhZKazmNNSo5TBNutyhw0;s#G2UySDXX&~9A^k($h=w$g9nvqDL} z+6He@SK(phUmP)f1@8el=pUJ06a$toD7h0#c$DT_{R6k z#@tW4&aOLN;QOb`y=U(*G9cS#clP`xkyr}4zyB;!3B5~|yj z8L>ReyjZX)eIc%tnWiSl8+~ZEOPa}Dt`7Ti9n3gsN?{DYJ%MC$mX~MS$UUkt{ujjJ zXu_BbGW6vl;b)yDAoRvQMd#J|C@{_fpNqm_)R#C`szt@F!;W+8uEn>V-$SRc`B9F1 z5tW{$^SonNb6%d$D@m(!6pnCOeKsvqP9C0j(Oh@X?o;b}LA?;^QC6`KWY-jjYke%y z6l7rZB>oyxzYbxs1)<{^_fMmxdk*c(rE!!W92^sKhD~R)^o~onorkTLf{SN@TZ<^< z_11M;t;7w9O`gB9^R@3OKE!4O9WxTu%-z{+UX4;7-i$KwypKt9;yn~oqank`<)E5}eZQ=h-jb{)>3|IqX-+XAd-Dl-=v#*P#ao9Tdl=*|=dfwNw?9#+ zc`|=qBU@;y9`(35T4iMJ68qI_rK9&pWyiYn(f(-;xFZ;Ma<7K>IhfD4QS6_TKEC_W zkK|D`^=|ej!v*`^`o-mrgTVZp!!Jj@+T!cE`TOzPSG60?Q2rqQ79zj0E6>`pg>|>< z$r(R8pOTyj)rn6}&vKAug!&&7>eA$}-@ZxzzKgT_etTP~GFa~uGFNJlnjCE>w$vfd ze!p^A$tmTBWooZgKyw2MPH%FUImmm-*cfdouKijmoi2UWu-<~3D$F*h*%k3I6p0WQ zFwU-luLMXN=AM3xjnYx+N&ij{<5 zIEzRYwif&ermNBu>_&u~h;s`~O3v~BFILKfN%G*IKscd*e|af9|MpVUy__vLB%K^# z;udaZt~SmvC%8)m&nlWYSa67nJbYN>ZA>j(O<*=oj%pUJHkRlQHkOqe6$E|*uPG|( z#$f(n4nA>w9c;RvJR<^iH}JPh`b zVFLl-y@;7OOIz4jS;OG%;1AVc77m(J-0&t3-NT(T3lP1#%PKl)tT9I@~?Z{X8D> zne#m#sS%@wo8#M`eZ?u}80k2(Oem)?H#|=qEeGvnm%O}IspAAQD;>P5eUK0Fb541Q z_SjnGlxDrlolp4`6xt^%1l$g#zUL~{)Q(P*ui8<&-P-$OS%^DQ3x)+fXz5g{9o3oa zT1$e~%9j1g+bW3*s@|j$4A#Ug@wy>tqN|^N-NYK#SwJY^|+WNSnXVm?ELYvG0?}^>;P@g+V~zzmfhITR7C* zO<@nzQFnE>_+2V$;%4#izWz69Y+T)7V%8?E4`h%x`R6^53!OvD#vEqtrU!*Wski_D zD$qYuPHvvx_kZLco`K;sdr1GdKwJQ-hj#E3-QXXD`8ScN9(b;b&Y|w4;b`;VxxyX6 z|KQPoI<~<7W!e7AScGzM|J7I2_VFb+uNG?c+Bc`1k%Ba)3-1FwNGfM z1*yK6#F0+Z&+uCt(qTM)8bP3I(1v1KZmUu(!8GZ(le6W((!8$bkY`#x6-lIP7ul|9 zw$JuPopU>Bh8|JxP-;%-aPZEub@S%c=FKoq>#5J6c<5x92u6-@P+2nvcuS@d^^r5# z>%>{8IAZH*GeTEur)$33mtv>yD2=d3XE=|EeY6hqhkx$hFg%)g@o4Dt{IYV7hhc*r zRy3YCxy5{Mop(jB)GxzaD~}5a2*ftMNRo6ttHL8w;LLO9+B;WBRCf!*feai+`t0=# z!}r^)mLuq%063%ZX9C0WSJ zI;2rUPdr{A#|X=%g=lzyD$qm4Es^%iha{ZHEk#J2JzbevWFcx=|Btq@y~jT{_k4cf zfy57J-z#5le?5yj3)*9Fq^On1)6k(xyX?bPr#;7X$XYb}H7&~?)fl!G-8!mALhBZO z?w00S(V@{5!q&Mav12ukW*b;)%xwvo>mzTwW;+Pn(Q^(^k1@(|DlQxv_dBbGL2vnQ zdDVvO;U2((akmZ*Ust!X54DHD)DZcmVj-z|bJ52Dn#OJXGq0bR9+V#H)!Obv8C`Ri z^qv<-tt;_x^~ zCDZz8Ke~-;X<=cRJNMmQQ?7{TE8I+sbJvaHjnHyo6;nezvHRMPiR*@B_ExT>%wwR6 zSX9y)nU)woTEoL4B@tIW8BMF^%w%9#sru}z-miQ;ydY(?_ygZ8O#;)_lBXfGx$ZiT zjt@x7CBhY$E_IPO*+afjFFR)jE8FdsJ~gFd8t>kLz@+SgxIQ__=ado_ciMUgzAfZJ zltJ1dnt!NsrJRdSiHQCG1wY4{^EM>o4A?%>g&^fv?{61#q zrHcY)S5cy%k`2OyEEHJEo3GmN)Z5&{xB!sg4{q2Ee>{nJT19RuwG}G-7Q4FFkNDIy z@@w++%JY%gA6HVn0nY-oRnMOyupo?d#1@X0hmIyE6WWvH+2xFu8O;1hTbw3v&J-}+ zXBUBdd679CIutP!G(W&U6Hv!uQm(myIE7wLS1!Bvt`utpFr5Rmo?W|a@ryW|DO_xi z)unErq2e{g`2ICPS)n_TBwHaR-Tnv-OKY<3v6SF7w94&KV2yuCv7wN^2MX;t3 zI!z24-gD<8GttJ^@ShkE#|s#}3f>f}@)Lw~7Ka#@cmqJ~pRz=;fhi8SAzLV~#aI=a zeCQcJEIH4F;s}Sj(#SL7Pa1WUH#KQHmZm-_vLypo*$0SzY&eB>k$x2k!nNy)ZXbJu zUajs4Si=tEf5rFy(ijV`eAFU@a8~ZVE3a&~<23j;ruhz2%weP?e{0m$;wo zjE|~WlijZZsJ5zYBa3O*iNAW->Nz*iHUt1@9y3*b~uWalgfX0ApidAWi>1gy|%*g?TVnln_WRHW?Wy%{AcQ) zgAv#&Nv9{yI+tP?F!8@QQww5Xyotg#tLfY#&NFfSfPgklbPw_W;Pv!eKasabD$x>` z3a9jZT`YONJv@QOp(>9}E$91pQ#*#V-0TS(Z$4`>xPdn?GUEifwmNr7KaI1(!k3Jj zusicEXNmLZ*g2Q)(g{Jbj@p97lV;ZS>V}4>71pV{xjfC)UMDq^c_Rl?1%-V8+DYck zy)2!Vg&6%HsuU_vxUeMQ6*bhb9^XH~N(^0Pv{5hgNwo8yGX!Bbfj zRq|uA^|q%#2B$nhW<)wt#)EddRiR-w&Pe$w1!`9QaCxyaQI%NyNC>v}+*a=6Ax*42 z8^{?$t8<6Aym0Y;v@JSA-Y*_(8r~7aUrQpC<{xLfg;MQLNf}1lZNg;x)woA>bZuX- zGZ#F2FCT2+E~bQ(9{--qUSa!h$*e~r4-(y_&`{i-< zV#6=o(YQ@#%s#xT1HiRlt{67!5syfL(n#-`eMg(U9_MGmvG#bAsx5HBBdaqzFzzzJ z940;khb|EguP!g=H2wOzMIJp5@qzj&tPTjVSWlIOGz1IW_!MsSE;(4rWx1u8$yh2{ zju4)FQxftLsQnrVdc#voK`rNiomnW(u(PW@_mJz%?6R2JT~8ih1>A9+U@EDNV9MuVs+>QNN1@p zF=bUAEfGOpvTYk}4Zt%?t_YJo?&qXNF*qKCy~rSBXo^?EOtYt5Vgl(75qqogN9uQ{ z;>pu?^bPAiBT`$TW_phjpr-O_OmWO#Y~3qG$w=Ep(i_}R9l4fZ|50(Oz_7SD5x++7 z45=^#qoVBlckp)h9qWV>?-c+Ta~q;6y>lP!nM^qky!xnD(W+}xIgN~jP$pHT<5S&l zO;YrO*4&g%UTsrd4mfaPQazF~YKv0rm%tJx_wq|U^Jku!(S(#@Qt5?cd3tDyHyfK} zwyIvBF58#Mw>s?XJ?5FWJae(%{9*GuT_Ga2=E6PtQh;9r@6X|P#u8+A6np!ABEyUH%i z|B7>LwizmmqpHl+%?79M6b$%>$$Q0e3`*I~l zX&LX=b5Z|!++&SxwYR1}f9OkojGOGRMTY6Qe<;;iOc}GI(~16`oH0U*R-B$97okJ) z?n9>J>KDXI`(D(wmuhL)3+0N3JdvDxC8*KdoBb2;-DKIdJe; z@N+uGFPJT~!iW?~c-eZg)$6pKC*#C=y$EXu|Aq2EndF!=XU{Rx$$BP3rn$o0hIJ|AaJioEN|N{U^ zm?@(xlh217X$WJZ{Yx7(N7=QB_jf5PhR2jCKiJZIcC^enM}N4sR>^zcYtcDvO}d;! z!$^%pOE08-=$7|0@EN^~^%mpC8byZge*l@&+ELTu1y>M#SNj9Hfp$AFg{{9fl(&<3 z`z@zb+LDkM?fR@YsuMKRW%5ak%?YifPoFzZc`}&ex970>gVZGj1UJB63;WknQD)3V zbQbipA4z)U4)hNoJu5w1w;-z)8h0u3Keh8G{%DxjB1u70C{kT`;JytVdoz{cdwI*# zbT;NzT0P=4BR7)bEBUKi`owUBPf?b!S7H48N&2g(_1EfDiiPj`l5zStX^4rFFHawq&H44~y2Wv2P|l~+RdE#d&69jhNu@kF zrNsspqw6LyJ0eZ}(>%Woubdq39(NX|&f(Ci^<4%d&B_!CyH^o=%!7uU$Fo_Ku^i-s zx_X*Ei-3vagT2xa?4WU%lYM~-Ba{Mf4Z;J4qYlT#F-*N0fhkkw$zAjr)o11#btG!V z!}c}cxx$VORfED|MJp$*dbbLJHp?***qh1JwISvUA6G3O2+`ulw5Q;d!|Yobna6WG zdw$*{U6*0p*@;1`tE(sW@5&q}FDW+LBg&Pr+dI&TFHSZ-q9O~JeO3||qqrusXm(2k zfa=E8z^bWCu7$`*sNJ!yI}>b{4t0g&O>PJCi-S=M950bdXEJpBSA+cPqht-|)ykP| zVTx5LrIFLv>sC=*Ra-*Q3oz9l#vJHc?lAMP!hUwT-E38$>b7BFDp!3fnP7RTs)EYc zbzA|_{pOR!_*b<{R%=Pk?@ji?iAEk3C&nvdv-pK%PFOr6bP4R4l=PXEm9>665)~pb z%-YNJy@>6V;-Qw+Wtzp~+S&cw$d^kl-9@R@mZ>)RQ@2WKG;*qI3eEg2IXyU#F0kL& z@&~^5nw1Zb1b~NBUQg?!$L{Rl2WvSbcB!kh_fK6_=hm=}-ll`LxvS0gGo&xj3)Q@D z*oyRi1)a6|rCh{45!2D3DIDT^ReiogiFgrvYd%A6Sy?UrF~98F_%lmF-|Jz{@r7)q z1?tA~6K;|I`Lj1^88i7j{NT5@=pm>5{i{)yRW2iYOPSv|S%ZVK zoV7-r>G%l_aB0M{-Z>jtZw+(^Zwlcc`jSkV_2Y=)*uS>Jt)<<0joSj4TCJo!MxDG6 zMwAnRQTykMuM#Dszi>g_7X6XfbV-nj?qz@*YDg^n;<5glgvYtp(R^>!pX^wNd4&^F z*Am^Q5F|N{3zaClHUf4QUyoqtt*chi#tNH!r6DmosDJeS>Z619;}m1uIkYeO+g{D4 zSC9I!R(D5QP1`pkWS?Z?+U~X|WN^IUq-XvxUH`@5>s!xPE&@Yu<~z~+Efh{OF>70r zlJDv%F08zd5O=m!XlV6-!2+-sF!612RR^&4;=iKYIRdr)=BvR=;+5rIIe#ay~BsJ>qdKDVnxDy$S%__qBn3bjcayJsY_D*V7 z?sI|OqK!pJXKw?g9I+f<0)%zwT%yRkA}bv>f&Zyk%s|fSRMBv$@)C1lc+L~UX+xnY zKxw18DS6ZDot5Vs6bfmxJ!?KN{GVh>cB1#V=M|I0ESfQig-=Pf)MwI>vUwZKYUvLt zIZ~p`buoi5(t{|!j|4Dq6Wn!AEjjVBXQ6v#j^r(xslr>an;yh{ zaZ=p-7b2~+laFr5Jqvbb{+pEd#}DhlP%fT-OR3=hBJllNMfM z(cHt;24>@E#r6-S_YYkd2>P=y3<7}R!Z3)73dqUHL&d|z^S^{)9xm=b3B&Lfe-VZs z9{x9B82F&y{Z$#}`uBAHP=>jo|Ccg6I^-Bkh5KlUPpRF-XZljjzysf`Vz)FR>@K$k z=QTh#_6_3k(%i=Yhm^C-E6YS`fVjJlLwwa}agrBG0=6pJlYu5u?5Z#o=Ga_yX~)Bu z`_l*;y!r)4Jy!Y8q#G4QeS-m`_UrtBd_lzyl}lFR(YHY+wuq|TE)oOzg{Vx&PHcUm zazaN+mijg;va6TT>cfQ|q&bvgEs|F4j4%pcG0H%WEgEhK%An_GeU03iBh(o=91A)D zrD6)9EulA0{V=Y67_GfBZNeMAu~Pp~)?Ej@3Ohb+?~_J4yEr~8DIfA*bOyhb{BKNx z{2S!o^6ziy_kWAR|2K91yE+X1mpc5Q_(FMrRGgepDlh=@TM6cdrw}kV6@&{)1%bj< zVF;KCF3nOwf$%zTwsFJrP;LO#e=5Vk2hRUh8h)V7Uy1amBPWvZ(`*}cu3Le1k{*_&w%+xGkdK~bhLLBNAo-l*Im>BR%59SQK)o&1$O{^?9 zlucY8tdZYiaHv|iIk~%2?B9*av5-lI66AP-1N|Y&zI`=93SA2a&>Zdh7YR-7w}CS-JBoBH1pyR zQ-fPGFKx^$RHZ~YWT;?pabHl7LkvD6ct~zkzt`HIOqjou;30lD^#2GNo#S6o!-M`E z_Cur(amd-2yXjH=M)n~q=H&hW-k*l}``+|>KL2tb<_YZtIv~_f;9Nq2h|M3R^$6MaS(Td5!kxfeU#h)h% z`jfdP20x<)HwF16B_t&Q04^v100jX65FU66hNtjl2?YtFbHIrX2hQS;dSJ1C)Pcb3 z{;m(N2ZpzYK>m5b+e0|v7rY!^1;F``O2AVvyszIw!e5X${IDF8I4=kQ`z<;X;1i^p#^q*zif5!j_g7Cms*I&i}ass&i9t)5Y1pWKm;FyBoo5A1O z@__%NKQQ3WbA!2>z};Nehy6~?#@hn^+Bj65oZt)ofza^XSjN%PiRw4`9x~!odQ|-4 z+>)Z=P%d#v38<(9mnfVwK!7AvQWU~10_TG$k0AR0UFBhC5qB~Zv$im^b8~l~;t+#~ ziNe2203b1l1XKhh0)aq*JfdJQR76aS8^pu;uom24Ca$pG^a6rGKp;9jy@Zk^`u_tF Cdb=zD literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageHyperlinksOpenOffice.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageHyperlinksOpenOffice.cs new file mode 100644 index 00000000..e0737914 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageHyperlinksOpenOffice.cs @@ -0,0 +1,52 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using Xunit; + + public class SinglePageHyperlinksOpenOffice + { + private static string GetFilename() + { + return IntegrationHelpers.GetDocumentPath("Single Page Hyperlinks - from open office.pdf"); + } + + [Fact] + public void GetsCorrectText() + { + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + + Assert.Equal("https://duckduckgo.com/ a link aboveGitHub", page.Text); + } + } + + [Fact] + public void GetsHyperlinks() + { + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + + var links = page.GetHyperlinks(); + + Assert.Equal(2, links.Count); + + var ddg = links[0]; + + Assert.Equal("https://duckduckgo.com/", ddg.Text); + Assert.Equal("https://duckduckgo.com/", ddg.Uri); + Assert.Equal("https://duckduckgo.com/ ".Length, ddg.Letters.Count); + + Assert.NotNull(ddg.Annotation); + + var github = links[1]; + + Assert.Equal("GitHub", github.Text); + Assert.Equal("https://github.com/", github.Uri); + Assert.Equal(6, github.Letters.Count); + + Assert.NotNull(github.Annotation); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/SwedishTouringCarChampionshipTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SwedishTouringCarChampionshipTests.cs index ddda24b2..9af4cb79 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SwedishTouringCarChampionshipTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SwedishTouringCarChampionshipTests.cs @@ -55,5 +55,39 @@ Assert.Contains("Söderberg", page.Text); } } + + [Fact] + public void GetsHyperlinks() + { + using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + + var links = page.GetHyperlinks(); + + Assert.Equal(4, links.Count); + + var pageLink = links[0]; + + Assert.Equal("Swedish Touring Car Championship", pageLink.Text); + Assert.Equal("https://en.wikipedia.org/wiki/Swedish_Touring_Car_Championship", pageLink.Uri); + + var year2005 = links[1]; + + Assert.Equal("2005", year2005.Text); + Assert.Equal("https://en.wikipedia.org/wiki/2005_Swedish_Touring_Car_Championship", year2005.Uri); + + var year2007 = links[2]; + + Assert.Equal("2007", year2007.Text); + Assert.Equal("https://en.wikipedia.org/wiki/2007_Swedish_Touring_Car_Championship", year2007.Uri); + + var fullLink = links[3]; + + Assert.Equal("The 2006 Swedish Touring Car Championship season was the 11th Swedish Touring Car Championship (STCC) season. " + + "In total nine racing weekends at six different circuits were held; each", fullLink.Text); + Assert.Equal("https://en.wikipedia.org/wiki/Swedish_Touring_Car_Championship", fullLink.Uri); + } + } } } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 84ff6e61..4a8f7575 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -65,6 +65,7 @@ "UglyToad.PdfPig.Content.Catalog", "UglyToad.PdfPig.Content.CropBox", "UglyToad.PdfPig.Content.DocumentInformation", + "UglyToad.PdfPig.Content.Hyperlink", "UglyToad.PdfPig.Content.InlineImage", "UglyToad.PdfPig.Content.IPdfImage", "UglyToad.PdfPig.Content.Letter", diff --git a/src/UglyToad.PdfPig/Annotations/HyperlinkFactory.cs b/src/UglyToad.PdfPig/Annotations/HyperlinkFactory.cs new file mode 100644 index 00000000..f9fa0526 --- /dev/null +++ b/src/UglyToad.PdfPig/Annotations/HyperlinkFactory.cs @@ -0,0 +1,65 @@ +namespace UglyToad.PdfPig.Annotations +{ + using System.Collections.Generic; + using System.Linq; + using Content; + using Geometry; + using Tokenization.Scanner; + using Tokens; + using Util; + + internal static class HyperlinkFactory + { + public static IReadOnlyList GetHyperlinks(Page page, IPdfTokenScanner pdfScanner, AnnotationProvider annotationProvider) + { + var result = new List(); + + var annotations = annotationProvider.GetAnnotations(); + + foreach (var annotation in annotations) + { + if (annotation.Type != AnnotationType.Link) + { + continue; + } + + // Must be a link annotation with an action of type /URI. + if (!annotation.AnnotationDictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary) + || !actionDictionary.TryGet(NameToken.S, pdfScanner, out NameToken actionType) + || actionType != NameToken.Uri) + { + continue; + } + + // (Required) The uniform resource identifier to resolve, encoded in 7-bit ASCII. + if (!actionDictionary.TryGet(NameToken.Uri, pdfScanner, out IDataToken uriStringToken)) + { + continue; + } + + var bounds = annotation.Rectangle; + + // Build in tolerance for letters close to the link region. + var tolerantBounds = new PdfRectangle(bounds.TopLeft.Translate(-0.5m, 0), bounds.BottomRight.Translate(0.5m, 0)); + + var linkLetters = new List(); + + foreach (var letter in page.Letters) + { + if (tolerantBounds.Contains(letter.Location, true)) + { + linkLetters.Add(letter); + } + } + + var words = DefaultWordExtractor.Instance.GetWords(linkLetters); + + var presentationText = string.Join(" ", words.Select(x => x.Text)); + + result.Add(new Hyperlink(bounds, linkLetters, presentationText, uriStringToken.Data, annotation)); + } + + return result; + } + } +} diff --git a/src/UglyToad.PdfPig/Content/Hyperlink.cs b/src/UglyToad.PdfPig/Content/Hyperlink.cs new file mode 100644 index 00000000..01895900 --- /dev/null +++ b/src/UglyToad.PdfPig/Content/Hyperlink.cs @@ -0,0 +1,57 @@ +namespace UglyToad.PdfPig.Content +{ + using System; + using System.Collections.Generic; + using Annotations; + using Geometry; + + /// + /// Full details for a link annotation which references an external resource. + /// A link to an external resource in a document. + /// + public class Hyperlink + { + /// + /// The area on the page which when clicked will open the hyperlink. + /// + public PdfRectangle Bounds { get; } + + /// + /// The text in the link region (if any). + /// + public string Text { get; } + + /// + /// The letters in the link region. + /// + public IReadOnlyList Letters { get; } + + /// + /// The URI the link directs to. + /// + public string Uri { get; set; } + + /// + /// The underlying link annotation. + /// + public Annotation Annotation { get; } + + /// + /// Create a new . + /// + public Hyperlink(PdfRectangle bounds, IReadOnlyList letters, string text, string uri, Annotation annotation) + { + Bounds = bounds; + Text = text ?? string.Empty; + Letters = letters ?? throw new ArgumentNullException(nameof(letters)); + Uri = uri ?? string.Empty; + Annotation = annotation ?? throw new ArgumentNullException(nameof(annotation)); + } + + /// + public override string ToString() + { + return $"Link: {Text} ({Uri})"; + } + } +} diff --git a/src/UglyToad.PdfPig/Content/Page.cs b/src/UglyToad.PdfPig/Content/Page.cs index b7843679..375229ef 100644 --- a/src/UglyToad.PdfPig/Content/Page.cs +++ b/src/UglyToad.PdfPig/Content/Page.cs @@ -9,12 +9,15 @@ using Util; using Util.JetBrains.Annotations; using Geometry; + using Tokenization.Scanner; /// /// Contains the content and provides access to methods of a single page in the . /// public class Page { + private readonly AnnotationProvider annotationProvider; + private readonly IPdfTokenScanner pdfScanner; private readonly Lazy textLazy; /// @@ -78,13 +81,14 @@ public Experimental ExperimentalAccess { get; } internal Page(int number, DictionaryToken dictionary, MediaBox mediaBox, CropBox cropBox, PageRotationDegrees rotation, PageContent content, - AnnotationProvider annotationProvider) + AnnotationProvider annotationProvider, + IPdfTokenScanner pdfScanner) { if (number <= 0) { throw new ArgumentOutOfRangeException(nameof(number), "Page number cannot be 0 or negative."); } - + Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary)); Number = number; @@ -99,6 +103,8 @@ Size = mediaBox.Bounds.GetPageSize(); ExperimentalAccess = new Experimental(this, annotationProvider); + this.annotationProvider = annotationProvider; + this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner)); } private static string GetText(PageContent content) @@ -133,6 +139,15 @@ return (wordExtractor ?? DefaultWordExtractor.Instance).GetWords(Letters); } + /// + /// Get the hyperlinks which link to external resources on the page. + /// These are based on the annotations on the page with a type of '/Link'. + /// + public IReadOnlyList GetHyperlinks() + { + return HyperlinkFactory.GetHyperlinks(this, pdfScanner, annotationProvider); + } + /// /// Gets any images on the page. /// diff --git a/src/UglyToad.PdfPig/Geometry/PdfPoint.cs b/src/UglyToad.PdfPig/Geometry/PdfPoint.cs index 8fd13aed..9663e0ac 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfPoint.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfPoint.cs @@ -101,7 +101,7 @@ { if (obj is PdfPoint point) { - return point.X == this.X && point.Y == this.Y; + return point.X == X && point.Y == Y; } return false; } @@ -114,9 +114,7 @@ return (X, Y).GetHashCode(); } - /// - /// Get a string representation of this point. - /// + /// public override string ToString() { return $"(x:{X}, y:{Y})"; diff --git a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs index 5ccc8430..f50f5385 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs @@ -134,12 +134,10 @@ /// A new rectangle shifted on the y axis by the given delta value. public PdfRectangle Translate(decimal dx, decimal dy) { - return new PdfRectangle(this.BottomLeft.Translate(dx, dy), this.TopRight.Translate(dx, dy)); + return new PdfRectangle(BottomLeft.Translate(dx, dy), TopRight.Translate(dx, dy)); } - /// - /// To string override. - /// + /// public override string ToString() { return $"[{TopLeft}, {Width}, {Height}]"; diff --git a/src/UglyToad.PdfPig/Parser/PageFactory.cs b/src/UglyToad.PdfPig/Parser/PageFactory.cs index e3dc71c5..be0336fe 100644 --- a/src/UglyToad.PdfPig/Parser/PageFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PageFactory.cs @@ -126,7 +126,9 @@ content = GetContent(bytes, cropBox, userSpaceUnit, rotation, isLenientParsing); } - var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, new AnnotationProvider(pdfScanner, dictionary, isLenientParsing)); + var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, + new AnnotationProvider(pdfScanner, dictionary, isLenientParsing), + pdfScanner); for (var i = 0; i < stackDepth; i++) {