From df0b60c2e1a756b28a66f85e497276b678e6ee2a Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Tue, 23 Oct 2018 20:02:20 +0100 Subject: [PATCH] port type 1 lexer from pdf box and add test data --- .../Fonts/Type1/CMBX10.pfa | Bin 0 -> 11365 bytes .../Fonts/Type1/CMBX12.pfa | Bin 0 -> 8940 bytes .../Fonts/Type1/CMCSC10.pfa | Bin 0 -> 4880 bytes .../Fonts/Type1/Type1FontParserTests.cs | 10 +- .../Integration/LaTexTests.cs | 11 + .../Parser/Type1EncryptedPortionParser.cs | 29 +- .../Fonts/Type1/Parser/Type1FontParser.cs | 7 +- .../Fonts/Type1/Parser/Type1Token.cs | 90 ++++ .../Fonts/Type1/Parser/Type1Tokenizer.cs | 387 ++++++++++++++++++ 9 files changed, 528 insertions(+), 6 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX10.pfa create mode 100644 src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX12.pfa create mode 100644 src/UglyToad.PdfPig.Tests/Fonts/Type1/CMCSC10.pfa create mode 100644 src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Token.cs create mode 100644 src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Tokenizer.cs diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX10.pfa b/src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX10.pfa new file mode 100644 index 0000000000000000000000000000000000000000..db940c298bd8f3f6b5767969b886c21b439e3c7f GIT binary patch literal 11365 zcmeI2RaBc_x9)LwTA)Y??(P(KcbB5UNpQDPptu(+?i5dPX(?8m;_mJa#drGOdw=cz z&gDKA=i(%}czETGdCNcBQ%GFnFBx*Y+(y@^JWEzJ2(I|e&68=&;YsuT|9u6tY{n@ z083j7H-I_N+7^Vy_FI!Y$O;VL`_ExZcc=fJ@&LNHLPY`S|B=Z6fQqyPgB-j8mOv{s zHWe_m8W5`G|Awr8PcGx`;GkmW_&fbSPW>PAW{$QF-v1Q=c64%g1G)f|!InT5(7%W3 z0{_!939y6Zzt73LnK{^6h=Z&hfB^RYd@Ng68Cx%)rJAjqg$=;U%)u4-&o~fd`ESqv z+qi!Ou*vI7C~7G%{Z~)_F-OhJ7UZVo?ewp-zoY+QsQbU&`_0g%x!8IE4B4TpfCE|| z`u^`L<9{nA4YB}R+JdYBoIJb$GZz;#Z#3xYff5hEmjhr6vIKeofL>6=Y^)%#8?*ud zYKcF<3haXRyP+JM05&Vz-(&wEH!pzA3A%E?mVbr>0Bm3o@L$Zs4q$Wh1pi?UDBHLI z|BCpX6YTEtuMl1y0Gs$9g4U4uLwo=>$v?yoV3Ybo(9+WXB0gwgnLor0U{m-*&|KqWdp`?l3m9KLk~6{)eEdE&fFu z?9hUL+~k1vlkJaH9PGcn`YRZ!!1^zQO0fA0p-=gzR~+n65%z!0LRC2Yg-{uee-L`6 zuz~(UsEj{7dHz98 zs4B0&5UR@iAN-#u%I`fPAp!RCW#NMQ$HEC+gB%>Z+yDV?UjP3YsSUDqb_dE!0eINi z+4*?+|JhR(?k+AsklR0p8+5y({TEx=LN_-M=moSuo0|q(ya};OZ3rt1kS_T78IciI z!n!eD|@ z?F^r0620>bW$!H~%A5BG*sTqdmxnyCs^iOpWy$)5Jz1x{2YG7rQd&F6ixe>54L$0< ze+uE{)04??(Ebn#q2;2`z)5p13+Mi@93s!N9mnub;y= z$p-LdleK^Kab9A$L=5s%?IjG!XB(eN$AwK!XBF?vJNenXJo*dPD0)QdwjlAdZIux* z&F&nuL)#4ZCOR0_oR7hdU#=y&;zEOPVPd%=BjN!m6dt2i#jJuS2#yJT(I1v;gtmuP zR2$UOv9ZF&+)&RbJcVsMS71U{Ahrb^KE+lDuiCM(E_#IXIim8y(7ve9xAg9^ERafW z0R;P35@OkTbOl0#x*?7_)gBxO4J5KGj}b zN{XnQ{CF#x#(seEHURvU)~dolAe!Zz9*J=@XFx|s0PW>_KZW!|KCxNX(GyncD5(K_ znWU0nedV9FOv>NL3A%?5ixa!Cq#nQCAmHEIqSF6NzcaIRp%HHch(xU@(M4o#Sjj0d9$zHGhT<3jrSD@itS!YvB{grl# z?%UPTs>-EP6w${a9ZJO@DwF{+kXY)a{+WLcmm&r9@Id#b}gtD?C(*rh+-%p71 z7bR0T1U436FfbzYE0rveKOcI^Gs)ycLBilsj~QH#s6OOvbK(`AYpI2&pv6)uVNpFI zm99}kwxcd5o^{Q~*Z3Riyo;&dVDe!dG3pHvDKR@2r|#Ao^P^F!H)(c5uESz7$??oF zKi(&KKTS6DJX1^o?D^5vCfB4=qrWWG(@fz==EET3yxHY9AqS~-`9%8dZqk(AVL$Ad zf8nc-R-{Tq??!;XjAYy^niz7eceTJ( zu7!F3fr0U%7bT-`uIPr$@`@kGOh-Kj)Z$Z()^cF_&_YMG!ZoGOeI4>K+k_jN=iT#3 z31a9zGR*b)-Nts1-5}!jeK#nDJ>kgBYP2=%$v@QQfnyOGsM_dnod!kum z41K+7O4}-uoasfHVzYmTrtXb)?Yg^`7Sd{WLIMp*+PG(g!SsMp2{9M`(VEKXmjqLNc4LJ!LL>`fbin$MJRo-Hn zNJtWuLoZJcgUn#6E_^wyv2+;{^*CCRBE>C!!sO<&j|F0ij*>N9pNcP&(snTm$RZh> zWXRI{q)#Sr_+kMVY0HT+F2VDvGOJJav(KW8-~o_=s8(bv*B9T+eYbLuGlSNo`Ivcw zPb_G40qZsDKqGI)5o2VW>DmeZGG41cx5;woS8k%ps8;^{PRCQZlb;7NEO|@{C=D3V z6~AJ|6!KOnR_oFalw7?lk!qry5?uuW(iB!y#G&+5!S|k!(APMmD$TZ& zEfTsz4;=aTNCGWfpV3$8t;I<#9@;UT>|mX@h&y{MmT+hXW84VowTqSpgP&=?e3obb z(Jrfv<;xb?xj7PeA!_3#03wdT73*YZwhY{8QHUx(9o7j~ z$=ZW9noy(^dd@d(msO|cAB)LKKjm(JTKeZfZpP2Q`4WDs^0m%$>^Qh&VD39s^k1vx z)ruza1dd|lius^8J2d)y;>hH0H6$l$+cLhI$V&FtcssdpX1=K1*cMHQLD#Vgh#`pr znN(fV0-8}DV9P>BuG}4v=C~`W1pu5+?{#aV>M80Zdi);qV7|?gArv3mmRQv&n)}Nl zd{y2*Lg(HtK_!2vOH2k5i+Cd;C*{QAsoKwf94|6fIr1A;29uGR2IoXtSWdG& zVDj$h&ke`Y%fuW0vPW{!uDgx`^^Rvn!OMo-cPfY}MrI_a{0fPk2HwX3ty5f`G8eXFEOggE`MBVU0hQ{oVK)$cAeh8B@QA|iMim(eQUtf|W&+;;Ss z4y~0YF!~9+=8`KvHVIMbb+d2Xwl>r52#M3E8SngQ2#Hp5$^hRS*$}T9c>}N$tNTTh zzI8tt&rk$SR!`7hGCiYaIv;ry{kqgE>QPGqx34HI&>w3l?LN6T2r!bTtuQ*O!H&l` zJefd%BgaXdmoGo@n4SMB#+uefnur5zFh~|054UvJKNy1su{<2r1MncXOkZ#o7b!Ef z^naP~+e^T7SV5+}>xishjHg?s_afg98&G1g%$YFPJa|TNd|vlN(-(ob(v=XrFZ#CK zufQ{8WNV2B zQS%VZ`!rCSzkoksHl83I_(fP0oS&R`f*^ZRbkk|+P|LStogUe~;t-djjtd{Zeg&S>Rndrp0QTPW}nhXe=|t9b@F8ia5x zY5YIn@F7SPenPA^8xKeeJ&`_IFglqhEaZN&}za~5TLc* zIQgryl_PlJ9HwyjnIQX4Y=$QV^WF`6O1x!aa+Y|&8SaJ5B`^$=8V`fMLCeQ7cS^IX zogC5OP+mAv_>p&>L4JyiM3Dz26kssJkd?afkcrJKx&@bhVRGm)P0Awp(>s`ds(zS@ z7G2=clhWEFm={~ zJnhdS8oY5_!KuWjArWEfhjJ~__pNpX1aW{aa4TYg@AO31)V5&@*(-v8KFYxHxRenZ zqj{d?h-WZ2pY^=6BiqYxb}l!8&w4+8mh|>7p213AcV^0~L!8g_ohKEjRh>ve@zjT- zT<^_0TnArQ(b8_sTtqS@J2Nx-VZIH1CJ+DFmtR0uvjcyhjd1~Q-V$zp7p8qa&h_)q zUqFYK=GD}fSW7Na1kAU%qUfyCQ-dG7rDC6$C~CGJ<9Dr%$~anDuN?7& zckYtS`bnAhFqu36pT`Cy$hFvBK0VQ_o0dtibC(WThuKX_XZ61YZ>^YqbN4ihwyS;e zPPWVU5qY;bW$n*p_mDY)U9}x-4xcO}0Vc*k$-Sk{^hG;|AA3bXDmr$Uf#VYz`owjo7#46qq z+w8g>4W==7(Zw9(UQS;@Ae`KrVPqngXPdsZx&?muhF;O?{IdV1TLut+n!6rDIHFL( zADpv+NWQdSd=t+bl3fT+%&}@5VKz1*Bfd8wY2>pJLw_RcvJG2rQDDasCNqD}kAm#a zPK~jdY;CBm(_k?JY7C2_s*%a{Yg8zRN7dM;N$Nq;*Jq(tiu&+@xbOjdT1Ycjc(W{0 zB3bx$Vw_0eWX%TK_*;-yd$*FgT=ZW5OGx~EYFj4u#T^hoDuyVP_k@wLPW-0OkOcVC zFWODG$<`KiIjY}$J@vLG@&s2J>0ChPs=h`joKdrQ&WuS)*42btG0SeVn8R^*oM6le z4M;o4StpB&sY%^^L9{Z9UAa9KmzWqFW-#J_u=^J1mX-?%Av@nT=-0ri_X0bNT&HMP z3?Xn;#YI$UFlF2kHgI7#d*SqPQgNG@oMF#nVfQ7Mg~RMl=rIf zSt)2_K4J9p!OV&YjUBZOK75^%s%{$0_4uH~gomSgM@suJ1CJUJ-4m*)EK{v+U;I@p z;OwOTO1PV#kH%N)#I5#zi97fdOosS3KuPAEkSn=wGhxFCw}!G>M(NzseYza5Nq*U? zF&p<)Z!hI26zrAYTP|x>YaaW$UuHiVCgIMt!3aK_(Rv=ryia!_jM)A5F~;<&Evmu; zLpuYqA)AguaX7_o7C(>nMzOr>YTclP?6cT%R#F>wrn=IQ$Ci zI8mTRziFLyZMShex8mH@TyqNtW=JvSlsD0F2E&D8qgXi8tBxQ%vs<`?RgBA9EDQO6&*&&gQP6-v!?L zAl1bAW1;MOPi4lE<5G~~uD0n-4l+*cSB%@DT?{vUls$5F`JMlE%EE6%5~o|h3ic*s}-xB2un^}0l>d!_1N7`87efO4oVKY#?1 zMzHn-T2Y#sDfvl<^J)2}z@x;UX)SlF!0W2$nh|_FM_UP$9_VGZLhX`ZAQq<)-k5=T zIkv)B{`7qLN(I>r6i3No-NHGVjd(F`K%Ci}XB-M78vseRtzmzv114lh@Ot9F$#mJ; z6$EpGXYb{D9L765sfE^`U*#mz-yqJ8*yy;mdo#NpJua}AVq8dMMUmPnF*(vw9}2;f z(=%-uX@6`p>5fv2E%GWMiO>nAx@ktPI z(i40Li`bLH_m8XR!Y?GvG`sR`9;?xQiXffLjl5=SOeU4wuI;)@3P4k>IDaP$idFbp z@#LELD^17TuOv(WvUQUXMe==SJ}4TEN2)ovQXWUdIO$r!=UOB$-EONa+@2t&C|+CTn79>= za3I&Pu8ndq0gn_Wchp~4IJt1O^TPqJG>rtA7f)TNUExz82$7nP!F#u z`@Rn>cWm!ub>19WFg``XJ+*ZGWML9Bs?Y_!AU_aj@BONHJtx1w$MM!{#5c)5IQvMp zJ;Y*nig7f<$1Iq*9VMfnkFm{tigIUSFq75_*^XGF60Vuyh&{xGjIcD<=5nE^q|>Ci zO3j;EItnKqYxhONMDdRU&OLcS!?#g^`L>XE=Qu{c66SF!at?={2rO15VB3vERhKl4 zZ^H-hEGt)BNcNZsll@*r(HK$OdA(-6tN{AvGmz>k}e(&6VwEU!eFtrLS`yu zIKpGp$y1F#+=rdzCyHv9{)nvaw8(iJgOlbfPlQ0qU*hJBe9l7z_n_(zM{$vUe>&lh zEAg6e7A1K}5w%a<>Rt6UZ_}!CA?@Icr&g;yn-a}AJK7ef@Pny|C=ska%qT`qtRJ{# zpYc8qak{`<{@MWqWC{~ahZ5^C_u^O|^66_JSkN_dWi++?N(O6q@?=Z=hxW`}=DZemwZoc$$cbv9rOz7iHhO96%~~b9Uo315MW89N&5|&cuV*b#yMEyx9TOqp6^s43=!2LyUU1~srlX{O zke*T*H~!*^YrjLhkYDtPC-%x{ZW*?7C{_mi>RGBwA$O^M=k3*Ov2~yHK|)iGA^H5T zLzFfHFZ*1ISMW))^N!QSJHe=7&(G=sVC5ovj+Z@dg@JeVj8&7J*DWVBB?jwN?G?yd z{A0M5F>g;l!p~4F$e9(`7H!hOJ23(D5fe{Ik8rJ8T86y*EWM`U z(v;Ii?(9@Btd7xlqj0+MZG%!B0^5>jC3tB=XGD=xnTxF^(Fm4sUYAY2Vz>c;9v~Yy zb_Kn-sKG1$oZ{;dYP>NWs(hd0m)&2#e?iBv?oDov>6z@*XDXR|;NA%OQEc3r57EyH z+6$r~F$}Y4>N6dbCNE&9L^A+OS4$zC*1mTNJwUg!sT<+DM!Q=rUcQ|z_j}LAr%Lx2 z5T_M@F@_eH*>U#`TQak-aOIhl8A+VbLY)Tld{(Uj!`lcq>eXA9d>jVE#Wy|7+|uGp zPTL%J3%6UZNDk}#j{rvwGuO^5lB(|I;z2b$vsb5B53iyE8S5Yx6gbbL+tS zcEI{r=$G5vidY!>YK{6~v}N`O=2`&{^iY8c21Vwu`sz=isTWf?dVZK7^RrKuVVJCb zgq}X3=Mo(2EI66{?vaegYrEaZmExHb*Mme;E~PUQNxjZfAq9Ip1tX;$iVoHC0p?u@ zdDE8{D+D)B87B#43@!$HYWi=1HHE`wYz`fyPEJKQ`GQo+(maCk(JmrBoBJPsC@nEW zC^y>Qh0Y-h{z~I}kZ5TN4)4Q#5B*veumSc(3G+j$@N=13?iMIBXS8#!;;m6XcnG|E zQ>h!b*NCPuY|B`h-VZRBwbd7)Q%w-PrU<>KdmL4P!^XIQG z`>=~l-rbahf@D_H+sc>m!;Ek6`_|1!00QPxQ*!>L^srfjYPuVPyi-_5QvYm^1uqXS z!ae8FpedTAS5e)Lkn`gRtU86qBnaj9>mvbFuv*P7i&*WI zl{(+B$H(3IP6C3GS}EdZrUW~@_;-@E%v-NV?yJL1CA7KGxkYzdE~xJE-@43_kdii98g+gRaw*r`x`e{ zG4dU`;ipffR37!Dz>MVqkc(f*il!zW7K(F|jqB|)x6xyeDfhOx96s7+oF*fa zDC@b%2rq-QM%SS-k(XFz|1Fued^WEilv8A z9TS+BV$q0;fA5ZfhI+`WO<0Td3SL_ML&cqIOcd>mg@x3$jq7OtF~!d+CEmTUMRcF6 z_vtK`kCB^ffqn8Z6dv}a+?{;#0z6}I(c-V+6e)F8>IZyR5H5v6qF&Sax`fttQFD@D zJD`pn8QMX7{c_#<#=}418z<_&9JKgwH?QASly0 z<`{Cw5G@6XGR*IgQ^}p48W+c?>o&|}Zp3%rxEf^&SGg4;<{`YS!^6Jo=a&C*B38Zo zu*zv{@}27pp-SO+{*@0a#YCL^FlbX)1d~S;cBLqqMkR=J`*`QH>@qE8@}XrUsn0(< zKQfa>$_DvDoTuzYB5|*IAFufJrAXDO;>M@-e147=0-alVuHm%(kP!OZOq(U8Im;Bu z%cDIGj<&RJf=4iXGk(kDJMJUUPfoPb*loCq3mI6W28w};QudZhoXPRM4Um3#4#5cgszqC9T4%(smZ6jHJaU9zTA0#^n!z*c0rUOBEn!qN-{&PEz3y{c5gP>WpC{_ zPO{5)L0Mz(E~-DV6(83s%QY^*6gifDH_1qxKArK=$Yh+6WRDlZ%$oxtp%x_zbF0<$6;3_w_?ia_0huy`AkW-{29{ zwQ*QiJP&?4z6zHZ2)~1sn^@vNAT;rX=kQ}fcq(v#kZPA>SGsoarwh1hsZ8o_nM{&q zuZ3;baA5k325qeA!2Ar2Xq_DQKU((rrnm_B_)a!r!B=Z_TRXG6<=E)30jCiMsicB! z#WOP9K$w-kLzYDhw$b$cl|V5GH8w> zW5!SFt%tAE*|_fa;$1R&y$0B8koR6wvE1_Kh}@sHC9&29r)r@w>A)B}jhG$1&@1Hj zS#zUxi)h#6IeL$b94z?|jlA0D%E@)O%sED{R;JZIRsI*N$r6DWAn4;~6ex;B| z4FA3Jbs#2*uNOp=Cn&jTvng-*)Hd$Ms^T88U!}g(7;^W+EpY6`z8S2ERCL-f=cmZ! zd;O$J8i`9I?$@U!Ze@b0^cMwnIqhDtvggYaD}d-B zM&=mwL-+}K&X90hdT=r|pCC?|oruE0>4Ab)Y1tDJ2_7bG@3z3QFCezM_17-8{)xR% z(cbngj<7#Q@tFvB6Tf@dQrx=gxz!#gfHql#$T{0x_(iOc0PR6HqPYIPjL{gMf>1sz z7F)D8l2bA+CkD%&>l z?#KD!=auBgf;e(7$9qY$gc$8rO*0Hy8g0=sLUSrQUfxaQg-Z2-T6CwA9jHW)${#= z<9$Z^^JEO3RIN|R%fgPMnar;|hp!12v&-&6dTrwQW?3a%^m2rb&Or8Bx~45^J|5Gi z-(MVTBkaATgHiIuu(UDx+)fh0ez4~(^Ae?#U?Nno8zQ8Y;@iGP!JAC1=`~&({7dd> z{tk84P2yT^x#r>i*?uE;N?FCP!ketN?J6g82!fJcy-LhMOX;@$e4P42O)i{ticDH5 zZiWr=<+aWh=;KD>SW4&3UHB$0+JbjrQ{mM-5mErFmY$sjQOBi4q2CEkd3oA`@#dBl z(CH>E=;UJy=b@7GG@^ri>(=c-efglsZFhwj=ZDC?F$$DqQLi`JrurBu79z!jn`B4y0 ztaT@%OFnr^d9Wl4J{ZOR^_^kLHe#zPr^=`JyvIZih-|F4N5@eWV^H5J8Udi-IgV5ZL1F|QZaWV z4&#Iz2+xSTFN}~ZhYITIC<5_QZM|D68KB>#%xV*?7534Tmg<`vIG}P z9gZueL=o@s;6rCYe3Pzk=ljYeDQ-+uVt77L>CrAY2l_~({CZsqKdc`uD3@viia;*j zeaW{+zofc-fS+HR#1r7-Q)S0Jc@Rn(f+R+Ry|I!A##-rc6#o4iRnVkupUk6{8Sbne z4_v9~(1Ky_H5hRbuj+ZMy6xE=IF0PaLsl1|pD@gZr>a%P&BO~zS5$J+7X4p7r2tp4;4PqRh#$ET_g?7+=#7!0n?qyxh#Df?aC@90E?GmGfp zg0Fk{noc@Ko2qvM1A?ADB6ER8tQWqsxy1`wi>&=Dg=0v!-AdMPzRN7b&};kn0_40= z7&kQ)7gNi%3as;n=&)F@>7JqgdHP?y(Ee9`wtNs Bc+3C* literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX12.pfa b/src/UglyToad.PdfPig.Tests/Fonts/Type1/CMBX12.pfa new file mode 100644 index 0000000000000000000000000000000000000000..f1299b51a6810ae221de48525d0bfece5361e11d GIT binary patch literal 8940 zcmeI1Rao0wx9*`pi#wE}DOM;NAW+;TP~0gF0Rn*_f#UA&MOvVx6nA&G;x45~ad)Wz zJ6+$mSJ(c| z2LR;c#%5xYc7~WCpl}BnGXz8k02CAi0wi6n0X*CQAioeVpAd)-n+YHdcl2Ng_#3D*$iO=u}9{xFoOX!;TBK`!h;ha34;MNey?x=Xh2*b&TbG( zPHZ3$U*QV zh!r-M3LL2gK|1-rVd_7V%elf}DrWY-)Bjba|6|_F9t!jLcLdzt(G>x41}MWVAj$_(ZL`D+~FVEG@< z|GR8|8Q{{DQIeF@VEcDJ|0<4}8Poxx<>B~u+TYQCF|zi*tM{9cEpvvt172}+a&rTb z`pEZxe=_(go}vh_qq<2Z2Z%4*x);jX%u+{_d~i zKW34-fBFIhB6VH+B>f{Om%K$*!+}r~Ef`6R`3s+}nhy&uUO9^?Z|g zP~@?MK-?h~*bB383*lhfH+7+fzOuRFMHtUfrL61YRjNCtT5*Er6HsBFJ}WsrU#`;o zQJg`7zlE_y_xcx=OJs|;`iR_dfKzC6n~7toW-fN6(?R59~D)gVlBDh%ff z`}&$NIT8KqSdg!67-j?8ms5C)EAfw9%$%e<6Ew>a9n)*2qf`&Pb%P)A!`H->{ze6n z>#jM<>H4vyzsz4P(Y*z}f}KTYPCq+mM;NA;wK*=9O=o>H_^yiigs(_GLQwz@8rjtP z@u@aSWqdia-&p6-NBW`Gm>(Y{)fX}5*PY33Dy4e8As1Pvn9_F-2(nieP-3j`HgPiN zJ`^~ow*a%O<^l+*8gR3=?GoM4OB#KAkd=ps36NOFa@ziKgaqVaj!Fb%=o)4+$us1EOr)j_2 z0vl?&j2GvHupEkR!b@2T{?Xn|IL!__?HKtjCIG@be--JNAX?j+GYPKy{u~a5Z$uZ_)?NbNitx*ym()5qTm$i{}7OwjF z-Uqah(=va#pHfd25G!EvSZO8Mpr$jzfz7|Q_DS|Oj?yUp>q15!Doi5gVl7vW;TOkf3=pdAA!>PVe96FQByWJEdM!g^NbI-%eflz7d%W`FKS;+Lv)L{bw=#W=@+cuep2W!WL3DZ+00PnEW0}`_{ zMYqXYI1-|d`amD?)eFM0Wj1>wi!TMB<4_l-v1I+`>C*mKGe`(=DdQmQgRm1D2FhJ6 zmAUNVG=URY%s3)b9>=VP=!hS^Nbd45y07J6BlW;L5oq>npTu?&AlxpM9j0=fPjvjr z)?K-^GOxFNlvA@ay@*ry_7`fMwlFQlQ&g9#li^-ZJc{F}^NH7e0lxj8S~;T) zWmu=%dt<=_feRr9;_3*i%&U%TR+PezZk`)$tCOl3QGo(8Su#6L{Oo;`(H-h9q*ihJ z{m;GWJ6m{k(Qk>G5TX<3=d~{{?#RB9Lu_27MvmVp*6AaZc`==RatGq81kNA6we3eY zG>3L`UIrHE-~pPxs*vtys^iIDrkvR&)x>813YcqAf4dv9|A^DRKtC3Zl&p*VIyxrA zsI$2p^s2PuMDe8dTgdET2-nwkrd8Vu=}moI!UNga{X8zL5Y>7zP^#kEEhl1HZ!B!s z-S%oQ{w`p|_d;jy;2KuuP#C03woG)=y5(ygYx_#<{5?lD)Nhe2hI>iYgLO`%Y^@xzO54le$z!b26xvJCWxs7}+7+mgIHh{I;*0+%)g2hF7jhRW%xovS~3i9o&`J-cC)dl2kl? z-ST5_za~|Xl|(b%U%uBMgB?&UXC6EpY`}9re<9w-B=2jX6_va0%%1AtXBSW_}{)S9*(gY@E zd)8DfIc}WRGyrwEc_U(#)1*qoAs5~+-&>Z!LX=2h$$Ii>G*1+?zsu~ehr09KM!r3B z-P4y{_AGiwP?8{kzht5wypZhM_aMx1nRAWqzDhiKj^q;bvf<=Ou#1GGp<5piAMJae zw%*~n{MJtc1zIl^%%LaK*_cees5Ty*Bmkz{5ubfH|0NK z#}twO%6uXhM3BsHK&wlc>I7@Xga#s{_B2*7pPUq{-o;rNx5%hrx_czj76=$en#$o6 zu9&s>G1mr-Z@v^kS2-!$a^GVG6FDHYpKizypU{l-}W0%g-nh=i27sttY0jiy?(bCjY zdxe*~Mtt-pLqXg5lC|B5q zAnt%G(zrosG|3&pi25veEQUWv712F$MB5_oV!VmKoZ0wo=Eje0P_hD@-9t`mp&9eGVsspc5Rmq;c^r#hc!VJI9;CW0=_S-u8&Dk8 zy3Ki4&;NBX3VdPhH`;eC;4{|37@Nv0{9ez~6YDd|RJfw5TPJa{dU2dHyOozSRTQQ- zdz8_O=MJLiRW|Ej1(0koH+;=Qa(*eaC2Q%P={aszu=)E~v7V3s7O&>6RKJ~}%Gu}j z_tq1{Fn4A=ucihZAchrPCa^<}u z7((LPW6RR`Ah9IVdFN%|VuGr8UDs>!?Dpb>4fXe}jw9$#%`)4%9S{BSOcGo4W(1zp z8NOeaAuE-;PLpi@+G{&*e5KHHk7K!>(v4X#)FU(v%EI(qso9CKP4iGqbtr<}HYf~1nY-YYCb{R)f>ssl$E0~I7=_Jz}%=I5$ zlYSeZKcgS|p!Vo-8fmQW^c$1V`EOYXArB)fC^E}Fr-Sihs$yHOs+M6wrLBQBo)9KK4j zAsGfM1wTF$Y&xy)HsIycAn4b@T0T54TlV4fn8d%Z18wO;JKC+p5uh;P&4wRW?K^$d z+inYM8e3qa(BYrTiXV$BQXcu-FY5X%O>f@ztQivDNl_<%IXsOY*bbE<`V7c_|gPkzOJ(ObVj-uD(aE&t_Uyvttj=5$0FR9Owl|T zU;@79nk^9p3~p4{R*qrDX|2JqBoR)K(JYn?;p)(BRH8#6j4W1Bt1N1Md9iCA-b#rM z8*n#>UHaN6VFX?4g*s^?i!n7tRoV>d@W__wIW*JEPa@=L%;ozPj4xd} z%z|3UOE^FL5||&TsKPl9wD|bL6?EPzhm*q(G>iJ7jo1?7jr_nj!a?u$L}o6Kw7iS{ zlAn;N!SA^z3n6g~&R`QY$+2bfhE3f2i(BaITJ}6Mb<%qe?jK3d% z-78~kC{d&)DxCAEE0OXs#wV{;*XVnB>->Y`w=_J)6>svN>^*?f+(a)mbdwW750hEe zEof)SPQEI(6`HLo4}Ot(<)q68xV!lN3VT)y?L0$+1katPH`Gh1m>#zCU9=4~K}K;J zx>5iMN5@a{^(z-$Kay1{R2MrHDgX5=1kTyF^?u0egE~j9C_Ef@K12YQkA5(T zxJmnwHO~|@5&3N_t6bXQOjcm!_f14`C@zkp!e+4~zc!a@S)?8_Q(q?Ey~ZJv^u)z~ zeB*MPDZplj=Wp)Z1=ZDM5X`r2Buv&Hy#mx48edby8ph4f>oRp0(Vt}ZJGwnsVQx#b)sIts_ZNXrqz#e?eZxS%^90D?^$QW&JHpm8ynY=1qaa zT)z)2Z~D^4pGtII*;T;J=Z=T~H(6M?O&d3;iwp;@VgiJ5v-8A9r97$Q6Te!$OZ2GbA;gvEWg9%pb0<0^792{qG7sT67s?K+#N2NfU z5&1QSxZBg1`DlZeMmOm^B5~A)xVt4KySZPZPhEF_i6n4{?A- z`Xt!P@KV^I5wq2hP>@xKHjJv~8j2pqgn4XF-%O>)%--sTy&*sUB0$_PnEZ%!PQ;FY zcO4H6478D3@WhB_oY$8y>vbz8QWe#v;$C}1@=cLof45pJfsDF)fZk7}v~*w910$C` z$s_8s%kEO#7xR}tdK#Chre?|7_Rxi15Pusx#rq!+3t|fW3 zsb=YW(93PJVa$^ze{@ZQbcQ)vE|TV4Uh4a&I6uW4#~`=VEL!akswj*G+|D&7Z#Wre zn}4OWxbe{i z+ei@X6hAU-g8jPf;g_^F=`u;J>XPrN;R>kcE)eFi^}V~JQ(QjRBjDPXtR}-G?zUW_ zEU-%o`Zak0(kMGc;XN{-3opKmKIoM0425-ejlH9{MX&nkkiS7Sr8RnbXX%`sP(MzF-m3?#zvK82q7bqa0K4d{ z$luvg$d0(faMf;Y~&(zn`XQ)+Uw)!U#ZRWv|JP(B=gm6EdBJj zcAPDNhJOMGRwsx+IcjlbAgzk$h6P*pcx zuI%))&?Q7RVW+?JA-hnpGEIGR%0q7^u;l?SyQHa!zL<+-hvXbJ?XOC7FL$OXP-=dW3%K%f{)Ypx^~>) zLKnda-a_wY+~oZc!`H^p7lxR6TOrtuo>PND3rc$;!qX`=xt;M7G(=*C&60B2(^KA7 zZsIt1^DB&bt-=_kn=;qcs2R&dkmE9d$;7;z-)yKu^KqIWP1wuG|KJR zdw{9fWozBw6KLY6c6g$5a)$wzQtq2sERz^_?C<3tEfjyktM1^|*YQELi3ISVIQxFg zO2LS3jNC6noFnkqD;fFBN8~RX`CK?iHAdy64CJY3Ci&_=N4zf^Slm7&2Gzg$e4pAi z2=&2{fV)0X56+2jpTv)$83zL>Qx-1ih~++?Zah;G*ohw{ZE58>A7v!mMAwpL6W{YX ze5lOOVH)w+>|TS=Mfrthxsr6)>i2+b7Bp;#oc;Kz9Tl8HW}Kgb0F~#Jk}XO8*1-&W z`<0{Hc@yb?Tl8+@p50Octeo&mK_bDy&uD$V7gr6V7t$=1)P`Y7>tQUZxM#1X!VVOB z)PmwhTar#4X+r#YTDaaA;y9VwlNj!UiKHfs=<{=!JKZ^Guhl#eLwDDQCy?@x&eAwf&LRt*Xw%?JWmNvO0^7j}jQ*q0( zK@c~$Bu?@KHKv2NmI1D%L<)Y3CktIG!rfOr$w;XFMRwav=a?VJxS zk(iZMuG7`eOn{bdz=-qiRaUiWVzS1gYRIoKPCz%xu9-zm-b${WuV3keN)$|icvHqr zUvmuJD!2w~7NKYC&S8gT1lM>>CVE(qyt*_u0*94USoYT(@S=Z5Y)9Y4Gpjn4g?&)0 zIaKaGS;W$%oBQh7P(ojVC#s;j;G=(#g7xTR6_wdp1ej$Jty73mWG*s{Yn=M4f9QZ_ zrIznGsX7lQW3uF8Mw|4xC@&g$bJpvt1eOHr5wSjtn>OE}Qjt0Ixp~w$rtUAL73bXt zIeU5p8%ZY6iS&AmI(M8)jaden>TaS`rdmU#cuq|$z^zPZXTR(c$vWYS(V%ZUkW2*iIKVYUv z7DDpVCC+Gi!T!>V&>ChcJ7Ar7CNb8Uj*?@&qIgE{Jjvx6SGXA@{S)<49F0+$30_aj zR0JXsm-BT{sQtG5+@NPBQl_CLu-B4R8TW80I)^l+=ptopkUk{wAhHTVZnUbqo)sk6 zz+N>JqVs-YR4KJaN-qU517aaxe(3VO$hU^@d7@5)caZO!+#-sa6o2ujG~!U#14Epo zjce;r+dbpnLrq^xDXPy78XwFg6*C~8e=bUt*v|7}JUP1ZWZm1D>>{<=*+|@7W`r=y zsZhoy3RPE``^bG^xyi#+Gyepr&fgp#%mvi<+ z>F%(TlKN~s&B;MZRJB=(wFr{CDt5JsgI=IUsjd(TP2=eXB-56>wiH%vJ_PnYMc6PRDL1SM2?4L zhSg@y(Yn<`nE9w-kMz2}n1{r+^BI~SKCh2l*J1D(*wS2RQz76de`kwT&?P?ZW`9bl zLY*QiuTNt2RG#hv8 zHazP@&Payk1gO`9z%@QUZVEF~?z!KO&->y1@SgMK{I2Wz|E}NpUDx?=LKLbL0MK_Syl-u0%#FD z{fIbc7ZM<@CGn$30nqROi8u@z4;Z0IE}#d61A}%4%m^49Nb-XN8t(3Z>5mg$fGOw& z5`91{9Ew5#SR94~ID*bNJQVRm(s{fS0Z{lOjP>^XGvxyky(pppaf(O@fFcn~z`Oea zSkMWIFeXrXffObG8>0S6uIugYZjAQ$LH~zM|Dyoyfpho!cL~A6)0+em0V4tyB;x;B zY6<>fnGuM^d3*dbd!B@L$6++^&h8+9{Nv4VUb;A65Nm=XVO#(ww7VDhGY;ah|FGbP zb3Y{@EX>W%>uQ|-bEZEHGC||;By&H{zj%L?|HKske>nI9Q^rQb`2w~`I1-7X2%y}5 zKH2}Hn+_gBz~b=EfQ+0xfF=^reo)HxP>>uDfC6xMEa(e>z7)v_IG#YFbO0!>1OZM2 zBJ{^dW#s{cCuLa(*dOtqA$eH(x&8tv%x=FxB>>^^H>7+72>dUQqKfznq)`2>5rv{qdH<4C1Q0$y;eT%D4{tR! z3BCa+SwLDwj$#y2K><*ZLk9h4-6cHksyBFE8<0aHDHSq5Cxh`O5%k~j%fu_U_YCPuvDrq{!|AHu>5&q?PE-9*)=!nx?6m`oDn2qerOxf)i(bo-c|9vs?dEd1+(3k9{)R*tfK(2~v#J|aO(T!Ei zHIAB#5e>LYN8nAv<(EGfzii7MUz(jn;B$v1@L7(w@`;p*osi?qk{X3uL{ADLqZ>uz z@@Kt8dgDuq4SjQM7G*uHqDzzypWRWhmA(Gy^e(+R2Kw<9w1p$+wwJKPeRAl6$r^Qk ztyn5TYq-F-O;f9%>}%H8QKGYNNB>yNs609;+v;q0UyI?#j@Tp?=1V2{YaY@afu6Ss zS6l8}J2GN3uUl{&sXo^$To=;+m?pAJG4IIU>MT#mD^9MkLN4{M^K{?%?YnwRdQZOd zl;(`L7-Abm_zb{IOU4tKXM`1BHLTA)gatqxm>aZwPxN#wOQddJ@#uDJ+n=U8lo7k$ zs`XKOp#AV@z8&T`KrZ89;I;7bKC4wv=R9K1qr=;w$PoLa?S0t7eoSE|Iax3wVlC_J zsECEfaO9>&&KM7G>+?mpd(dU(lUs@YkpevRqpsf;oV`pqSTYm^2G4w!R^#_f-oM>n zG4Lkr*8Y8fn_BDm(p7n&i9yDkJsPzK2D-kf(wAfmqGKSXMZ@o>NN>hR^{jTC;#qgw zZj<|6%0<}#zR|m21w48z8^La)&W2>E&+?H7(fzP>zegu^WVWYdM?LXWk*i^6OxzIh z;Hh;%jCKx=*OJn{FbIQ5G>t>SA`=YGRji?1nEp0j3(TwYl zXq3DzVaW>|bBXG_+Gx(h%A+XW(tc2E5QA*DMX1E{m>BgzwpEwOFAenJ1JCW3OCELr6 zow}Vj=4xWQ7&u&c6Bz1hZGCl9^c^IM{2X=`^`~CFV?H(PfL+?$5f9GxJN$ON{?o{o zNxPK3xBaY-Teaha>8#OCa&mJEp0+CRA}H*dls!g?bs%BCW$`$APdNWaP_sYF97+AONRP;J8h9LA? zq7I;q`EK{dkuAh_ z(o5(+O!+C-4co_4J?6{1_vQH=?GOR_#&*7hD^xZ18e98IOsRdR3#v`|s#=ataR)$r z*59E^wiUg2MzLLAQ_bVt>gC3B*Q;8)V=jl)uG6e&+m)X49D85L zaF^O%_Ho+{jqr-@D%k|lUXI^V;@as{GaPSApV5^P1XqEvw^Ucz69|ur_a@|J?~hhI zOZU&#XWZtm`NSoAw-L5~YOaS^eR<<-s?EDvnp;^DXX3{-(-fwUL)G=~*}I5-H{NQV zrmbe|SrnX*ZwwXL_boniahzXnUxdf1%5871NhyaR5Ek%(HMNFe{&k;)PuN7+QA>~I zd?w-7RxYXWopiG&(;Mit@@q0VCj>(qD-V0+vO|^WpMEW?m5k+MPYz9lC<; zXkuL|veV%;4yTwk3pO_5Y=+=trMhfy6nljCLtvo}I}ZdIF~=j1xsx0X0HT9tv!%59 zgE@l1>91?pK`V*k`gO=--_qA1M^3K9#2G1{*OuBucEi@S9hTMF4Y00P>W{s!ec5Jz zye(~9sK}{+pXma*ESxPisHhcY&TNoPrD`KO=783@fokfe#Bg_TEg+RQVhVBKl0bxxH$bL-{g#!kWgn%r-FE2U63Cfjoj!F)Gje12<4 zt)gKL9Q%MXO;Ra0$%;?f@_NHwd-XI*^%CRQ=vF}8r;#Y@^8~}8d+epcvQ^d`+j6^E z{(E}mS689XNNg@+Ww=5}zE4lO(guSoBZ&@4KWKQjQfU4(UBJBgpfi`!%Ibif^B~ZRTlBSdveV4X3T<8#WhdIg3nRyIcB>!(!C2_7?n!O1tO2kB(jNImYL$RnOQp6JW{Ku9EHCg!1@a5Ucqo<%D!aZE@c zHhM#;{_?)(h{U2Nw4N(qYkMy)s+;M5jHEC9%IE0 zhvf&K;0mxT`3;+g_fLtd@1aoN*pW@gfGbWx?G?XYB)@yDuUP6T{L-(V&C^P=r3V_d zJMCmKvEm9VHs;Y*z2w>4cu!=!xG*Yp2FGDI9lxX~P@%DXB+r}bQj*A%cCl+O+c$V( zVy4;^c|oaFNtyF$*qyqt(H$S&n3%Psm9QlHfsG69R4j@)3hXS#X9V%bCaYKi#+ST< z^rj7Wu0SfiXs{vzlEdxI1*g$}xpH1T_h$={x z^*5|6rsFdb=E8cs*KW|?l{@$P*DsTqVbR9~>dehGhy8Y*NkTrn2qs%}m}#=f^PUj$ z^07LIN{P}8x)-d9?MfmWZAe!80Ey3NNhj!Pj#W&BrWz7;GgK?vyE~WeW9>d*v`;LV zDPj1v%j0iy9c8bMsmlvo8Me%I3cMPjGV@6^kGg@`v7oZ(>cGUHs?{rV?R-b|3`x%> z#MY5;r&lxgC82|P?`MOb9NpYh+8N^xVRm^0D|31gQI5GZKQ^EjkTJBG*D4dO7Mq{j zHnSEjW%-43e?1)#Z!U*R+RuOVPpdq~>h5A1cI&C|j!4@XI+{-{`R(9_tk}L~)TZ&|P)4s0iz)`&0MKSw0Z8nG?&C zRrb2R^BjC$pi954G#xY{b&mV&M46W=Z6f5Y1V7=C<2<9bVyzriFg%hNtQDZZnlX_w z$AvbnE5w(joSu@N;|g=jWS_1AAJ9Zcl;!XL_nT23%g`H`R}mL&$;TQfg7yq7h% zEHz?k4;VTp!8M_1o9@mA(leyyE|)Bhj5R)56A$fA<@rogMRtox(=$T785zCp;ZnT8 zQ4{^K#E;Ks()?TWf^YIvl#pRPDfvL3CD!rVF{6fr<-n`Ph3M1DamAwcG!G5aU10`? z+jp{~$*hG6p#(_cLUJ&RC=3X>nO zs4EcQ?(i?;m)v-z%t#{&3yRQFn}62ImaC2*cIrkFV9|Nx_=KESn%{0t`@i#Bmorek zKD2VaL1-|dvIVlp^xf}$6JMFW@E*5%fXqRx9-tzc{d8RLiiS~F{G#q7mlc_gn*^Ew z-scVE@S)9@cz$b3(wJ4?t!eVf@dj^>wmd?d#Sybxk^FO5_65E5%4JQo*yc#0kBN7) CrWhpv literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs index 88a86698..d2ac5622 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs @@ -3,10 +3,8 @@ using System; using System.IO; using System.Linq; - using System.Text; using PdfPig.Fonts.Type1.Parser; using PdfPig.IO; - using PdfPig.Util; using Xunit; public class Type1FontParserTests @@ -30,6 +28,14 @@ parser.Parse(new ByteArrayInputBytes(bytes), 0, 0); } + [Fact] + public void CanReadCharStrings() + { + var bytes = GetFileBytes("CMBX10.pfa"); + + parser.Parse(new ByteArrayInputBytes(bytes), 0, 0); + } + [Fact] public void CanReadAsciiPart() { diff --git a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs index 8e913674..0d6e7a7f 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs @@ -28,6 +28,17 @@ } } + [Fact] + public void LettersHaveHeight() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.NotEqual(0, page.Letters[0].GlyphRectangle.Height); + } + } + [Fact] public void HasCorrectNumberOfPages() { diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs index 46aca41f..f04b7a68 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1EncryptedPortionParser.cs @@ -2,6 +2,7 @@ { using System.Collections.Generic; using System.Linq; + using IO; using PdfPig.Parser.Parts; using Tokenization.Tokens; using Util; @@ -11,7 +12,7 @@ private const ushort EexecEncryptionKey = 55665; private const int EexecRandomBytes = 4; - public void Parse(IReadOnlyList bytes) + public IReadOnlyList Parse(IReadOnlyList bytes) { if (!IsBinary(bytes)) { @@ -20,7 +21,23 @@ var decrypted = Decrypt(bytes, EexecEncryptionKey, EexecRandomBytes); + // line 461 of type1parser.java var str = OtherEncodings.BytesAsLatin1String(decrypted.ToArray()); + + var tokenizer = new Type1Tokenizer(new ByteArrayInputBytes(decrypted)); + while (tokenizer.CurrentToken != null) + { + tokenizer.GetNext(); + } + + /* + * After 4 random characters follows the /Private dictionary and the /CharString dictionary. + * The first defines a number of technical terms involving character construction, and contains also an array of subroutines used in character paths. + * The second contains the character descriptions themselves. + * Both the subroutines and the character descriptions are yet again encrypted in a fashion similar to the entire binary segment, but now with an initial value of R = 4330 instead of 55665. + */ + + return decrypted; } /// @@ -89,6 +106,16 @@ private static IReadOnlyList Decrypt(IReadOnlyList bytes, int key, int randomBytes) { + /* + * We start with three constants R = 55665, c1 = 52845 and c2 = 22719. + * Then we apply to the entire binary array c[i] of length n the decryption procedure: + * for in [0, n): + * p[i] = c[i]^(R >> 8) + * R = ((c[i] + R)*c1 + c2) & ((1 << 16) - 1) + * + * Here ^ means xor addition, in which one interprets the bits modulo 2. + * The encryption key R changes as the procedure is carried out. + */ if (randomBytes == -1) { return bytes; diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs index 6edc69af..8fa7a520 100644 --- a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1FontParser.cs @@ -5,6 +5,7 @@ using Exceptions; using Geometry; using IO; + using PdfPig.Parser.Parts; using Tokenization; using Tokenization.Scanner; using Tokenization.Tokens; @@ -14,7 +15,8 @@ private const string ClearToMark = "cleartomark"; private const int PfbFileIndicator = 0x80; - + private const int EexecKey = 55665; + private readonly Type1EncryptedPortionParser encryptedPortionParser; public Type1FontParser(Type1EncryptedPortionParser encryptedPortionParser) @@ -146,7 +148,7 @@ var matrix = GetFontMatrix(dictionaries); var boundingBox = GetBoundingBox(dictionaries); - encryptedPortionParser.Parse(eexecPortion); + var binaryPortion = encryptedPortionParser.Parse(eexecPortion); return new Type1Font(name, encoding, matrix, boundingBox ?? new PdfRectangle()); } @@ -349,7 +351,6 @@ return new ArrayToken(result); } - private static Dictionary GetEncoding(IReadOnlyList dictionaries) { var result = new Dictionary(); diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Token.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Token.cs new file mode 100644 index 00000000..1339bd05 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Token.cs @@ -0,0 +1,90 @@ +namespace UglyToad.PdfPig.Fonts.Type1.Parser +{ + using System; + using System.Collections.Generic; + + internal class Type1DataToken : Type1Token + { + public IReadOnlyList Data { get; } + + public Type1DataToken(TokenType type, IReadOnlyList data) : base(type) + { + if (type != TokenType.Charstring) + { + throw new ArgumentException($"Invalid token type for type 1 token receiving bytes, expected Charstring, got {type}."); + } + + Data = data; + } + + public override string ToString() + { + return $"Token[type = {Type}, data = {Data.Count} bytes]"; + + } + } + + internal class Type1TextToken : Type1Token + { + public string Text { get; } + + public Type1TextToken(char c, TokenType type) : this(c.ToString(), type) { } + public Type1TextToken(string text, TokenType type) : base(type) + { + Text = text; + } + + public int AsInt() + { + return (int)AsFloat(); + } + + public float AsFloat() + { + return float.Parse(Text); + } + + public bool AsBool() + { + return string.Equals(Text, "true", StringComparison.OrdinalIgnoreCase); + } + + public override string ToString() + { + return $"Token[type={Type}, text={Text}]"; + } + } + + internal class Type1Token + { + public TokenType Type { get; } + + public Type1Token(TokenType type) + { + Type = type; + } + + public enum TokenType + { + None, + String, + Name, + Literal, + Real, + Integer, + /// + /// An array must begin with either '[' or '{'. + /// + StartArray, + /// + /// An array must end with either ']' or '}'. + /// + EndArray, + StartProc, + EndProc, + StartDict, + EndDict, + Charstring + } + } +} diff --git a/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Tokenizer.cs b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Tokenizer.cs new file mode 100644 index 00000000..7888adc0 --- /dev/null +++ b/src/UglyToad.PdfPig/Fonts/Type1/Parser/Type1Tokenizer.cs @@ -0,0 +1,387 @@ +namespace UglyToad.PdfPig.Fonts.Type1.Parser +{ + using System; + using System.Text; + using IO; + using PdfPig.Parser.Parts; + + internal class Type1Tokenizer + { + private readonly StringBuilder commentBuffer = new StringBuilder(); + private readonly StringBuilder literalBuffer = new StringBuilder(); + private readonly StringBuilder stringBuffer = new StringBuilder(); + + private readonly IInputBytes bytes; + + private int openParens; + private Type1Token previousToken; + + public Type1Token CurrentToken { get; private set; } + + public Type1Tokenizer(IInputBytes bytes) + { + this.bytes = bytes; + CurrentToken = ReadNextToken(); + } + + public Type1Token GetNext() + { + CurrentToken = ReadNextToken(); + return CurrentToken; + } + + private Type1Token ReadNextToken() + { + previousToken = CurrentToken; + bool skip; + do + { + skip = false; + while (bytes.MoveNext()) + { + var b = bytes.CurrentByte; + var c = (char)b; + + switch (c) + { + case '%': + var comment = ReadComment(); + break; + case '(': + return ReadString(); + case ')': + throw new InvalidOperationException("Encountered an end of string ')' outside of string."); + case '[': + return new Type1TextToken(c, Type1Token.TokenType.StartArray); + case ']': + return new Type1TextToken(c, Type1Token.TokenType.EndArray); + case '{': + return new Type1TextToken(c, Type1Token.TokenType.StartProc); + case '}': + return new Type1TextToken(c, Type1Token.TokenType.EndProc); + case '/': + { + var name = ReadLiteral(); + return new Type1TextToken(name, Type1Token.TokenType.Literal); + } + case '<': + { + var following = bytes.Peek(); + if (following == '<') + { + bytes.MoveNext(); + return new Type1TextToken("<<", Type1Token.TokenType.StartDict); + } + + return new Type1TextToken(c, Type1Token.TokenType.Name); + } + case '>': + { + var following = bytes.Peek(); + if (following == '>') + { + bytes.MoveNext(); + return new Type1TextToken(">>", Type1Token.TokenType.EndDict); + } + + return new Type1TextToken(c, Type1Token.TokenType.Name); + } + default: + { + if (ReadHelper.IsWhitespace(b)) + { + skip = true; + break; + } + + if (b == 0) + { + skip = true; + break; + } + + if (TryReadNumber(out var number)) + { + return number; + } + + var name = ReadLiteral(c); + if (name == null) + { + throw new InvalidOperationException($"The binary portion of the type 1 font was invalid at position {bytes.CurrentOffset}."); + } + + if (name.Equals("RD") || name.Equals("-|")) + { + if (previousToken.Type == Type1Token.TokenType.Integer) + { + return ReadCharString(((Type1TextToken)previousToken).AsInt()); + } + + throw new InvalidOperationException($"Expected integer token before {name} at offset {bytes.CurrentOffset}."); + } + + return new Type1TextToken(name, Type1Token.TokenType.Name); + } + } + } + } while (skip); + + return null; + } + + private Type1TextToken ReadString() + { + char GetNext() + { + bytes.MoveNext(); + return (char)bytes.CurrentByte; + } + stringBuffer.Clear(); + + while (bytes.MoveNext()) + { + var c = (char)bytes.CurrentByte; + + // string context + switch (c) + { + case '(': + openParens++; + stringBuffer.Append('('); + break; + case ')': + if (openParens == 0) + { + // end of string + return new Type1TextToken(stringBuffer.ToString(), Type1Token.TokenType.String); + } + stringBuffer.Append(')'); + openParens--; + break; + case '\\': + // escapes: \n \r \t \b \f \\ \( \) + char c1 = GetNext(); + switch (c1) + { + case 'n': + case 'r': stringBuffer.Append("\n"); break; + case 't': stringBuffer.Append('\t'); break; + case 'b': stringBuffer.Append('\b'); break; + case 'f': stringBuffer.Append('\f'); break; + case '\\': stringBuffer.Append('\\'); break; + case '(': stringBuffer.Append('('); break; + case ')': stringBuffer.Append(')'); break; + } + // octal \ddd + if (char.IsDigit(c1)) + { + var rawOctal = new string(new[] { c1, GetNext(), GetNext() }); + var code = Convert.ToInt32(rawOctal, 8); + stringBuffer.Append((char)code); + } + break; + case '\r': + case '\n': + stringBuffer.Append("\n"); + break; + default: + stringBuffer.Append(c); + break; + } + } + return null; + } + + private bool TryReadNumber(out Type1TextToken numberToken) + { + char GetNext() + { + bytes.MoveNext(); + return (char)bytes.CurrentByte; + } + + numberToken = null; + + var currentPosition = bytes.CurrentOffset; + + var sb = new StringBuilder(); + StringBuilder radix = null; + + char c = GetNext(); + var hasDigit = false; + + // optional + or - + if (c == '+' || c == '-') + { + sb.Append(c); + c = GetNext(); + } + + // optional digits + while (char.IsDigit(c)) + { + sb.Append(c); + c = GetNext(); + hasDigit = true; + } + + // optional . + if (c == '.') + { + sb.Append(c); + c = GetNext(); + } + else if (c == '#') + { + // PostScript radix number takes the form base#number + radix = sb; + sb = new StringBuilder(); + c = GetNext(); + } + else if (sb.Length == 0 || !hasDigit) + { + // failure + bytes.Seek(currentPosition); + return false; + } + else + { + // integer + bytes.Seek(bytes.CurrentOffset - 1); + + numberToken = new Type1TextToken(sb.ToString(), Type1Token.TokenType.Integer); + return true; + } + + // required digit + if (char.IsDigit(c)) + { + sb.Append(c); + c = GetNext(); + } + else + { + bytes.Seek(currentPosition); + return false; + } + + // optional digits + while (char.IsDigit(c)) + { + sb.Append(c); + c = GetNext(); + } + + // optional E + if (c == 'E') + { + sb.Append(c); + c = GetNext(); + + // optional minus + if (c == '-') + { + sb.Append(c); + c = GetNext(); + } + + // required digit + if (char.IsDigit(c)) + { + sb.Append(c); + c = GetNext(); + } + else + { + bytes.Seek(currentPosition); + return false; + } + + // optional digits + while (char.IsDigit(c)) + { + sb.Append(c); + c = GetNext(); + } + } + + bytes.Seek(bytes.CurrentOffset - 1); + if (radix != null) + { + var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString())); + numberToken = new Type1TextToken(number.ToString(), Type1Token.TokenType.Integer); + } + else + { + numberToken = new Type1TextToken(sb.ToString(), Type1Token.TokenType.Real); + } + + return true; + } + + private string ReadLiteral(char? previousCharacter = null) + { + literalBuffer.Clear(); + if (previousCharacter.HasValue) + { + literalBuffer.Append(previousCharacter); + } + + do + { + var b = bytes.Peek(); + if (!b.HasValue) + { + break; + } + + var c = (char)b; + + if (char.IsWhiteSpace(c) || c == '(' || c == ')' || c == '<' || c == '>' || + c == '[' || c == ']' || c == '{' || c == '}' || c == '/' || c == '%') + { + break; + } + + literalBuffer.Append(c); + } while (bytes.MoveNext()); + + var literal = literalBuffer.ToString(); + return literal.Length == 0 ? null : literal; + } + + private string ReadComment() + { + commentBuffer.Clear(); + + while (bytes.MoveNext()) + { + var c = (char)bytes.CurrentByte; + if (ReadHelper.IsEndOfLine(c)) + { + continue; + } + + commentBuffer.Append(c); + } + + return commentBuffer.ToString(); + } + + private Type1DataToken ReadCharString(int length) + { + // Skip preceding space. + bytes.MoveNext(); + + byte[] data = new byte[length]; + for (int i = 0; i < length; i++) + { + bytes.MoveNext(); + data[i] = bytes.CurrentByte; + } + + return new Type1DataToken(Type1Token.TokenType.Charstring, data); + } + } +}