From 04874c25f5bba33febdaedb0e542a3355c39c96b Mon Sep 17 00:00:00 2001 From: BobLd Date: Tue, 1 Dec 2020 14:57:47 +0000 Subject: [PATCH 1/6] - implement flexpoints - deactivate SetCurrentPointCommand() - need more investigation - make SbwCommand static - add data.pdf and test --- .../Arithmetic/CallOtherSubrCommand.cs | 55 +++++++++++++----- .../Arithmetic/SetCurrentPointCommand.cs | 6 +- .../PathConstruction/HMoveToCommand.cs | 14 ++--- .../PathConstruction/RMoveToCommand.cs | 2 +- .../PathConstruction/VMoveToCommand.cs | 4 +- .../Commands/StartFinishOutline/SbwCommand.cs | 2 +- .../Commands/Type1BuildCharContext.cs | 6 +- .../Fonts/Type1/Type1CharStringParserTests.cs | 39 +++++++++++++ .../Integration/Documents/data.pdf | Bin 0 -> 28330 bytes 9 files changed, 100 insertions(+), 28 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1CharStringParserTests.cs create mode 100644 src/UglyToad.PdfPig.Tests/Integration/Documents/data.pdf diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/CallOtherSubrCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/CallOtherSubrCommand.cs index b7fb85ed..2505f4cf 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/CallOtherSubrCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/CallOtherSubrCommand.cs @@ -3,6 +3,8 @@ using System; using System.Collections.Generic; using System.Diagnostics; + using UglyToad.PdfPig.Core; + using UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction; /// /// Call other subroutine command. Arguments are pushed onto the PostScript interpreter operand stack then @@ -23,13 +25,13 @@ public static bool TakeFromStackBottom { get; } = false; public static bool ClearsOperandStack { get; } = false; - + public static LazyType1Command Lazy { get; } = new LazyType1Command(Name, Run); public static void Run(Type1BuildCharContext context) { - var index = (int) context.Stack.PopTop(); - + var index = (int)context.Stack.PopTop(); + // What it should do var numberOfArguments = (int)context.Stack.PopTop(); var otherSubroutineArguments = new List(numberOfArguments); @@ -42,17 +44,44 @@ { // Other subrs 0-2 implement flex case FlexEnd: - { - context.IsFlexing = false; - // TODO: I don't really care about flexpoints, but we should probably handle them... one day. - //if (context.FlexPoints.Count < 7) - //{ - // throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine."); - //} + { + // https://github.com/apache/pdfbox/blob/2c23d8b4e3ad61852f0b6ee2b95b907eefba1fcf/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharString.java#L339 + context.IsFlexing = false; + if (context.FlexPoints.Count < 7) + { + throw new NotSupportedException("There must be at least 7 flex points defined by an other subroutine."); + } - context.ClearFlexPoints(); - break; - } + // reference point is relative to start point + PdfPoint reference = context.FlexPoints[0]; + reference = reference.Translate(context.CurrentPosition.X, context.CurrentPosition.Y); + + // first point is relative to reference point + PdfPoint first = context.FlexPoints[1]; + first = first.Translate(reference.X, reference.Y); + + // make the first point relative to the start point + first = first.Translate(-context.CurrentPosition.X, -context.CurrentPosition.Y); + + context.Stack.Push(first.X); + context.Stack.Push(first.Y); + context.Stack.Push(context.FlexPoints[2].X); + context.Stack.Push(context.FlexPoints[2].Y); + context.Stack.Push(context.FlexPoints[3].X); + context.Stack.Push(context.FlexPoints[3].Y); + RelativeRCurveToCommand.Run(context); + + context.Stack.Push(context.FlexPoints[4].X); + context.Stack.Push(context.FlexPoints[4].Y); + context.Stack.Push(context.FlexPoints[5].X); + context.Stack.Push(context.FlexPoints[5].Y); + context.Stack.Push(context.FlexPoints[6].X); + context.Stack.Push(context.FlexPoints[6].Y); + RelativeRCurveToCommand.Run(context); + + context.ClearFlexPoints(); + break; + } case FlexBegin: Debug.Assert(otherSubroutineArguments.Count == 0, "Flex begin should have no arguments."); diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/SetCurrentPointCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/SetCurrentPointCommand.cs index a6679225..f49634e3 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/SetCurrentPointCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Arithmetic/SetCurrentPointCommand.cs @@ -4,6 +4,8 @@ /// /// Sets the current point to (x, y) in absolute character space coordinates without performing a charstring moveto command. + /// This establishes the current point for a subsequent relative path building command. + /// The 'setcurrentpoint' command is used only in conjunction with results from 'OtherSubrs' procedures. /// internal static class SetCurrentPointCommand { @@ -22,8 +24,8 @@ var x = context.Stack.PopBottom(); var y = context.Stack.PopBottom(); - context.CurrentPosition = new PdfPoint(x, y); - + //context.CurrentPosition = new PdfPoint(x, y); + // TODO: need to investigate why odd behavior when the current point is actualy set. context.Stack.Clear(); } } diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/HMoveToCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/HMoveToCommand.cs index 9e4567cd..0c415f81 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/HMoveToCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/HMoveToCommand.cs @@ -19,19 +19,19 @@ public static void Run(Type1BuildCharContext context) { - var x = context.Stack.PopBottom(); - - var actualX = context.CurrentPosition.X + x; - var y = context.CurrentPosition.Y; + var deltaX = context.Stack.PopBottom(); if (context.IsFlexing) { - // TODO: flex support + // not in the Type 1 spec, but exists in some fonts + context.AddFlexPoint(new PdfPoint(deltaX, 0)); } else { - context.CurrentPosition = new PdfPoint(actualX, y); - context.Path.MoveTo(actualX, y); + var x = context.CurrentPosition.X + deltaX; + var y = context.CurrentPosition.Y; + context.CurrentPosition = new PdfPoint(x, y); + context.Path.MoveTo(x, y); } context.Stack.Clear(); diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/RMoveToCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/RMoveToCommand.cs index 1aa75dfa..8d4efcd0 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/RMoveToCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/RMoveToCommand.cs @@ -30,7 +30,7 @@ if (context.IsFlexing) { - + context.AddFlexPoint(new PdfPoint(deltaX, deltaY)); } else { diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/VMoveToCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/VMoveToCommand.cs index 0099e208..f7a9beb6 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/VMoveToCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/PathConstruction/VMoveToCommand.cs @@ -1,6 +1,7 @@ namespace UglyToad.PdfPig.Fonts.Type1.CharStrings.Commands.PathConstruction { using Core; + using System; /// /// Vertical move to. Moves relative to the current point. @@ -23,7 +24,8 @@ if (context.IsFlexing) { - // TODO: flex commands + // not in the Type 1 spec, but exists in some fonts + context.AddFlexPoint(new PdfPoint(0, deltaY)); } else { diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/StartFinishOutline/SbwCommand.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/StartFinishOutline/SbwCommand.cs index 93e749fe..e6b7a194 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/StartFinishOutline/SbwCommand.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/StartFinishOutline/SbwCommand.cs @@ -6,7 +6,7 @@ /// Sets left sidebearing and the character width vector. /// This command also sets the current point to(sbx, sby), but does not place the point in the character path. /// - internal class SbwCommand + internal static class SbwCommand { public const string Name = "sbw"; diff --git a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs index 603a63c9..ff422bd4 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/CharStrings/Commands/Type1BuildCharContext.cs @@ -28,7 +28,7 @@ public CharStringStack PostscriptStack { get; } = new CharStringStack(); - public IReadOnlyList FlexPoints { get; } + public List FlexPoints { get; } = new List(); public Type1BuildCharContext(IReadOnlyDictionary subroutines, Func characterByIndexFactory, @@ -41,7 +41,7 @@ public void AddFlexPoint(PdfPoint point) { - + FlexPoints.Add(point); } public PdfSubpath GetCharacter(int characterCode) @@ -61,7 +61,7 @@ public void ClearFlexPoints() { - + FlexPoints.Clear(); } } } diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1CharStringParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1CharStringParserTests.cs new file mode 100644 index 00000000..19b013e8 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1CharStringParserTests.cs @@ -0,0 +1,39 @@ +using System; +using System.IO; +using UglyToad.PdfPig.Core; +using Xunit; + +namespace UglyToad.PdfPig.Tests.Fonts.Type1 +{ + public class Type1CharStringParserTests + { + [Fact] + public void CorrectBoundingBoxesFlexPoints() + { + PointComparer pointComparer = new PointComparer(new DoubleComparer(3)); + + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + var filePath = Path.Combine(documentFolder, "data.pdf"); + + using (var doc = PdfDocument.Open(filePath)) + { + var page = doc.GetPage(1); + + var letters = page.Letters; + + // check 'm' + var m = letters[0]; + Assert.Equal("m", m.Value); + Assert.Equal(new PdfPoint(253.4458, 658.431), m.GlyphRectangle.BottomLeft, pointComparer); + Assert.Equal(new PdfPoint(261.22659, 662.83446), m.GlyphRectangle.TopRight, pointComparer); + + // check 'p' + var p = letters[1]; + Assert.Equal("p", p.Value); + Assert.Equal(new PdfPoint(261.70778, 656.49825), p.GlyphRectangle.BottomLeft, pointComparer); + Assert.Equal(new PdfPoint(266.6193, 662.83446), p.GlyphRectangle.TopRight, pointComparer); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/data.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/data.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8bdda7f411ecd3958f53e62e74d84afc02367b93 GIT binary patch literal 28330 zcmbrk1yr3&(cM0wm95(Lm8r(Iwy99R#uE8N#aCdi~os;ve|DQXv)}1?d zueD!Vs_J>GclT4(-B2otOER!9a==ke{~BC^V!`gs2>)x=$-*H(JkhH{-;}S?g>@y()k(`+ns&?R^jXL1+v)4n9t+c zQ;VNqa_0HPXu;z!QK6#4td%qKoHLNr3A;)gZaP6=5A4A0J-Os5P`~7I&SYpB+ zKDAL`^?Pt-MWzKj z5e5Tdd5Uy>Z+Fgim^3T2*V7}MT%2+HUpv7|o^P7=)B5=*lJ{GK+hVc3y?6!n${8ir zUlWuWu5(Mr9B)@r%%oZ0OGPdBDU-Lh58vOe_WM&R2n=|#Y1DbTJlk-(x&`$uCn4F=8f%d60k);ufIl<&1S531VKVCm?I&7B%|{`N9@^px55oY z2^-7D7-9#Bgk9F{YlPd}SW=QSWFh6-V@4{RLJBYzbxI#>^{Ois-hKzj%i?^CLT>^I zg&n2S>E2xfuxTa2Qh#(q9nhmkIL#YA<#)bca5;PWzCs93I>C)Z41+A&jU$Z&R|MHq z!tA~Sz?Z2WgsA|SfYcBY^a8m0nDc;D16z`ge2Nz}(2?M(pwrk(3g=HCtfZABEA(LL z@oMpFIbiWGNr-?Ezq-E>f=FY3I|8-%$;-K)7GIn~+K6Nb1}b6K=d znO!zl2ryLQDTe3_!kh2L=t~%duY3^9`E(a^y!y?UXK|dqXC!dA zlcBKcgNt3I&N$??sY>eJwwTkhG;}{&mv+yXI6bePmNzLeF!ej*CiG|*+L%{U3ONU^ zbYEM-=H4deaMx{#Ue8@#PBPiMyFlTP)fIT)`rJ%*KV#%M_^Y8dE3+e?Gjay6*=bZc z_^X@uHbO zja8R^W zB0G(}2tc6{$24lGF~S4wdz%lf{SN0vE}9mv9=tY*L`oVAt1YqgqYlJKR0XwJI@rO# zp7eBGHGrA49Oi;xDYTa2leratHrJ_->EqbW+{6(-$0T_xtFBa_sV)r=H@yA@sH%?I ztj>;puE-f|>ts(QH$vCPnF%MFh62Rs3dQ&fTeAR5`lrVZLURj_aI?aEQ{_l``V_>L0f~(Z zMZQ;Jlse4ynYn_l_B&bNOOm`{W;Cml841uT*ve@TRc~+!oK2?oIvCMGt2L1Q0F|2uB5k0|Uar=q^<1MUd7;7kSbe!19v^HVAW$ zV+7g?GLnJ@!m48~Y0M06z5~#g&JMXlP+{aoj$L6=X9$B{Dq{={26IPH z>t$;B`c5(tVWDoKo}_WdMh9bTh2d) zF|e<&(i}@v3B7NBegd*TH4o}#1pSMxis-#Dj-MborLtdT3H@HNNcf`zolP<(YOIAo zd)MS4mpEs_9Dno<)gYFX_eG(sDGx3ZF5|9}tmdkD**g7Dg_&MivAO(hmOvHM!dG$H zZo(H5v~(6r*UlL!7;u6_*T&Z?X=h&>crt_x(!y7^dJci#f~HH1H#=pk4ai8p;jr`f zNM9i{k9i#jS+os6&~x|zFKkaZsR+;&Rhe-eC3L}Ej?we?LG6e>gsO=$_JCC{ekkBI z6=ovu*z)EHk_rG8u1@L$3?!YvJ($Wt{*0!-q5)M}!YazTEM1?q6HPQ<;FxazV(t$C zd;~`RWFA3#CUBz$(7FQa?&vZ+n}Tp49g;6$JU~#XsT+DbkoE+ZfZ1g5z28e^b~Ex$ zgd2#eKe>isuRr&cRdkg^UIIzaK-cE}9y{~L{TswBTBHmUBd2FEH)HhO=*dc34&>Be z%WzzayZdm=#>KgvUCw&Z(VhKCWOUfgHTToO?K3&Er>^KkGXB@KlF<$Lo)j2Kb>(LD zA}9a)a+@SmnZqGNNe%7h=e{ZF1X1iqV%G`z?AHsh`9F-5L=F+2w){G zrvbi4p6(tX8@B}x+j9`DMZ(M8U-0eSqcm#&{q5!{5RI!;2gZrkXfl~&+|I;U1Ub=A z4HKXvK?0$luguM)~ zg9kaonHH?_oL--dfd#>;Q!Z)r{zeqNezN9bM7m)wY;ul1YAKD4xSuls>OP~$#C)CVlviVUSv!u&x;?R}G`T*4}agZ&e! zb-o)uu@(Z!&LoNn;}1PHMV9mgSD~HO18GtPc3qnD4})xGH#DOD zLzl2vQ={zKa>lD1oqo=m)hpCt7mMcQ0#%D){{HWW=h6ax+ybj>gn=Xd6k?h6dCRs+ zlSZIq=oNIpDUk4zL|4zCaP^yQH)ABYEYhZuyj@1lqGIHHN#FOE7CLYeh&RvE_)AHU zq1V&Zk1;wBLX%elBO?lM1|+yA$^&y6bk75NTQc-Sy6Z`O&IC$r>LI87&XGwZQ-fV^ z5GQ>rngplZx|JLZF<)GOq$~KcIxx{G(x-77ZGF#wXB~|`l?2+I8>3*$z#pxqRYkh& zCCsm&cuBzc3G*2kj}v`0fNBMWwg8Y6xtk@YsVSkFSx$8w(8%`0~&Kc5SmawyKE;@g<>MA#=@z5nNOI% zM9(=$u*K!bT(M&A_P!YucZwrlyu^N`8U{NbC1#q3JHW2REnLX!k5D*X8>V`Vv5VCcWLx~6cKSNWh ztOW&$6_L$DKA_i_6Dk8#Xt_{35W{g9Jk8YR^>rlf@2T;-IMY9Mn#Uxl``Tg&UT34K z`vEfoSRX^FGERSASIM|1BLJo$SI}ZN8h8LOWGHzn|1 zSO8Q-Q%<$f=TENaIelFDD0}G4IO_Xr>ifHUw?7@-Ltw4!v4$Gu(FeF?e$Dk8Eu$N4 zj{FQJSM)QX4&gEVw8%5l;c&4;moLlyN2k-=GDRtzM>HsBi0c(2yiB7p7RD zCj(BnMEArP;DjSz{!tnL%*dphrP-Mn?>y~cDGsFR37G83-(_FTK%vZWp+o97m+#Ph z&`=61pjPOfgR6zw)>yY9=fEJT#P4o+bR-^7*)rg2`I>Pyfef>xvG)9bzH`bBBdZ1qMS)y9(pk$o}CU`NnZV|#JqN*HjQkw6W z)2-dV+1<#chiKbHMMf%y&V31M*70ZAsQ~IGnG$or{Ex%U8;%_~U@^A!U@@@L^Gk^2 zd36+FA$8+C!x&b-?b2ViwvNVjJOy9RQs?@a4s$pXOlgw`C+yHK>yaeYF@e#>!C%FE zpt)7eV;GOLVv)jzL-LZ{LsAXZ&Az-08s)qOn44FJdfA%e|C5{6ljoqJ*GJZlfd#`# zO8}Dx6d-!AEgC71Z2CAOP`>S|(YT}5fP2gqPfWR^<_hKz`sI)kPhrQ{D-7Qt;PN+c zGnkaInGQCrRA|jSxG+~7*MASH91I3z60QKMQS~Yf0qSO%@+O|zW|C^A&B~o!UzKcG zJILvUf{s|oD_J{G=?TEnakW#H&3bRs64cVya=Pf*NS6W4(j;u;<*XgOFZJWrtW(hv zQ+N#jBrBImsChpM;{1CF?4KYUIc$QjEI`i$#H( zycpwL3F1|j0{lTnIwz9Imr9wsN-9)rWw{YMj2CiUmcigt$+|JJNZ8RBNwzeP`)r)9DJxf*1~zcAu##}`u z7V5NEyqMvq>HrFP>%zB}?C+`Un)@pF4hpP>I!qD*Uz{v6kNjBYrl1@6-(733EvWdk zBT=d{J0IdzGVHnu*8Q31vVCsO03fPa?{6Pc-EtO);(({;o7?ltm95g9tzj-lnfT6| zxvM!qdQa)7GlVD~;YGs zflf?!q+X+j_m`(?+&CD{;{g+}F-O9WyP-kN(&5Q5sb)iFi)av@fVxhOpcq<`6VrEX z6Odg3?!Ks}54W3s{@|6Hqb<`POPOmZam%u(X_SgYaxjXXP%A7_NHoj&EoI)2 z!@oT+WdH1+$1gd>)tTDD)OD&Rc?EdS_JANL=aj zK|iT}H!6=MON%YOvS?Q0dijQtv8+OPrh^xUCF^=yK}oF`pgf$|5J0L5M<{cRLsp?w zk+GDqNoLuZPN^>aQ~@qcnog=d(N%b5=BUO|!9SnpBOJw_`MMHg&>BSt+0LBoqd%)`gmVmD$%v9FFXnQg5@G{oReIk9tyH%%K~$- zWNk1()FrUd%1x6qW}nOw^Grr1+)SqOkV7VmU;xF1UIL7rR;M7g*aC3nO84!UhO@tuF}x;GvI@PQQ%IQf>Lh=%j} zN}<2t&gC*iY7F?Uz)GaOA;07rDgxmIs~OIh^Uzj@xzXyMOT8c-!x>3NzOHdbNR2}} zg2hu7Ei70&I=cYVtiUWDa#zFxQULL7b@29rw>Rx%_2X3UdW}~=ES7b|heZ4X-|$oXtBn_cHx z%((jr70p^)>pfY1I)x=c+GX3U+hDc#PmsCa*)^XdN3LC&mFRFiY^Jb!Rt7Cw*$W&D#S!dYxC#1JJL4O1D^&h<`k)iDoA44CqByKx*T!yN zrEs%eSC=?LX=q8rUPj2;1ngd$X2_1PBUFr<}1i@Y61U&0+GXKtejbn*lI)~PzC6K8;ystr~T>e5AeyqgUC5t;XQ zQThMMtz7?&u>QB)`iIf}CvN5Z+wlL9TmNYN|KwJd|IW|>f&Mj4( zTbaAv7jwj}A31!B{;yt!6s6^VeO^sxEmb<;;LD(%nmkfmDwAIR~y* zfL#blsw^l;>@UR6|9xmP-=VH-sOK8Xrz!`y_YiDYK-8*;27pB)2p$wiqF@?E2XLf; zw9GK2*6DVth7WhJOyb=w-3dq5iWd@{!H*G3XV8kowzBWBN~0(qu=qtJK@NI%iyUb2W;XJrwAKIoO+Rp5ive7NL*Rl@DIA()8%;fN#;$ z(<%t847uc|_wh+#9;Kp=k8sdAgNt)_-)gRePx2vwkG5^ zFV&G5aqdy0^6h5jxqXL3+gK)`&yD}_nSN$7YA`vTbIU6@*`(NMAx{Z_Nj1DD_IFT_ zD}C?Xeg{)#M9>GD;^yya!g?vc~yZ1qC&%AVO%D5!Bv#7%!B9kh1J%zpl?qx<^S$W2rW~whd+#YNfE3RddK7F~| zhajy!8203+?a~cQ9F=@jIs*fGw+Zx4mi-O+@_z3+$?bYJ**}BKe&mG6r!A@_bp-Mi z5`Dc<;D24bEPv9P|E#M2?dtt!&HPWxm*@X#`To)R577T*Y6Q%_{@T!CV`F1vBVpy@ zV&o>_U}pY5{{GeY?@z9OY_M~&GP08Veg19X;$SA>V&(>-aIyaBL9}Nx;ptXPP z|G@)(S(sV>IQ0)au*%8u$Mk*f2u(7{}f4Bc-YzhUXT92vvYI{q>}DRGh?(hST7--`=3v# zqdVd`Qt#SV=pb1KU>*Mae#t!7yMF5JqvD~WvVD>372i3!twI_WGuaYS{xmwk*sXc8 zG1>(|7sN=LQW6RT9Gs0192}j1wmdSVf#Y(+m|KM-uP`Yjn_*E1A(${Qbds1ARqOzn zPn%CCAUTR7AOW{$WN2_?a&7>DR>hxoq($R2A=7C^1`}&4&mj=C9uQOD+5v* zkMZp6v$n&dwbj)_1lz2`Q-X;{8cPs6tjT>4$~^Sx8B7bX$0%mu$!VxZE;e3nm4L{$ zkhu*fQ$AI5E-@*RO)*1TBZNlR=Szn=4p3e&!9?(6N>h;Qd*PMt7}IyoJkZw;E)YrH zp^xB~mIs4CdYg|u8w(p7a}#@u6C)#QDCVYGD6ri@z+u)}wk1etGnGFs-WR@Q-$(p* zyf0e#c)mEVl}=&-X?bKlmtQx#k9ctTzAW+a&G2YzeMls{*Dw?7VKs4}HaFCP{^sTD zeN)d34RJmh=CkpYFA?&*=bEdzmWKqA=(=MzfrTS7W$ zHfR+B0>Ww5I7l8i$f2E$!iS~@CbIWtcXG6Ugb>c7$MVWDxSY^u5MO$^Kw=>WeTC3! z;eduh`03%3+{h0}hv;Y!ba)f<0MzO6KCE-aTV$5$Tl-hX$5^a$5ZlnWWfKVI*Sn{w zr1N2q^rhu5-mfeli%+vrQDZRW<;ouuk3zhpqq;*vJKn^zM{Th$_>D=BPK$Kp+d1ydC2xTYFO{@YzB89}N`+I&GzFrMJ ze()cEI6Zx2UUnC}ZiK%2V^i!K+rBYz40hjs5I5z=gftCzBMOEt3_h8F0?q^%dhY!* zE5JYWzi8)0ghWkzEXeTU`_TvGeNVsh|822FVYY?Du3YQD$@$^^<^5abdz0w`Rc;wnqa-Zma zpBVd!I0`v^P-s)CZQ&z;`0n>>An_vT8?nIxazQh3{~huhG-!v=kzm%@yX>2C^&9aW zw9rf3VAG2fw4i>_EfP>?@8c`Mj?{Mr|Jjd_fa(vD{%51}TgR0*$H&iCareGz2R7$0 zuTmd^kK7&0Z~a?e6UTsPT)H2iLNIM_fpWxG{O_PbOP=pW4_n8uF0Xv|{6yDv^Q?w+Cn z3I$b@2ulu~=yG_+@Ad8EGLD~Wv!vM=_AfQyh4@AZDyTzxY)SpI1-@|v^l$C1hYD!a^X z+h~;>>px0wnWMZwK!uvj#z2ICc^$&HeR{Tr;50eF7pTV1ku_z>?wpXiK&3jqlXdVV z1TIOH8VmLt78TJW!}TFXdg`=MJ;qSb;zPmwyTpT84p~sv6gzEYqro9kI%@?(GMO#D zx0S9`svpbwJ{4%th~u-e8@ja&zVaPm#O_Rm&KiW>P5IFK$}p2|`$AFg&X+E`Q~GOVSlsvS%> zi-o`O#09!mfKw`WPt$i$Id%%;LEU&j>cR;avDsm>GFvzb-v`8Qzne|mbwyv5S9Qm~ zH1R|=8lHt#;Xe|MP(9fFN^o*i4mX{ow~EIVO-LpEjhWLzBwIbA2Va$oNMG2!R3%=D zPJ2fZFTG@@*GWJ^Yn*^_v8+>u7dZ`kS?jY)pZD@zqx?7C@Rz*Lt=@Rjd~RA7LNpjS zt7D$#POZM0`iw1M^}aOz*)((cMdWzZ*KxgcJTwuQv1wjHnF zkRzqzOx7$#=`Wu)2cFNwVysf_mqfXL(jh+$|)={6qAA@XXI z`JMGj!tw5W1zA>{R@h`WBS@MfT7F8Gx)I)eqi)U8ELBBM28aa<$=%_&IemmM(3=0& z<5!$<%Oju`0JB~!$Z&br;oR!O$U;Dek&JP3sIY6MW?MmT$#2mB~l#ZW(bJC*KBD=?oc5&?P&nJ;{_4NwFCkXIKjynTAwR$+vJvj3K?8MeUjIqlzNi)T zTl>C)=BANRR)5F7YmP@eh_>YG_{6t`)!iMMvp$c3*Q{zf1Tf2lSbgd(U2Q&&9p{6< z2%(Srd7NA|^Zu<>#nNKaT#1~)6eT{f5B++|kKw1OW9-Iv-<{7kp@^N=pR~)RmI>rT zBWJWUnVJ#D?QeLI1wfdG3BvMS$js>1y)W&x>E<$KT`FhOhQ*Tg*m1V|XZ1sk`o4&6 zFP}L)dJuiWfI*$zB^8jnMqsm|Rkulmqo7!rl2XK!Z;Zgn{GFXf-Oy8)KN1FNgF}Bb z1IbFI(qJz+pOgQfQmAFg&gf=f`YFweMPN}6ZPrQ5e7S5^r#w>V!PTEJ*X!UE=cB84 zLx_y#K>(F98&$JbIJ0 zddQj6A|C5*FdOhyyuNgp^9NYWu`_8(T1X0KrDPF;Y0k2zj6WB99(wl>7Dfh6y$_huj2?>&D zX7r|~KK`~{2_{Nk0XtX$zx7N|raUB4MTcCiWjM2&{Zf;f^@+5QD<8Y5B~k)8Ahqce zV1}@HOjz!zO-LBS;1*7#wMiVyT73kYN2dLzn2-q)AIa*((xj>N#yrcNO`6h!60{YcL@aNmQ8q7yn2+LpViS2ITaEdYCL4A=+#ixlE?S(a{9FYMch43h z1tZ~CCbysRvYb~$zL%8)8e!r$i_BfWT>#LMjG{JiHz2|Fv>lX2IO0AfokbMme|1sb zhapx@Ho!WhqrU#6qLG!IB(5^F9Ye3|FoR$|wvaIa&aMW>sUg?(9w~+Ab|G$5r6ee+ zgyOu!K{4a*`dul%d+<}ODarVi2=a2nynnM-sZ+D~m4nYc;#loHmqa$bdO)Ncajh8J1 zDEM;(nt(<(#I87+7?h6_J032Mjvos7nS5CF`_0^0>v)wG^GnA@WO%H0vF}#S^&W3; z3xf&7kX@V9Cz8qK8_`ge1dJ|=jK8|wzlG;iKeaMh_y~j@mwetj{Zh&0wAKratC9^C zzYkp(KaV@OO6@mARAK`%zHYN!c(OLnZ+DWKP*NZ65+TrST#~%hq3`ND*#{q?*r9X! zqHsVnfU6K>wo*i^OFRDf>qOU#OxWim8yqkz!|1}P@(gWpj^HkGaAi}_)V+ibiScTj zyy_~>Li4b%kuVCpFTKIF5Zy~nMXYFs$$8)IEXo#4CP`*>D4jG`$nm;bn(8Zn>a~)3 z+Rr=_`%z>O8L?w5;Rbj1V@36IU>Aj{6aOWto*!PD87`(Tt&jv zdE&O+F3fBs-icKtA8QJf09M4;r(;P{-k@=`42_)^goHuIv8Z>-NsibxLT1qLM6 zC|WoLauwIA)e;#3&3K_-Xl>usR_bVWOqFh$k@w{0Ko93M8p2=7X|Da0+0?di5O#wM?0EYUteGXrtNVs7m`N6WBs* z`14WbyFC~fEL+(Is1`R}e_Kc9Thm`M&BKHB81x?R??-`yM;G@p!s(wbf3f1o%GtSK zGhFJk$6JQZTTiNW@`(uKM>rjx2`zvGw%r_3W3hy28XU`jMoPnF&FJOIowasRMiML6KSP+eR&;}rchY)mU@an&QzU-k zYRWLL(AF(b_vL-|LiV!j63eduD}j+p+-~@2LoWT56T`;wO#R$ z7C5?+%9Jv*i%gS$h17aOTi`6Asof%C-Fez<@B+gR6Q`gTyXijV&gUtBY^|Z{Kbv#C zVNjgZ)I5;aDpq6Ad2mXi)I`4_pfCMs1shV%P&kY9G{a#>V!uAP_>A`zhDJ7?@}^U) znesIhr2Ti0L*Ao#1mx!NY1w=-%Yz=i1{|7~N8_1;-lS5Y89q8ztAQ{TG>-g8*F>28 zOy)DQJ%xEzGzRi1N<=CZxjIpN=1*REktsBbvdXUK&eeG?PRsiRsgvH93C8#@v4|)# zue@=|sUBgtcu}O&{x;#ef(YLUyRTz5zF9dQRQ0YX>ok>5^h|9z2-K@4%`I z`3zbM;6Kf{(#C5=q(f%fX(=B_MP*POS`(l;0c=Z=zEI0LV#4pi%xZ~t8rL#gRvwwQ z8TXvpBJM|Onof%{T4ui-cW8^XYCa^o^A%1D_#EFc5X&_3-etM3u3su4ulvuuim$`1 zl2l%~55>Oq&y~s`VaKVX4Oi|qolszNs}h@p5m_xT#?=JZShqWCD{>=em%>!Ar{r~K zInzEcv{F<%`Hw)n>k%jE^kE{U=;v~HGD?n66xYM3a}Yol>IdM*p% z;>er0th%S|w6M2e-KC-RmXOPuM=gJ)NBYx1bx~n|+orpjYHt9dJOz9zT=#j?T2AGC z`OTaqKz^7>%7d)MUAJy#jOuY(Mq<-CKwQMUNRhhH)RDa`d4(a=G(?PHe+513cLOEk z1@lknw5(ZJEs@VZ=w^#U@hCd%%)}xPuLFOt7@j9^BY_7(5}_nwj9%D&Gte@X`ANYS zbK=`Mam6RSEg;dXtVZ3wv-*XjuzbbfQ)BDU6tWjJ+zTr%U-j{K-_(F?Zty%PMr1I< zrF9g{QV5HiN~18>qc^WH8}9C-27!_sLiHAqjZ z@VFb=Dd&xCnIUEv@a18r$Kl@i9%qq(9L3xkzU}umFK`N%-#NBrs&aH+88CyeMVmOf zHOdcM0kh0b6b5sk!?=M0-^VeQ4SU%h+ZsWMd-BftEL%jJ-Y=SBNlRJZu@(~+`~~EJ#%{9;uBYS-Vu;9D>ha2sp@XOb|#t6 zX@CR1;!~zhg{0b;F(~UNecBNPmt9J!YMtxD!ey)^(h2jOwb8rrpdL`JSDp6Qh?Z{?H}ii{ zA78)K#_nt2?3x1BR{(9D0%)9zUrcQ<<%=A)F~{lBlij~vC2l~!;*y~6x|co(#B~|t zP+4uW-mw3IK2)&4+_n$N^tzQVMsgKE5p&PmSN09ZjEnp7h?yfoRy+xon|2X6a)iqh zRv!iC^DUYGcps{x-qy(H0{%(t!k{G})7*+o#d)2UfdMaEP?fST7WFI68{OBSuVI%Z zX*GuBkgyvVKCPtW;by;!F3iG*chsHmQ3uX*U65)GF7C|Gd#`GHx7y|m`F_NufROS2 z0L`V;HCtZCe9@;?9c{o!hmL7oi^w#FNvnZPmTLP+9}_>R@i{ZAcsu{-DfDJ{!{2jK#4l%Xz(_74-3Ac#wuVX^G_-I{ zFlc<$La3{G#y!`Y3EZq$UQgJ?gS`XK7ug02uedQ-qS$hUnGHhHxOmiW4gGbS+Z+10 zAIVcrf$KAEe(W0Fi8W(5nXaBH@WK?P>o&+L-z$|0_thSgvkO@@awx=+1gf(P%T%yU zSwk-^UpzXGXWYp)LkkU?L$jtJBwdUTp$FTvnP=Iz#1`vwKoS(TdE6Rl#jQpb0^|yl z_iu0uJx^XwXy`|b+n?j}w1wgrj!)^|CYBphT;N%AhbIka`q}QTjR&W!KiDmG<$Mx; z{?dMnh&71t4zcrTk3M?14nIWX*nS;LbgU2(yX?7Q0Q+9yTyczOt@g7iQ!=9mALG7) z(SuCdR)CQao$-!RjHOJ$+cjINN%ku|HkqMk$VS?8tV;vbyYEeTax2FeZTq{#rFFWN`0BlYmnV@p33{i3gf^ACpC?82Ej;x0(=fiZA>s=<3ZRS^#oEJdtg2sh@e*a(vf{pK8U zL7SiLi=A-KeCCJmTX%E;oD!JcgJe9EXNlq7o*jLfuAa@Tyh-ohWo$tEzJ?@=i3pt1 zMT1*DR;X`D+Nm7O&pd5vc#Zg)9#h@yVs--p=9*G;l@^?7cG)!UM7+(( zaTwSwn4bk243&NJ!eUZ&U@h0y zoXrfaDFwrljH7kskTvKVUeZ7&R!7Pp%fWJTtzqpE{Agt_wmK3< z3;J;x1fRUz@@fiE&=coBc8BV&Y)@Vr{d!y=R91|S()~zaao~`JN)qsjRU;rG3~8xwUie+Jn(YF z4pCwL98#%I;guT!3lYSrYY_QZCBPQAQC?3sqip|erg08Y$-U-Ioxn0Av`C@fsHi8s z4MBd5+Z);(K>svm!*fb=Ww{0tB6=u3A^bRlP^MKiGjZ)a^LdoAYojXj_f= z21W)oyp?b`mESy{fIiEDk;ZE3F~3h*x(?0F!OWh)+~Ur)pK!LcD{;S>@Jd?5=0iV~ zOTBE%F|eJNx@Y~dIEa;gEgmmn%Oo+wd!`VtVnaxok+sPzg=L6WgG3N@`rYz4Z6;zf zAb$nHMrZ+?9dKV9&H?$+_@O1KSz7J@3T9@gYLgX=SK5t2G3<7P_WNMd#n0E@Tm~j* z=~9P}KY2Lb5ZN}KMV6rlwAd$D&jK*R}GdJ{jKk4HDN0m(*|8lA$-}g|0LJ4i8OxkXSX-9+CW%)-o<_8 zlRoXlkPCL<(MNSod3djeQTkrK=%X-ADvIENhLbO`LliWyq&=%LnuzEaL|W}y%3VJS zOT>c$6f5+ryt36|7?>-^a(iJ5as~DLgVJI80*Aav2bFiqYO;(}&IVmyddyM> z?@zz{`qkr5+90B;{L*332K@l7BVqC8;5l65Zmn#XA338G2Ll5zBA{M~Xu`{V>A^3n z-Hgd}1?B4Qly$o~fOe!$&0$*GMDE@*T)#894e9J08M$bMS>rXjXZbz3i%uf)%FoDZ zl?!fF(u|CScayV#w&K~Ij~(-P>|FK8h4Z-w{t;X>6`uklGTf}y2%S~rxWK|LHb+D~ zuF#t(wjm>AZLV%Q*svbfgmI%Ts#mi!5M`mJ1IHzj{(D?I^hV=A9S%nrn}j{TROF$h zj4{Yi*F%ZB+G+Ks>WTYc9X1!!`*)biAbi_AY6+@e9@B}T8RktA!tdPLxZ)cK&^gT> zL`R)Y-KN12&whCMCB%0Gb(%(gma6@lzhCjJoxj#88=>NTO@~di6xfK~W{^K+H!R}! zVxN?(Vd_BFxp=jQnAQ41`D@xE84zr*b1#8=!|G^K5|hXAO5M2F@9gE6oQ}b9Vg9BC z$;H@t1>fo7QtC>RyltBPnfeUe-(_ILUl|h&9tKL2n;BJ+YY+{qv3Bqa5u#7IDQ)k? zz8UOpj^&{ed2oyOfm)87+$;&=(i$qL+>d)Q!>v8YuHf=sBhUBHU3c@JJO_SVG*as{ zcM)Rn7bXZs4e(p{MI#LWBhe6^;GJgwW*gMEdcPk^8`kp&U&v{wbeS${Q){w`5<*1$ zMWww_29y{VjJCc|owU4E4Qwq;CFS{~r(HU5{+y$~hH||kuah+M9n|9It9pnEHpw#B z&5i}}qV()e-<54JHTR0!Wq&_``iyh(v~L+@q~NINg2nUI9?sR5G3*2bTdM?ZPe1)b{lwjy0u#dbsY4tbhvGVIP@&Mgvsr5V7z={arSGKPSdRSx zroEOate&4-w_!<2?{%(Xqs5m zKo@>{?K(%}FNr1SH5q-QhXlT$@af#XEV;lu4#(SpQyZc-UUSCA+cnoX{lx@NaW^)xWu26@Lbzf*-vQN=fA}w`SJ4p2^<$OWyo7s0$C4{F$0;h290c$wul%{_NT3Fh%dV4D*o$<1yL2ulBfER*&J+LpD4Z}Noj1nJZ zi_X83DYP3)E1@h~ zy`+>h$;=(oi`am+*7d#C@#1!YZBLT0jS>`Y(^A@%^22xU zvJSK?7u?;Zm;)AMlX(ucD0ccUHy3W-HHmL}@I$U?7jQ_Mhd zT7@o}K%8}gCNzDh2K;sEPNh^~x6A$o-{j~q)C@1=Y}B8u-M91Czc96T3!j3bDY$ej z_l|hB+SPi~w?v1g6SgrQ`J~0iq7CLna56HI0#;bDj*L#`n9kU|&11jy8?oG`;&CDS0$w!xhDLBv3@}RX8!?$ z{niG26z`y0_eu2Jg6b^Gy5~8x^mTq+1U~7NK5t1#pr@)d`ln6-nVnwb4e!m4yJ^XM z6*jqiN2(g6`1S-yW*#hc6}k%a7Syb7OG_sv z@}_$zto!GU`&!z3t1s!Ca!~L`Tr)34FfJ3MNx9NE2$SSRPANe$zNN(}Zq;MJVjaj8U#U;(Jv!*I$5M_|YukeU@vBQJ0y#Y(6A{KEkkCx9H8!zr+ zLE4h=9u331ZW?HQRe*J*Bqdp+3Y?LAMj!n&l7`MEmJQD`FrC;>syotBnU;g)%(1$! z>8>tM3|i-*^dm|f4E_^1GepW#fCF;G|0wJ%fZ}SFhMgcGxVyWvi+ga_ z;O>jF$l|g0+8(bc8*uQZE0pX46T;o8zQv#EgczclbZD>S%AQ5KR2yQy=6j z4JtZHCsDI=!rA7_QEfJr$O`ZsXguX63ZX*WeFYZs;k#xJ!s)tI)6BzDDRt?b6oH~? zzf~>tpsFJ~o939UpDEbuwU`>8eCnjcK=b=|Z%zi2B5sn1IMVst5wxiz=PxrfNRmf= z!dVfkZ_nkCz=XVay$T|ya#z!goX_t+ceIxtba$5PLr&5F2u`{&^+2e-FQTYoFchdwLF<6ulrq3oWkcKHubsZn4q-s;utKi%g?Od6{ zP&X2VzZd^9UpBimjrIMBE+GKE%PKN?qI>V#A7y~}W>tT6B4qYbp896SaNKNfaKSQJ zaj0fg*W@)Sh9-$v@58E$?j(8MEf>Zw?fejpH`GKl5}_HtD`~BE#SETAjpTQn@7!nd zyI7nQ&x+QlvHjkCyS`0`42p8rK4cF=*#(g*y;o`(uN}kA^rml#G8FK680_mOjpo_A zVP@i(d7p6pU6e=Z>y5QD<{})?=<;maGa9mY5{ICEYK{{UG$0M&)8D9JIX@XX zi0!UgNrJBK%CDOi(Tl@=&*gFX3qQLEz$xz#aA$T-ozzylE+mOwCwGK*2{yHKKy3EY zz(-m@5k4R+M%ou%scW0}lpM_3M@Dy&*{R61ZY~+hyXKOjffF)qb+|we|8}4fRUJ=t z#>>xfsbv^Hl8==vbr}uvjE3Uhg(d_YufQGkGdO;Z(oTSMv%k6T*s+(@ZaF<{CAj#q z+{z4OZf&YbST$%T(P$&1pY2T&4wDZtrYoY?{JOazA3)I0JM7+7wPrQG_OX4pJv8NT zd#CRMc81!rwh98DBt$x;JQ^i>>gH5*&r(s(YwkDOq6(w$tx)pCP3Kn@uN3X_FiFq^ z!qrp$?6kC$Mf|V*1Kdm&77Q7olNet?4s)(8*Y|mt6=7ZALkNFgiJB7&0n=o;@XIi+ z6Hic-BQPB~BxYkm(*tidg9(WK**H>DY?u%6D6dS6`?NG)bDsX!nGOkO#BvYd5iL;Q z)pe6EF=6ibSO}{{ZQPS!R7ufd;$FHI%i~0om*Q}oVlq7yrdk~Yr`s-)gt z7iG%d8_`D0D_9bv-mstDH;g>AWS(0X`0?Xs?2{e-rG_}y%<321fugp2as!%O5^`3l zQ$(EavhT>XgV~Cu1f1<``d?gF#=c8oC5&u-)Sv1Laz?Lg#?SP5d~1fCOg52-{H9b9 zwX*!Zywz(FZpK%5hV~`Dpo1w-dOm{CZWMa7ouz)hOWTn>3&yhASK7{nO#bMqu^&_O z_tigaRJh*ZE53=WRk}S*7p6-bE=8yexx!V}$s=SgqQ-8P%KkpjCCWQFAFPV{|0HLd=8u z^)bI*Z`~JDag6l~aZqqF84a90we)5A8b9EX*+V3xL+OG=dP3f|L5;J;Bk3JC`%Uzf z-=3+$+oz+4ZwFaw+7G!~*A6PzmF1eL7&d;p)`%LTvI4sv zr+Bvw!T@^%^wLjrE40aIGYM;KjDm{z*d zl@w-q<#85+3zEmXx#O%4a~%{1qrv=caGCwi)}Z@OD3NR8q7|eBdvjl>5U(V_uP9c8 zh|~GgUdQ#3_mA&PcAQzL zn3_!iqju1oEgVm^{OJZgP+kXpQmKvJ9vHVN)xI5yiB~z7kDT5ehSbax4GBeHH61`O z8D8jpAe-+Tt6^j&zR&U0`P2hocF+?VI(oy7ok}~fqg6v3UxPb%;da6Jqofyi3Ro|H zkcR<3(I#AlpfK<6SKNxPD!3fKz|81KK1UZcc3*looS`wD8|puabX`90 zGJE&>LB?(ug)Y!&`BN+*cXK--aQUsuef!T%yi~Yq@+uafVXiuE4Vp0E17}_IKI#(b zDypL*(+Xu4i0HFl;|a=W>1 zIAwjiU5uWP=b}iH!Nx0L=K8Cn=ZxYP)*eRznFPW~$(Fk8Q#5rd6l4lw!k-TCk|fU% zYb4nMB8*?zRHvjk2Vg^qqihm$J}F;$qa825-QPApX5~k}q^}VAU?mud`4Xzb5@vFA zU#7;@Rl&1D-tCow^3q0d$>>2&zhA?7m31hHB}YREuOCZ+|bq3`b5ev5^wLho1F0yNuq+DNr`-8%<0(`cl6 z7y9x)0mW*e8p&Ifx%xG;bUWgKxMf@$UW`9jTvY5>)5Mqqz zAm!U_tSu+TPppd+IszwZWl&=2u~r>Mj@y&a;5}<7z2zCRRrc5#*uj@S6ifzWf+;l; z498Q=?`drJ-bL69fBwrtr;ql_y12-{xVzLpsezOHzpaE`&;imc@Y$LXFYvdP2$0z}_@yzOl+ z^lsH23G3`q&*Ady{+M#;5d6zqBW~i}TUL`@7Zf||7cK^k|K5Og-WCtq4 z5u7nQ-1WTUt)U9Vq32qyi0B{OxcByKUD8sv{y$Y@@c(dfiK=Hx%!sTv@;IX69X z*6sCiTSXl;Y0!XQR>$}4s# zZP`_HwcR4qXW7S)?Hng^sd<&N|+p#Q=*)kd`MHz?~& zRp~O_l8J%-psH9?n!zX0d8Zk6o7^t0rs|+ppI;;H&Tn4H=T{th5Ovm4oVeuwmeEm1 zA*!Ko+5vG#maUJ~ChBnO0y*H(Ty8@~q|{fY@j+p9{;~6F6VoIcH0NN;M^&>>9aX>4k{5gkhVV62*84xgTg z6|3;i1Q8b#cET~Q-E8V6p%nSr+wo0~LtEt0VadWf0~s;H##XUD)Hf}k?yhXNnAiIq zjVoz|C9st?6@L$05F6dZD%xzU`^>-goT}5|?OiX!81UK$q zu_ui2G3}g18^uw>!BmAR_i=q-FBtS&vzIEjvBw-8ZcJ3L9DV|P7(Og&d~H@oBXGV1j3rX9 zGdSMHMq4$0T{88fnQ?AZ6nss9?VehoGVgHM1Ckekl0s0VP)qQe~&(HQB=Q^J}nyXfb=n?NwZ<=tCo zt&vvd;Reus>XqJ*n@3D2+9K7PnpiGV!EWyJp651H++Jtg^=$hQ5Dn4bi28_K?GxFg zTaf5rxsV3vXkLD$_NXm>F`$uT1|V%Y^PG@A6*j=3j_i59=qp?+8Q}K0y7qz3xsM&& z!kEzhDN&Stwvb^qh}az)?f7j$q7!ZJCa)D`^KQGwUl=ok#02k6O=Clz>ft*PqBc$A zBDaf|KmiouZt$2o+U-OG^Z5MF)(2kBPRhjn#abfBqLXUDxS&7<9_br;xMoA~RHSce zPIEt5wu-a$wGC_Mvs@L=0L!{lG_XfH)};v@8X{-khQG%=P$-fm&nqW%EYk(unD!;^ zwnPp$uH4WOg6v8$NUhd(2r@;RF;O>09_PqH_<`{$#azDG%^&^#1c+gt%n;lo?$&!EM-JIy;zn| z8vp#`-NG={(~_>Pbt;_g(npFKj4l}uwG9L984Rcjk_7n}iB!%8L$})el+G3Obakl*d9ebs_1S6hm zI@#coJfA0B5L4&JG3A;NwIJyL+D)t>6T|d0+RL|DRS&0zC~q7HMt)XI`DgAJX$@}Z zAS-8V^d>2D&=Lik`Vk%h0zVNgnlT`1z;-9d&|k!bm?B^W*2%REGV`@cDb%T;5uEia z^Kn11*e8E^hc>BDbEEfezUj(jCzbS5LYZ0@ORDV~zf-wynZr^>pNR_WR6t};12WqF zW>ab~ZXH@XJ~`&npkXoD-P}Zpdv(|^uB_frT~Sb0d(RngP<3T5 zwdH?LipdeRc5PSQYufKhNAn(4<}S}i{GKQ(uqb1e#_&PhG+HcIjnn!1=t|b?N}ES&>m~# zywUy4yR720ow$Xtm(@L$fga(~Q>wFP+nR;v<+f^j7nJwh)j^&6~H86Z2DJ`qmXP zcvxFdq9xw$c}&g47`dKT&ye0bA(#>P1z&_dO)*b@Bv?bK^AV=5>!E}zfnOM$;tz^&1H>n;yH&r_e04M%C>@2aQEViM&wv~5pY-E zAP(<@yQ0Q&A^Le|#B(3<^p!~aA6PmfRSFDa;&Mahv>8@WOCq(6=@SnMqfF!#w|;Nm|pNteW&dG2T+GGevr$NoCv|pRD0Fv8xvxId_Z;qdzJx+y( zPbe^{Dxn9W?9|oDz4jU5M9!(ode-V2BBh@b2~~beZxE7&J=bwScM!;P9*@35Y&_3E zp6pb-U4)QWP}Z}I;WyowGHcdc&CP{3P%FbDiltef^|A3mmHWs&!37N@`M1Hd%CoXT z@QD}J6f}U|!|@-j^H&@<^O4x>gn%Vh_SOeVhEoWFV;w4VU zKNC>9<3Md=GrGK}Fwca)dS@)&vB4xHrpvJ{10N$(|NH$`uqI`(UNw2v8g#MXcc&Zs z3}c)D=kd*{f$OL~PHDDR|NdjD(&yvSLgHxF*p0 z;!0-&fBsY_h{?w`^G!+duW6jE;xUDhw+HJV`9VUuNI$}aER z!2VHvhniz#c&V8~(`EM13*nEe`36~kKl`=z9V<@c)HmzSglv#h$*K;jSNc^=R}bsn zDTS9ng7|{>q-kfU$mbjxg5Zb`a$KHqFWg$nEO>zU#vJme%QzgN7Ym$A_DSB)6y`(& z)IEh^yyqf`BEEM&m()47vEjk(d+!+i$%Z`bq<&10};%eP275O)o1? zDWc*DwZStBSr(!*;1sIn{+5b28ho7}-{X`oq3>BvBvx-z`^3c1W`<$0+RJ9LRU43j zKt9-&_@Kwav=nQZnte-%cSAhsZnCM^($cyg;u%cl2Dx5he-55DeqpRZJ^osyDGv)? z=cnHGxyM**hQ{trjW$j@@1m7%%;MhB=Ch^$^HCIFt}E!o7oS2%fBqfLYC(rDW>;0; z{0v4Kv&OHJCCvr#H`@-8x$R4chQ%D+KdTNaCBYSsa^Wp&bdm4D^&h7LY*Q=6#9O@O zdGmZ|zev9C(_?fug1_s;J1yR(SI`VQ|&q15~0?uuuWXFl4S>>HHoU?4l zOH%fG#z@q9xgc2#8CHzf(kXNsgAgkkRU67I62G?j#}r^fUv+!Nbgj<${!IUS!-4PY zuS9Y5F^_AqW?}rG+83{$M$YZ%5k(@H4KUOBGU-IgY>FNZaiR>bKRG;K;L61ou{Q8x z6AZZi=#As%P3rO=aTseJ`i%=lNxXh-EbHu1J0o7$$!CPBlg+ID5^DFI*?jxbdbVJO z&q0?V;awx8mTrYlwO7BKs!F+#v{Tt2=f?T;8BcCNvzhnoKEqQfkygp6x(Lyi*s{%Q zWOSt^jiFtUI=PSIC++{WfeIUm5#2t`&fv3c}=0t z=LND`W+IyOI1+NIqCQ2HHtJ_DtnBGUg%)<%n)DL**me9k)DK+Ek@|2=A^aiP?;+Xq zdne_mqJ1j{u`w0xbLE++pd_n4T1Eu7+|Mro;{=VEW25h{a5fE;aUqCy_oO>+$M-f1 z)+HY?Pt-5fT+{<~lu#U?lOR_2@!%`i`wE;=$|7g$H^T|M2$ZTIlu6^0y|>>3bP@&_ z>TLQaxfPjpF~V8oiI58oO-etl4nm%|_k&uVqf((x3<`eN8B>qgk7=-5jn=HC&?xUQL!!RPU+dekbgX4Zx3!&3{3Vm^J;Reu-B`^(<#!)M>~lUD`gHHHl)85t@g1{Eo?J|;z@c{pPH z#n#2n7E7w+87LT6(K;0~5`1!^GkMv=HmGFPuXUb+Z$UO&>`*DE&{Y?lx;=}jOgoa@DL)g58aw42_nvTY|3F#94f=cSN9*>M(cI`O*;@g zH|t^LE#}hWtS8GDVn+c9v@6bRNMXXZ(=o$MzAy)q`1biRO}uo*#so8JIi{QT-e&v6 zw>_QM`4gOtp-NeBR5aHe`P4G#?mg+xO7ZB0wdI45qgg zk(Km5orT^V=e4$-_xy{`Cd5XbH!TEHxrp#)o_jeYUx+!{a4Wypu-z*7&cmPxAz0=q;J06qo9jSa4bCkT4Tg$f|;!yV558 z+xR}r>TwWDK*W2s;GEU$hqk8v&q4Z{b8~$QuHPV)+tDb9`0UkJmmO4 z-uSX5m-|a!J2p!+nmoES327mQyHp`kAAL}NtK0?O2C;8|W7Q$_QOyrsKD>`Jl=V&W zYiFf+`+C>^=rT2fTLB-!J3?sGLcS-xd=t^KU{pbVi6rmJlfA#5c#c=Jh~CNf^G4bm zS;{9mkXo%&HTL_2@6_q{@9NYeVRPd+kv@yQw?UOX5hHYfuU*j7^u?^7?~TVOV)wzIc+`*e zGF3397`2TcI4xi>x*?~b{%z|_Q$U2B&|={zFMIcATg_Bl_i?vUOwg+O`@9scXnjt~ zJ*4Ez{8;wr39M1mk3PC5r7kA@k7a5cj^1B{*?BQb8zMS`m;g|OMfZ+t%_4HOobKcD z^*a_IjeXtsJDibG0DjSxxxz9cUHX;gW8EYXS|1slO13N^wX?3jyZ$0)Isyvz8G4`v zr*+k?h8Gps(p?A9$ZW>>1FnzLnT-qiRnP9hw*NAGx#dUs6+WQ&E|U-S=r-y*q&{S~ zHTTF^A$s-)#+_FC2Sm#}i6I8A(Ba@^c*0MP+aBFcTJ6o;62fI%2+a24YpaOnZZFgq z26n#pZiyt5(;}k-L;wLw-dD0X`}%x~q7v!XOmZSKWI`GnsWti@Z( zO?IE8-V&(nD?g%h%+o?s_sa)tTt%aYG|u726+S0Yv`aH~F$b*bUgd9rM|liAKa5bC zmf_{Mk{xhFN(!O-bjNM{(6gF8qlcG+Ru8zd^}!qV8Kfp=zM>tV)}WDybVDZ>-Pwdr z$vk}e^5TRjWv?~QpLcaB{y2r>NYAKe6{S);ZevoPpaP?Ges4TW!|dW|Y({6)r=EuA zIj3TE*}cZ}S+lXb<^?&9r1mAj)_X1X7U8A*)hZ$ePVF_ zMu`WxE;waeu~LcK<6(-4XZ|8|Zl}7v?Px)n76tUKux-Qrie02#x%5KW+-3vY%lu#6S#9UHbi&hcJ=5 z^!2r6_?r&O#v7xqhchw#Ci*V>LPMR(W%E+LIC{wEp~Z|J^@umsKYx&bCHg-Ekxt>x zj|vYV%D71!Ffq~;68vD&cUsY_N=0c1V$XwW48)d}oqXPuSX1p$VS9pTMb-@$q-LyP zDlf4O)BBLi3r`XEXo4AWYm80P#94OOk z_+6IL&jQnKz$mvmq&csf{sS)Bp&5-LAvE`}_h2nm&q85N9MWZ)?cd2zKbT2ms>y@k z+x&~kA7CSVgld($jW3Pi4QQze7k!n5Gb?Ddq5m2OI}(g(=oX#-w30(vTT$y0A}n5R zmXwGxy~fr`t5gp+Z{}|^=nmqu%s}kZC;hi692L#!EOmt zeLG!i5`*>NHyZUCUt3&iwu~38=Ib^A)g2PZXe23;nabz#^{L}{T)Cu*N(#YIr@o7nz-{k1sz>QuQ17Ep~!T*olQwz7dP3Hz1K z|AMpo1KRx;W$qt1yFaWsX%NKH&CbQ$*$qY!gSA!7onQ!HIXPuX9UWFF6%9@pa12&+ z$p%6NgOL4Eko*hz%gzI&V(0n`$_9jy-FSg8@UH~K@(=izPY|XlW$q#mva_*;H3hg) z0Dr9f`HdY0Ekgk)xSKoLSxSIy96^70W}5CGCmmQf6oB48`|xmc!iKOlcl(0_W&%h6 zBmq(YX@Cqs79bB$04M^K0V)7>fCfMlpaswY=mPWr1^{z_1;7$u1pop5G7PY`^8^8` zVa#HH4Zs#)>*Hbz0t4&-jsPbB7yz~dg8JKoMD&gVd4H4 zU-?h)Vb>>V4gviM=zm4~Z&{(@{4*X5GAL{3266ur6Dl5F7z57ypA8uQ7zLnfXXS1S z12g}D0RFoxe;CYvPx&9VGvp84*#l-6mKY^FD+rb0U;U{5&`LF6l)XQvf5$F!{`W9a z=CBkx+x&q&!^Zn>*7LsuNnu!LD-TOp?Jg!2J0&fU9+iugHS7c{{lx?1;N*kFQVLeX z3pN3%m|)_6HSL_i(y&TjR7}!BTtH4fAP*M|8ps3W0kQ%)8G%4Xn4XHW)&HfUUtcRncg*h1f$0aRWI|vo*^AFe9|KS=H7l$ARFFy>P$n-BK zng1=p-fkdk6c~<>9|ib72Mm13%gsw=P4!P153e8$sQTxi0{@SUM*tS$zsrEYKhgc0 zjEkR}7nYF!kn#NYc)WsqFdpFF^nknqyfB*N-(@^}yfDt}-(|4(+5gi9kWYXUmbd@V z19HRW!+*#G1^&whKM&V`+Th|7{4bxm_<8yMGcH{GFp}f{x8d$)4qH}kusU#508Kkz z*aG8+!N;AQ-KqYXhp?5P0Je6f`s3=KWuYKVWhlZWCBP*h!z;nb#VNrlDa|h}%OxYj sFDVV=<&u<^~HKaBc_LQgNFCX4d_09KU8_W%F@ literal 0 HcmV?d00001 From 9bfe69aef1d19f2132d8a82e94f2792c65872063 Mon Sep 17 00:00:00 2001 From: Plaisted Date: Tue, 19 Jan 2021 18:06:50 -0600 Subject: [PATCH 2/6] removing locking --- .../NumericTokenizer.cs | 316 +++++----- .../Scanner/CoreTokenScanner.cs | 7 +- .../StringTokenizer.cs | 560 +++++++++--------- .../UglyToad.PdfPig.Tokenization.csproj | 48 +- 4 files changed, 466 insertions(+), 465 deletions(-) diff --git a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs index 3b3dd813..798892f4 100644 --- a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs @@ -1,158 +1,158 @@ -namespace UglyToad.PdfPig.Tokenization -{ - using System; - using System.Globalization; - using System.Text; - using Core; - using Tokens; - - internal class NumericTokenizer : ITokenizer - { - private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(10); - - private const byte Zero = 48; - private const byte Nine = 57; - - public bool ReadsNextByte { get; } = true; - - public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) - { - token = null; - - StringBuilder characters; - - if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') - { - characters = StringBuilderPool.Borrow(); - characters.Append((char)currentByte); - } - else - { - return false; - } - - while (inputBytes.MoveNext()) - { - var b = inputBytes.CurrentByte; - - if ((b >= Zero && b <= Nine) || - b == '-' || - b == '+' || - b == '.' || - b == 'E' || - b == 'e') - { - characters.Append((char)b); - } - else - { - break; - } - } - - try - { - var str = characters.ToString(); - StringBuilderPool.Return(characters); - - switch (str) - { - case "-1": - token = NumericToken.MinusOne; - return true; - case "-": - case ".": - case "0": - case "0000": - token = NumericToken.Zero; - return true; - case "1": - token = NumericToken.One; - return true; - case "2": - token = NumericToken.Two; - return true; - case "3": - token = NumericToken.Three; - return true; - case "4": - token = NumericToken.Four; - return true; - case "5": - token = NumericToken.Five; - return true; - case "6": - token = NumericToken.Six; - return true; - case "7": - token = NumericToken.Seven; - return true; - case "8": - token = NumericToken.Eight; - return true; - case "9": - token = NumericToken.Nine; - return true; - case "10": - token = NumericToken.Ten; - return true; - case "11": - token = NumericToken.Eleven; - return true; - case "12": - token = NumericToken.Twelve; - return true; - case "13": - token = NumericToken.Thirteen; - return true; - case "14": - token = NumericToken.Fourteen; - return true; - case "15": - token = NumericToken.Fifteen; - return true; - case "16": - token = NumericToken.Sixteen; - return true; - case "17": - token = NumericToken.Seventeen; - return true; - case "18": - token = NumericToken.Eighteen; - return true; - case "19": - token = NumericToken.Nineteen; - return true; - case "20": - token = NumericToken.Twenty; - return true; - case "100": - token = NumericToken.OneHundred; - return true; - case "500": - token = NumericToken.FiveHundred; - return true; - case "1000": - token = NumericToken.OneThousand; - return true; - default: - if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) - { - return false; - } - - token = new NumericToken(value); - return true; - } - } - catch (FormatException) - { - return false; - } - catch (OverflowException) - { - return false; - } - } - } -} +namespace UglyToad.PdfPig.Tokenization +{ + using System; + using System.Globalization; + using System.Text; + using Core; + using Tokens; + + internal class NumericTokenizer : ITokenizer + { + private readonly StringBuilder stringBuilder = new(); + + private const byte Zero = 48; + private const byte Nine = 57; + + public bool ReadsNextByte { get; } = true; + + public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) + { + token = null; + + StringBuilder characters; + + if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') + { + characters = stringBuilder; + characters.Append((char)currentByte); + } + else + { + return false; + } + + while (inputBytes.MoveNext()) + { + var b = inputBytes.CurrentByte; + + if ((b >= Zero && b <= Nine) || + b == '-' || + b == '+' || + b == '.' || + b == 'E' || + b == 'e') + { + characters.Append((char)b); + } + else + { + break; + } + } + + try + { + var str = characters.ToString(); + characters.Clear(); + + switch (str) + { + case "-1": + token = NumericToken.MinusOne; + return true; + case "-": + case ".": + case "0": + case "0000": + token = NumericToken.Zero; + return true; + case "1": + token = NumericToken.One; + return true; + case "2": + token = NumericToken.Two; + return true; + case "3": + token = NumericToken.Three; + return true; + case "4": + token = NumericToken.Four; + return true; + case "5": + token = NumericToken.Five; + return true; + case "6": + token = NumericToken.Six; + return true; + case "7": + token = NumericToken.Seven; + return true; + case "8": + token = NumericToken.Eight; + return true; + case "9": + token = NumericToken.Nine; + return true; + case "10": + token = NumericToken.Ten; + return true; + case "11": + token = NumericToken.Eleven; + return true; + case "12": + token = NumericToken.Twelve; + return true; + case "13": + token = NumericToken.Thirteen; + return true; + case "14": + token = NumericToken.Fourteen; + return true; + case "15": + token = NumericToken.Fifteen; + return true; + case "16": + token = NumericToken.Sixteen; + return true; + case "17": + token = NumericToken.Seventeen; + return true; + case "18": + token = NumericToken.Eighteen; + return true; + case "19": + token = NumericToken.Nineteen; + return true; + case "20": + token = NumericToken.Twenty; + return true; + case "100": + token = NumericToken.OneHundred; + return true; + case "500": + token = NumericToken.FiveHundred; + return true; + case "1000": + token = NumericToken.OneThousand; + return true; + default: + if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) + { + return false; + } + + token = new NumericToken(value); + return true; + } + } + catch (FormatException) + { + return false; + } + catch (OverflowException) + { + return false; + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs index 22f68b34..5182ba59 100644 --- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs @@ -15,9 +15,10 @@ private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer(); private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); - private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); - private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); - private static readonly StringTokenizer StringTokenizer = new StringTokenizer(); + private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); + + private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); + private readonly StringTokenizer StringTokenizer = new StringTokenizer(); private readonly ScannerScope scope; private readonly IInputBytes inputBytes; diff --git a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs index bc68e919..1b9985f8 100644 --- a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs @@ -1,281 +1,281 @@ -namespace UglyToad.PdfPig.Tokenization -{ - using System.Text; - using Core; - using Tokens; - - internal class StringTokenizer : ITokenizer - { - private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(16); - public bool ReadsNextByte { get; } = false; - - public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) - { - token = null; - - if (inputBytes == null) - { - return false; - } - - if (currentByte != '(') - { - return false; - } - - var builder = StringBuilderPool.Borrow(); - var numberOfBrackets = 1; - var isEscapeActive = false; - var isLineBreaking = false; - - var octalModeActive = false; - - short[] octal = { 0, 0, 0 }; - var octalsRead = 0; - - while (inputBytes.MoveNext()) - { - var b = inputBytes.CurrentByte; - var c = (char)b; - - if (octalModeActive) - { - var nextCharacterOctal = c >= '0' && c <= '7'; - - if (nextCharacterOctal) - { - // left shift the octals. - LeftShiftOctal(c, octalsRead, octal); - octalsRead++; - } - - if (octalsRead == 3 || !nextCharacterOctal) - { - var characterCode = OctalHelpers.FromOctalDigits(octal); - - // For now :( - // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers - builder.Append((char)characterCode); - - octal[0] = 0; - octal[1] = 0; - octal[2] = 0; - octalsRead = 0; - octalModeActive = false; - } - - if (nextCharacterOctal) - { - continue; - } - } - - switch (c) - { - case ')': - isLineBreaking = false; - if (!isEscapeActive) - { - numberOfBrackets--; - } - - isEscapeActive = false; - if (numberOfBrackets > 0) - { - builder.Append(c); - } - - // TODO: Check for other ends of string where the string is improperly formatted. See commented method - numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); - - break; - case '(': - isLineBreaking = false; - - if (!isEscapeActive) - { - numberOfBrackets++; - } - - isEscapeActive = false; - builder.Append(c); - break; - // Escape - case '\\': - isLineBreaking = false; - // Escaped backslash - if (isEscapeActive) - { - builder.Append(c); - isEscapeActive = false; - } - else - { - isEscapeActive = true; - } - break; - default: - if (isLineBreaking) - { - if (ReadHelper.IsEndOfLine(c)) - { - continue; - } - - isLineBreaking = false; - builder.Append(c); - } - else if (isEscapeActive) - { - ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); - isEscapeActive = false; - } - else - { - builder.Append(c); - } - - break; - } - - if (numberOfBrackets <= 0) - { - break; - } - } - - StringToken.Encoding encodedWith; - string tokenStr; - if (builder.Length >= 2) - { - if (builder[0] == 0xFE && builder[1] == 0xFF) - { - var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); - - tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); - - encodedWith = StringToken.Encoding.Utf16BE; - } - else if (builder[0] == 0xFF && builder[1] == 0xFE) - { - var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); - - tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); - - encodedWith = StringToken.Encoding.Utf16; - } - else - { - tokenStr = builder.ToString(); - - encodedWith = StringToken.Encoding.Iso88591; - } - } - else - { - tokenStr = builder.ToString(); - - encodedWith = StringToken.Encoding.Iso88591; - } - - StringBuilderPool.Return(builder); - - token = new StringToken(tokenStr, encodedWith); - - return true; - } - - private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals) - { - for (var i = octalsRead; i > 0; i--) - { - octals[i] = octals[i - 1]; - } - - var value = nextOctalChar.CharacterToShort(); - - octals[0] = value; - } - - private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive, - ref int octalsRead, ref bool isLineBreaking) - { - switch (c) - { - case 'n': - builder.Append('\n'); - break; - case 'r': - builder.Append('\r'); - break; - case 't': - builder.Append('\t'); - break; - case 'b': - builder.Append('\b'); - break; - case 'f': - builder.Append('\f'); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - octal[0] = c.CharacterToShort(); - isOctalActive = true; - octalsRead = 1; - break; - default: - if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed) - { - isLineBreaking = true; - } - else - { - // Drop the backslash - builder.Append(c); - } - break; - } - } - - private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes) - { - const byte lineFeed = 10; - const byte carriageReturn = 13; - - var braces = numberOfBrackets; - var nextThreeBytes = new byte[3]; - - var startAt = bytes.CurrentOffset; - - var amountRead = bytes.Read(nextThreeBytes); - - // Check the next 3 bytes if available - // The following cases are valid indicators for the end of the string - // 1. Next line contains another COSObject: CR + LF + '/' - // 2. COSDictionary ends in the next line: CR + LF + '>' - // 3. Next line contains another COSObject: CR + '/' - // 4. COSDictionary ends in the next line: CR + '>' - if (amountRead == 3 && nextThreeBytes[0] == carriageReturn) - { - if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') - || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') - { - braces = 0; - } - } - - if (amountRead > 0) - { - bytes.Seek(startAt); - } - - return braces; - } - } +namespace UglyToad.PdfPig.Tokenization +{ + using System.Text; + using Core; + using Tokens; + + internal class StringTokenizer : ITokenizer + { + private readonly StringBuilder stringBuilder = new(); + public bool ReadsNextByte { get; } = false; + + public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) + { + token = null; + + if (inputBytes == null) + { + return false; + } + + if (currentByte != '(') + { + return false; + } + + var builder = stringBuilder; + var numberOfBrackets = 1; + var isEscapeActive = false; + var isLineBreaking = false; + + var octalModeActive = false; + + short[] octal = { 0, 0, 0 }; + var octalsRead = 0; + + while (inputBytes.MoveNext()) + { + var b = inputBytes.CurrentByte; + var c = (char)b; + + if (octalModeActive) + { + var nextCharacterOctal = c >= '0' && c <= '7'; + + if (nextCharacterOctal) + { + // left shift the octals. + LeftShiftOctal(c, octalsRead, octal); + octalsRead++; + } + + if (octalsRead == 3 || !nextCharacterOctal) + { + var characterCode = OctalHelpers.FromOctalDigits(octal); + + // For now :( + // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers + builder.Append((char)characterCode); + + octal[0] = 0; + octal[1] = 0; + octal[2] = 0; + octalsRead = 0; + octalModeActive = false; + } + + if (nextCharacterOctal) + { + continue; + } + } + + switch (c) + { + case ')': + isLineBreaking = false; + if (!isEscapeActive) + { + numberOfBrackets--; + } + + isEscapeActive = false; + if (numberOfBrackets > 0) + { + builder.Append(c); + } + + // TODO: Check for other ends of string where the string is improperly formatted. See commented method + numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); + + break; + case '(': + isLineBreaking = false; + + if (!isEscapeActive) + { + numberOfBrackets++; + } + + isEscapeActive = false; + builder.Append(c); + break; + // Escape + case '\\': + isLineBreaking = false; + // Escaped backslash + if (isEscapeActive) + { + builder.Append(c); + isEscapeActive = false; + } + else + { + isEscapeActive = true; + } + break; + default: + if (isLineBreaking) + { + if (ReadHelper.IsEndOfLine(c)) + { + continue; + } + + isLineBreaking = false; + builder.Append(c); + } + else if (isEscapeActive) + { + ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); + isEscapeActive = false; + } + else + { + builder.Append(c); + } + + break; + } + + if (numberOfBrackets <= 0) + { + break; + } + } + + StringToken.Encoding encodedWith; + string tokenStr; + if (builder.Length >= 2) + { + if (builder[0] == 0xFE && builder[1] == 0xFF) + { + var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); + + tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); + + encodedWith = StringToken.Encoding.Utf16BE; + } + else if (builder[0] == 0xFF && builder[1] == 0xFE) + { + var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); + + tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); + + encodedWith = StringToken.Encoding.Utf16; + } + else + { + tokenStr = builder.ToString(); + + encodedWith = StringToken.Encoding.Iso88591; + } + } + else + { + tokenStr = builder.ToString(); + + encodedWith = StringToken.Encoding.Iso88591; + } + + builder.Clear(); + + token = new StringToken(tokenStr, encodedWith); + + return true; + } + + private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals) + { + for (var i = octalsRead; i > 0; i--) + { + octals[i] = octals[i - 1]; + } + + var value = nextOctalChar.CharacterToShort(); + + octals[0] = value; + } + + private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive, + ref int octalsRead, ref bool isLineBreaking) + { + switch (c) + { + case 'n': + builder.Append('\n'); + break; + case 'r': + builder.Append('\r'); + break; + case 't': + builder.Append('\t'); + break; + case 'b': + builder.Append('\b'); + break; + case 'f': + builder.Append('\f'); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + octal[0] = c.CharacterToShort(); + isOctalActive = true; + octalsRead = 1; + break; + default: + if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed) + { + isLineBreaking = true; + } + else + { + // Drop the backslash + builder.Append(c); + } + break; + } + } + + private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes) + { + const byte lineFeed = 10; + const byte carriageReturn = 13; + + var braces = numberOfBrackets; + var nextThreeBytes = new byte[3]; + + var startAt = bytes.CurrentOffset; + + var amountRead = bytes.Read(nextThreeBytes); + + // Check the next 3 bytes if available + // The following cases are valid indicators for the end of the string + // 1. Next line contains another COSObject: CR + LF + '/' + // 2. COSDictionary ends in the next line: CR + LF + '>' + // 3. Next line contains another COSObject: CR + '/' + // 4. COSDictionary ends in the next line: CR + '>' + if (amountRead == 3 && nextThreeBytes[0] == carriageReturn) + { + if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') + || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') + { + braces = 0; + } + } + + if (amountRead > 0) + { + bytes.Seek(startAt); + } + + return braces; + } + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj b/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj index a8effc40..fa40e532 100644 --- a/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj +++ b/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj @@ -1,25 +1,25 @@ - - - netstandard2.0;net45;net451;net452;net46;net461;net462;net47 - latest - 0.1.4 - False - true - true - ..\pdfpig.snk - - - true - - - - - - - - - - - - + + + netstandard2.0;net45;net451;net452;net46;net461;net462;net47 + latest + 0.1.4 + False + true + true + ..\pdfpig.snk + + + true + + + + + + + + + + + + \ No newline at end of file From 0b716a759f32a1e0adb4981a36dbf8ed11eb8d85 Mon Sep 17 00:00:00 2001 From: Plaisted Date: Tue, 19 Jan 2021 18:18:33 -0600 Subject: [PATCH 3/6] adding comment for non-static tokenizer --- src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs index 5182ba59..902cdb4d 100644 --- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs @@ -16,7 +16,9 @@ private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); - + + // NOTE: these are not thread safe so should not be static. Each instance includes a + // StringBuilder it re-uses. private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); private readonly StringTokenizer StringTokenizer = new StringTokenizer(); From feb6117e1e91f01155bf3a40400104b1b0d2f4c9 Mon Sep 17 00:00:00 2001 From: Plaisted Date: Tue, 19 Jan 2021 18:39:51 -0600 Subject: [PATCH 4/6] fix EOL issues --- .../NumericTokenizer.cs | 316 +++++----- .../Scanner/CoreTokenScanner.cs | 8 +- .../StringTokenizer.cs | 560 +++++++++--------- .../UglyToad.PdfPig.Tokenization.csproj | 48 +- 4 files changed, 466 insertions(+), 466 deletions(-) diff --git a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs index 798892f4..ad7585cb 100644 --- a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs @@ -1,158 +1,158 @@ -namespace UglyToad.PdfPig.Tokenization -{ - using System; - using System.Globalization; - using System.Text; - using Core; - using Tokens; - - internal class NumericTokenizer : ITokenizer - { - private readonly StringBuilder stringBuilder = new(); - - private const byte Zero = 48; - private const byte Nine = 57; - - public bool ReadsNextByte { get; } = true; - - public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) - { - token = null; - - StringBuilder characters; - - if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') - { - characters = stringBuilder; - characters.Append((char)currentByte); - } - else - { - return false; - } - - while (inputBytes.MoveNext()) - { - var b = inputBytes.CurrentByte; - - if ((b >= Zero && b <= Nine) || - b == '-' || - b == '+' || - b == '.' || - b == 'E' || - b == 'e') - { - characters.Append((char)b); - } - else - { - break; - } - } - - try - { - var str = characters.ToString(); - characters.Clear(); - - switch (str) - { - case "-1": - token = NumericToken.MinusOne; - return true; - case "-": - case ".": - case "0": - case "0000": - token = NumericToken.Zero; - return true; - case "1": - token = NumericToken.One; - return true; - case "2": - token = NumericToken.Two; - return true; - case "3": - token = NumericToken.Three; - return true; - case "4": - token = NumericToken.Four; - return true; - case "5": - token = NumericToken.Five; - return true; - case "6": - token = NumericToken.Six; - return true; - case "7": - token = NumericToken.Seven; - return true; - case "8": - token = NumericToken.Eight; - return true; - case "9": - token = NumericToken.Nine; - return true; - case "10": - token = NumericToken.Ten; - return true; - case "11": - token = NumericToken.Eleven; - return true; - case "12": - token = NumericToken.Twelve; - return true; - case "13": - token = NumericToken.Thirteen; - return true; - case "14": - token = NumericToken.Fourteen; - return true; - case "15": - token = NumericToken.Fifteen; - return true; - case "16": - token = NumericToken.Sixteen; - return true; - case "17": - token = NumericToken.Seventeen; - return true; - case "18": - token = NumericToken.Eighteen; - return true; - case "19": - token = NumericToken.Nineteen; - return true; - case "20": - token = NumericToken.Twenty; - return true; - case "100": - token = NumericToken.OneHundred; - return true; - case "500": - token = NumericToken.FiveHundred; - return true; - case "1000": - token = NumericToken.OneThousand; - return true; - default: - if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) - { - return false; - } - - token = new NumericToken(value); - return true; - } - } - catch (FormatException) - { - return false; - } - catch (OverflowException) - { - return false; - } - } - } -} +namespace UglyToad.PdfPig.Tokenization +{ + using System; + using System.Globalization; + using System.Text; + using Core; + using Tokens; + + internal class NumericTokenizer : ITokenizer + { + private readonly StringBuilder stringBuilder = new(); + + private const byte Zero = 48; + private const byte Nine = 57; + + public bool ReadsNextByte { get; } = true; + + public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) + { + token = null; + + StringBuilder characters; + + if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') + { + characters = stringBuilder; + characters.Append((char)currentByte); + } + else + { + return false; + } + + while (inputBytes.MoveNext()) + { + var b = inputBytes.CurrentByte; + + if ((b >= Zero && b <= Nine) || + b == '-' || + b == '+' || + b == '.' || + b == 'E' || + b == 'e') + { + characters.Append((char)b); + } + else + { + break; + } + } + + try + { + var str = characters.ToString(); + characters.Clear(); + + switch (str) + { + case "-1": + token = NumericToken.MinusOne; + return true; + case "-": + case ".": + case "0": + case "0000": + token = NumericToken.Zero; + return true; + case "1": + token = NumericToken.One; + return true; + case "2": + token = NumericToken.Two; + return true; + case "3": + token = NumericToken.Three; + return true; + case "4": + token = NumericToken.Four; + return true; + case "5": + token = NumericToken.Five; + return true; + case "6": + token = NumericToken.Six; + return true; + case "7": + token = NumericToken.Seven; + return true; + case "8": + token = NumericToken.Eight; + return true; + case "9": + token = NumericToken.Nine; + return true; + case "10": + token = NumericToken.Ten; + return true; + case "11": + token = NumericToken.Eleven; + return true; + case "12": + token = NumericToken.Twelve; + return true; + case "13": + token = NumericToken.Thirteen; + return true; + case "14": + token = NumericToken.Fourteen; + return true; + case "15": + token = NumericToken.Fifteen; + return true; + case "16": + token = NumericToken.Sixteen; + return true; + case "17": + token = NumericToken.Seventeen; + return true; + case "18": + token = NumericToken.Eighteen; + return true; + case "19": + token = NumericToken.Nineteen; + return true; + case "20": + token = NumericToken.Twenty; + return true; + case "100": + token = NumericToken.OneHundred; + return true; + case "500": + token = NumericToken.FiveHundred; + return true; + case "1000": + token = NumericToken.OneThousand; + return true; + default: + if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) + { + return false; + } + + token = new NumericToken(value); + return true; + } + } + catch (FormatException) + { + return false; + } + catch (OverflowException) + { + return false; + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs index 902cdb4d..e69fc3ac 100644 --- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs @@ -15,10 +15,10 @@ private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer(); private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); - private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); - - // NOTE: these are not thread safe so should not be static. Each instance includes a - // StringBuilder it re-uses. + private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); + + // NOTE: these are not thread safe so should not be static. Each instance includes a + // StringBuilder it re-uses. private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); private readonly StringTokenizer StringTokenizer = new StringTokenizer(); diff --git a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs index 1b9985f8..bb8fc2a1 100644 --- a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs @@ -1,281 +1,281 @@ -namespace UglyToad.PdfPig.Tokenization -{ - using System.Text; - using Core; - using Tokens; - - internal class StringTokenizer : ITokenizer - { - private readonly StringBuilder stringBuilder = new(); - public bool ReadsNextByte { get; } = false; - - public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) - { - token = null; - - if (inputBytes == null) - { - return false; - } - - if (currentByte != '(') - { - return false; - } - - var builder = stringBuilder; - var numberOfBrackets = 1; - var isEscapeActive = false; - var isLineBreaking = false; - - var octalModeActive = false; - - short[] octal = { 0, 0, 0 }; - var octalsRead = 0; - - while (inputBytes.MoveNext()) - { - var b = inputBytes.CurrentByte; - var c = (char)b; - - if (octalModeActive) - { - var nextCharacterOctal = c >= '0' && c <= '7'; - - if (nextCharacterOctal) - { - // left shift the octals. - LeftShiftOctal(c, octalsRead, octal); - octalsRead++; - } - - if (octalsRead == 3 || !nextCharacterOctal) - { - var characterCode = OctalHelpers.FromOctalDigits(octal); - - // For now :( - // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers - builder.Append((char)characterCode); - - octal[0] = 0; - octal[1] = 0; - octal[2] = 0; - octalsRead = 0; - octalModeActive = false; - } - - if (nextCharacterOctal) - { - continue; - } - } - - switch (c) - { - case ')': - isLineBreaking = false; - if (!isEscapeActive) - { - numberOfBrackets--; - } - - isEscapeActive = false; - if (numberOfBrackets > 0) - { - builder.Append(c); - } - - // TODO: Check for other ends of string where the string is improperly formatted. See commented method - numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); - - break; - case '(': - isLineBreaking = false; - - if (!isEscapeActive) - { - numberOfBrackets++; - } - - isEscapeActive = false; - builder.Append(c); - break; - // Escape - case '\\': - isLineBreaking = false; - // Escaped backslash - if (isEscapeActive) - { - builder.Append(c); - isEscapeActive = false; - } - else - { - isEscapeActive = true; - } - break; - default: - if (isLineBreaking) - { - if (ReadHelper.IsEndOfLine(c)) - { - continue; - } - - isLineBreaking = false; - builder.Append(c); - } - else if (isEscapeActive) - { - ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); - isEscapeActive = false; - } - else - { - builder.Append(c); - } - - break; - } - - if (numberOfBrackets <= 0) - { - break; - } - } - - StringToken.Encoding encodedWith; - string tokenStr; - if (builder.Length >= 2) - { - if (builder[0] == 0xFE && builder[1] == 0xFF) - { - var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); - - tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); - - encodedWith = StringToken.Encoding.Utf16BE; - } - else if (builder[0] == 0xFF && builder[1] == 0xFE) - { - var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); - - tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); - - encodedWith = StringToken.Encoding.Utf16; - } - else - { - tokenStr = builder.ToString(); - - encodedWith = StringToken.Encoding.Iso88591; - } - } - else - { - tokenStr = builder.ToString(); - - encodedWith = StringToken.Encoding.Iso88591; - } - - builder.Clear(); - - token = new StringToken(tokenStr, encodedWith); - - return true; - } - - private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals) - { - for (var i = octalsRead; i > 0; i--) - { - octals[i] = octals[i - 1]; - } - - var value = nextOctalChar.CharacterToShort(); - - octals[0] = value; - } - - private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive, - ref int octalsRead, ref bool isLineBreaking) - { - switch (c) - { - case 'n': - builder.Append('\n'); - break; - case 'r': - builder.Append('\r'); - break; - case 't': - builder.Append('\t'); - break; - case 'b': - builder.Append('\b'); - break; - case 'f': - builder.Append('\f'); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - octal[0] = c.CharacterToShort(); - isOctalActive = true; - octalsRead = 1; - break; - default: - if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed) - { - isLineBreaking = true; - } - else - { - // Drop the backslash - builder.Append(c); - } - break; - } - } - - private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes) - { - const byte lineFeed = 10; - const byte carriageReturn = 13; - - var braces = numberOfBrackets; - var nextThreeBytes = new byte[3]; - - var startAt = bytes.CurrentOffset; - - var amountRead = bytes.Read(nextThreeBytes); - - // Check the next 3 bytes if available - // The following cases are valid indicators for the end of the string - // 1. Next line contains another COSObject: CR + LF + '/' - // 2. COSDictionary ends in the next line: CR + LF + '>' - // 3. Next line contains another COSObject: CR + '/' - // 4. COSDictionary ends in the next line: CR + '>' - if (amountRead == 3 && nextThreeBytes[0] == carriageReturn) - { - if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') - || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') - { - braces = 0; - } - } - - if (amountRead > 0) - { - bytes.Seek(startAt); - } - - return braces; - } - } +namespace UglyToad.PdfPig.Tokenization +{ + using System.Text; + using Core; + using Tokens; + + internal class StringTokenizer : ITokenizer + { + private readonly StringBuilder stringBuilder = new(); + public bool ReadsNextByte { get; } = false; + + public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) + { + token = null; + + if (inputBytes == null) + { + return false; + } + + if (currentByte != '(') + { + return false; + } + + var builder = stringBuilder; + var numberOfBrackets = 1; + var isEscapeActive = false; + var isLineBreaking = false; + + var octalModeActive = false; + + short[] octal = { 0, 0, 0 }; + var octalsRead = 0; + + while (inputBytes.MoveNext()) + { + var b = inputBytes.CurrentByte; + var c = (char)b; + + if (octalModeActive) + { + var nextCharacterOctal = c >= '0' && c <= '7'; + + if (nextCharacterOctal) + { + // left shift the octals. + LeftShiftOctal(c, octalsRead, octal); + octalsRead++; + } + + if (octalsRead == 3 || !nextCharacterOctal) + { + var characterCode = OctalHelpers.FromOctalDigits(octal); + + // For now :( + // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers + builder.Append((char)characterCode); + + octal[0] = 0; + octal[1] = 0; + octal[2] = 0; + octalsRead = 0; + octalModeActive = false; + } + + if (nextCharacterOctal) + { + continue; + } + } + + switch (c) + { + case ')': + isLineBreaking = false; + if (!isEscapeActive) + { + numberOfBrackets--; + } + + isEscapeActive = false; + if (numberOfBrackets > 0) + { + builder.Append(c); + } + + // TODO: Check for other ends of string where the string is improperly formatted. See commented method + numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); + + break; + case '(': + isLineBreaking = false; + + if (!isEscapeActive) + { + numberOfBrackets++; + } + + isEscapeActive = false; + builder.Append(c); + break; + // Escape + case '\\': + isLineBreaking = false; + // Escaped backslash + if (isEscapeActive) + { + builder.Append(c); + isEscapeActive = false; + } + else + { + isEscapeActive = true; + } + break; + default: + if (isLineBreaking) + { + if (ReadHelper.IsEndOfLine(c)) + { + continue; + } + + isLineBreaking = false; + builder.Append(c); + } + else if (isEscapeActive) + { + ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); + isEscapeActive = false; + } + else + { + builder.Append(c); + } + + break; + } + + if (numberOfBrackets <= 0) + { + break; + } + } + + StringToken.Encoding encodedWith; + string tokenStr; + if (builder.Length >= 2) + { + if (builder[0] == 0xFE && builder[1] == 0xFF) + { + var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); + + tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); + + encodedWith = StringToken.Encoding.Utf16BE; + } + else if (builder[0] == 0xFF && builder[1] == 0xFE) + { + var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); + + tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); + + encodedWith = StringToken.Encoding.Utf16; + } + else + { + tokenStr = builder.ToString(); + + encodedWith = StringToken.Encoding.Iso88591; + } + } + else + { + tokenStr = builder.ToString(); + + encodedWith = StringToken.Encoding.Iso88591; + } + + builder.Clear(); + + token = new StringToken(tokenStr, encodedWith); + + return true; + } + + private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals) + { + for (var i = octalsRead; i > 0; i--) + { + octals[i] = octals[i - 1]; + } + + var value = nextOctalChar.CharacterToShort(); + + octals[0] = value; + } + + private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive, + ref int octalsRead, ref bool isLineBreaking) + { + switch (c) + { + case 'n': + builder.Append('\n'); + break; + case 'r': + builder.Append('\r'); + break; + case 't': + builder.Append('\t'); + break; + case 'b': + builder.Append('\b'); + break; + case 'f': + builder.Append('\f'); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + octal[0] = c.CharacterToShort(); + isOctalActive = true; + octalsRead = 1; + break; + default: + if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed) + { + isLineBreaking = true; + } + else + { + // Drop the backslash + builder.Append(c); + } + break; + } + } + + private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes) + { + const byte lineFeed = 10; + const byte carriageReturn = 13; + + var braces = numberOfBrackets; + var nextThreeBytes = new byte[3]; + + var startAt = bytes.CurrentOffset; + + var amountRead = bytes.Read(nextThreeBytes); + + // Check the next 3 bytes if available + // The following cases are valid indicators for the end of the string + // 1. Next line contains another COSObject: CR + LF + '/' + // 2. COSDictionary ends in the next line: CR + LF + '>' + // 3. Next line contains another COSObject: CR + '/' + // 4. COSDictionary ends in the next line: CR + '>' + if (amountRead == 3 && nextThreeBytes[0] == carriageReturn) + { + if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') + || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') + { + braces = 0; + } + } + + if (amountRead > 0) + { + bytes.Seek(startAt); + } + + return braces; + } + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj b/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj index fa40e532..09fa1ff5 100644 --- a/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj +++ b/src/UglyToad.PdfPig.Tokenization/UglyToad.PdfPig.Tokenization.csproj @@ -1,25 +1,25 @@ - - - netstandard2.0;net45;net451;net452;net46;net461;net462;net47 - latest - 0.1.4 - False - true - true - ..\pdfpig.snk - - - true - - - - - - - - - - - - + + + netstandard2.0;net45;net451;net452;net46;net461;net462;net47 + latest + 0.1.4 + False + true + true + ..\pdfpig.snk + + + true + + + + + + + + + + + + \ No newline at end of file From 4c807691b78877202b2f9e5825eabaa644ab0238 Mon Sep 17 00:00:00 2001 From: Plaisted Date: Tue, 19 Jan 2021 18:52:14 -0600 Subject: [PATCH 5/6] adding in PlainTokenizer to unpooled SB changes --- src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs | 7 ++++--- .../Scanner/CoreTokenScanner.cs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs index 8ef87512..5d774638 100644 --- a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs @@ -1,11 +1,12 @@ namespace UglyToad.PdfPig.Tokenization { using Core; + using System.Text; using Tokens; internal class PlainTokenizer : ITokenizer { - private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(10); + private readonly StringBuilder stringBuilder = new(); public bool ReadsNextByte { get; } = true; @@ -18,7 +19,7 @@ return false; } - var builder = StringBuilderPool.Borrow(); + var builder = stringBuilder; builder.Append((char)currentByte); while (inputBytes.MoveNext()) { @@ -39,7 +40,7 @@ } var text = builder.ToString(); - StringBuilderPool.Return(builder); + builder.Clear(); switch (text) { diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs index e69fc3ac..2f676076 100644 --- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs @@ -15,10 +15,10 @@ private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer(); private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); - private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); // NOTE: these are not thread safe so should not be static. Each instance includes a // StringBuilder it re-uses. + private readonly PlainTokenizer PlainTokenizer = new PlainTokenizer(); private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); private readonly StringTokenizer StringTokenizer = new StringTokenizer(); From a0f0c4d6c70636cfc0cea357f269d2ad3d6df6ba Mon Sep 17 00:00:00 2001 From: Plaisted Date: Tue, 19 Jan 2021 18:53:44 -0600 Subject: [PATCH 6/6] switch to old syntax for build server --- src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs | 2 +- src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs | 2 +- src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs index ad7585cb..10c7b9fa 100644 --- a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs @@ -8,7 +8,7 @@ internal class NumericTokenizer : ITokenizer { - private readonly StringBuilder stringBuilder = new(); + private readonly StringBuilder stringBuilder = new StringBuilder(); private const byte Zero = 48; private const byte Nine = 57; diff --git a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs index 5d774638..20afccb4 100644 --- a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs @@ -6,7 +6,7 @@ internal class PlainTokenizer : ITokenizer { - private readonly StringBuilder stringBuilder = new(); + private readonly StringBuilder stringBuilder = new StringBuilder(); public bool ReadsNextByte { get; } = true; diff --git a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs index bb8fc2a1..6592dddd 100644 --- a/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/StringTokenizer.cs @@ -6,7 +6,7 @@ internal class StringTokenizer : ITokenizer { - private readonly StringBuilder stringBuilder = new(); + private readonly StringBuilder stringBuilder = new StringBuilder(); public bool ReadsNextByte { get; } = false; public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)