From fa09b9afb1833f3fcbe49028a8919505ad6732d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 18 Jan 2022 15:53:13 +0100 Subject: [PATCH] post workers: Share resource accounting code --- build/openrpc/miner.json.gz | Bin 12696 -> 12801 bytes build/openrpc/worker.json.gz | Bin 4212 -> 4323 bytes documentation/en/api-v0-methods-miner.md | 164 +++++++++++++++++++ documentation/en/api-v0-methods-worker.md | 164 +++++++++++++++++++ extern/sector-storage/manager_post.go | 16 +- extern/sector-storage/sched_post.go | 106 +++++------- extern/sector-storage/storiface/resources.go | 100 ++++++++++- itests/worker_test.go | 9 +- 8 files changed, 482 insertions(+), 77 deletions(-) diff --git a/build/openrpc/miner.json.gz b/build/openrpc/miner.json.gz index e5253cf432dd4e1ba2e5be4f1df4ddeb1eafbb4c..c9d4a43d5a8fefa01b8625388140c295bbc42f09 100644 GIT binary patch delta 893 zcmV-@1A_dRV}WF_k}iK%wo93!e89xCA6EVaTe>H{KB1+}CatG)MBrom!N-fwpDDKR zSO3OU-sS>2(6W$)-KFHw$BWNg0l$MTO7!s5(YlF_$NlkmsK@&bo;BFL2lJ_;^%59# z^-+H`=#G1X7!uuKS08qJqyDf_Ji^|XHPcVv((m=Uz24Da)YE_C$mo52H0gDZy81u~ zodY>O!xTP|l+dS+Hb~$z91MDcBYiR$B=Ct5YLoa3$D{6KFdB~f!>5jR^ju^*e0DM& z?Gc%d_l!&@dqt*Cov4&0n9N%-n-;U%A9}uM&ElWPg zq!fKdqI~KE9vy#Znk)Yn--_Z>e_rufqxCp(jOd6dhw-+Jl+;66{To>+N>o-8=*~A({u;4nG~qcNSjG<`fRir5vHDR zyxXiWO+DJJFjeh-+rm^8k6dB;jHonVls1J*bHcPFZ8m=`Ov}^ec7$oARcOFEZ3>kJ zY|~~?Nek0nyRqLJ$CR@FG(bjg*b!9Cf7;UrdRLzehDXEJf7<#_cTJ|M{?px%X+*@% z5H%Ju`P$V7<`DgaVGI~mPo&$-tA3}2EFbe zZZJeN*ETSb3d5e>JDTX@L2sOxl>TVYA5V_@gHhAoqyU-L&-vVxnUs~!NSP_o`FxZq zm8Es|<`Seb%-Nj@Q|0~}-JRD#B;VH=Zf~--JOWSC>Hk}iLIMF~xFy!iZ?VhexuF3Z$; zn+xbb%R&}*m)dbR`RL=tXWsA~bWx&*r;gT5bUf~l$3s2dckryi?md`K9j%wZpsSDi zqd|Av8^n-^K5h_?5bRS&>nBhd^d^JJaMYU&W2i(QZwr+{a$SFp#*;~Za5U+~mnQOf zTc{j87i|uooi<1NL!0CMq0Py@(B@Mo5T@RiquZ=7O+DJJFjeh-+rm^8k6dB;jHo1q z=`*6zoG>j(n@tPT^0c`fVOnVwlEU;EQArBZXG0||OnWtiX-b;!)-tWpeR*Lz?CQgA zZ`7Bl+!}aTodV4UY85V4x?d zL+|R7!SHCqhNJ$lx?HVKrnTg1BV?*7S9d|CNx}NuWSW$&&r7Be5j#Uv zT^_{jYchz@9(uL=N4R6lBdr#+BKkDnl{&2h-XA)m$iS!%l-QMJA(CZH3 z217)1Z37djFzo5QqlrEq^u~!v>5m5e@#LsK7&Yxp3Xp02oX<^}Nm==fl$jEp&qtY3 zSz2drE@5oSnVv_Vdfm17t zO!UI5K4GsD?S4BA80=Xeaz{?mftl`z?PF4(h{Hi3bf3mlh%P5&f3_(2q@P zEDSjr(I!JE_PWfB@c1;9`S|FyR4;xQi?UP7T=m23N;FnYON~VKDdxt8^O-|@&Cq3n zg%p<~#<3DYjBvYR-f)cJ%YVZqg3@PY#;OuaUhPR-{*<60F1~hIQ(|Akw~zYT)6?zK S{|^iR0RR8o2Ool@S^@x1KC2lbhV!!EO?4 zX^FNukttn?PU;5!?h8t?^;Se%vYnI|pc7Lh&mnmpKT_g3&(Lu(^;{F4!EQ(E^q~R? zojCC988X6b4A0>24qN!(>hc_Xyu5%4e8iql92&If_B!f`g&*Ncfpg4ej^)F%+h<78 zoHzZA*w}Ok?R>PXOM*=d6=-4K^oYyCF1~&HHg)dkWI;{v&IiBz<#-R+yJp_PWbeVc z**jMSrb8M2&N}De{}teNfzKQdO>yTMo6Pa9@I(Qw25#@sm~?Kai!2L=9bA(iSOKmM z%AOdTzCI&{-h+4iCz&7o_B_XX51#Mn9U~LqcP=Qy9!2(Mbl}4WK#3hz zxnmw?3y*?dD1ORy_g(?Ndfp!nyI#1i<1uXUHY@&5)U2TYcdp8Z%?HT`!`Xg+M^|_B z+qZ8}f&OAXN8S>i!I)Y&pPZ(DN1*~;o(Rg{{Iz9=OBYK=#Lbsb+v)rtw zX!9TD49^jlA^aW9T^olzUVP?7UkSw?JnO2es=z-M^D+N5S}E|-bDYUPmoA29!_~b4 zk!1yJA^XyETMqVyYP=1fqKf;M7!Ha4_jIj7Gd zMF42yhnI-WqGBUa{Lu`i{i|bPL0B8G+7_l=&qF;xE0OC@h+NC$zCBRa0HVtS)4Yb( zk~-*PWD`1t38sAJ1trbE%$$9w$Jg9>#9K2Zk)}kNZw+bo^%8hMD6+{99Ib^QPDnKt zJpfymUm-dbcKa#Ht@;1#_|`x`0)V4X(qjMs89AK9D0hgjq{{u?iU^PG8fSSsRQOZyRX zwI;DT=fo0pv8BY@^0rl@m5%qQ6!{n&ZzR<(Z?ppip$uGDm@+aU1)J`3VgZkn_c3MM zIQ!V1oP>48gib?cqYNO+^04n?3y@839dN<8NuN0099v+#1PJ^qeE+Nf1kN1`o8aBk zjF>Y(s73e?IgRYs09%C%4^M#KkKB=0iNM0mvS^&YU3Zs z#7KYgtyzDmBfZbYy8nRZ?%&S$f4hsNGk5C(AoTe|VMgEIe(#-wwFr-bu}EnYLQvw;`q&pvW#;BMR1qYZR) zXy^DW6qWNCvTaPK;R!pTyx(5XmiKmrsO8Kl_WcwXh#9*Il)4Tqh}+b{16dMVA%oNe zW)J20KwE_eC5zMAN$;?Mlis}BOo&m7A0DV`E63C?$b8++_caczs560Q!?lgwxDzFK zjIHy~_BY{W6&SO4c57(&3cPVG#BlWa!`6HLM7K?Qsk|rJ%vr4yhU6SpBC)7x1V2{U z^{7K^J(X2vjfj3f!^;e|=gqUKRciwblQ<{xmb`f6eeeK70s7{eieHAIohixV;xd ztZ9C>X;@F^l4E;@b8L}dZpMTnZ}}@V9gC|ox1B!j8wrcDR`jA>_)R6LM2O;-}}AH-7lnm*B74Gz0e; zQm*5((?|96DcF0uYqeFQ&CFLkclb~>)fg)9EBc-;_;Pu3!-!3Opm55%L7hwFaW82T ze_l}Z6nbVKir2KrF% z^SLoH{PYb&GkWUCFf#mT!=W)!yTjp7ZI++@aL^qYgZ@D8uN2rjl$iDpE~bMc64T+4 ziRtL5#B`-3tt{V6OEa4{v+WPP+O(GGPoj8#c6HCpNTXU}RQD~ESwnhIq0Cy*0}ACz z3GVrD6N)U$4Ps<$Z$8z-n$I1`i=)(v#o2+6xZ46tYG@;4)E{W0eu|Z3aa*ip?dY&K z9F26NH`3BelPGSBl`XC4?y|X)6>UT|>q65#$Y$1{9#A&3=Ja5)nJ}iNGoKT7+HKaD z=8E=fOzVd5H#`ttl#o}cbpOzWPX?ng`$CUz2p5-PB- zUDX)&2K|ijkIEXf{Az)T-6!q5aor#7JT%=O?K~FUAMLD>+$Zgl$gmknmh>a8hUt zHC5}4)S;mbGn1kZ41GB2>BgYx)1(bE@|^R~l9{!Y2b9d5={%fd7TVHX_T~jlmB87( z8PnSI8{M7n!btU5XS}^B+x9fO%cwl=EZc;o%3I^6IkPG%q)pq^Erhgzy?J9onnR)o z71FGMJ*1E(EbM~vza&sS1~?#rssySMsJ^Zdu#PVmJm)WS`7JqUry;XR3(4eD1;U#UQhVUJ#>X0G$g3y%a= zAtOmOmm?*(&jkaHg}&DOWaM7FO`sUHnu(0HB1+VDC2!(NeNXKUi?5Jq)*K<0Z;?iEquPX`^DFc8S=`7xZR9 zo8pVn$sib!H{i9ad{fX{Wo(whE`^-an&Vb3NS82J86YHGuU15YzzhuflkR9Z9G~Ec zuAUgy0H2I?Y@UozUmHw}G3ucqRN!BjOlK^-+x)v@B0C_IyI_5e=yh-y6-qPz$sBL@ z$RF!&i}q77k)o$jOpfdZC^3eDr|Z$~HAMcgD!QSTD7xpE*!Qoo?}suir17c7#j3wp z5UX=@Qw>DZ9aU)krZ+J?&+=KU@L@yl*}Nbo3gCo*GXYP*sDU50SHLui>|{k1?In1c zVH(xDqi#qK_zZbWEFW%>CD{x*w>-%wHA1)4LJO^gc6r(VzYR6(l_Z0_^qfb0gkdI+YDYFV3FsndiFg&?3 z$FVZ`XmE06WF`)IvLsKI8biYUl3dn#G?#baqDWY46bT2}HZ}3gL3Z- zm9_mrW$h4IHY8k@a9O&3376l>%1F3;tSghxh*IUa>%CLSM{-WT^2qTlv*}9IFHwJh z`UfQ`>aVbkk&kbJuiUXEi1;(D_nD+VL%J>Co*$w7RO+Y{^%#JX)iv%4_#xiXXKp}J46x37qP-g7yG*P7s{vLPd3Az(B~8{ zCC6Hiv!=F}<@Xa(+jccTP~MAd%F8I_VC`{~dH=lXf_A6JeQAyV5)JdwajwRZP zBGw=`6Tv19Qoe{H)&$}9@M?N=60IJC?jIRiO-XU>M0XKB)inhMjT=mAw?js*1Sf*2 z;UxZ*_*dfJ9bI%+Ey-tljgfsGgr*Vwpfb@?%sq#=5!rF##NbUbFAn@8G8uWqxf({+ zuczKKM`=NcCHi(bF^smHeR`(k+hpnXUr5%Qmi^b5R&So zJ!kAVYMRB(JDA2YMPE1CpP(&{iHoggavDThHhIt@LAeCwuNjngOOh(bfKl|@&O}S` zh#0&y!l{2Q*w#3@wXZYNgOf*QN2Qy`fSvO~{2I}u^mNkGy){p#mBfDHCjHXG+n+$i zjTlk3xTs!5jVrCrR>bMZP1GAVq?dW*aJSi! z$Bnn!l*itFyM1{~e=nFs?#_=+RgVFW6iV}Jge21UN#FO@d|$65zTI|#?RJl7*H$RI zmtxrcP9AV3E}sI%3kEj9?P&B#cOqgP*8+?9Xl!%ZGFshF60NS=FNs!njoGg>kOpDB zSvM>LietyID=rnp9qf9sAjL|><${>t9C;5uU^Bed3giPEK094&M+f}jFu-Pr|3iG> zW5x)b-qAT)0)`$iKwyHOfR9awT0US7;N~4o5ySikL;eHc;%+VM2Cp*kS*@j1TtXA5 zVI5*s-W8~=GlP=2HqkAWkCqigr4?12bnEhMmPhEcN`KP*?5spNw#qfBu{}|6Dpbou zq~aDg8GP%flj>o~z3qP@|KW3J`vfIA(Xv#UF|xTq^aCMU!rrv ztSQ}5YPe12O)23!NcgDa0wrIQ5CLo<;9 zw<0gsuXEs5oU%Fis&?DlX>a@JNb8ndEegL74&G`CYEPo&wt2m7sEaHMTc4H;Ur-Bw z-`p`;KMA_OCO>d+x2W)YklvQT-4pz`^};1RMer z*QVL7r(a3>#o`x}8rzQf;DZHa#0G4JJq(bC0S({N`M_IH8ob@xQDT3(<0xYUSF8Kg R{|5j7|NmytG%5Dy007!QhyDNn literal 4212 zcmV-)5R300iwFP!00000|Lk3RZ`-(*|0)FcpJt#qvLwF(^p7^3>1KD@$xUu2 zTB2=EWJ*_}le&Sw`+|~ey=~E!Y$qiK=)@Gsb4Z@YkFUcQ=(w1At_ja!yQQ_dP=SPw z9C-c$8DTbt=kQO5&3tfubpbwHT|xyuVb3QH4N9~-E%nsGPjI2YDP|ML^5Oa23#4eu zi~d1uY&wLtK3LWj!6t?Zw6Je_#ARU>-@kt!JNI-nqb7LggWtY6-Xr#Im^U-o30M|; z=c>STD8oNk>mvNW0^Bz6nd6}`Zrxy$Io>rMDZtgh-2)nu)-83BW#O=b8}bt?z|}$7 z7h_e|C&bVRc*lQ|5i)V>0@?QECk0^7bG#Gqa!>CW83Dg_Ng4JivRADG@81JT>@dqc z^Dvuv6#PcxY1q9pj}Osse(E|t35Fy3oEk#6og1@Uucv79 zALay45tkwS15I5Uhc#Y);z?f!#U4CwtE#HNue0fp{~9b5c;z|H=$~^J!}I>)L4nA! z0=AHSo4?CSoC{AtMT80Rzk5iu1}6x(9@p@o6Ls5!&8T0 zU=#WX6*%*3cn&9wx&Hat**M73GM(w!C;}0uuIF&R&TI}y&W5%#Jewk)VebsNS{oAd0?8?&`MGV zeT-~E$1uT^&%7X|9+;W457qdZI!}0OrXk5X1?o!lDac z%k*nR$HHzuMY%EmpKad~2uJ{M6iRvw03ajB)t0PY;9sMWj~O2)%}`*l3>5smNUvlG z^!c*JDxDtE+pfp9To;p0RXWwT=2Uw|u~YqwDfSSH{KkLd`5&G$jTTEq{A+1FVy@OC zR_lUTf-bg{cvIeXU2CP|Jt{>$2FDvo_2!9opdgfiOAAv*MxQx&A$<9InP(l4cpR<#}$IxxE^ z&l}n(JSZ8Q##Va!HLUdJ-DX0J8a#NQs*NmDZ;<(_lkXcGSW#yL&-+UoyK*N=@EKbd zq3y53%PKHt@%+xv9u#=%T8QE3_`}w7{zSEPd#Su9TJ>3_6^3LVu0>)|(FlGlvg1*O z*s?QUmY{yjXJ^H%GTXx8nl!K4LQ ztDw`B9|%-0_6ppx1b=;;GoBUx54F|*z43>b|Ki>uf64vbafN$0~zBM2` z=xzkUl+f{RNO)PpzMya+1ipK4d8MsH^_7tClKC~zH&^wxh&Rzudp(CD;jVvQneU7CT13@INY z`|L?QLzd+RR}ar$pc>6&zTm0D+o`EWUx8oHk95N4tJ_;fZ1NL@{iOzZu8_xFq)q%O zJa1={K40Ai89yQ)XDBQb*v=5{>-~OLP19bb)kr?7bD=;j!$Di^={=*}*NhYs$>bXO zh^t*FP|vVpXai%=?P-H+--aq0921KsEhwDiy5^Ss7E)nNA5iYu9T`b4mL(rd7!|ZA`23vDTO#kd>@4Js>Of8B;;otlOB1 z%VsmibgNOw8q))^k~O9WVo?H8ETu3>0KM;#c3nw1W!HOoYh4d3rU?@}3T|i@*w?md^gF$7 z#`s5RHClePz{Kv8cHX$|k9HoK?vHjJi|&th)=2J?cGgbrlXl)h?vHjJYVMDA9$W5< zcEZxR9&ToG{_0mc`)Ws5yLz|3pT(qLoC~a9S8Z#9j-j=UG+;=0t|mAsG=`e0bp~qR z(E6E9(R+s8A9Qr1SNCetiW#}id1%SZ+R6h;X3lgTPBIH^=`M5g0;Wpf?B0xNW%{+w z&UayC{aR-{y(yda)H};qyWCke2}`9n#!Y=@Ra8jpwyRqRX$^Dp+JrQRL=P&YSp$1W zAx&7=8RdUTpn42&Kmt_>R3%V-ML>125WKe%pV@lJpOMWS+sYq(&77MU6m607570-3|(XB7)n&z$N{7cquCdX?(A)^8g=35-HUk}3{I zN^oBa8XOCKrT)pty?B{GF)9@kY3rKH_uHksiJM9twcRg1nft|g!Z%?w7r_I%iIVjM zz!i+Tlc3*JoSk$NWn`TryIXkp$U3hlrmYM^Bg4?VHJ_#pihbG@ViRA`n+0Wx@0%p= z7DnEH=U(TVg4)(zc`d9`$T=-JZsguc34?3zU8eI@y66>{fnImi9`yUeQ#{huQ^V@v z)1i*d(*f#gy^%3Q9n^;k{D#SR!on*fe>x_z146lbD;J2~1dCCjH1nU#@pivZxvaKm zJ{1!w`Wjtz%uaw3Z7BGHG^(V%sQ;b+|KB*7gUL zwL=6imT+0ZW$F4QTz)GfBjNI~j!Zt#^g72~ZBHd1ATRUaf0k$YNSQ?a67>hDzgP0| z=QXx5^6_nO%3b)a`Aq43zWtdY-4t-ow@@CHIx0my2B0oq|CEkZI@Vv)vGz+||GdV* z>+_=ilowj}7(zI-m!D2T*I4Wsc_qaTQN-~@tZ>rBzHWI!`3(HUCfE}?PO<6Dfn_@@ zYJ1tUDMD)7tOf|mdy%dDI#FIy2okxny=!x_J!PIWNSo5lu=@Cq3O;^K@EC?5FVMX1x6hR9v{0MX24j zj?xS@=>}8HPOd0PZ+H~?dko(27g$M>Yb$b(Oo9{%QtAOIdPy*^l*8R-MjjX5Zc-k5 z`|alCG5x(@5;;3RI#oReJW?pluMv_+-zR^8-)`;$E4 zOk6$%3}+0ig4@yPNOvM)9oGVj_-JTz+A><*FA}Y;-7Se$cZ1pQG>`^iy;&A41Bye( zvDaKGh%4CjVnK?PHJ1xwf>Y!@`hZRFQY(-TaQN+PsU0ow)nR~55dVkxz{iXcI=-h< zGzSblVt~L1KLa0|4z+y19Kg*x8Y71J4~G0lz{TB~*)?8e;JsQ&sknqDP{T6Bs=O;u zS!a4Bac!boDjzH>h)OG}IO)>m(=3nB@jCrU*R!({<=86Mq}t|0!KzRt50Q#nTxamD zUrws~CHJ=fiTuY;q3sit=tRjz6tORg4$CxnAtO+oERl-%}RuUqOO%fi;jIm4IK!ar7bjFw-5 z>Tk$T9NaA`JP*>@GPrw!|E4xv(p@C)6v>^JN^gYa)S|g(!c;@m`^L79g>Ae*lvCpd zTcXv`jQq`@H+MiMf6GS3d6Z$%qTZY9&`@B3%{)no+6ZN!rRhlmope%Nmdu z 0 { @@ -189,11 +193,11 @@ func (m *Manager) generateWindowPoSt(ctx context.Context, minerID abi.ActorID, s return out, skipped, retErr } -func (m *Manager) generatePartitionWindowPost(ctx context.Context, ppt abi.RegisteredPoStProof, minerID abi.ActorID, partIndex int, sc []storiface.PostSectorChallenge, randomness abi.PoStRandomness) (proof.PoStProof, []abi.SectorID, error) { +func (m *Manager) generatePartitionWindowPost(ctx context.Context, spt abi.RegisteredSealProof, ppt abi.RegisteredPoStProof, minerID abi.ActorID, partIndex int, sc []storiface.PostSectorChallenge, randomness abi.PoStRandomness) (proof.PoStProof, []abi.SectorID, error) { log.Infow("generateWindowPost", "index", partIndex) var result storiface.WindowPoStResult - err := m.windowPoStSched.Schedule(ctx, true, func(ctx context.Context, w Worker) error { + err := m.windowPoStSched.Schedule(ctx, true, spt, func(ctx context.Context, w Worker) error { out, err := w.GenerateWindowPoSt(ctx, ppt, minerID, sc, partIndex, randomness) if err != nil { return err diff --git a/extern/sector-storage/sched_post.go b/extern/sector-storage/sched_post.go index b04142059..a3d29d62e 100644 --- a/extern/sector-storage/sched_post.go +++ b/extern/sector-storage/sched_post.go @@ -2,31 +2,31 @@ package sectorstorage import ( "context" - "sort" + "math/rand" "sync" "time" xerrors "golang.org/x/xerrors" + "github.com/filecoin-project/go-state-types/abi" + sealtasks "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type poStScheduler struct { - lk sync.RWMutex - workers map[storiface.WorkerID]*workerHandle - cond *sync.Cond - postType sealtasks.TaskType + lk sync.RWMutex + workers map[storiface.WorkerID]*workerHandle + cond *sync.Cond - GPUUtilization float64 + postType sealtasks.TaskType } func newPoStScheduler(t sealtasks.TaskType) *poStScheduler { ps := &poStScheduler{ - workers: map[storiface.WorkerID]*workerHandle{}, - postType: t, - GPUUtilization: storiface.GPUUtilizationProof, + workers: map[storiface.WorkerID]*workerHandle{}, + postType: t, } ps.cond = sync.NewCond(&ps.lk) return ps @@ -76,7 +76,7 @@ func (ps *poStScheduler) CanSched(ctx context.Context) bool { return false } -func (ps *poStScheduler) Schedule(ctx context.Context, primary bool, work WorkerAction) error { +func (ps *poStScheduler) Schedule(ctx context.Context, primary bool, spt abi.RegisteredSealProof, work WorkerAction) error { ps.lk.Lock() defer ps.lk.Unlock() @@ -85,83 +85,61 @@ func (ps *poStScheduler) Schedule(ctx context.Context, primary bool, work Worker } // Get workers by resource - canDo, accepts := ps.canHandleWorkers() - + canDo, candidates := ps.readyWorkers(spt) for !canDo { //if primary is true, it must be dispatched to a worker if primary { ps.cond.Wait() - canDo, accepts = ps.canHandleWorkers() + canDo, candidates = ps.readyWorkers(spt) } else { - return xerrors.Errorf("cant find %s post worker", ps.postType) + return xerrors.Errorf("can't find %s post worker", ps.postType) } } - return ps.withResources(accepts[0], func(worker Worker) error { + defer func() { + if ps.cond != nil { + ps.cond.Broadcast() + } + }() + + selected := candidates[0] + worker := ps.workers[selected.id] + + return worker.active.withResources(selected.id, worker.info, selected.res, &ps.lk, func() error { ps.lk.Unlock() defer ps.lk.Lock() - return work(ctx, worker) + return work(ctx, worker.workerRpc) }) } -func (ps *poStScheduler) canHandleWorkers() (bool, []storiface.WorkerID) { +type candidateWorker struct { + id storiface.WorkerID + res storiface.Resources +} - var accepts []storiface.WorkerID +func (ps *poStScheduler) readyWorkers(spt abi.RegisteredSealProof) (bool, []candidateWorker) { + var accepts []candidateWorker //if the gpus of the worker are insufficient or its disable, it cannot be scheduled for wid, wr := range ps.workers { - if wr.active.gpuUsed >= float64(len(wr.info.Resources.GPUs)) || !wr.enabled { + needRes := wr.info.Resources.ResourceSpec(spt, ps.postType) + + if !wr.active.canHandleRequest(needRes, wid, "post-readyWorkers", wr.info) { continue } - accepts = append(accepts, wid) + + accepts = append(accepts, candidateWorker{ + id: wid, + res: needRes, + }) } - freeGPU := func(i int) float64 { - w := ps.workers[accepts[i]] - return float64(len(w.info.Resources.GPUs)) - w.active.gpuUsed - } - - sort.Slice(accepts[:], func(i, j int) bool { - return freeGPU(i) > freeGPU(j) + // todo: round robin or something + rand.Shuffle(len(accepts), func(i, j int) { + accepts[i], accepts[j] = accepts[j], accepts[i] }) - if len(accepts) == 0 { - return false, accepts - } - - return true, accepts -} - -func (ps *poStScheduler) withResources(wid storiface.WorkerID, cb func(wid Worker) error) error { - ps.addResource(wid) - - worker := ps.workers[wid].workerRpc - - err := cb(worker) - - ps.freeResource(wid) - - if ps.cond != nil { - ps.cond.Broadcast() - } - - return err -} - -func (ps *poStScheduler) freeResource(wid storiface.WorkerID) { - if _, ok := ps.workers[wid]; !ok { - log.Warnf("release PoSt Worker not found worker") - return - } - if ps.workers[wid].active.gpuUsed > 0 { - ps.workers[wid].active.gpuUsed -= ps.GPUUtilization - } - - return -} - -func (ps *poStScheduler) addResource(wid storiface.WorkerID) { - ps.workers[wid].active.gpuUsed += ps.GPUUtilization + return len(accepts) != 0, accepts } func (ps *poStScheduler) disable(wid storiface.WorkerID) { diff --git a/extern/sector-storage/storiface/resources.go b/extern/sector-storage/storiface/resources.go index 47e1f8c2a..116fe6fc3 100644 --- a/extern/sector-storage/storiface/resources.go +++ b/extern/sector-storage/storiface/resources.go @@ -62,8 +62,6 @@ func (r Resources) Threads(wcpus uint64, gpus int) uint64 { return uint64(mp) } -var GPUUtilizationProof float64 = 1.0 // todo use resource tablo - var ResourceTable = map[sealtasks.TaskType]map[abi.RegisteredSealProof]Resources{ sealtasks.TTAddPiece: { abi.RegisteredSealProof_StackedDrg64GiBV1: Resources{ @@ -333,6 +331,104 @@ var ResourceTable = map[sealtasks.TaskType]map[abi.RegisteredSealProof]Resources BaseMinMemory: 0, }, }, + sealtasks.TTGenerateWindowPoSt: { + abi.RegisteredSealProof_StackedDrg64GiBV1: Resources{ + MaxMemory: 120 << 30, // TODO: Confirm + MinMemory: 60 << 30, + + MaxParallelism: -1, + MaxParallelismGPU: 6, + GPUUtilization: 1.0, + + BaseMinMemory: 64 << 30, // params + }, + abi.RegisteredSealProof_StackedDrg32GiBV1: Resources{ + MaxMemory: 96 << 30, + MinMemory: 30 << 30, + + MaxParallelism: -1, + MaxParallelismGPU: 6, + GPUUtilization: 1.0, + + BaseMinMemory: 32 << 30, // params + }, + abi.RegisteredSealProof_StackedDrg512MiBV1: Resources{ + MaxMemory: 3 << 29, // 1.5G + MinMemory: 1 << 30, + + MaxParallelism: 1, // This is fine + GPUUtilization: 1.0, + + BaseMinMemory: 10 << 30, + }, + abi.RegisteredSealProof_StackedDrg2KiBV1: Resources{ + MaxMemory: 2 << 10, + MinMemory: 2 << 10, + + MaxParallelism: 1, + GPUUtilization: 1.0, + + BaseMinMemory: 2 << 10, + }, + abi.RegisteredSealProof_StackedDrg8MiBV1: Resources{ + MaxMemory: 8 << 20, + MinMemory: 8 << 20, + + MaxParallelism: 1, + GPUUtilization: 1.0, + + BaseMinMemory: 8 << 20, + }, + }, + sealtasks.TTGenerateWinningPoSt: { + abi.RegisteredSealProof_StackedDrg64GiBV1: Resources{ + MaxMemory: 1 << 30, + MinMemory: 1 << 30, + + MaxParallelism: -1, + MaxParallelismGPU: 6, + GPUUtilization: 1.0, + + BaseMinMemory: 64 << 30, // params + }, + abi.RegisteredSealProof_StackedDrg32GiBV1: Resources{ + MaxMemory: 1 << 30, + MinMemory: 1 << 30, + + MaxParallelism: -1, + MaxParallelismGPU: 6, + GPUUtilization: 1.0, + + BaseMinMemory: 32 << 30, // params + }, + abi.RegisteredSealProof_StackedDrg512MiBV1: Resources{ + MaxMemory: 2 << 10, + MinMemory: 2 << 10, + + MaxParallelism: 1, // This is fine + GPUUtilization: 1.0, + + BaseMinMemory: 10 << 30, + }, + abi.RegisteredSealProof_StackedDrg2KiBV1: Resources{ + MaxMemory: 2 << 10, + MinMemory: 2 << 10, + + MaxParallelism: 1, + GPUUtilization: 1.0, + + BaseMinMemory: 2 << 10, + }, + abi.RegisteredSealProof_StackedDrg8MiBV1: Resources{ + MaxMemory: 8 << 20, + MinMemory: 8 << 20, + + MaxParallelism: 1, + GPUUtilization: 1.0, + + BaseMinMemory: 8 << 20, + }, + }, } func init() { diff --git a/itests/worker_test.go b/itests/worker_test.go index 8964c75d0..3bc136b5e 100644 --- a/itests/worker_test.go +++ b/itests/worker_test.go @@ -80,19 +80,18 @@ func TestWindowPostWorker(t *testing.T) { t.Log("Waiting for post message") bm.Stop() + var lastPending []*types.SignedMessage for i := 0; i < 500; i++ { - n, err := client.MpoolPending(ctx, types.EmptyTSK) + lastPending, err = client.MpoolPending(ctx, types.EmptyTSK) require.NoError(t, err) - if len(n) > 0 { + if len(lastPending) > 0 { break } time.Sleep(40 * time.Millisecond) } - n, err := client.MpoolPending(ctx, types.EmptyTSK) - require.NoError(t, err) - require.Greater(t, len(n), 0) + require.Greater(t, len(lastPending), 0) t.Log("post message landed")