From 54dc885ae636095179ff45ef4daa618a59cec21b Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 14 Aug 2023 11:40:12 -0500 Subject: [PATCH 01/17] feat:miner:harmonytask --- cmd/lotus-worker/main.go | 1 + go.mod | 24 +- go.sum | 43 + htask.patch | 1660 ++++++++++++++++++++ itests/harmonytask_test.go | 247 +++ lib/harmony/harmonydb/harmonydb.go | 19 +- lib/harmony/harmonydb/sql/20230706.sql | 1 + lib/harmony/harmonydb/sql/20230719.sql | 52 + lib/harmony/harmonytask/doc.go | 79 + lib/harmony/harmonytask/harmonytask.go | 386 +++++ lib/harmony/harmonytask/notifyingMx.go | 16 + lib/harmony/harmonytask/taskTypeHandler.go | 276 ++++ lib/harmony/resources/memsys.go | 22 + lib/harmony/resources/resources.go | 180 +++ 14 files changed, 2997 insertions(+), 9 deletions(-) create mode 100644 htask.patch create mode 100644 itests/harmonytask_test.go create mode 100644 lib/harmony/harmonydb/sql/20230719.sql create mode 100644 lib/harmony/harmonytask/doc.go create mode 100644 lib/harmony/harmonytask/harmonytask.go create mode 100644 lib/harmony/harmonytask/notifyingMx.go create mode 100644 lib/harmony/harmonytask/taskTypeHandler.go create mode 100644 lib/harmony/resources/memsys.go create mode 100644 lib/harmony/resources/resources.go diff --git a/cmd/lotus-worker/main.go b/cmd/lotus-worker/main.go index 944791275..995a3cbe0 100644 --- a/cmd/lotus-worker/main.go +++ b/cmd/lotus-worker/main.go @@ -609,6 +609,7 @@ var runCmd = &cli.Command{ if err := srv.Shutdown(context.TODO()); err != nil { log.Errorf("shutting down RPC server failed: %s", err) } + //taskManager.GracefullyTerminate(5*time.Hour) log.Warn("Graceful shutdown successful") }() diff --git a/go.mod b/go.mod index 2da784ad6..661495e89 100644 --- a/go.mod +++ b/go.mod @@ -156,7 +156,7 @@ require ( golang.org/x/exp v0.0.0-20230321023759-10a507213a29 golang.org/x/net v0.10.0 golang.org/x/sync v0.2.0 - golang.org/x/sys v0.9.0 + golang.org/x/sys v0.10.0 golang.org/x/term v0.9.0 golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9 golang.org/x/tools v0.9.1 @@ -167,6 +167,8 @@ require ( require ( github.com/GeertJohan/go.incremental v1.0.0 // indirect + github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 // indirect + github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef // indirect github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/StackExchange/wmi v1.2.1 // indirect @@ -177,8 +179,10 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bep/debounce v1.2.1 // indirect github.com/boltdb/bolt v1.3.1 // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cespare/xxhash v1.1.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/cilium/ebpf v0.9.1 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect @@ -202,7 +206,10 @@ require ( github.com/flynn/noise v1.0.0 // indirect github.com/francoispqt/gojay v1.2.13 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gdamore/encoding v1.0.0 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/gin-gonic/gin v1.9.1 // indirect github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/go-logr/logr v1.2.4 // indirect @@ -211,7 +218,11 @@ require ( github.com/go-openapi/jsonpointer v0.19.3 // indirect github.com/go-openapi/jsonreference v0.19.4 // indirect github.com/go-openapi/swag v0.19.11 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/glog v1.1.0 // indirect @@ -256,10 +267,12 @@ require ( github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/kilic/bls12-381 v0.1.0 // indirect github.com/klauspost/compress v1.16.5 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/koron/go-ssdp v0.0.4 // indirect + github.com/leodido/go-urn v1.2.4 // indirect github.com/libp2p/go-cidranger v1.1.0 // indirect github.com/libp2p/go-flow-metrics v0.1.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect @@ -280,6 +293,8 @@ require ( github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/minio/sha256-simd v1.0.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect @@ -291,6 +306,7 @@ require ( github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -306,12 +322,15 @@ require ( github.com/rivo/uniseg v0.1.0 // indirect github.com/rs/cors v1.7.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/samber/lo v1.38.1 // indirect + github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad // indirect github.com/shirou/gopsutil v2.18.12+incompatible // indirect github.com/sirupsen/logrus v1.9.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/tidwall/gjson v1.14.4 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/twmb/murmur3 v1.1.6 // indirect - github.com/ugorji/go/codec v1.2.6 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.0.1 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect @@ -327,6 +346,7 @@ require ( go.opentelemetry.io/otel/trace v1.16.0 // indirect go.uber.org/dig v1.17.0 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/mod v0.10.0 // indirect golang.org/x/text v0.10.0 // indirect gonum.org/v1/gonum v0.13.0 // indirect diff --git a/go.sum b/go.sum index ebbc4dcc8..74127c535 100644 --- a/go.sum +++ b/go.sum @@ -59,6 +59,8 @@ github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZ github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4= github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee h1:8doiS7ib3zi6/K172oDhSKU0dJ/miJramo9NITOMyZQ= github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee/go.mod h1:W0GbEAA4uFNYOGG2cJpmFJ04E6SD1NLELPYZB57/7AY= +github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 h1:TKWkFaRW5EPQyrS1pM0vm3vvqw/jmHu+FkV8gRD+7/w= +github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006/go.mod h1:9ILtD1/UTP/Y7JMCU8loWZMDvhrQuTgHzHatG6z9ZdQ= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y= github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa h1:1PPxEyGdIGVkX/kqMvLJ95a1dGS1Sz7tpNEgehEYYt0= @@ -66,6 +68,8 @@ github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa/go.mod h1:WUmMvh9wMtq github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= +github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef h1:DiNnYI6NBdeXGOJXptJcrYeDavJf4tImz/B4MOVQtMs= +github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef/go.mod h1:RRVtxaQlBBnbo+n2fgYHhxQmXDkRLKWcWX93lJL0Yhw= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= @@ -142,6 +146,9 @@ github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46f github.com/buger/goterm v1.0.3 h1:7V/HeAQHrzPk/U4BvyH2g9u+xbUW9nr4yRPyG59W4fM= github.com/buger/goterm v1.0.3/go.mod h1:HiFWV3xnkolgrBV3mY8m0X0Pumt4zg4QhbdOzQtB8tE= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -152,6 +159,9 @@ github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= @@ -386,6 +396,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gbrlsnchs/jwt/v3 v3.0.1 h1:lbUmgAKpxnClrKloyIwpxm4OuWeDl5wLk52G91ODPw4= github.com/gbrlsnchs/jwt/v3 v3.0.1/go.mod h1:AncDcjXz18xetI3A6STfXq2w+LuTx8pQ8bGEwRN8zVM= github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= @@ -399,6 +411,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= github.com/go-chi/chi v1.5.4 h1:QHdzF2szwjqVV4wmByUnTcsbIg7UGaQ0tPF2t5GcAIs= @@ -445,10 +459,16 @@ github.com/go-openapi/swag v0.19.11/go.mod h1:Uc0gKkdR+ojzsEpjh39QChyu92vPgIr72P github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= @@ -464,6 +484,8 @@ github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= @@ -970,6 +992,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= github.com/libp2p/go-addr-util v0.0.2/go.mod h1:Ecd6Fb3yIuLzq4bD7VcywcVSBtefcAwnUISBM3WG15E= @@ -1405,7 +1429,10 @@ github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144T github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= +github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= @@ -1512,6 +1539,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= +github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= +github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= @@ -1598,6 +1629,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stvp/go-udp-testing v0.0.0-20201019212854-469649b16807/go.mod h1:7jxmlfBCDBXRzr0eAQJ48XC1hBu1np4CS5+cHEYfwpc= @@ -1618,6 +1651,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= @@ -1628,6 +1663,8 @@ github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljT github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/ugorji/go/codec v1.2.6 h1:7kbGefxLoDBuYXOms4yD7223OpNMMPNPZxXk5TvFcyQ= github.com/ugorji/go/codec v1.2.6/go.mod h1:V6TCNZ4PHqoHGFZuSG1W8nrCzzdgA2DozYxWFFpvxTw= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= @@ -1789,6 +1826,9 @@ go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1 go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -2066,6 +2106,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -2319,6 +2361,7 @@ lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1 nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= diff --git a/htask.patch b/htask.patch new file mode 100644 index 000000000..0c2d12b4e --- /dev/null +++ b/htask.patch @@ -0,0 +1,1660 @@ +diff --git a/cmd/lotus-worker/main.go b/cmd/lotus-worker/main.go +index 944791275..995a3cbe0 100644 +--- a/cmd/lotus-worker/main.go ++++ b/cmd/lotus-worker/main.go +@@ -609,6 +609,7 @@ var runCmd = &cli.Command{ + if err := srv.Shutdown(context.TODO()); err != nil { + log.Errorf("shutting down RPC server failed: %s", err) + } ++ //taskManager.GracefullyTerminate(5*time.Hour) + log.Warn("Graceful shutdown successful") + }() + +diff --git a/go.mod b/go.mod +index 2da784ad6..661495e89 100644 +--- a/go.mod ++++ b/go.mod +@@ -156,7 +156,7 @@ require ( + golang.org/x/exp v0.0.0-20230321023759-10a507213a29 + golang.org/x/net v0.10.0 + golang.org/x/sync v0.2.0 +- golang.org/x/sys v0.9.0 ++ golang.org/x/sys v0.10.0 + golang.org/x/term v0.9.0 + golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9 + golang.org/x/tools v0.9.1 +@@ -167,6 +167,8 @@ require ( + + require ( + github.com/GeertJohan/go.incremental v1.0.0 // indirect ++ github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 // indirect ++ github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef // indirect + github.com/PuerkitoBio/purell v1.1.1 // indirect + github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect + github.com/StackExchange/wmi v1.2.1 // indirect +@@ -177,8 +179,10 @@ require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/bep/debounce v1.2.1 // indirect + github.com/boltdb/bolt v1.3.1 // indirect ++ github.com/bytedance/sonic v1.9.1 // indirect + github.com/cespare/xxhash v1.1.0 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect ++ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect + github.com/cilium/ebpf v0.9.1 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect +@@ -202,7 +206,10 @@ require ( + github.com/flynn/noise v1.0.0 // indirect + github.com/francoispqt/gojay v1.2.13 // indirect + github.com/fsnotify/fsnotify v1.6.0 // indirect ++ github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gdamore/encoding v1.0.0 // indirect ++ github.com/gin-contrib/sse v0.1.0 // indirect ++ github.com/gin-gonic/gin v1.9.1 // indirect + github.com/go-kit/log v0.2.1 // indirect + github.com/go-logfmt/logfmt v0.5.1 // indirect + github.com/go-logr/logr v1.2.4 // indirect +@@ -211,7 +218,11 @@ require ( + github.com/go-openapi/jsonpointer v0.19.3 // indirect + github.com/go-openapi/jsonreference v0.19.4 // indirect + github.com/go-openapi/swag v0.19.11 // indirect ++ github.com/go-playground/locales v0.14.1 // indirect ++ github.com/go-playground/universal-translator v0.18.1 // indirect ++ github.com/go-playground/validator/v10 v10.14.0 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect ++ github.com/goccy/go-json v0.10.2 // indirect + github.com/godbus/dbus/v5 v5.1.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/glog v1.1.0 // indirect +@@ -256,10 +267,12 @@ require ( + github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/jpillora/backoff v1.0.0 // indirect ++ github.com/json-iterator/go v1.1.12 // indirect + github.com/kilic/bls12-381 v0.1.0 // indirect + github.com/klauspost/compress v1.16.5 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/koron/go-ssdp v0.0.4 // indirect ++ github.com/leodido/go-urn v1.2.4 // indirect + github.com/libp2p/go-cidranger v1.1.0 // indirect + github.com/libp2p/go-flow-metrics v0.1.0 // indirect + github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect +@@ -280,6 +293,8 @@ require ( + github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect + github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect + github.com/minio/sha256-simd v1.0.1 // indirect ++ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect ++ github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect + github.com/multiformats/go-base36 v0.2.0 // indirect + github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect +@@ -291,6 +306,7 @@ require ( + github.com/opencontainers/runtime-spec v1.0.2 // indirect + github.com/opentracing/opentracing-go v1.2.0 // indirect + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect ++ github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect +@@ -306,12 +322,15 @@ require ( + github.com/rivo/uniseg v0.1.0 // indirect + github.com/rs/cors v1.7.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect ++ github.com/samber/lo v1.38.1 // indirect ++ github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad // indirect + github.com/shirou/gopsutil v2.18.12+incompatible // indirect + github.com/sirupsen/logrus v1.9.0 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/tidwall/gjson v1.14.4 // indirect ++ github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/twmb/murmur3 v1.1.6 // indirect +- github.com/ugorji/go/codec v1.2.6 // indirect ++ github.com/ugorji/go/codec v1.2.11 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasttemplate v1.0.1 // indirect + github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect +@@ -327,6 +346,7 @@ require ( + go.opentelemetry.io/otel/trace v1.16.0 // indirect + go.uber.org/dig v1.17.0 // indirect + go4.org v0.0.0-20230225012048-214862532bf5 // indirect ++ golang.org/x/arch v0.3.0 // indirect + golang.org/x/mod v0.10.0 // indirect + golang.org/x/text v0.10.0 // indirect + gonum.org/v1/gonum v0.13.0 // indirect +diff --git a/go.sum b/go.sum +index ebbc4dcc8..74127c535 100644 +--- a/go.sum ++++ b/go.sum +@@ -59,6 +59,8 @@ github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZ + github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4= + github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee h1:8doiS7ib3zi6/K172oDhSKU0dJ/miJramo9NITOMyZQ= + github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee/go.mod h1:W0GbEAA4uFNYOGG2cJpmFJ04E6SD1NLELPYZB57/7AY= ++github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 h1:TKWkFaRW5EPQyrS1pM0vm3vvqw/jmHu+FkV8gRD+7/w= ++github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006/go.mod h1:9ILtD1/UTP/Y7JMCU8loWZMDvhrQuTgHzHatG6z9ZdQ= + github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= + github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y= + github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa h1:1PPxEyGdIGVkX/kqMvLJ95a1dGS1Sz7tpNEgehEYYt0= +@@ -66,6 +68,8 @@ github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa/go.mod h1:WUmMvh9wMtq + github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= + github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= + github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= ++github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef h1:DiNnYI6NBdeXGOJXptJcrYeDavJf4tImz/B4MOVQtMs= ++github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef/go.mod h1:RRVtxaQlBBnbo+n2fgYHhxQmXDkRLKWcWX93lJL0Yhw= + github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= + github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= + github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= +@@ -142,6 +146,9 @@ github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46f + github.com/buger/goterm v1.0.3 h1:7V/HeAQHrzPk/U4BvyH2g9u+xbUW9nr4yRPyG59W4fM= + github.com/buger/goterm v1.0.3/go.mod h1:HiFWV3xnkolgrBV3mY8m0X0Pumt4zg4QhbdOzQtB8tE= + github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= ++github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= ++github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= ++github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= + github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= + github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= + github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +@@ -152,6 +159,9 @@ github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL + github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= + github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= + github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ= ++github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= ++github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= ++github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= + github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= + github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= + github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= +@@ -386,6 +396,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo + github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= + github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= + github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= ++github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= ++github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= + github.com/gbrlsnchs/jwt/v3 v3.0.1 h1:lbUmgAKpxnClrKloyIwpxm4OuWeDl5wLk52G91ODPw4= + github.com/gbrlsnchs/jwt/v3 v3.0.1/go.mod h1:AncDcjXz18xetI3A6STfXq2w+LuTx8pQ8bGEwRN8zVM= + github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= +@@ -399,6 +411,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE + github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= + github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= + github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= ++github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= ++github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= + github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= + github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= + github.com/go-chi/chi v1.5.4 h1:QHdzF2szwjqVV4wmByUnTcsbIg7UGaQ0tPF2t5GcAIs= +@@ -445,10 +459,16 @@ github.com/go-openapi/swag v0.19.11/go.mod h1:Uc0gKkdR+ojzsEpjh39QChyu92vPgIr72P + github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= + github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= + github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= ++github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= ++github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= + github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= + github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= ++github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= ++github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= + github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= + github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= ++github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= ++github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= + github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= + github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +@@ -464,6 +484,8 @@ github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= + github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= + github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= + github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= ++github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= ++github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= + github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= + github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= + github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +@@ -970,6 +992,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= + github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= + github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= + github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= ++github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= ++github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= + github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= + github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= + github.com/libp2p/go-addr-util v0.0.2/go.mod h1:Ecd6Fb3yIuLzq4bD7VcywcVSBtefcAwnUISBM3WG15E= +@@ -1405,7 +1429,10 @@ github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144T + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= + github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= ++github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= + github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= ++github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= ++github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= + github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= + github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= + github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= +@@ -1512,6 +1539,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf + github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= + github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= + github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= ++github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= ++github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= ++github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= ++github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= + github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= + github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= + github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= +@@ -1598,6 +1629,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ + github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= + github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= + github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= ++github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= ++github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= + github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= + github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= + github.com/stvp/go-udp-testing v0.0.0-20201019212854-469649b16807/go.mod h1:7jxmlfBCDBXRzr0eAQJ48XC1hBu1np4CS5+cHEYfwpc= +@@ -1618,6 +1651,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= + github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= + github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= + github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= ++github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= ++github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= + github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= + github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= + github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= +@@ -1628,6 +1663,8 @@ github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljT + github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= + github.com/ugorji/go/codec v1.2.6 h1:7kbGefxLoDBuYXOms4yD7223OpNMMPNPZxXk5TvFcyQ= + github.com/ugorji/go/codec v1.2.6/go.mod h1:V6TCNZ4PHqoHGFZuSG1W8nrCzzdgA2DozYxWFFpvxTw= ++github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= ++github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= + github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= + github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= + github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +@@ -1789,6 +1826,9 @@ go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1 + go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= + go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= + go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= ++golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= ++golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= ++golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= + golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= + golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= + golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +@@ -2066,6 +2106,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= + golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= + golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= + golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= ++golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= ++golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= + golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= + golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= + golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +@@ -2319,6 +2361,7 @@ lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1 + nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= + nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= + rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= ++rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= + rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= + rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= + sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= +diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go +new file mode 100644 +index 000000000..2c8523d82 +--- /dev/null ++++ b/itests/harmonytask_test.go +@@ -0,0 +1,247 @@ ++package itests ++ ++import ( ++ "context" ++ "errors" ++ "fmt" ++ "sort" ++ "strings" ++ "sync" ++ "testing" ++ "time" ++ ++ "github.com/filecoin-project/lotus/itests/kit" ++ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ++ "github.com/filecoin-project/lotus/lib/harmony/harmonytask" ++ "github.com/filecoin-project/lotus/lib/harmony/resources" ++ "github.com/filecoin-project/lotus/node/impl" ++ "github.com/stretchr/testify/require" ++) ++ ++type task1 struct { ++ toAdd []int ++ myPersonalTableLock sync.Mutex ++ myPersonalTable map[harmonytask.TaskID]int // This would typicallyb be a DB table ++ WorkCompleted []string ++} ++ ++func (t *task1) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { ++ if !stillOwned() { ++ return false, errors.New("Why not still owned?") ++ } ++ t.myPersonalTableLock.Lock() ++ defer t.myPersonalTableLock.Unlock() ++ t.WorkCompleted = append(t.WorkCompleted, fmt.Sprintf("taskResult%d", t.myPersonalTable[tID])) ++ return true, nil ++} ++func (t *task1) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { ++ return &list[0], nil ++} ++func (t *task1) TypeDetails() harmonytask.TaskTypeDetails { ++ return harmonytask.TaskTypeDetails{ ++ Max: 100, ++ Name: "ThingOne", ++ MaxFailures: 1, ++ Cost: resources.Resources{ ++ Cpu: 1, ++ Ram: 100 << 10, // at 100kb, it's tiny ++ }, ++ } ++} ++func (t *task1) Adder(add harmonytask.AddTaskFunc) { ++ for _, v := range t.toAdd { ++ add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { ++ t.myPersonalTableLock.Lock() ++ defer t.myPersonalTableLock.Unlock() ++ ++ t.myPersonalTable[tID] = v ++ return true ++ }) ++ } ++} ++ ++func TestHarmonyTasks(t *testing.T) { ++ withSetup(t, func(m *kit.TestMiner) { ++ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB ++ t1 := &task1{ ++ toAdd: []int{56, 73}, ++ myPersonalTable: map[harmonytask.TaskID]int{}, ++ } ++ e, err := harmonytask.New(cdb, []harmonytask.TaskInterface{t1}, "test:1") ++ require.NoError(t, err) ++ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. ++ e.GracefullyTerminate(time.Minute) ++ require.Equal(t, t1.WorkCompleted, 2, "wrong amount of work complete: expected 2 got:") ++ sort.Strings(t1.WorkCompleted) ++ got := strings.Join(t1.WorkCompleted, ",") ++ expected := "taskResult56,taskResult73" ++ if got != expected { ++ t.Fatal("Unexpected results! Wanted " + expected + " got " + got) ++ } ++ // TODO test history table looks right. ++ }) ++} ++ ++type passthru struct { ++ dtl harmonytask.TaskTypeDetails ++ do func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) ++ canAccept func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) ++ adder func(add harmonytask.AddTaskFunc) ++} ++ ++func (t *passthru) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { ++ return t.do(tID, stillOwned) ++} ++func (t *passthru) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { ++ return t.canAccept(list) ++} ++func (t *passthru) TypeDetails() harmonytask.TaskTypeDetails { ++ return t.dtl ++} ++func (t *passthru) Adder(add harmonytask.AddTaskFunc) { ++ if t.adder != nil { ++ t.adder(add) ++ } ++} ++ ++// Common stuff ++var dtl = harmonytask.TaskTypeDetails{Name: "foo", Max: -1, Cost: resources.Resources{}} ++var letters []string ++var lettersMutex sync.Mutex ++ ++func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { ++ return &passthru{ ++ dtl: dtl, ++ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return nil, nil }, ++ adder: func(add harmonytask.AddTaskFunc) { ++ for _, v := range []string{"A", "B"} { ++ add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { ++ _, err := tx.Exec("INSERT INTO itest_scratch (some_int, content) VALUES ($1,$2)", tID, v) ++ require.NoError(t, err) ++ return true ++ }) ++ } ++ }, ++ } ++} ++func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { ++ return &passthru{ ++ dtl: dtl, ++ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, ++ do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { ++ var content string ++ err = cdb.QueryRow(context.Background(), ++ "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) ++ require.NoError(t, err) ++ lettersMutex.Lock() ++ defer lettersMutex.Unlock() ++ letters = append(letters, content) ++ return true, nil ++ }, ++ } ++} ++ ++func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { ++ withSetup(t, func(m *kit.TestMiner) { ++ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB ++ senderParty := fooLetterAdder(t, cdb) ++ workerParty := fooLetterSaver(t, cdb) ++ harmonytask.POLL_DURATION = time.Millisecond * 100 ++ sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") ++ require.NoError(t, err) ++ worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{workerParty}, "test:2") ++ require.NoError(t, err) ++ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. ++ sender.GracefullyTerminate(time.Second * 5) ++ worker.GracefullyTerminate(time.Second * 5) ++ sort.Strings(letters) ++ require.Equal(t, letters, []string{"A", "B"}) ++ }) ++} ++ ++func TestWorkStealing(t *testing.T) { ++ withSetup(t, func(m *kit.TestMiner) { ++ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB ++ ctx := context.Background() ++ ++ // The dead worker will be played by a few SQL INSERTS. ++ _, err := cdb.Exec(ctx, `INSERT INTO harmony_machines ++ (id, last_contact,host_and_port, cpu, ram, gpu, gpuram) ++ VALUES (300, DATE '2000-01-01', 'test:1', 4, 400000, 1, 1000000)`) ++ require.ErrorIs(t, err, nil) ++ _, err = cdb.Exec(ctx, `INSERT INTO harmony_task ++ (id, name, owner_id, posted_time, added_by) ++ VALUES (1234, 'foo', 300, DATE '2000-01-01', 300)`) ++ require.ErrorIs(t, err, nil) ++ _, err = cdb.Exec(ctx, "INSERT INTO itest_scratch (some_int, content) VALUES (1234, 'M')") ++ require.ErrorIs(t, err, nil) ++ ++ harmonytask.POLL_DURATION = time.Millisecond * 100 ++ harmonytask.CLEANUP_FREQUENCY = time.Millisecond * 100 ++ worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb)}, "test:2") ++ require.ErrorIs(t, err, nil) ++ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. ++ worker.GracefullyTerminate(time.Second * 5) ++ require.Equal(t, []string{"M"}, letters) ++ }) ++} ++ ++func TestTaskRetry(t *testing.T) { ++ withSetup(t, func(m *kit.TestMiner) { ++ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB ++ senderParty := fooLetterAdder(t, cdb) ++ harmonytask.POLL_DURATION = time.Millisecond * 100 ++ sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") ++ require.NoError(t, err) ++ ++ alreadyFailed := map[string]bool{} ++ fails2xPerMsg := &passthru{ ++ dtl: dtl, ++ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, ++ do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { ++ var content string ++ err = cdb.QueryRow(context.Background(), ++ "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) ++ require.NoError(t, err) ++ lettersMutex.Lock() ++ defer lettersMutex.Unlock() ++ if !alreadyFailed[content] { ++ alreadyFailed[content] = true ++ return false, errors.New("intentional 'error'") ++ } ++ letters = append(letters, content) ++ return true, nil ++ }, ++ } ++ rcv, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fails2xPerMsg}, "test:2") ++ require.NoError(t, err) ++ time.Sleep(3 * time.Second) ++ sender.GracefullyTerminate(time.Hour) ++ rcv.GracefullyTerminate(time.Hour) ++ sort.Strings(letters) ++ require.Equal(t, []string{"A", "B"}, letters) ++ type hist struct { ++ TaskID int ++ Result bool ++ Err string ++ } ++ var res []hist ++ require.NoError(t, cdb.Select(context.Background(), &res, ++ `SELECT task_id, result, err FROM harmony_task_history ++ ORDER BY result DESC, task_id`)) ++ ++ require.Equal(t, []hist{ ++ {1, true, ""}, ++ {2, true, ""}, ++ {1, false, "error: intentional 'error'"}, ++ {2, false, "error: intentional 'error'"}}, res) ++ }) ++} ++ ++/* ++FUTURE test fast-pass round-robin via http calls (3party) once the API for that is set ++It's necessary for WinningPoSt. ++ ++FUTURE test follows. ++It's necessary for sealing work. ++*/ +diff --git a/lib/harmony/harmonydb/harmonydb.go b/lib/harmony/harmonydb/harmonydb.go +index fd31e7a13..48e3db6fa 100644 +--- a/lib/harmony/harmonydb/harmonydb.go ++++ b/lib/harmony/harmonydb/harmonydb.go +@@ -118,21 +118,25 @@ type tracer struct { + + type ctxkey string + +-var sqlStart = ctxkey("sqlStart") ++const SQL_START = ctxkey("sqlStart") ++const SQL_STRING = ctxkey("sqlString") + + func (t tracer) TraceQueryStart(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryStartData) context.Context { +- return context.WithValue(ctx, sqlStart, time.Now()) ++ return context.WithValue(context.WithValue(ctx, SQL_START, time.Now()), SQL_STRING, data.SQL) + } + func (t tracer) TraceQueryEnd(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryEndData) { + DBMeasures.Hits.M(1) +- ms := time.Since(ctx.Value(sqlStart).(time.Time)).Milliseconds() ++ ms := time.Since(ctx.Value(SQL_START).(time.Time)).Milliseconds() + DBMeasures.TotalWait.M(ms) + DBMeasures.Waits.Observe(float64(ms)) + if data.Err != nil { + DBMeasures.Errors.M(1) + } +- // Can log what type of query it is, but not what tables +- // Can log rows affected. ++ logger.Debugw("SQL run", ++ "query", ctx.Value(SQL_STRING).(string), ++ "err", data.Err, ++ "rowCt", data.CommandTag.RowsAffected(), ++ "milliseconds", ms) + } + + // addStatsAndConnect connects a prometheus logger. Be sure to run this before using the DB. +@@ -250,8 +254,9 @@ func (db *DB) upgrade() error { + } + _, err = db.pgx.Exec(context.Background(), s) + if err != nil { +- db.log(fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error())) +- return err ++ msg := fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error()) ++ db.log(msg) ++ return errors.New(msg) // makes devs lives easier by placing message at the end. + } + } + +diff --git a/lib/harmony/harmonydb/sql/20230706.sql b/lib/harmony/harmonydb/sql/20230706.sql +index b45aca7fa..a4a333b81 100644 +--- a/lib/harmony/harmonydb/sql/20230706.sql ++++ b/lib/harmony/harmonydb/sql/20230706.sql +@@ -2,5 +2,6 @@ CREATE TABLE itest_scratch ( + id SERIAL PRIMARY KEY, + content TEXT, + some_int INTEGER, ++ second_int INTEGER, + update_time TIMESTAMP DEFAULT current_timestamp + ) +\ No newline at end of file +diff --git a/lib/harmony/harmonydb/sql/20230719.sql b/lib/harmony/harmonydb/sql/20230719.sql +new file mode 100644 +index 000000000..0a676526b +--- /dev/null ++++ b/lib/harmony/harmonydb/sql/20230719.sql +@@ -0,0 +1,52 @@ ++/* For HarmonyTask base implementation. */ ++ ++CREATE TABLE harmony_machines ( ++ id SERIAL PRIMARY KEY NOT NULL, ++ last_contact TIMESTAMP NOT NULL DEFAULT current_timestamp, ++ host_and_port varchar(300) NOT NULL, ++ cpu INTEGER NOT NULL, ++ ram BIGINT NOT NULL, ++ gpu FLOAT NOT NULL, ++ gpuram BIGINT NOT NULL ++); ++ ++CREATE TABLE harmony_task ( ++ id SERIAL PRIMARY KEY NOT NULL, ++ initiated_by INTEGER, ++ update_time TIMESTAMP NOT NULL DEFAULT current_timestamp, ++ posted_time TIMESTAMP NOT NULL, ++ owner_id INTEGER REFERENCES harmony_machines (id) ON DELETE SET NULL, ++ added_by INTEGER NOT NULL, ++ previous_task INTEGER, ++ name varchar(8) NOT NULL ++); ++COMMENT ON COLUMN harmony_task.initiated_by IS 'The task ID whose completion occasioned this task.'; ++COMMENT ON COLUMN harmony_task.owner_id IS 'The foreign key to harmony_machines.'; ++COMMENT ON COLUMN harmony_task.name IS 'The name of the task type.'; ++COMMENT ON COLUMN harmony_task.owner_id IS 'may be null if between owners or not yet taken'; ++COMMENT ON COLUMN harmony_task.update_time IS 'When it was last modified. not a heartbeat'; ++ ++CREATE TABLE harmony_task_history ( ++ id SERIAL PRIMARY KEY NOT NULL, ++ task_id INTEGER NOT NULL, ++ name VARCHAR(8) NOT NULL, ++ posted TIMESTAMP NOT NULL, ++ work_start TIMESTAMP NOT NULL, ++ work_end TIMESTAMP NOT NULL, ++ result BOOLEAN NOT NULL, ++ err varchar ++); ++COMMENT ON COLUMN harmony_task_history.result IS 'Use to detemine if this was a successful run.'; ++ ++CREATE TABLE harmony_task_follow ( ++ id SERIAL PRIMARY KEY NOT NULL, ++ owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, ++ to_type VARCHAR(8) NOT NULL, ++ from_type VARCHAR(8) NOT NULL ++); ++ ++CREATE TABLE harmony_task_impl ( ++ id SERIAL PRIMARY KEY NOT NULL, ++ owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, ++ name VARCHAR(8) NOT NULL ++); +\ No newline at end of file +diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go +new file mode 100644 +index 000000000..357c3e15c +--- /dev/null ++++ b/lib/harmony/harmonytask/doc.go +@@ -0,0 +1,79 @@ ++/* ++ Package harmomnytask implements a pure (no task logic), distributed ++ task manager. This clean interface allows a task implementer to completely ++ ++avoid being concerned with task scheduling and management. ++It's based on the idea of tasks as small units of work broken from other ++work by hardware, parallelizabilty, reliability, or any other reason. ++Workers will be Greedy: vaccuuming up their favorite jobs from a list. ++Once 1 task is accepted, harmonydb tries to get other task runner ++machines to accept work (round robin) before trying again to accept. ++* ++Mental Model: ++ ++ Things that block tasks: ++ - task not registered for any running server ++ - max was specified and reached ++ - resource exhaustion ++ - CanAccept() interface (per-task implmentation) does not accept it. ++ Ways tasks start: (slowest first) ++ - DB Read every 1 minute ++ - Bump via HTTP if registered in DB ++ - Task was added (to db) by this process ++ Ways tasks get added: ++ - Async Listener task (for chain, etc) ++ - Followers: Tasks get added because another task completed ++ When Follower collectors run: ++ - If both sides are process-local, then ++ - Otherwise, at the listen interval during db scrape ++ How duplicate tasks are avoided: ++ - that's up to the task definition, but probably a unique key ++ ++* ++To use: ++1.Implement TaskInterface for a new task. ++2 Have New() receive this & all other ACTIVE implementations. ++* ++* ++As we are not expecting DBAs in this database, it's important to know ++what grows uncontrolled. The only harmony_* table is _task_history ++(somewhat quickly) and harmony_machines (slowly). These will need a ++clean-up for after the task data could never be acted upon. ++but the design **requires** extraInfo tables to grow until the task's ++info could not possibly be used by a following task, including slow ++release rollout. This would normally be in the order of months old. ++* ++Other possible enhancements include more collaboative coordination ++to assign a task to machines closer to the data. ++ ++__Database_Behavior__ ++harmony_task is the list of work that has not been completed. ++ ++ AddTaskFunc manages the additions, but is designed to have its ++ transactions failed-out on overlap with a similar task already written. ++ It's up to the TaskInterface implementer to discover this overlap via ++ some other table it uses (since overlap can mean very different things). ++ ++harmony_task_history ++ ++ This holds transactions that completed or saw too many retries. It also ++ serves as input for subsequent (follower) tasks to kick off. This is not ++ done machine-internally because a follower may not be on the same machine ++ as the previous task. ++ ++harmony_task_machines ++ ++ Managed by lib/harmony/resources, this is a reference to machines registered ++ via the resources. This registration does not obligate the machine to ++ anything, but serves as a discovery mechanism. Paths are hostnames + ports ++ which are presumed to support http, but this assumption is only used by ++ the task system. ++ ++harmony_task_follow / harmony_task_impl ++ ++ These tables are used to fast-path notifications to other machines instead ++ of waiting for polling. _impl helps round-robin work pick-up. _follow helps ++ discover the machines that are interested in creating tasks following the ++ task that just completed. ++*/ ++package harmonytask +diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go +new file mode 100644 +index 000000000..1f5662959 +--- /dev/null ++++ b/lib/harmony/harmonytask/harmonytask.go +@@ -0,0 +1,386 @@ ++package harmonytask ++ ++import ( ++ "context" ++ "fmt" ++ "strconv" ++ "sync/atomic" ++ "time" ++ ++ "github.com/filecoin-project/lotus/lib/harmony/resources" ++ "github.com/gin-gonic/gin" ++ ++ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ++) ++ ++// Consts (except for unit test) ++var POLL_DURATION = time.Minute // Poll for Work this frequently ++var CLEANUP_FREQUENCY = 5 * time.Minute // Check for dead workers this often * everyone ++ ++type TaskTypeDetails struct { ++ // Max returns how many tasks this machine can run of this type. ++ // Negative means unrestricted. ++ Max int ++ ++ // Name is the task name to be added to the task list. ++ Name string ++ ++ // Peak costs to Do() the task. ++ Cost resources.Resources ++ ++ // Max Failure count before the job is dropped. ++ // 0 = retry forever ++ MaxFailures uint ++ ++ // Follow another task's completion via this task's creation. ++ // The function should populate extraInfo from data ++ // available from the previous task's tables, using the given TaskID. ++ // It should also return success if the trigger succeeded. ++ // NOTE: if refatoring tasks, see if your task is ++ // necessary. Ex: Is the sector state correct for your stage to run? ++ Follows map[string]func(TaskID, AddTaskFunc) bool ++} ++ ++// TaskInterface must be implemented in order to have a task used by harmonytask. ++type TaskInterface interface { ++ // Do the task assigned. Call stillOwned before making single-writer-only ++ // changes to ensure the work has not been stolen. ++ // This is the ONLY function that should attempt to do the work, and must ++ // ONLY be called by harmonytask. ++ // Indicate if the task no-longer needs scheduling with done=true including ++ // cases where it's past the deadline. ++ Do(taskID TaskID, stillOwned func() bool) (done bool, err error) ++ ++ // CanAccept should return if the task can run on this machine. It should ++ // return null if the task type is not allowed on this machine. ++ // It should select the task it most wants to accomplish. ++ // It is also responsible for determining disk space (including scratch). ++ CanAccept([]TaskID) (*TaskID, error) ++ ++ // TypeDetails() returns static details about how this task behaves and ++ // how this machine will run it. Read once at the beginning. ++ TypeDetails() TaskTypeDetails ++ ++ // This listener will consume all external sources continuously for work. ++ // Do() may also be called from a backlog of work. This must not ++ // start doing the work (it still must be scheduled). ++ // Note: Task de-duplication should happen in ExtraInfoFunc by ++ // returning false, typically by determining from the tx that the work ++ // exists already. The easy way is to have a unique joint index ++ // across all fields that will be common. ++ // Adder should typically only add its own task type, but multiple ++ // is possible for when 1 trigger starts 2 things. ++ // Usage Example: ++ // func (b *BazType)Adder(addTask AddTaskFunc) { ++ // for { ++ // bazMaker := <- bazChannel ++ // addTask("baz", func(t harmonytask.TaskID, txn db.Transaction) bool { ++ // _, err := txn.Exec(`INSERT INTO bazInfoTable (taskID, qix, mot) ++ // VALUES ($1,$2,$3)`, id, bazMaker.qix, bazMaker.mot) ++ // if err != nil { ++ // scream(err) ++ // return false ++ // } ++ // return true ++ // }) ++ // } ++ // } ++ Adder(AddTaskFunc) ++} ++ ++type AddTaskFunc func(extraInfo func(TaskID, *harmonydb.Tx) bool) ++ ++type TaskEngine struct { ++ ctx context.Context ++ handlers []*taskTypeHandler ++ db *harmonydb.DB ++ workAdderMutex *notifyingMx ++ reg *resources.Reg ++ grace context.CancelFunc ++ taskMap map[string]*taskTypeHandler ++ ownerID int ++ tryAllWork chan bool // notify if work completed ++ follows map[string][]followStruct ++ lastFollowTime time.Time ++ lastCleanup atomic.Value ++} ++type followStruct struct { ++ f func(TaskID, AddTaskFunc) bool ++ h *taskTypeHandler ++} ++ ++type TaskID int ++ ++// New creates all the task definitions. Note that TaskEngine ++// knows nothing about the tasks themselves and serves to be a ++// generic container for common work ++func New( ++ db *harmonydb.DB, ++ impls []TaskInterface, ++ hostnameAndPort string) (*TaskEngine, error) { ++ ++ reg, err := resources.Register(db, hostnameAndPort) ++ if err != nil { ++ return nil, fmt.Errorf("cannot get resources: %w", err) ++ } ++ ctx, grace := context.WithCancel(context.Background()) ++ e := &TaskEngine{ ++ ctx: ctx, ++ grace: grace, ++ db: db, ++ reg: reg, ++ ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" ++ workAdderMutex: ¬ifyingMx{}, ++ taskMap: make(map[string]*taskTypeHandler, len(impls)), ++ tryAllWork: make(chan bool), ++ follows: make(map[string][]followStruct), ++ } ++ e.lastCleanup.Store(time.Now()) ++ for _, c := range impls { ++ h := taskTypeHandler{ ++ TaskInterface: c, ++ TaskTypeDetails: c.TypeDetails(), ++ TaskEngine: e, ++ } ++ e.handlers = append(e.handlers, &h) ++ e.taskMap[h.TaskTypeDetails.Name] = &h ++ ++ _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_impl (owner_id, name) ++ VALUES ($1,$2)`, e.ownerID, h.Name) ++ if err != nil { ++ return nil, fmt.Errorf("can't update impl: %w", err) ++ } ++ ++ for name, fn := range c.TypeDetails().Follows { ++ e.follows[name] = append(e.follows[name], followStruct{fn, &h}) ++ ++ // populate harmony_task_follows ++ _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_follows (owner_id, from_task, to_task) ++ VALUES ($1,$2,$3)`, e.ownerID, name, h.Name) ++ if err != nil { ++ return nil, fmt.Errorf("can't update harmony_task_follows: %w", err) ++ } ++ } ++ } ++ ++ // resurrect old work ++ { ++ var taskRet []struct { ++ ID int ++ Name string ++ } ++ ++ err := db.Select(e.ctx, &taskRet, `SELECT id, name from harmony_task WHERE owner_id=$1`, e.ownerID) ++ if err != nil { ++ return nil, err ++ } ++ for _, w := range taskRet { ++ // edge-case: if old assignments are not available tasks, unlock them. ++ h := e.taskMap[w.Name] ++ if h == nil { ++ _, err := db.Exec(e.ctx, `UPDATE harmony_task SET owner=NULL WHERE id=$1`, w.ID) ++ if err != nil { ++ log.Error("Cannot remove self from owner field: ", err) ++ continue // not really fatal, but not great ++ } ++ } ++ if !h.considerWork([]TaskID{TaskID(w.ID)}) { ++ log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) ++ } ++ } ++ } ++ for _, h := range e.handlers { ++ go h.Adder(h.AddTask) ++ } ++ go e.poller() ++ ++ return e, nil ++} ++ ++// GracefullyTerminate hangs until all present tasks have completed. ++// Call this to cleanly exit the process. As some processes are long-running, ++// passing a deadline will ignore those still running (to be picked-up later). ++func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { ++ e.grace() ++ e.reg.Shutdown() ++ deadlineChan := time.NewTimer(deadline).C ++ ++ // block bumps & follows by unreg from DBs. ++ _, err := e.db.Exec(context.Background(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) ++ if err != nil { ++ log.Warn("Could not clean-up impl table: %w", err) ++ } ++ _, err = e.db.Exec(context.Background(), `DELETE FROM harmony_task_follow WHERE owner_id=$1`, e.ownerID) ++ if err != nil { ++ log.Warn("Could not clean-up impl table: %w", err) ++ } ++top: ++ for _, h := range e.handlers { ++ if h.Count.Load() > 0 { ++ select { ++ case <-deadlineChan: ++ return ++ default: ++ time.Sleep(time.Millisecond) ++ goto top ++ } ++ } ++ } ++} ++ ++func (e *TaskEngine) poller() { ++ for { ++ select { ++ case <-e.tryAllWork: ///////////////////// Find work after some work finished ++ case <-time.NewTicker(POLL_DURATION).C: // Find work periodically ++ case <-e.ctx.Done(): ///////////////////// Graceful exit ++ return ++ } ++ e.followWorkInDB() // "Follows" the slow way ++ e.pollerTryAllWork() // "Bumps" (round robin tasks) the slow way ++ } ++} ++ ++// followWorkInDB implements "Follows" the slow way ++func (e *TaskEngine) followWorkInDB() { ++ // Step 1: What are we following? ++ var lastFollowTime time.Time ++ lastFollowTime, e.lastFollowTime = e.lastFollowTime, time.Now() ++ ++ for from_name, srcs := range e.follows { ++ var cList []int // Which work is done (that we follow) since we last checked? ++ err := e.db.Select(e.ctx, &cList, `SELECT h.task_id FROM harmony_task_history ++ WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, from_name) ++ if err != nil { ++ log.Error("Could not query DB: ", err) ++ return ++ } ++ for _, src := range srcs { ++ for _, workAlreadyDone := range cList { // Were any tasks made to follow these tasks? ++ var ct int ++ err := e.db.QueryRow(e.ctx, `SELECT COUNT(*) FROM harmony_task ++ WHERE name=$1 AND previous_task=$2`, src.h.Name, workAlreadyDone).Scan(&ct) ++ if err != nil { ++ log.Error("Could not query harmony_task: ", err) ++ return // not recoverable here ++ } ++ if ct > 0 { ++ continue ++ } ++ // we need to create this task ++ if !src.h.Follows[from_name](TaskID(workAlreadyDone), src.h.AddTask) { ++ // But someone may have beaten us to it. ++ log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, from_name) ++ } ++ } ++ } ++ } ++} ++ ++// pollerTryAllWork implements "Bumps" (next task) the slow way ++func (e *TaskEngine) pollerTryAllWork() { ++ if time.Since(e.lastCleanup.Load().(time.Time)) > CLEANUP_FREQUENCY { ++ e.lastCleanup.Store(time.Now()) ++ resources.CleanupMachines(e.ctx, e.db) ++ } ++ for _, v := range e.handlers { ++ rerun: ++ if v.AssertMachineHasCapacity() != nil { ++ continue ++ } ++ var unownedTasks []TaskID ++ err := e.db.Select(e.ctx, &unownedTasks, `SELECT id ++ FROM harmony_task ++ WHERE owner_id IS NULL AND name=$1 ++ ORDER BY update_time`, v.Name) ++ if err != nil { ++ log.Error("Unable to read work ", err) ++ continue ++ } ++ accepted := v.considerWork(unownedTasks) ++ if !accepted { ++ log.Warn("Work not accepted") ++ continue ++ } ++ if len(unownedTasks) > 1 { ++ e.bump(v.Name) // wait for others before trying again to add work. ++ goto rerun ++ } ++ } ++} ++ ++// AddHttpHandlers TODO this needs to be called by the http server to register routes. ++// This implements the receiver-side of "follows" and "bumps" the fast way. ++func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) { ++ s := root.Group("/scheduler/") ++ f := s.Group("/follows") ++ for name, v := range e.follows { ++ f.GET("/"+name+"/:tID", func(c *gin.Context) { ++ tIDString := c.Param("tID") ++ tID, err := strconv.Atoi(tIDString) ++ if err != nil { ++ c.AbortWithError(401, err) ++ return ++ } ++ taskAdded := false ++ for _, v := range v { ++ taskAdded = taskAdded || v.f(TaskID(tID), v.h.AddTask) ++ } ++ if taskAdded { ++ e.tryAllWork <- true ++ c.Status(200) ++ } ++ c.Status(202) // NOTE: 202 for "accepted" but not worked. ++ }) ++ } ++ b := s.Group("/bump") ++ for _, h := range e.handlers { ++ b.GET("/"+h.Name+"/:tID", func(c *gin.Context) { ++ tIDString := c.Param("tID") ++ tID, err := strconv.Atoi(tIDString) ++ if err != nil { ++ c.AbortWithError(401, err) ++ return ++ } ++ // We NEED to block while trying to deliver ++ // this work to ease the network impact. ++ if h.considerWork([]TaskID{TaskID(tID)}) { ++ c.Status(200) ++ } ++ c.Status(202) // NOTE: 202 for "accepted" but not worked. ++ }) ++ } ++} ++ ++func (e *TaskEngine) bump(taskType string) { ++ var res []string ++ err := e.db.Select(e.ctx, &res, `SELECT host_and_port FROM harmony_machines m ++ JOIN harmony_task_impl i ON i.owner_id=m.id ++ WHERE i.name=$1`, taskType) ++ if err != nil { ++ log.Error("Could not read db for bump: ", err) ++ return ++ } ++ for _, url := range res { ++ resp, err := hClient.Get(url + "/scheduler/bump/" + taskType) ++ if err != nil { ++ log.Info("Server unreachable to bump: ", err) ++ continue ++ } ++ if resp.StatusCode == 200 { ++ return // just want 1 taker. ++ } ++ } ++} ++ ++// resourcesInUse requires workListsMutex to be already locked. ++func (e *TaskEngine) resourcesInUse() resources.Resources { ++ tmp := e.reg.Resources ++ for _, t := range e.handlers { ++ ct := t.Count.Load() ++ tmp.Cpu -= int(ct) * t.Cost.Cpu ++ tmp.Gpu -= float64(ct) * t.Cost.Gpu ++ tmp.Ram -= uint64(ct) * t.Cost.Ram ++ } ++ return tmp ++} +diff --git a/lib/harmony/harmonytask/notifyingMx.go b/lib/harmony/harmonytask/notifyingMx.go +new file mode 100644 +index 000000000..51c4e0a53 +--- /dev/null ++++ b/lib/harmony/harmonytask/notifyingMx.go +@@ -0,0 +1,16 @@ ++package harmonytask ++ ++import "sync" ++ ++type notifyingMx struct { ++ sync.Mutex ++ UnlockNotify func() ++} ++ ++func (n *notifyingMx) Unlock() { ++ tmp := n.UnlockNotify ++ n.Mutex.Unlock() ++ if tmp != nil { ++ tmp() ++ } ++} +diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go +new file mode 100644 +index 000000000..079f33704 +--- /dev/null ++++ b/lib/harmony/harmonytask/taskTypeHandler.go +@@ -0,0 +1,276 @@ ++package harmonytask ++ ++import ( ++ "context" ++ "errors" ++ "io" ++ "net/http" ++ "strconv" ++ "sync/atomic" ++ "time" ++ ++ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ++ logging "github.com/ipfs/go-log/v2" ++) ++ ++var log = logging.Logger("harmonytask") ++ ++type taskTypeHandler struct { ++ TaskInterface ++ TaskTypeDetails ++ TaskEngine *TaskEngine ++ Count atomic.Int32 /// locked by TaskEngine's mutex ++ ++} ++ ++func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { ++ var tID TaskID ++ did, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { ++ // create taskID (from DB) ++ _, err := tx.Exec(`INSERT INTO harmony_task (name, added_by, posted_time) ++ VALUES ($1, $2, CURRENT_TIMESTAMP) `, h.Name, h.TaskEngine.ownerID) ++ if err != nil { ++ log.Error("Could not insert into harmonyTask", err) ++ return false ++ } ++ err = tx.QueryRow("SELECT id FROM harmony_task ORDER BY update_time DESC LIMIT 1").Scan(&tID) ++ if err != nil { ++ log.Error("Could not select ID: ", err) ++ } ++ return extra(tID, tx) ++ }) ++ if err != nil { ++ log.Error(err) ++ } ++ if !did { ++ return ++ } ++ ++ if !h.considerWork([]TaskID{tID}) { ++ h.TaskEngine.bump(h.Name) // We can't do it. How about someone else. ++ } ++} ++ ++func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { ++ if len(ids) == 0 { ++ return true // stop looking for takers ++ } ++ ++ // 1. Can we do any more of this task type? ++ if h.Max > -1 && int(h.Count.Load()) == h.Max { ++ log.Infow("did not accept task", "name", h.Name, "reason", "at max already") ++ return false ++ } ++ ++ h.TaskEngine.workAdderMutex.Lock() ++ defer h.TaskEngine.workAdderMutex.Unlock() ++ ++ // 2. Can we do any more work? ++ err := h.AssertMachineHasCapacity() ++ if err != nil { ++ log.Info(err) ++ return false ++ } ++ ++ // 3. What does the impl say? ++ tID, err := h.CanAccept(ids) ++ if err != nil { ++ log.Error(err) ++ return false ++ } ++ if tID == nil { ++ log.Infow("did not accept task", "task_id", ids[0], "reason", "CanAccept() refused") ++ return false ++ } ++ ++ // 4. Can we claim the work for our hostname? ++ ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID) ++ if err != nil { ++ log.Error(err) ++ return false ++ } ++ if ct == 0 { ++ log.Infow("did not accept task", "task_id", strconv.Itoa(int(*tID)), "reason", "already Taken") ++ return false ++ } ++ ++ go func() { ++ h.Count.Add(1) ++ ++ var done bool ++ var doErr error ++ workStart := time.Now() ++ ++ defer func() { ++ if r := recover(); r != nil { ++ log.Error("Recovered from a serious error "+ ++ "while processing "+h.Name+" task "+strconv.Itoa(int(*tID))+": ", r) ++ } ++ h.Count.Add(-1) ++ ++ h.recordCompletion(*tID, workStart, done, doErr) ++ if done { ++ h.triggerCompletionListeners(*tID) ++ } ++ ++ h.TaskEngine.tryAllWork <- true // Activate tasks in this machine ++ }() ++ ++ done, doErr = h.Do(*tID, func() bool { ++ var owner int ++ // Background here because we don't want GracefulRestart to block this save. ++ err := h.TaskEngine.db.QueryRow(context.Background(), ++ `SELECT owner_id FROM harmony_task WHERE id=$1`, *tID).Scan(&owner) ++ if err != nil { ++ log.Error("Cannot determine ownership: ", err) ++ return false ++ } ++ return owner == h.TaskEngine.ownerID ++ }) ++ if doErr != nil { ++ log.Error("Do("+h.Name+", taskID="+strconv.Itoa(int(*tID))+") returned error: ", doErr) ++ } ++ }() ++ return true ++} ++ ++func (h *taskTypeHandler) recordCompletion(tID TaskID, workStart time.Time, done bool, doErr error) { ++ workEnd := time.Now() ++ ++ cm, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { ++ var postedTime time.Time ++ err := tx.QueryRow(`SELECT posted_time FROM harmony_task WHERE id=$1`, tID).Scan(&postedTime) ++ if err != nil { ++ log.Error("Could not log completion: ", err) ++ return false ++ } ++ result := "unspecified error" ++ if done { ++ _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) ++ if err != nil { ++ log.Error("Could not log completion: ", err) ++ return false ++ } ++ result = "" ++ } else { ++ if doErr != nil { ++ result = "error: " + doErr.Error() ++ } ++ var deleteTask bool ++ if h.MaxFailures > 0 { ++ ct := uint(0) ++ err = tx.QueryRow(`SELECT count(*) FROM harmony_task_history ++ WHERE task_id=$1 AND result=FALSE`, tID).Scan(&ct) ++ if err != nil { ++ log.Error("Could not read task history:", err) ++ return false ++ } ++ if ct >= h.MaxFailures { ++ deleteTask = true ++ } ++ } ++ if deleteTask { ++ _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) ++ if err != nil { ++ log.Error("Could not delete failed job: ", err) ++ return false ++ } ++ // Note: Extra Info is left laying around for later review & clean-up ++ } else { ++ tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) ++ if err != nil { ++ log.Error("Could not disown failed task: ", tID, err) ++ return false ++ } ++ } ++ } ++ _, err = tx.Exec(`INSERT INTO harmony_task_history ++ (task_id, name, posted, work_start, work_end, result, err) ++ VALUES ($1, $2, $3, $4, $5, $6, $7)`, tID, h.Name, postedTime, workStart, workEnd, done, result) ++ if err != nil { ++ log.Error("Could not write history: ", err) ++ return false ++ } ++ return true ++ }) ++ if err != nil { ++ log.Error("Could not record transaction: ", err) ++ return ++ } ++ if !cm { ++ log.Error("Committing the task records failed") ++ } ++} ++ ++func (h *taskTypeHandler) AssertMachineHasCapacity() error { ++ r := h.TaskEngine.resourcesInUse() ++ ++ if r.Cpu-h.Cost.Cpu < 0 { ++ return errors.New("Did not accept " + h.Name + " task: out of cpu") ++ } ++ if h.Cost.Ram > r.Ram { ++ return errors.New("Did not accept " + h.Name + " task: out of RAM") ++ } ++ if r.Gpu-h.Cost.Gpu < 0 { ++ return errors.New("Did not accept " + h.Name + " task: out of available GPU") ++ } ++ return nil ++} ++ ++var hClient = http.Client{} ++ ++func init() { ++ hClient.Timeout = 3 * time.Second ++} ++ ++// triggerCompletionListeners does in order: ++// 1. Trigger all in-process followers (b/c it's fast). ++// 2. Trigger all living processes with followers via DB ++// 3. Future followers (think partial upgrade) can read harmony_task_history ++// 3a. The Listen() handles slow follows. ++func (h *taskTypeHandler) triggerCompletionListeners(tID TaskID) { ++ // InProcess (#1 from Description) ++ inProcessDefs := h.TaskEngine.follows[h.Name] ++ inProcessFollowers := make([]string, len(inProcessDefs)) ++ for _, fs := range inProcessDefs { ++ if fs.f(tID, fs.h.AddTask) { ++ inProcessFollowers = append(inProcessFollowers, fs.h.Name) ++ } ++ } ++ ++ // Over HTTP (#2 from Description) ++ var hps []struct { ++ HostAndPort string ++ ToType string ++ } ++ err := h.TaskEngine.db.Select(h.TaskEngine.ctx, &hps, `SELECT m.host_and_port, to_type ++ FROM harmony_task_follow f JOIN harmony_machines m ON m.id=f.owner_id ++ WHERE from_type=$1 AND to_type NOT IN $2 AND f.owner_id != $3`, ++ h.Name, inProcessFollowers, h.TaskEngine.ownerID) ++ if err != nil { ++ log.Warn("Could not fast-trigger partner processes.", err) ++ return ++ } ++ hostsVisited := map[string]bool{} ++ tasksVisited := map[string]bool{} ++ for _, v := range hps { ++ if hostsVisited[v.HostAndPort] || tasksVisited[v.ToType] { ++ continue ++ } ++ resp, err := hClient.Get(v.HostAndPort + "/scheduler/follows/" + h.Name) ++ if err != nil { ++ log.Warn("Couldn't hit http endpoint: ", err) ++ continue ++ } ++ b, err := io.ReadAll(resp.Body) ++ if err != nil { ++ log.Warn("Couldn't hit http endpoint: ", err) ++ continue ++ } ++ hostsVisited[v.HostAndPort], tasksVisited[v.ToType] = true, true ++ if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted { ++ log.Error("IO failed for fast nudge: ", string(b)) ++ continue ++ } ++ } ++} +diff --git a/lib/harmony/resources/memsys.go b/lib/harmony/resources/memsys.go +new file mode 100644 +index 000000000..1a45b5b22 +--- /dev/null ++++ b/lib/harmony/resources/memsys.go +@@ -0,0 +1,22 @@ ++//go:build darwin || freebsd || openbsd || dragonfly || netbsd ++// +build darwin freebsd openbsd dragonfly netbsd ++ ++package resources ++ ++import ( ++ "encoding/binary" ++ "syscall" ++) ++ ++func sysctlUint64(name string) (uint64, error) { ++ s, err := syscall.Sysctl(name) ++ if err != nil { ++ return 0, err ++ } ++ // hack because the string conversion above drops a \0 ++ b := []byte(s) ++ if len(b) < 8 { ++ b = append(b, 0) ++ } ++ return binary.LittleEndian.Uint64(b), nil ++} +diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go +new file mode 100644 +index 000000000..77200b873 +--- /dev/null ++++ b/lib/harmony/resources/resources.go +@@ -0,0 +1,180 @@ ++package resources ++ ++import ( ++ "bytes" ++ "context" ++ "fmt" ++ "os/exec" ++ "regexp" ++ "runtime" ++ "strings" ++ "sync/atomic" ++ "time" ++ ++ cl "github.com/Nv7-Github/go-cl" ++ ffi "github.com/filecoin-project/filecoin-ffi" ++ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ++ logging "github.com/ipfs/go-log/v2" ++ "github.com/pbnjay/memory" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/samber/lo" ++) ++ ++var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats ++ ++type Resources struct { ++ Cpu int ++ Gpu float64 ++ GpuRam uint64 ++ Ram uint64 ++ MachineID int ++} ++type Reg struct { ++ Resources ++ shutdown atomic.Bool ++} ++ ++var logger = logging.Logger("harmonytask") ++ ++var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted") ++ ++func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { ++ var reg Reg ++ var err error ++ reg.Resources, err = getResources() ++ if err != nil { ++ return nil, err ++ } ++ ctx := context.Background() ++ { // Learn our owner_id while updating harmony_machines ++ var ownerID []int ++ err := db.Select(ctx, &ownerID, `SELECT id FROM harmony_machines WHERE host_and_port=$1`, hostnameAndPort) ++ if err != nil { ++ return nil, fmt.Errorf("could not read from harmony_machines: %w", err) ++ } ++ if len(ownerID) == 0 { ++ err = db.QueryRow(ctx, `INSERT INTO harmony_machines ++ (host_and_port, cpu, ram, gpu, gpuram) VALUES ++ ($1,$2,$3,$4,$5) RETURNING id`, ++ hostnameAndPort, reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam).Scan(®.Resources.MachineID) ++ if err != nil { ++ return nil, err ++ } ++ ++ } else { ++ reg.MachineID = ownerID[0] ++ _, err := db.Exec(ctx, `UPDATE harmony_machines SET ++ cpu=$1, ram=$2, gpu=$3, gpuram=$4 WHERE id=$6`, ++ reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam, reg.Resources.MachineID) ++ if err != nil { ++ return nil, err ++ } ++ } ++ CleanupMachines(context.Background(), db) ++ } ++ go func() { ++ for { ++ time.Sleep(time.Minute) ++ if reg.shutdown.Load() { ++ return ++ } ++ _, err := db.Exec(ctx, `UPDATE harmony_machines SET last_contact=CURRENT_TIMESTAMP`) ++ if err != nil { ++ logger.Error("Cannot keepalive ", err) ++ } ++ } ++ }() ++ ++ return ®, nil ++} ++func CleanupMachines(ctx context.Context, db *harmonydb.DB) int { ++ ct, err := db.Exec(ctx, `DELETE FROM harmony_machines WHERE last_contact < $1`, ++ time.Now().Add(-1*LOOKS_DEAD_TIMEOUT)) ++ if err != nil { ++ logger.Warn("unable to delete old machines: ", err) ++ } ++ return ct ++} ++ ++func (res *Reg) Shutdown() { ++ res.shutdown.Store(true) ++} ++ ++func getResources() (res Resources, err error) { ++ b, err := exec.Command(`ps`, `-ef`).CombinedOutput() ++ if err != nil { ++ logger.Warn("Could not safety check for 2+ processes: ", err) ++ } else { ++ found := 0 ++ for _, b := range bytes.Split(b, []byte("\n")) { ++ if lotusRE.Match(b) { ++ found++ ++ } ++ } ++ if found > 1 { ++ logger.Error("This Lotus process should run alone on a machine. Use CGroup.") ++ } ++ } ++ ++ res = Resources{ ++ Cpu: runtime.NumCPU(), ++ Ram: memory.FreeMemory(), ++ GpuRam: getGpuRam(), ++ } ++ ++ { // GPU boolean ++ gpus, err := ffi.GetGPUDevices() ++ if err != nil { ++ logger.Errorf("getting gpu devices failed: %+v", err) ++ } ++ all := strings.ToLower(strings.Join(gpus, ",")) ++ if len(gpus) > 1 || strings.Contains(all, "ati") || strings.Contains(all, "nvidia") { ++ res.Gpu = 1 ++ } ++ } ++ ++ return res, nil ++} ++ ++func getGpuRam() uint64 { ++ platforms, err := cl.GetPlatforms() ++ if err != nil { ++ logger.Error(err) ++ return 0 ++ } ++ ++ return uint64(lo.SumBy(platforms, func(p *cl.Platform) int64 { ++ d, err := p.GetDevices(cl.DeviceTypeAll) ++ if err != nil { ++ logger.Error(err) ++ return 0 ++ } ++ return lo.SumBy(d, func(d *cl.Device) int64 { return d.GlobalMemSize() }) ++ })) ++} ++ ++func DiskFree(path string) (uint64, error) { ++ s := unix.Statfs_t{} ++ err := unix.Statfs(path, &s) ++ if err != nil { ++ return 0, err ++ } ++ ++ return s.Bfree * uint64(s.Bsize), nil ++} ++ ++/* NOT for Darwin. ++func GetMemFree() uint64 { ++ in := unix.Sysinfo_t{} ++ err := unix.Sysinfo(&in) ++ if err != nil { ++ return 0 ++ } ++ // If this is a 32-bit system, then these fields are ++ // uint32 instead of uint64. ++ // So we always convert to uint64 to match signature. ++ return uint64(in.Freeram) * uint64(in.Unit) ++} ++*/ diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go new file mode 100644 index 000000000..2c8523d82 --- /dev/null +++ b/itests/harmonytask_test.go @@ -0,0 +1,247 @@ +package itests + +import ( + "context" + "errors" + "fmt" + "sort" + "strings" + "sync" + "testing" + "time" + + "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/node/impl" + "github.com/stretchr/testify/require" +) + +type task1 struct { + toAdd []int + myPersonalTableLock sync.Mutex + myPersonalTable map[harmonytask.TaskID]int // This would typicallyb be a DB table + WorkCompleted []string +} + +func (t *task1) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + if !stillOwned() { + return false, errors.New("Why not still owned?") + } + t.myPersonalTableLock.Lock() + defer t.myPersonalTableLock.Unlock() + t.WorkCompleted = append(t.WorkCompleted, fmt.Sprintf("taskResult%d", t.myPersonalTable[tID])) + return true, nil +} +func (t *task1) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { + return &list[0], nil +} +func (t *task1) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Max: 100, + Name: "ThingOne", + MaxFailures: 1, + Cost: resources.Resources{ + Cpu: 1, + Ram: 100 << 10, // at 100kb, it's tiny + }, + } +} +func (t *task1) Adder(add harmonytask.AddTaskFunc) { + for _, v := range t.toAdd { + add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { + t.myPersonalTableLock.Lock() + defer t.myPersonalTableLock.Unlock() + + t.myPersonalTable[tID] = v + return true + }) + } +} + +func TestHarmonyTasks(t *testing.T) { + withSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + t1 := &task1{ + toAdd: []int{56, 73}, + myPersonalTable: map[harmonytask.TaskID]int{}, + } + e, err := harmonytask.New(cdb, []harmonytask.TaskInterface{t1}, "test:1") + require.NoError(t, err) + time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. + e.GracefullyTerminate(time.Minute) + require.Equal(t, t1.WorkCompleted, 2, "wrong amount of work complete: expected 2 got:") + sort.Strings(t1.WorkCompleted) + got := strings.Join(t1.WorkCompleted, ",") + expected := "taskResult56,taskResult73" + if got != expected { + t.Fatal("Unexpected results! Wanted " + expected + " got " + got) + } + // TODO test history table looks right. + }) +} + +type passthru struct { + dtl harmonytask.TaskTypeDetails + do func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) + canAccept func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) + adder func(add harmonytask.AddTaskFunc) +} + +func (t *passthru) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + return t.do(tID, stillOwned) +} +func (t *passthru) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { + return t.canAccept(list) +} +func (t *passthru) TypeDetails() harmonytask.TaskTypeDetails { + return t.dtl +} +func (t *passthru) Adder(add harmonytask.AddTaskFunc) { + if t.adder != nil { + t.adder(add) + } +} + +// Common stuff +var dtl = harmonytask.TaskTypeDetails{Name: "foo", Max: -1, Cost: resources.Resources{}} +var letters []string +var lettersMutex sync.Mutex + +func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { + return &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return nil, nil }, + adder: func(add harmonytask.AddTaskFunc) { + for _, v := range []string{"A", "B"} { + add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { + _, err := tx.Exec("INSERT INTO itest_scratch (some_int, content) VALUES ($1,$2)", tID, v) + require.NoError(t, err) + return true + }) + } + }, + } +} +func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { + return &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, + do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + var content string + err = cdb.QueryRow(context.Background(), + "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) + require.NoError(t, err) + lettersMutex.Lock() + defer lettersMutex.Unlock() + letters = append(letters, content) + return true, nil + }, + } +} + +func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { + withSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + senderParty := fooLetterAdder(t, cdb) + workerParty := fooLetterSaver(t, cdb) + harmonytask.POLL_DURATION = time.Millisecond * 100 + sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") + require.NoError(t, err) + worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{workerParty}, "test:2") + require.NoError(t, err) + time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. + sender.GracefullyTerminate(time.Second * 5) + worker.GracefullyTerminate(time.Second * 5) + sort.Strings(letters) + require.Equal(t, letters, []string{"A", "B"}) + }) +} + +func TestWorkStealing(t *testing.T) { + withSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + ctx := context.Background() + + // The dead worker will be played by a few SQL INSERTS. + _, err := cdb.Exec(ctx, `INSERT INTO harmony_machines + (id, last_contact,host_and_port, cpu, ram, gpu, gpuram) + VALUES (300, DATE '2000-01-01', 'test:1', 4, 400000, 1, 1000000)`) + require.ErrorIs(t, err, nil) + _, err = cdb.Exec(ctx, `INSERT INTO harmony_task + (id, name, owner_id, posted_time, added_by) + VALUES (1234, 'foo', 300, DATE '2000-01-01', 300)`) + require.ErrorIs(t, err, nil) + _, err = cdb.Exec(ctx, "INSERT INTO itest_scratch (some_int, content) VALUES (1234, 'M')") + require.ErrorIs(t, err, nil) + + harmonytask.POLL_DURATION = time.Millisecond * 100 + harmonytask.CLEANUP_FREQUENCY = time.Millisecond * 100 + worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb)}, "test:2") + require.ErrorIs(t, err, nil) + time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. + worker.GracefullyTerminate(time.Second * 5) + require.Equal(t, []string{"M"}, letters) + }) +} + +func TestTaskRetry(t *testing.T) { + withSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + senderParty := fooLetterAdder(t, cdb) + harmonytask.POLL_DURATION = time.Millisecond * 100 + sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") + require.NoError(t, err) + + alreadyFailed := map[string]bool{} + fails2xPerMsg := &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, + do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + var content string + err = cdb.QueryRow(context.Background(), + "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) + require.NoError(t, err) + lettersMutex.Lock() + defer lettersMutex.Unlock() + if !alreadyFailed[content] { + alreadyFailed[content] = true + return false, errors.New("intentional 'error'") + } + letters = append(letters, content) + return true, nil + }, + } + rcv, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fails2xPerMsg}, "test:2") + require.NoError(t, err) + time.Sleep(3 * time.Second) + sender.GracefullyTerminate(time.Hour) + rcv.GracefullyTerminate(time.Hour) + sort.Strings(letters) + require.Equal(t, []string{"A", "B"}, letters) + type hist struct { + TaskID int + Result bool + Err string + } + var res []hist + require.NoError(t, cdb.Select(context.Background(), &res, + `SELECT task_id, result, err FROM harmony_task_history + ORDER BY result DESC, task_id`)) + + require.Equal(t, []hist{ + {1, true, ""}, + {2, true, ""}, + {1, false, "error: intentional 'error'"}, + {2, false, "error: intentional 'error'"}}, res) + }) +} + +/* +FUTURE test fast-pass round-robin via http calls (3party) once the API for that is set +It's necessary for WinningPoSt. + +FUTURE test follows. +It's necessary for sealing work. +*/ diff --git a/lib/harmony/harmonydb/harmonydb.go b/lib/harmony/harmonydb/harmonydb.go index fd31e7a13..48e3db6fa 100644 --- a/lib/harmony/harmonydb/harmonydb.go +++ b/lib/harmony/harmonydb/harmonydb.go @@ -118,21 +118,25 @@ type tracer struct { type ctxkey string -var sqlStart = ctxkey("sqlStart") +const SQL_START = ctxkey("sqlStart") +const SQL_STRING = ctxkey("sqlString") func (t tracer) TraceQueryStart(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryStartData) context.Context { - return context.WithValue(ctx, sqlStart, time.Now()) + return context.WithValue(context.WithValue(ctx, SQL_START, time.Now()), SQL_STRING, data.SQL) } func (t tracer) TraceQueryEnd(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryEndData) { DBMeasures.Hits.M(1) - ms := time.Since(ctx.Value(sqlStart).(time.Time)).Milliseconds() + ms := time.Since(ctx.Value(SQL_START).(time.Time)).Milliseconds() DBMeasures.TotalWait.M(ms) DBMeasures.Waits.Observe(float64(ms)) if data.Err != nil { DBMeasures.Errors.M(1) } - // Can log what type of query it is, but not what tables - // Can log rows affected. + logger.Debugw("SQL run", + "query", ctx.Value(SQL_STRING).(string), + "err", data.Err, + "rowCt", data.CommandTag.RowsAffected(), + "milliseconds", ms) } // addStatsAndConnect connects a prometheus logger. Be sure to run this before using the DB. @@ -250,8 +254,9 @@ func (db *DB) upgrade() error { } _, err = db.pgx.Exec(context.Background(), s) if err != nil { - db.log(fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error())) - return err + msg := fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error()) + db.log(msg) + return errors.New(msg) // makes devs lives easier by placing message at the end. } } diff --git a/lib/harmony/harmonydb/sql/20230706.sql b/lib/harmony/harmonydb/sql/20230706.sql index b45aca7fa..a4a333b81 100644 --- a/lib/harmony/harmonydb/sql/20230706.sql +++ b/lib/harmony/harmonydb/sql/20230706.sql @@ -2,5 +2,6 @@ CREATE TABLE itest_scratch ( id SERIAL PRIMARY KEY, content TEXT, some_int INTEGER, + second_int INTEGER, update_time TIMESTAMP DEFAULT current_timestamp ) \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20230719.sql b/lib/harmony/harmonydb/sql/20230719.sql new file mode 100644 index 000000000..0a676526b --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230719.sql @@ -0,0 +1,52 @@ +/* For HarmonyTask base implementation. */ + +CREATE TABLE harmony_machines ( + id SERIAL PRIMARY KEY NOT NULL, + last_contact TIMESTAMP NOT NULL DEFAULT current_timestamp, + host_and_port varchar(300) NOT NULL, + cpu INTEGER NOT NULL, + ram BIGINT NOT NULL, + gpu FLOAT NOT NULL, + gpuram BIGINT NOT NULL +); + +CREATE TABLE harmony_task ( + id SERIAL PRIMARY KEY NOT NULL, + initiated_by INTEGER, + update_time TIMESTAMP NOT NULL DEFAULT current_timestamp, + posted_time TIMESTAMP NOT NULL, + owner_id INTEGER REFERENCES harmony_machines (id) ON DELETE SET NULL, + added_by INTEGER NOT NULL, + previous_task INTEGER, + name varchar(8) NOT NULL +); +COMMENT ON COLUMN harmony_task.initiated_by IS 'The task ID whose completion occasioned this task.'; +COMMENT ON COLUMN harmony_task.owner_id IS 'The foreign key to harmony_machines.'; +COMMENT ON COLUMN harmony_task.name IS 'The name of the task type.'; +COMMENT ON COLUMN harmony_task.owner_id IS 'may be null if between owners or not yet taken'; +COMMENT ON COLUMN harmony_task.update_time IS 'When it was last modified. not a heartbeat'; + +CREATE TABLE harmony_task_history ( + id SERIAL PRIMARY KEY NOT NULL, + task_id INTEGER NOT NULL, + name VARCHAR(8) NOT NULL, + posted TIMESTAMP NOT NULL, + work_start TIMESTAMP NOT NULL, + work_end TIMESTAMP NOT NULL, + result BOOLEAN NOT NULL, + err varchar +); +COMMENT ON COLUMN harmony_task_history.result IS 'Use to detemine if this was a successful run.'; + +CREATE TABLE harmony_task_follow ( + id SERIAL PRIMARY KEY NOT NULL, + owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, + to_type VARCHAR(8) NOT NULL, + from_type VARCHAR(8) NOT NULL +); + +CREATE TABLE harmony_task_impl ( + id SERIAL PRIMARY KEY NOT NULL, + owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, + name VARCHAR(8) NOT NULL +); \ No newline at end of file diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go new file mode 100644 index 000000000..357c3e15c --- /dev/null +++ b/lib/harmony/harmonytask/doc.go @@ -0,0 +1,79 @@ +/* + Package harmomnytask implements a pure (no task logic), distributed + task manager. This clean interface allows a task implementer to completely + +avoid being concerned with task scheduling and management. +It's based on the idea of tasks as small units of work broken from other +work by hardware, parallelizabilty, reliability, or any other reason. +Workers will be Greedy: vaccuuming up their favorite jobs from a list. +Once 1 task is accepted, harmonydb tries to get other task runner +machines to accept work (round robin) before trying again to accept. +* +Mental Model: + + Things that block tasks: + - task not registered for any running server + - max was specified and reached + - resource exhaustion + - CanAccept() interface (per-task implmentation) does not accept it. + Ways tasks start: (slowest first) + - DB Read every 1 minute + - Bump via HTTP if registered in DB + - Task was added (to db) by this process + Ways tasks get added: + - Async Listener task (for chain, etc) + - Followers: Tasks get added because another task completed + When Follower collectors run: + - If both sides are process-local, then + - Otherwise, at the listen interval during db scrape + How duplicate tasks are avoided: + - that's up to the task definition, but probably a unique key + +* +To use: +1.Implement TaskInterface for a new task. +2 Have New() receive this & all other ACTIVE implementations. +* +* +As we are not expecting DBAs in this database, it's important to know +what grows uncontrolled. The only harmony_* table is _task_history +(somewhat quickly) and harmony_machines (slowly). These will need a +clean-up for after the task data could never be acted upon. +but the design **requires** extraInfo tables to grow until the task's +info could not possibly be used by a following task, including slow +release rollout. This would normally be in the order of months old. +* +Other possible enhancements include more collaboative coordination +to assign a task to machines closer to the data. + +__Database_Behavior__ +harmony_task is the list of work that has not been completed. + + AddTaskFunc manages the additions, but is designed to have its + transactions failed-out on overlap with a similar task already written. + It's up to the TaskInterface implementer to discover this overlap via + some other table it uses (since overlap can mean very different things). + +harmony_task_history + + This holds transactions that completed or saw too many retries. It also + serves as input for subsequent (follower) tasks to kick off. This is not + done machine-internally because a follower may not be on the same machine + as the previous task. + +harmony_task_machines + + Managed by lib/harmony/resources, this is a reference to machines registered + via the resources. This registration does not obligate the machine to + anything, but serves as a discovery mechanism. Paths are hostnames + ports + which are presumed to support http, but this assumption is only used by + the task system. + +harmony_task_follow / harmony_task_impl + + These tables are used to fast-path notifications to other machines instead + of waiting for polling. _impl helps round-robin work pick-up. _follow helps + discover the machines that are interested in creating tasks following the + task that just completed. +*/ +package harmonytask diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go new file mode 100644 index 000000000..1f5662959 --- /dev/null +++ b/lib/harmony/harmonytask/harmonytask.go @@ -0,0 +1,386 @@ +package harmonytask + +import ( + "context" + "fmt" + "strconv" + "sync/atomic" + "time" + + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/gin-gonic/gin" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" +) + +// Consts (except for unit test) +var POLL_DURATION = time.Minute // Poll for Work this frequently +var CLEANUP_FREQUENCY = 5 * time.Minute // Check for dead workers this often * everyone + +type TaskTypeDetails struct { + // Max returns how many tasks this machine can run of this type. + // Negative means unrestricted. + Max int + + // Name is the task name to be added to the task list. + Name string + + // Peak costs to Do() the task. + Cost resources.Resources + + // Max Failure count before the job is dropped. + // 0 = retry forever + MaxFailures uint + + // Follow another task's completion via this task's creation. + // The function should populate extraInfo from data + // available from the previous task's tables, using the given TaskID. + // It should also return success if the trigger succeeded. + // NOTE: if refatoring tasks, see if your task is + // necessary. Ex: Is the sector state correct for your stage to run? + Follows map[string]func(TaskID, AddTaskFunc) bool +} + +// TaskInterface must be implemented in order to have a task used by harmonytask. +type TaskInterface interface { + // Do the task assigned. Call stillOwned before making single-writer-only + // changes to ensure the work has not been stolen. + // This is the ONLY function that should attempt to do the work, and must + // ONLY be called by harmonytask. + // Indicate if the task no-longer needs scheduling with done=true including + // cases where it's past the deadline. + Do(taskID TaskID, stillOwned func() bool) (done bool, err error) + + // CanAccept should return if the task can run on this machine. It should + // return null if the task type is not allowed on this machine. + // It should select the task it most wants to accomplish. + // It is also responsible for determining disk space (including scratch). + CanAccept([]TaskID) (*TaskID, error) + + // TypeDetails() returns static details about how this task behaves and + // how this machine will run it. Read once at the beginning. + TypeDetails() TaskTypeDetails + + // This listener will consume all external sources continuously for work. + // Do() may also be called from a backlog of work. This must not + // start doing the work (it still must be scheduled). + // Note: Task de-duplication should happen in ExtraInfoFunc by + // returning false, typically by determining from the tx that the work + // exists already. The easy way is to have a unique joint index + // across all fields that will be common. + // Adder should typically only add its own task type, but multiple + // is possible for when 1 trigger starts 2 things. + // Usage Example: + // func (b *BazType)Adder(addTask AddTaskFunc) { + // for { + // bazMaker := <- bazChannel + // addTask("baz", func(t harmonytask.TaskID, txn db.Transaction) bool { + // _, err := txn.Exec(`INSERT INTO bazInfoTable (taskID, qix, mot) + // VALUES ($1,$2,$3)`, id, bazMaker.qix, bazMaker.mot) + // if err != nil { + // scream(err) + // return false + // } + // return true + // }) + // } + // } + Adder(AddTaskFunc) +} + +type AddTaskFunc func(extraInfo func(TaskID, *harmonydb.Tx) bool) + +type TaskEngine struct { + ctx context.Context + handlers []*taskTypeHandler + db *harmonydb.DB + workAdderMutex *notifyingMx + reg *resources.Reg + grace context.CancelFunc + taskMap map[string]*taskTypeHandler + ownerID int + tryAllWork chan bool // notify if work completed + follows map[string][]followStruct + lastFollowTime time.Time + lastCleanup atomic.Value +} +type followStruct struct { + f func(TaskID, AddTaskFunc) bool + h *taskTypeHandler +} + +type TaskID int + +// New creates all the task definitions. Note that TaskEngine +// knows nothing about the tasks themselves and serves to be a +// generic container for common work +func New( + db *harmonydb.DB, + impls []TaskInterface, + hostnameAndPort string) (*TaskEngine, error) { + + reg, err := resources.Register(db, hostnameAndPort) + if err != nil { + return nil, fmt.Errorf("cannot get resources: %w", err) + } + ctx, grace := context.WithCancel(context.Background()) + e := &TaskEngine{ + ctx: ctx, + grace: grace, + db: db, + reg: reg, + ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" + workAdderMutex: ¬ifyingMx{}, + taskMap: make(map[string]*taskTypeHandler, len(impls)), + tryAllWork: make(chan bool), + follows: make(map[string][]followStruct), + } + e.lastCleanup.Store(time.Now()) + for _, c := range impls { + h := taskTypeHandler{ + TaskInterface: c, + TaskTypeDetails: c.TypeDetails(), + TaskEngine: e, + } + e.handlers = append(e.handlers, &h) + e.taskMap[h.TaskTypeDetails.Name] = &h + + _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_impl (owner_id, name) + VALUES ($1,$2)`, e.ownerID, h.Name) + if err != nil { + return nil, fmt.Errorf("can't update impl: %w", err) + } + + for name, fn := range c.TypeDetails().Follows { + e.follows[name] = append(e.follows[name], followStruct{fn, &h}) + + // populate harmony_task_follows + _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_follows (owner_id, from_task, to_task) + VALUES ($1,$2,$3)`, e.ownerID, name, h.Name) + if err != nil { + return nil, fmt.Errorf("can't update harmony_task_follows: %w", err) + } + } + } + + // resurrect old work + { + var taskRet []struct { + ID int + Name string + } + + err := db.Select(e.ctx, &taskRet, `SELECT id, name from harmony_task WHERE owner_id=$1`, e.ownerID) + if err != nil { + return nil, err + } + for _, w := range taskRet { + // edge-case: if old assignments are not available tasks, unlock them. + h := e.taskMap[w.Name] + if h == nil { + _, err := db.Exec(e.ctx, `UPDATE harmony_task SET owner=NULL WHERE id=$1`, w.ID) + if err != nil { + log.Error("Cannot remove self from owner field: ", err) + continue // not really fatal, but not great + } + } + if !h.considerWork([]TaskID{TaskID(w.ID)}) { + log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) + } + } + } + for _, h := range e.handlers { + go h.Adder(h.AddTask) + } + go e.poller() + + return e, nil +} + +// GracefullyTerminate hangs until all present tasks have completed. +// Call this to cleanly exit the process. As some processes are long-running, +// passing a deadline will ignore those still running (to be picked-up later). +func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { + e.grace() + e.reg.Shutdown() + deadlineChan := time.NewTimer(deadline).C + + // block bumps & follows by unreg from DBs. + _, err := e.db.Exec(context.Background(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) + if err != nil { + log.Warn("Could not clean-up impl table: %w", err) + } + _, err = e.db.Exec(context.Background(), `DELETE FROM harmony_task_follow WHERE owner_id=$1`, e.ownerID) + if err != nil { + log.Warn("Could not clean-up impl table: %w", err) + } +top: + for _, h := range e.handlers { + if h.Count.Load() > 0 { + select { + case <-deadlineChan: + return + default: + time.Sleep(time.Millisecond) + goto top + } + } + } +} + +func (e *TaskEngine) poller() { + for { + select { + case <-e.tryAllWork: ///////////////////// Find work after some work finished + case <-time.NewTicker(POLL_DURATION).C: // Find work periodically + case <-e.ctx.Done(): ///////////////////// Graceful exit + return + } + e.followWorkInDB() // "Follows" the slow way + e.pollerTryAllWork() // "Bumps" (round robin tasks) the slow way + } +} + +// followWorkInDB implements "Follows" the slow way +func (e *TaskEngine) followWorkInDB() { + // Step 1: What are we following? + var lastFollowTime time.Time + lastFollowTime, e.lastFollowTime = e.lastFollowTime, time.Now() + + for from_name, srcs := range e.follows { + var cList []int // Which work is done (that we follow) since we last checked? + err := e.db.Select(e.ctx, &cList, `SELECT h.task_id FROM harmony_task_history + WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, from_name) + if err != nil { + log.Error("Could not query DB: ", err) + return + } + for _, src := range srcs { + for _, workAlreadyDone := range cList { // Were any tasks made to follow these tasks? + var ct int + err := e.db.QueryRow(e.ctx, `SELECT COUNT(*) FROM harmony_task + WHERE name=$1 AND previous_task=$2`, src.h.Name, workAlreadyDone).Scan(&ct) + if err != nil { + log.Error("Could not query harmony_task: ", err) + return // not recoverable here + } + if ct > 0 { + continue + } + // we need to create this task + if !src.h.Follows[from_name](TaskID(workAlreadyDone), src.h.AddTask) { + // But someone may have beaten us to it. + log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, from_name) + } + } + } + } +} + +// pollerTryAllWork implements "Bumps" (next task) the slow way +func (e *TaskEngine) pollerTryAllWork() { + if time.Since(e.lastCleanup.Load().(time.Time)) > CLEANUP_FREQUENCY { + e.lastCleanup.Store(time.Now()) + resources.CleanupMachines(e.ctx, e.db) + } + for _, v := range e.handlers { + rerun: + if v.AssertMachineHasCapacity() != nil { + continue + } + var unownedTasks []TaskID + err := e.db.Select(e.ctx, &unownedTasks, `SELECT id + FROM harmony_task + WHERE owner_id IS NULL AND name=$1 + ORDER BY update_time`, v.Name) + if err != nil { + log.Error("Unable to read work ", err) + continue + } + accepted := v.considerWork(unownedTasks) + if !accepted { + log.Warn("Work not accepted") + continue + } + if len(unownedTasks) > 1 { + e.bump(v.Name) // wait for others before trying again to add work. + goto rerun + } + } +} + +// AddHttpHandlers TODO this needs to be called by the http server to register routes. +// This implements the receiver-side of "follows" and "bumps" the fast way. +func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) { + s := root.Group("/scheduler/") + f := s.Group("/follows") + for name, v := range e.follows { + f.GET("/"+name+"/:tID", func(c *gin.Context) { + tIDString := c.Param("tID") + tID, err := strconv.Atoi(tIDString) + if err != nil { + c.AbortWithError(401, err) + return + } + taskAdded := false + for _, v := range v { + taskAdded = taskAdded || v.f(TaskID(tID), v.h.AddTask) + } + if taskAdded { + e.tryAllWork <- true + c.Status(200) + } + c.Status(202) // NOTE: 202 for "accepted" but not worked. + }) + } + b := s.Group("/bump") + for _, h := range e.handlers { + b.GET("/"+h.Name+"/:tID", func(c *gin.Context) { + tIDString := c.Param("tID") + tID, err := strconv.Atoi(tIDString) + if err != nil { + c.AbortWithError(401, err) + return + } + // We NEED to block while trying to deliver + // this work to ease the network impact. + if h.considerWork([]TaskID{TaskID(tID)}) { + c.Status(200) + } + c.Status(202) // NOTE: 202 for "accepted" but not worked. + }) + } +} + +func (e *TaskEngine) bump(taskType string) { + var res []string + err := e.db.Select(e.ctx, &res, `SELECT host_and_port FROM harmony_machines m + JOIN harmony_task_impl i ON i.owner_id=m.id + WHERE i.name=$1`, taskType) + if err != nil { + log.Error("Could not read db for bump: ", err) + return + } + for _, url := range res { + resp, err := hClient.Get(url + "/scheduler/bump/" + taskType) + if err != nil { + log.Info("Server unreachable to bump: ", err) + continue + } + if resp.StatusCode == 200 { + return // just want 1 taker. + } + } +} + +// resourcesInUse requires workListsMutex to be already locked. +func (e *TaskEngine) resourcesInUse() resources.Resources { + tmp := e.reg.Resources + for _, t := range e.handlers { + ct := t.Count.Load() + tmp.Cpu -= int(ct) * t.Cost.Cpu + tmp.Gpu -= float64(ct) * t.Cost.Gpu + tmp.Ram -= uint64(ct) * t.Cost.Ram + } + return tmp +} diff --git a/lib/harmony/harmonytask/notifyingMx.go b/lib/harmony/harmonytask/notifyingMx.go new file mode 100644 index 000000000..51c4e0a53 --- /dev/null +++ b/lib/harmony/harmonytask/notifyingMx.go @@ -0,0 +1,16 @@ +package harmonytask + +import "sync" + +type notifyingMx struct { + sync.Mutex + UnlockNotify func() +} + +func (n *notifyingMx) Unlock() { + tmp := n.UnlockNotify + n.Mutex.Unlock() + if tmp != nil { + tmp() + } +} diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go new file mode 100644 index 000000000..079f33704 --- /dev/null +++ b/lib/harmony/harmonytask/taskTypeHandler.go @@ -0,0 +1,276 @@ +package harmonytask + +import ( + "context" + "errors" + "io" + "net/http" + "strconv" + "sync/atomic" + "time" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + logging "github.com/ipfs/go-log/v2" +) + +var log = logging.Logger("harmonytask") + +type taskTypeHandler struct { + TaskInterface + TaskTypeDetails + TaskEngine *TaskEngine + Count atomic.Int32 /// locked by TaskEngine's mutex + +} + +func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { + var tID TaskID + did, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { + // create taskID (from DB) + _, err := tx.Exec(`INSERT INTO harmony_task (name, added_by, posted_time) + VALUES ($1, $2, CURRENT_TIMESTAMP) `, h.Name, h.TaskEngine.ownerID) + if err != nil { + log.Error("Could not insert into harmonyTask", err) + return false + } + err = tx.QueryRow("SELECT id FROM harmony_task ORDER BY update_time DESC LIMIT 1").Scan(&tID) + if err != nil { + log.Error("Could not select ID: ", err) + } + return extra(tID, tx) + }) + if err != nil { + log.Error(err) + } + if !did { + return + } + + if !h.considerWork([]TaskID{tID}) { + h.TaskEngine.bump(h.Name) // We can't do it. How about someone else. + } +} + +func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { + if len(ids) == 0 { + return true // stop looking for takers + } + + // 1. Can we do any more of this task type? + if h.Max > -1 && int(h.Count.Load()) == h.Max { + log.Infow("did not accept task", "name", h.Name, "reason", "at max already") + return false + } + + h.TaskEngine.workAdderMutex.Lock() + defer h.TaskEngine.workAdderMutex.Unlock() + + // 2. Can we do any more work? + err := h.AssertMachineHasCapacity() + if err != nil { + log.Info(err) + return false + } + + // 3. What does the impl say? + tID, err := h.CanAccept(ids) + if err != nil { + log.Error(err) + return false + } + if tID == nil { + log.Infow("did not accept task", "task_id", ids[0], "reason", "CanAccept() refused") + return false + } + + // 4. Can we claim the work for our hostname? + ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID) + if err != nil { + log.Error(err) + return false + } + if ct == 0 { + log.Infow("did not accept task", "task_id", strconv.Itoa(int(*tID)), "reason", "already Taken") + return false + } + + go func() { + h.Count.Add(1) + + var done bool + var doErr error + workStart := time.Now() + + defer func() { + if r := recover(); r != nil { + log.Error("Recovered from a serious error "+ + "while processing "+h.Name+" task "+strconv.Itoa(int(*tID))+": ", r) + } + h.Count.Add(-1) + + h.recordCompletion(*tID, workStart, done, doErr) + if done { + h.triggerCompletionListeners(*tID) + } + + h.TaskEngine.tryAllWork <- true // Activate tasks in this machine + }() + + done, doErr = h.Do(*tID, func() bool { + var owner int + // Background here because we don't want GracefulRestart to block this save. + err := h.TaskEngine.db.QueryRow(context.Background(), + `SELECT owner_id FROM harmony_task WHERE id=$1`, *tID).Scan(&owner) + if err != nil { + log.Error("Cannot determine ownership: ", err) + return false + } + return owner == h.TaskEngine.ownerID + }) + if doErr != nil { + log.Error("Do("+h.Name+", taskID="+strconv.Itoa(int(*tID))+") returned error: ", doErr) + } + }() + return true +} + +func (h *taskTypeHandler) recordCompletion(tID TaskID, workStart time.Time, done bool, doErr error) { + workEnd := time.Now() + + cm, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { + var postedTime time.Time + err := tx.QueryRow(`SELECT posted_time FROM harmony_task WHERE id=$1`, tID).Scan(&postedTime) + if err != nil { + log.Error("Could not log completion: ", err) + return false + } + result := "unspecified error" + if done { + _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) + if err != nil { + log.Error("Could not log completion: ", err) + return false + } + result = "" + } else { + if doErr != nil { + result = "error: " + doErr.Error() + } + var deleteTask bool + if h.MaxFailures > 0 { + ct := uint(0) + err = tx.QueryRow(`SELECT count(*) FROM harmony_task_history + WHERE task_id=$1 AND result=FALSE`, tID).Scan(&ct) + if err != nil { + log.Error("Could not read task history:", err) + return false + } + if ct >= h.MaxFailures { + deleteTask = true + } + } + if deleteTask { + _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) + if err != nil { + log.Error("Could not delete failed job: ", err) + return false + } + // Note: Extra Info is left laying around for later review & clean-up + } else { + tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) + if err != nil { + log.Error("Could not disown failed task: ", tID, err) + return false + } + } + } + _, err = tx.Exec(`INSERT INTO harmony_task_history + (task_id, name, posted, work_start, work_end, result, err) + VALUES ($1, $2, $3, $4, $5, $6, $7)`, tID, h.Name, postedTime, workStart, workEnd, done, result) + if err != nil { + log.Error("Could not write history: ", err) + return false + } + return true + }) + if err != nil { + log.Error("Could not record transaction: ", err) + return + } + if !cm { + log.Error("Committing the task records failed") + } +} + +func (h *taskTypeHandler) AssertMachineHasCapacity() error { + r := h.TaskEngine.resourcesInUse() + + if r.Cpu-h.Cost.Cpu < 0 { + return errors.New("Did not accept " + h.Name + " task: out of cpu") + } + if h.Cost.Ram > r.Ram { + return errors.New("Did not accept " + h.Name + " task: out of RAM") + } + if r.Gpu-h.Cost.Gpu < 0 { + return errors.New("Did not accept " + h.Name + " task: out of available GPU") + } + return nil +} + +var hClient = http.Client{} + +func init() { + hClient.Timeout = 3 * time.Second +} + +// triggerCompletionListeners does in order: +// 1. Trigger all in-process followers (b/c it's fast). +// 2. Trigger all living processes with followers via DB +// 3. Future followers (think partial upgrade) can read harmony_task_history +// 3a. The Listen() handles slow follows. +func (h *taskTypeHandler) triggerCompletionListeners(tID TaskID) { + // InProcess (#1 from Description) + inProcessDefs := h.TaskEngine.follows[h.Name] + inProcessFollowers := make([]string, len(inProcessDefs)) + for _, fs := range inProcessDefs { + if fs.f(tID, fs.h.AddTask) { + inProcessFollowers = append(inProcessFollowers, fs.h.Name) + } + } + + // Over HTTP (#2 from Description) + var hps []struct { + HostAndPort string + ToType string + } + err := h.TaskEngine.db.Select(h.TaskEngine.ctx, &hps, `SELECT m.host_and_port, to_type + FROM harmony_task_follow f JOIN harmony_machines m ON m.id=f.owner_id + WHERE from_type=$1 AND to_type NOT IN $2 AND f.owner_id != $3`, + h.Name, inProcessFollowers, h.TaskEngine.ownerID) + if err != nil { + log.Warn("Could not fast-trigger partner processes.", err) + return + } + hostsVisited := map[string]bool{} + tasksVisited := map[string]bool{} + for _, v := range hps { + if hostsVisited[v.HostAndPort] || tasksVisited[v.ToType] { + continue + } + resp, err := hClient.Get(v.HostAndPort + "/scheduler/follows/" + h.Name) + if err != nil { + log.Warn("Couldn't hit http endpoint: ", err) + continue + } + b, err := io.ReadAll(resp.Body) + if err != nil { + log.Warn("Couldn't hit http endpoint: ", err) + continue + } + hostsVisited[v.HostAndPort], tasksVisited[v.ToType] = true, true + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted { + log.Error("IO failed for fast nudge: ", string(b)) + continue + } + } +} diff --git a/lib/harmony/resources/memsys.go b/lib/harmony/resources/memsys.go new file mode 100644 index 000000000..1a45b5b22 --- /dev/null +++ b/lib/harmony/resources/memsys.go @@ -0,0 +1,22 @@ +//go:build darwin || freebsd || openbsd || dragonfly || netbsd +// +build darwin freebsd openbsd dragonfly netbsd + +package resources + +import ( + "encoding/binary" + "syscall" +) + +func sysctlUint64(name string) (uint64, error) { + s, err := syscall.Sysctl(name) + if err != nil { + return 0, err + } + // hack because the string conversion above drops a \0 + b := []byte(s) + if len(b) < 8 { + b = append(b, 0) + } + return binary.LittleEndian.Uint64(b), nil +} diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go new file mode 100644 index 000000000..77200b873 --- /dev/null +++ b/lib/harmony/resources/resources.go @@ -0,0 +1,180 @@ +package resources + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "regexp" + "runtime" + "strings" + "sync/atomic" + "time" + + cl "github.com/Nv7-Github/go-cl" + ffi "github.com/filecoin-project/filecoin-ffi" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + logging "github.com/ipfs/go-log/v2" + "github.com/pbnjay/memory" + + "golang.org/x/sys/unix" + + "github.com/samber/lo" +) + +var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats + +type Resources struct { + Cpu int + Gpu float64 + GpuRam uint64 + Ram uint64 + MachineID int +} +type Reg struct { + Resources + shutdown atomic.Bool +} + +var logger = logging.Logger("harmonytask") + +var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted") + +func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { + var reg Reg + var err error + reg.Resources, err = getResources() + if err != nil { + return nil, err + } + ctx := context.Background() + { // Learn our owner_id while updating harmony_machines + var ownerID []int + err := db.Select(ctx, &ownerID, `SELECT id FROM harmony_machines WHERE host_and_port=$1`, hostnameAndPort) + if err != nil { + return nil, fmt.Errorf("could not read from harmony_machines: %w", err) + } + if len(ownerID) == 0 { + err = db.QueryRow(ctx, `INSERT INTO harmony_machines + (host_and_port, cpu, ram, gpu, gpuram) VALUES + ($1,$2,$3,$4,$5) RETURNING id`, + hostnameAndPort, reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam).Scan(®.Resources.MachineID) + if err != nil { + return nil, err + } + + } else { + reg.MachineID = ownerID[0] + _, err := db.Exec(ctx, `UPDATE harmony_machines SET + cpu=$1, ram=$2, gpu=$3, gpuram=$4 WHERE id=$6`, + reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam, reg.Resources.MachineID) + if err != nil { + return nil, err + } + } + CleanupMachines(context.Background(), db) + } + go func() { + for { + time.Sleep(time.Minute) + if reg.shutdown.Load() { + return + } + _, err := db.Exec(ctx, `UPDATE harmony_machines SET last_contact=CURRENT_TIMESTAMP`) + if err != nil { + logger.Error("Cannot keepalive ", err) + } + } + }() + + return ®, nil +} +func CleanupMachines(ctx context.Context, db *harmonydb.DB) int { + ct, err := db.Exec(ctx, `DELETE FROM harmony_machines WHERE last_contact < $1`, + time.Now().Add(-1*LOOKS_DEAD_TIMEOUT)) + if err != nil { + logger.Warn("unable to delete old machines: ", err) + } + return ct +} + +func (res *Reg) Shutdown() { + res.shutdown.Store(true) +} + +func getResources() (res Resources, err error) { + b, err := exec.Command(`ps`, `-ef`).CombinedOutput() + if err != nil { + logger.Warn("Could not safety check for 2+ processes: ", err) + } else { + found := 0 + for _, b := range bytes.Split(b, []byte("\n")) { + if lotusRE.Match(b) { + found++ + } + } + if found > 1 { + logger.Error("This Lotus process should run alone on a machine. Use CGroup.") + } + } + + res = Resources{ + Cpu: runtime.NumCPU(), + Ram: memory.FreeMemory(), + GpuRam: getGpuRam(), + } + + { // GPU boolean + gpus, err := ffi.GetGPUDevices() + if err != nil { + logger.Errorf("getting gpu devices failed: %+v", err) + } + all := strings.ToLower(strings.Join(gpus, ",")) + if len(gpus) > 1 || strings.Contains(all, "ati") || strings.Contains(all, "nvidia") { + res.Gpu = 1 + } + } + + return res, nil +} + +func getGpuRam() uint64 { + platforms, err := cl.GetPlatforms() + if err != nil { + logger.Error(err) + return 0 + } + + return uint64(lo.SumBy(platforms, func(p *cl.Platform) int64 { + d, err := p.GetDevices(cl.DeviceTypeAll) + if err != nil { + logger.Error(err) + return 0 + } + return lo.SumBy(d, func(d *cl.Device) int64 { return d.GlobalMemSize() }) + })) +} + +func DiskFree(path string) (uint64, error) { + s := unix.Statfs_t{} + err := unix.Statfs(path, &s) + if err != nil { + return 0, err + } + + return s.Bfree * uint64(s.Bsize), nil +} + +/* NOT for Darwin. +func GetMemFree() uint64 { + in := unix.Sysinfo_t{} + err := unix.Sysinfo(&in) + if err != nil { + return 0 + } + // If this is a 32-bit system, then these fields are + // uint32 instead of uint64. + // So we always convert to uint64 to match signature. + return uint64(in.Freeram) * uint64(in.Unit) +} +*/ From 225f093103ae83cda8bf8b3567b78845c04f9de8 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 14 Aug 2023 11:42:39 -0500 Subject: [PATCH 02/17] oops committed a patch --- htask.patch | 1660 --------------------------------------------------- 1 file changed, 1660 deletions(-) delete mode 100644 htask.patch diff --git a/htask.patch b/htask.patch deleted file mode 100644 index 0c2d12b4e..000000000 --- a/htask.patch +++ /dev/null @@ -1,1660 +0,0 @@ -diff --git a/cmd/lotus-worker/main.go b/cmd/lotus-worker/main.go -index 944791275..995a3cbe0 100644 ---- a/cmd/lotus-worker/main.go -+++ b/cmd/lotus-worker/main.go -@@ -609,6 +609,7 @@ var runCmd = &cli.Command{ - if err := srv.Shutdown(context.TODO()); err != nil { - log.Errorf("shutting down RPC server failed: %s", err) - } -+ //taskManager.GracefullyTerminate(5*time.Hour) - log.Warn("Graceful shutdown successful") - }() - -diff --git a/go.mod b/go.mod -index 2da784ad6..661495e89 100644 ---- a/go.mod -+++ b/go.mod -@@ -156,7 +156,7 @@ require ( - golang.org/x/exp v0.0.0-20230321023759-10a507213a29 - golang.org/x/net v0.10.0 - golang.org/x/sync v0.2.0 -- golang.org/x/sys v0.9.0 -+ golang.org/x/sys v0.10.0 - golang.org/x/term v0.9.0 - golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9 - golang.org/x/tools v0.9.1 -@@ -167,6 +167,8 @@ require ( - - require ( - github.com/GeertJohan/go.incremental v1.0.0 // indirect -+ github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 // indirect -+ github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef // indirect - github.com/PuerkitoBio/purell v1.1.1 // indirect - github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect - github.com/StackExchange/wmi v1.2.1 // indirect -@@ -177,8 +179,10 @@ require ( - github.com/beorn7/perks v1.0.1 // indirect - github.com/bep/debounce v1.2.1 // indirect - github.com/boltdb/bolt v1.3.1 // indirect -+ github.com/bytedance/sonic v1.9.1 // indirect - github.com/cespare/xxhash v1.1.0 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect -+ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect - github.com/cilium/ebpf v0.9.1 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect -@@ -202,7 +206,10 @@ require ( - github.com/flynn/noise v1.0.0 // indirect - github.com/francoispqt/gojay v1.2.13 // indirect - github.com/fsnotify/fsnotify v1.6.0 // indirect -+ github.com/gabriel-vasile/mimetype v1.4.2 // indirect - github.com/gdamore/encoding v1.0.0 // indirect -+ github.com/gin-contrib/sse v0.1.0 // indirect -+ github.com/gin-gonic/gin v1.9.1 // indirect - github.com/go-kit/log v0.2.1 // indirect - github.com/go-logfmt/logfmt v0.5.1 // indirect - github.com/go-logr/logr v1.2.4 // indirect -@@ -211,7 +218,11 @@ require ( - github.com/go-openapi/jsonpointer v0.19.3 // indirect - github.com/go-openapi/jsonreference v0.19.4 // indirect - github.com/go-openapi/swag v0.19.11 // indirect -+ github.com/go-playground/locales v0.14.1 // indirect -+ github.com/go-playground/universal-translator v0.18.1 // indirect -+ github.com/go-playground/validator/v10 v10.14.0 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect -+ github.com/goccy/go-json v0.10.2 // indirect - github.com/godbus/dbus/v5 v5.1.0 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/glog v1.1.0 // indirect -@@ -256,10 +267,12 @@ require ( - github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect - github.com/josharian/intern v1.0.0 // indirect - github.com/jpillora/backoff v1.0.0 // indirect -+ github.com/json-iterator/go v1.1.12 // indirect - github.com/kilic/bls12-381 v0.1.0 // indirect - github.com/klauspost/compress v1.16.5 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect - github.com/koron/go-ssdp v0.0.4 // indirect -+ github.com/leodido/go-urn v1.2.4 // indirect - github.com/libp2p/go-cidranger v1.1.0 // indirect - github.com/libp2p/go-flow-metrics v0.1.0 // indirect - github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect -@@ -280,6 +293,8 @@ require ( - github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect - github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect - github.com/minio/sha256-simd v1.0.1 // indirect -+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect -+ github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/mr-tron/base58 v1.2.0 // indirect - github.com/multiformats/go-base36 v0.2.0 // indirect - github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect -@@ -291,6 +306,7 @@ require ( - github.com/opencontainers/runtime-spec v1.0.2 // indirect - github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect -+ github.com/pelletier/go-toml/v2 v2.0.8 // indirect - github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect - github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect -@@ -306,12 +322,15 @@ require ( - github.com/rivo/uniseg v0.1.0 // indirect - github.com/rs/cors v1.7.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect -+ github.com/samber/lo v1.38.1 // indirect -+ github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad // indirect - github.com/shirou/gopsutil v2.18.12+incompatible // indirect - github.com/sirupsen/logrus v1.9.0 // indirect - github.com/spaolacci/murmur3 v1.1.0 // indirect - github.com/tidwall/gjson v1.14.4 // indirect -+ github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/twmb/murmur3 v1.1.6 // indirect -- github.com/ugorji/go/codec v1.2.6 // indirect -+ github.com/ugorji/go/codec v1.2.11 // indirect - github.com/valyala/bytebufferpool v1.0.0 // indirect - github.com/valyala/fasttemplate v1.0.1 // indirect - github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect -@@ -327,6 +346,7 @@ require ( - go.opentelemetry.io/otel/trace v1.16.0 // indirect - go.uber.org/dig v1.17.0 // indirect - go4.org v0.0.0-20230225012048-214862532bf5 // indirect -+ golang.org/x/arch v0.3.0 // indirect - golang.org/x/mod v0.10.0 // indirect - golang.org/x/text v0.10.0 // indirect - gonum.org/v1/gonum v0.13.0 // indirect -diff --git a/go.sum b/go.sum -index ebbc4dcc8..74127c535 100644 ---- a/go.sum -+++ b/go.sum -@@ -59,6 +59,8 @@ github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZ - github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4= - github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee h1:8doiS7ib3zi6/K172oDhSKU0dJ/miJramo9NITOMyZQ= - github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee/go.mod h1:W0GbEAA4uFNYOGG2cJpmFJ04E6SD1NLELPYZB57/7AY= -+github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 h1:TKWkFaRW5EPQyrS1pM0vm3vvqw/jmHu+FkV8gRD+7/w= -+github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006/go.mod h1:9ILtD1/UTP/Y7JMCU8loWZMDvhrQuTgHzHatG6z9ZdQ= - github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= - github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y= - github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa h1:1PPxEyGdIGVkX/kqMvLJ95a1dGS1Sz7tpNEgehEYYt0= -@@ -66,6 +68,8 @@ github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa/go.mod h1:WUmMvh9wMtq - github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= - github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= - github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= -+github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef h1:DiNnYI6NBdeXGOJXptJcrYeDavJf4tImz/B4MOVQtMs= -+github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef/go.mod h1:RRVtxaQlBBnbo+n2fgYHhxQmXDkRLKWcWX93lJL0Yhw= - github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= - github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= - github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= -@@ -142,6 +146,9 @@ github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46f - github.com/buger/goterm v1.0.3 h1:7V/HeAQHrzPk/U4BvyH2g9u+xbUW9nr4yRPyG59W4fM= - github.com/buger/goterm v1.0.3/go.mod h1:HiFWV3xnkolgrBV3mY8m0X0Pumt4zg4QhbdOzQtB8tE= - github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= -+github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= -+github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= -+github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= - github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= - github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= - github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -@@ -152,6 +159,9 @@ github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL - github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= - github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= - github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ= -+github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= -+github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= -+github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= - github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= - github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= - github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= -@@ -386,6 +396,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo - github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= - github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= - github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -+github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= -+github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= - github.com/gbrlsnchs/jwt/v3 v3.0.1 h1:lbUmgAKpxnClrKloyIwpxm4OuWeDl5wLk52G91ODPw4= - github.com/gbrlsnchs/jwt/v3 v3.0.1/go.mod h1:AncDcjXz18xetI3A6STfXq2w+LuTx8pQ8bGEwRN8zVM= - github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= -@@ -399,6 +411,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE - github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= - github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= - github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -+github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= -+github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= - github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= - github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= - github.com/go-chi/chi v1.5.4 h1:QHdzF2szwjqVV4wmByUnTcsbIg7UGaQ0tPF2t5GcAIs= -@@ -445,10 +459,16 @@ github.com/go-openapi/swag v0.19.11/go.mod h1:Uc0gKkdR+ojzsEpjh39QChyu92vPgIr72P - github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= - github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= - github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= -+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= - github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= - github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= -+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= - github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= - github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -+github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= -+github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= - github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= - github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -@@ -464,6 +484,8 @@ github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= - github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= - github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= - github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= -+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= - github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= - github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= - github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= -@@ -970,6 +992,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= - github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= - github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= - github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -+github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= -+github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= - github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= - github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= - github.com/libp2p/go-addr-util v0.0.2/go.mod h1:Ecd6Fb3yIuLzq4bD7VcywcVSBtefcAwnUISBM3WG15E= -@@ -1405,7 +1429,10 @@ github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144T - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= - github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -+github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= - github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -+github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= -+github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= - github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= - github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= - github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= -@@ -1512,6 +1539,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf - github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= - github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= - github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -+github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= -+github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= -+github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= -+github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= - github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= - github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= - github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= -@@ -1598,6 +1629,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ - github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= - github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= - github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -+github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -+github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= - github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= - github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= - github.com/stvp/go-udp-testing v0.0.0-20201019212854-469649b16807/go.mod h1:7jxmlfBCDBXRzr0eAQJ48XC1hBu1np4CS5+cHEYfwpc= -@@ -1618,6 +1651,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= - github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= - github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= - github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= -+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= - github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= - github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= - github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= -@@ -1628,6 +1663,8 @@ github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljT - github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= - github.com/ugorji/go/codec v1.2.6 h1:7kbGefxLoDBuYXOms4yD7223OpNMMPNPZxXk5TvFcyQ= - github.com/ugorji/go/codec v1.2.6/go.mod h1:V6TCNZ4PHqoHGFZuSG1W8nrCzzdgA2DozYxWFFpvxTw= -+github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= -+github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= - github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= - github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= - github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -@@ -1789,6 +1826,9 @@ go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1 - go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= - go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= - go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= -+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -+golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= -+golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= - golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= - golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= - golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -@@ -2066,6 +2106,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= - golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= - golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= - golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -+golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -+golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= - golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= - golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= - golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -@@ -2319,6 +2361,7 @@ lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1 - nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= - nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= - rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= - rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= - rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= - sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go -new file mode 100644 -index 000000000..2c8523d82 ---- /dev/null -+++ b/itests/harmonytask_test.go -@@ -0,0 +1,247 @@ -+package itests -+ -+import ( -+ "context" -+ "errors" -+ "fmt" -+ "sort" -+ "strings" -+ "sync" -+ "testing" -+ "time" -+ -+ "github.com/filecoin-project/lotus/itests/kit" -+ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" -+ "github.com/filecoin-project/lotus/lib/harmony/harmonytask" -+ "github.com/filecoin-project/lotus/lib/harmony/resources" -+ "github.com/filecoin-project/lotus/node/impl" -+ "github.com/stretchr/testify/require" -+) -+ -+type task1 struct { -+ toAdd []int -+ myPersonalTableLock sync.Mutex -+ myPersonalTable map[harmonytask.TaskID]int // This would typicallyb be a DB table -+ WorkCompleted []string -+} -+ -+func (t *task1) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { -+ if !stillOwned() { -+ return false, errors.New("Why not still owned?") -+ } -+ t.myPersonalTableLock.Lock() -+ defer t.myPersonalTableLock.Unlock() -+ t.WorkCompleted = append(t.WorkCompleted, fmt.Sprintf("taskResult%d", t.myPersonalTable[tID])) -+ return true, nil -+} -+func (t *task1) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { -+ return &list[0], nil -+} -+func (t *task1) TypeDetails() harmonytask.TaskTypeDetails { -+ return harmonytask.TaskTypeDetails{ -+ Max: 100, -+ Name: "ThingOne", -+ MaxFailures: 1, -+ Cost: resources.Resources{ -+ Cpu: 1, -+ Ram: 100 << 10, // at 100kb, it's tiny -+ }, -+ } -+} -+func (t *task1) Adder(add harmonytask.AddTaskFunc) { -+ for _, v := range t.toAdd { -+ add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { -+ t.myPersonalTableLock.Lock() -+ defer t.myPersonalTableLock.Unlock() -+ -+ t.myPersonalTable[tID] = v -+ return true -+ }) -+ } -+} -+ -+func TestHarmonyTasks(t *testing.T) { -+ withSetup(t, func(m *kit.TestMiner) { -+ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB -+ t1 := &task1{ -+ toAdd: []int{56, 73}, -+ myPersonalTable: map[harmonytask.TaskID]int{}, -+ } -+ e, err := harmonytask.New(cdb, []harmonytask.TaskInterface{t1}, "test:1") -+ require.NoError(t, err) -+ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. -+ e.GracefullyTerminate(time.Minute) -+ require.Equal(t, t1.WorkCompleted, 2, "wrong amount of work complete: expected 2 got:") -+ sort.Strings(t1.WorkCompleted) -+ got := strings.Join(t1.WorkCompleted, ",") -+ expected := "taskResult56,taskResult73" -+ if got != expected { -+ t.Fatal("Unexpected results! Wanted " + expected + " got " + got) -+ } -+ // TODO test history table looks right. -+ }) -+} -+ -+type passthru struct { -+ dtl harmonytask.TaskTypeDetails -+ do func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) -+ canAccept func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) -+ adder func(add harmonytask.AddTaskFunc) -+} -+ -+func (t *passthru) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { -+ return t.do(tID, stillOwned) -+} -+func (t *passthru) CanAccept(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { -+ return t.canAccept(list) -+} -+func (t *passthru) TypeDetails() harmonytask.TaskTypeDetails { -+ return t.dtl -+} -+func (t *passthru) Adder(add harmonytask.AddTaskFunc) { -+ if t.adder != nil { -+ t.adder(add) -+ } -+} -+ -+// Common stuff -+var dtl = harmonytask.TaskTypeDetails{Name: "foo", Max: -1, Cost: resources.Resources{}} -+var letters []string -+var lettersMutex sync.Mutex -+ -+func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { -+ return &passthru{ -+ dtl: dtl, -+ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return nil, nil }, -+ adder: func(add harmonytask.AddTaskFunc) { -+ for _, v := range []string{"A", "B"} { -+ add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { -+ _, err := tx.Exec("INSERT INTO itest_scratch (some_int, content) VALUES ($1,$2)", tID, v) -+ require.NoError(t, err) -+ return true -+ }) -+ } -+ }, -+ } -+} -+func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { -+ return &passthru{ -+ dtl: dtl, -+ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, -+ do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { -+ var content string -+ err = cdb.QueryRow(context.Background(), -+ "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) -+ require.NoError(t, err) -+ lettersMutex.Lock() -+ defer lettersMutex.Unlock() -+ letters = append(letters, content) -+ return true, nil -+ }, -+ } -+} -+ -+func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { -+ withSetup(t, func(m *kit.TestMiner) { -+ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB -+ senderParty := fooLetterAdder(t, cdb) -+ workerParty := fooLetterSaver(t, cdb) -+ harmonytask.POLL_DURATION = time.Millisecond * 100 -+ sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") -+ require.NoError(t, err) -+ worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{workerParty}, "test:2") -+ require.NoError(t, err) -+ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. -+ sender.GracefullyTerminate(time.Second * 5) -+ worker.GracefullyTerminate(time.Second * 5) -+ sort.Strings(letters) -+ require.Equal(t, letters, []string{"A", "B"}) -+ }) -+} -+ -+func TestWorkStealing(t *testing.T) { -+ withSetup(t, func(m *kit.TestMiner) { -+ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB -+ ctx := context.Background() -+ -+ // The dead worker will be played by a few SQL INSERTS. -+ _, err := cdb.Exec(ctx, `INSERT INTO harmony_machines -+ (id, last_contact,host_and_port, cpu, ram, gpu, gpuram) -+ VALUES (300, DATE '2000-01-01', 'test:1', 4, 400000, 1, 1000000)`) -+ require.ErrorIs(t, err, nil) -+ _, err = cdb.Exec(ctx, `INSERT INTO harmony_task -+ (id, name, owner_id, posted_time, added_by) -+ VALUES (1234, 'foo', 300, DATE '2000-01-01', 300)`) -+ require.ErrorIs(t, err, nil) -+ _, err = cdb.Exec(ctx, "INSERT INTO itest_scratch (some_int, content) VALUES (1234, 'M')") -+ require.ErrorIs(t, err, nil) -+ -+ harmonytask.POLL_DURATION = time.Millisecond * 100 -+ harmonytask.CLEANUP_FREQUENCY = time.Millisecond * 100 -+ worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb)}, "test:2") -+ require.ErrorIs(t, err, nil) -+ time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. -+ worker.GracefullyTerminate(time.Second * 5) -+ require.Equal(t, []string{"M"}, letters) -+ }) -+} -+ -+func TestTaskRetry(t *testing.T) { -+ withSetup(t, func(m *kit.TestMiner) { -+ cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB -+ senderParty := fooLetterAdder(t, cdb) -+ harmonytask.POLL_DURATION = time.Millisecond * 100 -+ sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") -+ require.NoError(t, err) -+ -+ alreadyFailed := map[string]bool{} -+ fails2xPerMsg := &passthru{ -+ dtl: dtl, -+ canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, -+ do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { -+ var content string -+ err = cdb.QueryRow(context.Background(), -+ "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) -+ require.NoError(t, err) -+ lettersMutex.Lock() -+ defer lettersMutex.Unlock() -+ if !alreadyFailed[content] { -+ alreadyFailed[content] = true -+ return false, errors.New("intentional 'error'") -+ } -+ letters = append(letters, content) -+ return true, nil -+ }, -+ } -+ rcv, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fails2xPerMsg}, "test:2") -+ require.NoError(t, err) -+ time.Sleep(3 * time.Second) -+ sender.GracefullyTerminate(time.Hour) -+ rcv.GracefullyTerminate(time.Hour) -+ sort.Strings(letters) -+ require.Equal(t, []string{"A", "B"}, letters) -+ type hist struct { -+ TaskID int -+ Result bool -+ Err string -+ } -+ var res []hist -+ require.NoError(t, cdb.Select(context.Background(), &res, -+ `SELECT task_id, result, err FROM harmony_task_history -+ ORDER BY result DESC, task_id`)) -+ -+ require.Equal(t, []hist{ -+ {1, true, ""}, -+ {2, true, ""}, -+ {1, false, "error: intentional 'error'"}, -+ {2, false, "error: intentional 'error'"}}, res) -+ }) -+} -+ -+/* -+FUTURE test fast-pass round-robin via http calls (3party) once the API for that is set -+It's necessary for WinningPoSt. -+ -+FUTURE test follows. -+It's necessary for sealing work. -+*/ -diff --git a/lib/harmony/harmonydb/harmonydb.go b/lib/harmony/harmonydb/harmonydb.go -index fd31e7a13..48e3db6fa 100644 ---- a/lib/harmony/harmonydb/harmonydb.go -+++ b/lib/harmony/harmonydb/harmonydb.go -@@ -118,21 +118,25 @@ type tracer struct { - - type ctxkey string - --var sqlStart = ctxkey("sqlStart") -+const SQL_START = ctxkey("sqlStart") -+const SQL_STRING = ctxkey("sqlString") - - func (t tracer) TraceQueryStart(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryStartData) context.Context { -- return context.WithValue(ctx, sqlStart, time.Now()) -+ return context.WithValue(context.WithValue(ctx, SQL_START, time.Now()), SQL_STRING, data.SQL) - } - func (t tracer) TraceQueryEnd(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryEndData) { - DBMeasures.Hits.M(1) -- ms := time.Since(ctx.Value(sqlStart).(time.Time)).Milliseconds() -+ ms := time.Since(ctx.Value(SQL_START).(time.Time)).Milliseconds() - DBMeasures.TotalWait.M(ms) - DBMeasures.Waits.Observe(float64(ms)) - if data.Err != nil { - DBMeasures.Errors.M(1) - } -- // Can log what type of query it is, but not what tables -- // Can log rows affected. -+ logger.Debugw("SQL run", -+ "query", ctx.Value(SQL_STRING).(string), -+ "err", data.Err, -+ "rowCt", data.CommandTag.RowsAffected(), -+ "milliseconds", ms) - } - - // addStatsAndConnect connects a prometheus logger. Be sure to run this before using the DB. -@@ -250,8 +254,9 @@ func (db *DB) upgrade() error { - } - _, err = db.pgx.Exec(context.Background(), s) - if err != nil { -- db.log(fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error())) -- return err -+ msg := fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error()) -+ db.log(msg) -+ return errors.New(msg) // makes devs lives easier by placing message at the end. - } - } - -diff --git a/lib/harmony/harmonydb/sql/20230706.sql b/lib/harmony/harmonydb/sql/20230706.sql -index b45aca7fa..a4a333b81 100644 ---- a/lib/harmony/harmonydb/sql/20230706.sql -+++ b/lib/harmony/harmonydb/sql/20230706.sql -@@ -2,5 +2,6 @@ CREATE TABLE itest_scratch ( - id SERIAL PRIMARY KEY, - content TEXT, - some_int INTEGER, -+ second_int INTEGER, - update_time TIMESTAMP DEFAULT current_timestamp - ) -\ No newline at end of file -diff --git a/lib/harmony/harmonydb/sql/20230719.sql b/lib/harmony/harmonydb/sql/20230719.sql -new file mode 100644 -index 000000000..0a676526b ---- /dev/null -+++ b/lib/harmony/harmonydb/sql/20230719.sql -@@ -0,0 +1,52 @@ -+/* For HarmonyTask base implementation. */ -+ -+CREATE TABLE harmony_machines ( -+ id SERIAL PRIMARY KEY NOT NULL, -+ last_contact TIMESTAMP NOT NULL DEFAULT current_timestamp, -+ host_and_port varchar(300) NOT NULL, -+ cpu INTEGER NOT NULL, -+ ram BIGINT NOT NULL, -+ gpu FLOAT NOT NULL, -+ gpuram BIGINT NOT NULL -+); -+ -+CREATE TABLE harmony_task ( -+ id SERIAL PRIMARY KEY NOT NULL, -+ initiated_by INTEGER, -+ update_time TIMESTAMP NOT NULL DEFAULT current_timestamp, -+ posted_time TIMESTAMP NOT NULL, -+ owner_id INTEGER REFERENCES harmony_machines (id) ON DELETE SET NULL, -+ added_by INTEGER NOT NULL, -+ previous_task INTEGER, -+ name varchar(8) NOT NULL -+); -+COMMENT ON COLUMN harmony_task.initiated_by IS 'The task ID whose completion occasioned this task.'; -+COMMENT ON COLUMN harmony_task.owner_id IS 'The foreign key to harmony_machines.'; -+COMMENT ON COLUMN harmony_task.name IS 'The name of the task type.'; -+COMMENT ON COLUMN harmony_task.owner_id IS 'may be null if between owners or not yet taken'; -+COMMENT ON COLUMN harmony_task.update_time IS 'When it was last modified. not a heartbeat'; -+ -+CREATE TABLE harmony_task_history ( -+ id SERIAL PRIMARY KEY NOT NULL, -+ task_id INTEGER NOT NULL, -+ name VARCHAR(8) NOT NULL, -+ posted TIMESTAMP NOT NULL, -+ work_start TIMESTAMP NOT NULL, -+ work_end TIMESTAMP NOT NULL, -+ result BOOLEAN NOT NULL, -+ err varchar -+); -+COMMENT ON COLUMN harmony_task_history.result IS 'Use to detemine if this was a successful run.'; -+ -+CREATE TABLE harmony_task_follow ( -+ id SERIAL PRIMARY KEY NOT NULL, -+ owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, -+ to_type VARCHAR(8) NOT NULL, -+ from_type VARCHAR(8) NOT NULL -+); -+ -+CREATE TABLE harmony_task_impl ( -+ id SERIAL PRIMARY KEY NOT NULL, -+ owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, -+ name VARCHAR(8) NOT NULL -+); -\ No newline at end of file -diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go -new file mode 100644 -index 000000000..357c3e15c ---- /dev/null -+++ b/lib/harmony/harmonytask/doc.go -@@ -0,0 +1,79 @@ -+/* -+ Package harmomnytask implements a pure (no task logic), distributed -+ task manager. This clean interface allows a task implementer to completely -+ -+avoid being concerned with task scheduling and management. -+It's based on the idea of tasks as small units of work broken from other -+work by hardware, parallelizabilty, reliability, or any other reason. -+Workers will be Greedy: vaccuuming up their favorite jobs from a list. -+Once 1 task is accepted, harmonydb tries to get other task runner -+machines to accept work (round robin) before trying again to accept. -+* -+Mental Model: -+ -+ Things that block tasks: -+ - task not registered for any running server -+ - max was specified and reached -+ - resource exhaustion -+ - CanAccept() interface (per-task implmentation) does not accept it. -+ Ways tasks start: (slowest first) -+ - DB Read every 1 minute -+ - Bump via HTTP if registered in DB -+ - Task was added (to db) by this process -+ Ways tasks get added: -+ - Async Listener task (for chain, etc) -+ - Followers: Tasks get added because another task completed -+ When Follower collectors run: -+ - If both sides are process-local, then -+ - Otherwise, at the listen interval during db scrape -+ How duplicate tasks are avoided: -+ - that's up to the task definition, but probably a unique key -+ -+* -+To use: -+1.Implement TaskInterface for a new task. -+2 Have New() receive this & all other ACTIVE implementations. -+* -+* -+As we are not expecting DBAs in this database, it's important to know -+what grows uncontrolled. The only harmony_* table is _task_history -+(somewhat quickly) and harmony_machines (slowly). These will need a -+clean-up for after the task data could never be acted upon. -+but the design **requires** extraInfo tables to grow until the task's -+info could not possibly be used by a following task, including slow -+release rollout. This would normally be in the order of months old. -+* -+Other possible enhancements include more collaboative coordination -+to assign a task to machines closer to the data. -+ -+__Database_Behavior__ -+harmony_task is the list of work that has not been completed. -+ -+ AddTaskFunc manages the additions, but is designed to have its -+ transactions failed-out on overlap with a similar task already written. -+ It's up to the TaskInterface implementer to discover this overlap via -+ some other table it uses (since overlap can mean very different things). -+ -+harmony_task_history -+ -+ This holds transactions that completed or saw too many retries. It also -+ serves as input for subsequent (follower) tasks to kick off. This is not -+ done machine-internally because a follower may not be on the same machine -+ as the previous task. -+ -+harmony_task_machines -+ -+ Managed by lib/harmony/resources, this is a reference to machines registered -+ via the resources. This registration does not obligate the machine to -+ anything, but serves as a discovery mechanism. Paths are hostnames + ports -+ which are presumed to support http, but this assumption is only used by -+ the task system. -+ -+harmony_task_follow / harmony_task_impl -+ -+ These tables are used to fast-path notifications to other machines instead -+ of waiting for polling. _impl helps round-robin work pick-up. _follow helps -+ discover the machines that are interested in creating tasks following the -+ task that just completed. -+*/ -+package harmonytask -diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go -new file mode 100644 -index 000000000..1f5662959 ---- /dev/null -+++ b/lib/harmony/harmonytask/harmonytask.go -@@ -0,0 +1,386 @@ -+package harmonytask -+ -+import ( -+ "context" -+ "fmt" -+ "strconv" -+ "sync/atomic" -+ "time" -+ -+ "github.com/filecoin-project/lotus/lib/harmony/resources" -+ "github.com/gin-gonic/gin" -+ -+ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" -+) -+ -+// Consts (except for unit test) -+var POLL_DURATION = time.Minute // Poll for Work this frequently -+var CLEANUP_FREQUENCY = 5 * time.Minute // Check for dead workers this often * everyone -+ -+type TaskTypeDetails struct { -+ // Max returns how many tasks this machine can run of this type. -+ // Negative means unrestricted. -+ Max int -+ -+ // Name is the task name to be added to the task list. -+ Name string -+ -+ // Peak costs to Do() the task. -+ Cost resources.Resources -+ -+ // Max Failure count before the job is dropped. -+ // 0 = retry forever -+ MaxFailures uint -+ -+ // Follow another task's completion via this task's creation. -+ // The function should populate extraInfo from data -+ // available from the previous task's tables, using the given TaskID. -+ // It should also return success if the trigger succeeded. -+ // NOTE: if refatoring tasks, see if your task is -+ // necessary. Ex: Is the sector state correct for your stage to run? -+ Follows map[string]func(TaskID, AddTaskFunc) bool -+} -+ -+// TaskInterface must be implemented in order to have a task used by harmonytask. -+type TaskInterface interface { -+ // Do the task assigned. Call stillOwned before making single-writer-only -+ // changes to ensure the work has not been stolen. -+ // This is the ONLY function that should attempt to do the work, and must -+ // ONLY be called by harmonytask. -+ // Indicate if the task no-longer needs scheduling with done=true including -+ // cases where it's past the deadline. -+ Do(taskID TaskID, stillOwned func() bool) (done bool, err error) -+ -+ // CanAccept should return if the task can run on this machine. It should -+ // return null if the task type is not allowed on this machine. -+ // It should select the task it most wants to accomplish. -+ // It is also responsible for determining disk space (including scratch). -+ CanAccept([]TaskID) (*TaskID, error) -+ -+ // TypeDetails() returns static details about how this task behaves and -+ // how this machine will run it. Read once at the beginning. -+ TypeDetails() TaskTypeDetails -+ -+ // This listener will consume all external sources continuously for work. -+ // Do() may also be called from a backlog of work. This must not -+ // start doing the work (it still must be scheduled). -+ // Note: Task de-duplication should happen in ExtraInfoFunc by -+ // returning false, typically by determining from the tx that the work -+ // exists already. The easy way is to have a unique joint index -+ // across all fields that will be common. -+ // Adder should typically only add its own task type, but multiple -+ // is possible for when 1 trigger starts 2 things. -+ // Usage Example: -+ // func (b *BazType)Adder(addTask AddTaskFunc) { -+ // for { -+ // bazMaker := <- bazChannel -+ // addTask("baz", func(t harmonytask.TaskID, txn db.Transaction) bool { -+ // _, err := txn.Exec(`INSERT INTO bazInfoTable (taskID, qix, mot) -+ // VALUES ($1,$2,$3)`, id, bazMaker.qix, bazMaker.mot) -+ // if err != nil { -+ // scream(err) -+ // return false -+ // } -+ // return true -+ // }) -+ // } -+ // } -+ Adder(AddTaskFunc) -+} -+ -+type AddTaskFunc func(extraInfo func(TaskID, *harmonydb.Tx) bool) -+ -+type TaskEngine struct { -+ ctx context.Context -+ handlers []*taskTypeHandler -+ db *harmonydb.DB -+ workAdderMutex *notifyingMx -+ reg *resources.Reg -+ grace context.CancelFunc -+ taskMap map[string]*taskTypeHandler -+ ownerID int -+ tryAllWork chan bool // notify if work completed -+ follows map[string][]followStruct -+ lastFollowTime time.Time -+ lastCleanup atomic.Value -+} -+type followStruct struct { -+ f func(TaskID, AddTaskFunc) bool -+ h *taskTypeHandler -+} -+ -+type TaskID int -+ -+// New creates all the task definitions. Note that TaskEngine -+// knows nothing about the tasks themselves and serves to be a -+// generic container for common work -+func New( -+ db *harmonydb.DB, -+ impls []TaskInterface, -+ hostnameAndPort string) (*TaskEngine, error) { -+ -+ reg, err := resources.Register(db, hostnameAndPort) -+ if err != nil { -+ return nil, fmt.Errorf("cannot get resources: %w", err) -+ } -+ ctx, grace := context.WithCancel(context.Background()) -+ e := &TaskEngine{ -+ ctx: ctx, -+ grace: grace, -+ db: db, -+ reg: reg, -+ ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" -+ workAdderMutex: ¬ifyingMx{}, -+ taskMap: make(map[string]*taskTypeHandler, len(impls)), -+ tryAllWork: make(chan bool), -+ follows: make(map[string][]followStruct), -+ } -+ e.lastCleanup.Store(time.Now()) -+ for _, c := range impls { -+ h := taskTypeHandler{ -+ TaskInterface: c, -+ TaskTypeDetails: c.TypeDetails(), -+ TaskEngine: e, -+ } -+ e.handlers = append(e.handlers, &h) -+ e.taskMap[h.TaskTypeDetails.Name] = &h -+ -+ _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_impl (owner_id, name) -+ VALUES ($1,$2)`, e.ownerID, h.Name) -+ if err != nil { -+ return nil, fmt.Errorf("can't update impl: %w", err) -+ } -+ -+ for name, fn := range c.TypeDetails().Follows { -+ e.follows[name] = append(e.follows[name], followStruct{fn, &h}) -+ -+ // populate harmony_task_follows -+ _, err := db.Exec(e.ctx, `INSERT INTO harmony_task_follows (owner_id, from_task, to_task) -+ VALUES ($1,$2,$3)`, e.ownerID, name, h.Name) -+ if err != nil { -+ return nil, fmt.Errorf("can't update harmony_task_follows: %w", err) -+ } -+ } -+ } -+ -+ // resurrect old work -+ { -+ var taskRet []struct { -+ ID int -+ Name string -+ } -+ -+ err := db.Select(e.ctx, &taskRet, `SELECT id, name from harmony_task WHERE owner_id=$1`, e.ownerID) -+ if err != nil { -+ return nil, err -+ } -+ for _, w := range taskRet { -+ // edge-case: if old assignments are not available tasks, unlock them. -+ h := e.taskMap[w.Name] -+ if h == nil { -+ _, err := db.Exec(e.ctx, `UPDATE harmony_task SET owner=NULL WHERE id=$1`, w.ID) -+ if err != nil { -+ log.Error("Cannot remove self from owner field: ", err) -+ continue // not really fatal, but not great -+ } -+ } -+ if !h.considerWork([]TaskID{TaskID(w.ID)}) { -+ log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) -+ } -+ } -+ } -+ for _, h := range e.handlers { -+ go h.Adder(h.AddTask) -+ } -+ go e.poller() -+ -+ return e, nil -+} -+ -+// GracefullyTerminate hangs until all present tasks have completed. -+// Call this to cleanly exit the process. As some processes are long-running, -+// passing a deadline will ignore those still running (to be picked-up later). -+func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { -+ e.grace() -+ e.reg.Shutdown() -+ deadlineChan := time.NewTimer(deadline).C -+ -+ // block bumps & follows by unreg from DBs. -+ _, err := e.db.Exec(context.Background(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) -+ if err != nil { -+ log.Warn("Could not clean-up impl table: %w", err) -+ } -+ _, err = e.db.Exec(context.Background(), `DELETE FROM harmony_task_follow WHERE owner_id=$1`, e.ownerID) -+ if err != nil { -+ log.Warn("Could not clean-up impl table: %w", err) -+ } -+top: -+ for _, h := range e.handlers { -+ if h.Count.Load() > 0 { -+ select { -+ case <-deadlineChan: -+ return -+ default: -+ time.Sleep(time.Millisecond) -+ goto top -+ } -+ } -+ } -+} -+ -+func (e *TaskEngine) poller() { -+ for { -+ select { -+ case <-e.tryAllWork: ///////////////////// Find work after some work finished -+ case <-time.NewTicker(POLL_DURATION).C: // Find work periodically -+ case <-e.ctx.Done(): ///////////////////// Graceful exit -+ return -+ } -+ e.followWorkInDB() // "Follows" the slow way -+ e.pollerTryAllWork() // "Bumps" (round robin tasks) the slow way -+ } -+} -+ -+// followWorkInDB implements "Follows" the slow way -+func (e *TaskEngine) followWorkInDB() { -+ // Step 1: What are we following? -+ var lastFollowTime time.Time -+ lastFollowTime, e.lastFollowTime = e.lastFollowTime, time.Now() -+ -+ for from_name, srcs := range e.follows { -+ var cList []int // Which work is done (that we follow) since we last checked? -+ err := e.db.Select(e.ctx, &cList, `SELECT h.task_id FROM harmony_task_history -+ WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, from_name) -+ if err != nil { -+ log.Error("Could not query DB: ", err) -+ return -+ } -+ for _, src := range srcs { -+ for _, workAlreadyDone := range cList { // Were any tasks made to follow these tasks? -+ var ct int -+ err := e.db.QueryRow(e.ctx, `SELECT COUNT(*) FROM harmony_task -+ WHERE name=$1 AND previous_task=$2`, src.h.Name, workAlreadyDone).Scan(&ct) -+ if err != nil { -+ log.Error("Could not query harmony_task: ", err) -+ return // not recoverable here -+ } -+ if ct > 0 { -+ continue -+ } -+ // we need to create this task -+ if !src.h.Follows[from_name](TaskID(workAlreadyDone), src.h.AddTask) { -+ // But someone may have beaten us to it. -+ log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, from_name) -+ } -+ } -+ } -+ } -+} -+ -+// pollerTryAllWork implements "Bumps" (next task) the slow way -+func (e *TaskEngine) pollerTryAllWork() { -+ if time.Since(e.lastCleanup.Load().(time.Time)) > CLEANUP_FREQUENCY { -+ e.lastCleanup.Store(time.Now()) -+ resources.CleanupMachines(e.ctx, e.db) -+ } -+ for _, v := range e.handlers { -+ rerun: -+ if v.AssertMachineHasCapacity() != nil { -+ continue -+ } -+ var unownedTasks []TaskID -+ err := e.db.Select(e.ctx, &unownedTasks, `SELECT id -+ FROM harmony_task -+ WHERE owner_id IS NULL AND name=$1 -+ ORDER BY update_time`, v.Name) -+ if err != nil { -+ log.Error("Unable to read work ", err) -+ continue -+ } -+ accepted := v.considerWork(unownedTasks) -+ if !accepted { -+ log.Warn("Work not accepted") -+ continue -+ } -+ if len(unownedTasks) > 1 { -+ e.bump(v.Name) // wait for others before trying again to add work. -+ goto rerun -+ } -+ } -+} -+ -+// AddHttpHandlers TODO this needs to be called by the http server to register routes. -+// This implements the receiver-side of "follows" and "bumps" the fast way. -+func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) { -+ s := root.Group("/scheduler/") -+ f := s.Group("/follows") -+ for name, v := range e.follows { -+ f.GET("/"+name+"/:tID", func(c *gin.Context) { -+ tIDString := c.Param("tID") -+ tID, err := strconv.Atoi(tIDString) -+ if err != nil { -+ c.AbortWithError(401, err) -+ return -+ } -+ taskAdded := false -+ for _, v := range v { -+ taskAdded = taskAdded || v.f(TaskID(tID), v.h.AddTask) -+ } -+ if taskAdded { -+ e.tryAllWork <- true -+ c.Status(200) -+ } -+ c.Status(202) // NOTE: 202 for "accepted" but not worked. -+ }) -+ } -+ b := s.Group("/bump") -+ for _, h := range e.handlers { -+ b.GET("/"+h.Name+"/:tID", func(c *gin.Context) { -+ tIDString := c.Param("tID") -+ tID, err := strconv.Atoi(tIDString) -+ if err != nil { -+ c.AbortWithError(401, err) -+ return -+ } -+ // We NEED to block while trying to deliver -+ // this work to ease the network impact. -+ if h.considerWork([]TaskID{TaskID(tID)}) { -+ c.Status(200) -+ } -+ c.Status(202) // NOTE: 202 for "accepted" but not worked. -+ }) -+ } -+} -+ -+func (e *TaskEngine) bump(taskType string) { -+ var res []string -+ err := e.db.Select(e.ctx, &res, `SELECT host_and_port FROM harmony_machines m -+ JOIN harmony_task_impl i ON i.owner_id=m.id -+ WHERE i.name=$1`, taskType) -+ if err != nil { -+ log.Error("Could not read db for bump: ", err) -+ return -+ } -+ for _, url := range res { -+ resp, err := hClient.Get(url + "/scheduler/bump/" + taskType) -+ if err != nil { -+ log.Info("Server unreachable to bump: ", err) -+ continue -+ } -+ if resp.StatusCode == 200 { -+ return // just want 1 taker. -+ } -+ } -+} -+ -+// resourcesInUse requires workListsMutex to be already locked. -+func (e *TaskEngine) resourcesInUse() resources.Resources { -+ tmp := e.reg.Resources -+ for _, t := range e.handlers { -+ ct := t.Count.Load() -+ tmp.Cpu -= int(ct) * t.Cost.Cpu -+ tmp.Gpu -= float64(ct) * t.Cost.Gpu -+ tmp.Ram -= uint64(ct) * t.Cost.Ram -+ } -+ return tmp -+} -diff --git a/lib/harmony/harmonytask/notifyingMx.go b/lib/harmony/harmonytask/notifyingMx.go -new file mode 100644 -index 000000000..51c4e0a53 ---- /dev/null -+++ b/lib/harmony/harmonytask/notifyingMx.go -@@ -0,0 +1,16 @@ -+package harmonytask -+ -+import "sync" -+ -+type notifyingMx struct { -+ sync.Mutex -+ UnlockNotify func() -+} -+ -+func (n *notifyingMx) Unlock() { -+ tmp := n.UnlockNotify -+ n.Mutex.Unlock() -+ if tmp != nil { -+ tmp() -+ } -+} -diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go -new file mode 100644 -index 000000000..079f33704 ---- /dev/null -+++ b/lib/harmony/harmonytask/taskTypeHandler.go -@@ -0,0 +1,276 @@ -+package harmonytask -+ -+import ( -+ "context" -+ "errors" -+ "io" -+ "net/http" -+ "strconv" -+ "sync/atomic" -+ "time" -+ -+ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" -+ logging "github.com/ipfs/go-log/v2" -+) -+ -+var log = logging.Logger("harmonytask") -+ -+type taskTypeHandler struct { -+ TaskInterface -+ TaskTypeDetails -+ TaskEngine *TaskEngine -+ Count atomic.Int32 /// locked by TaskEngine's mutex -+ -+} -+ -+func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { -+ var tID TaskID -+ did, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { -+ // create taskID (from DB) -+ _, err := tx.Exec(`INSERT INTO harmony_task (name, added_by, posted_time) -+ VALUES ($1, $2, CURRENT_TIMESTAMP) `, h.Name, h.TaskEngine.ownerID) -+ if err != nil { -+ log.Error("Could not insert into harmonyTask", err) -+ return false -+ } -+ err = tx.QueryRow("SELECT id FROM harmony_task ORDER BY update_time DESC LIMIT 1").Scan(&tID) -+ if err != nil { -+ log.Error("Could not select ID: ", err) -+ } -+ return extra(tID, tx) -+ }) -+ if err != nil { -+ log.Error(err) -+ } -+ if !did { -+ return -+ } -+ -+ if !h.considerWork([]TaskID{tID}) { -+ h.TaskEngine.bump(h.Name) // We can't do it. How about someone else. -+ } -+} -+ -+func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { -+ if len(ids) == 0 { -+ return true // stop looking for takers -+ } -+ -+ // 1. Can we do any more of this task type? -+ if h.Max > -1 && int(h.Count.Load()) == h.Max { -+ log.Infow("did not accept task", "name", h.Name, "reason", "at max already") -+ return false -+ } -+ -+ h.TaskEngine.workAdderMutex.Lock() -+ defer h.TaskEngine.workAdderMutex.Unlock() -+ -+ // 2. Can we do any more work? -+ err := h.AssertMachineHasCapacity() -+ if err != nil { -+ log.Info(err) -+ return false -+ } -+ -+ // 3. What does the impl say? -+ tID, err := h.CanAccept(ids) -+ if err != nil { -+ log.Error(err) -+ return false -+ } -+ if tID == nil { -+ log.Infow("did not accept task", "task_id", ids[0], "reason", "CanAccept() refused") -+ return false -+ } -+ -+ // 4. Can we claim the work for our hostname? -+ ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID) -+ if err != nil { -+ log.Error(err) -+ return false -+ } -+ if ct == 0 { -+ log.Infow("did not accept task", "task_id", strconv.Itoa(int(*tID)), "reason", "already Taken") -+ return false -+ } -+ -+ go func() { -+ h.Count.Add(1) -+ -+ var done bool -+ var doErr error -+ workStart := time.Now() -+ -+ defer func() { -+ if r := recover(); r != nil { -+ log.Error("Recovered from a serious error "+ -+ "while processing "+h.Name+" task "+strconv.Itoa(int(*tID))+": ", r) -+ } -+ h.Count.Add(-1) -+ -+ h.recordCompletion(*tID, workStart, done, doErr) -+ if done { -+ h.triggerCompletionListeners(*tID) -+ } -+ -+ h.TaskEngine.tryAllWork <- true // Activate tasks in this machine -+ }() -+ -+ done, doErr = h.Do(*tID, func() bool { -+ var owner int -+ // Background here because we don't want GracefulRestart to block this save. -+ err := h.TaskEngine.db.QueryRow(context.Background(), -+ `SELECT owner_id FROM harmony_task WHERE id=$1`, *tID).Scan(&owner) -+ if err != nil { -+ log.Error("Cannot determine ownership: ", err) -+ return false -+ } -+ return owner == h.TaskEngine.ownerID -+ }) -+ if doErr != nil { -+ log.Error("Do("+h.Name+", taskID="+strconv.Itoa(int(*tID))+") returned error: ", doErr) -+ } -+ }() -+ return true -+} -+ -+func (h *taskTypeHandler) recordCompletion(tID TaskID, workStart time.Time, done bool, doErr error) { -+ workEnd := time.Now() -+ -+ cm, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) bool { -+ var postedTime time.Time -+ err := tx.QueryRow(`SELECT posted_time FROM harmony_task WHERE id=$1`, tID).Scan(&postedTime) -+ if err != nil { -+ log.Error("Could not log completion: ", err) -+ return false -+ } -+ result := "unspecified error" -+ if done { -+ _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) -+ if err != nil { -+ log.Error("Could not log completion: ", err) -+ return false -+ } -+ result = "" -+ } else { -+ if doErr != nil { -+ result = "error: " + doErr.Error() -+ } -+ var deleteTask bool -+ if h.MaxFailures > 0 { -+ ct := uint(0) -+ err = tx.QueryRow(`SELECT count(*) FROM harmony_task_history -+ WHERE task_id=$1 AND result=FALSE`, tID).Scan(&ct) -+ if err != nil { -+ log.Error("Could not read task history:", err) -+ return false -+ } -+ if ct >= h.MaxFailures { -+ deleteTask = true -+ } -+ } -+ if deleteTask { -+ _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) -+ if err != nil { -+ log.Error("Could not delete failed job: ", err) -+ return false -+ } -+ // Note: Extra Info is left laying around for later review & clean-up -+ } else { -+ tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) -+ if err != nil { -+ log.Error("Could not disown failed task: ", tID, err) -+ return false -+ } -+ } -+ } -+ _, err = tx.Exec(`INSERT INTO harmony_task_history -+ (task_id, name, posted, work_start, work_end, result, err) -+ VALUES ($1, $2, $3, $4, $5, $6, $7)`, tID, h.Name, postedTime, workStart, workEnd, done, result) -+ if err != nil { -+ log.Error("Could not write history: ", err) -+ return false -+ } -+ return true -+ }) -+ if err != nil { -+ log.Error("Could not record transaction: ", err) -+ return -+ } -+ if !cm { -+ log.Error("Committing the task records failed") -+ } -+} -+ -+func (h *taskTypeHandler) AssertMachineHasCapacity() error { -+ r := h.TaskEngine.resourcesInUse() -+ -+ if r.Cpu-h.Cost.Cpu < 0 { -+ return errors.New("Did not accept " + h.Name + " task: out of cpu") -+ } -+ if h.Cost.Ram > r.Ram { -+ return errors.New("Did not accept " + h.Name + " task: out of RAM") -+ } -+ if r.Gpu-h.Cost.Gpu < 0 { -+ return errors.New("Did not accept " + h.Name + " task: out of available GPU") -+ } -+ return nil -+} -+ -+var hClient = http.Client{} -+ -+func init() { -+ hClient.Timeout = 3 * time.Second -+} -+ -+// triggerCompletionListeners does in order: -+// 1. Trigger all in-process followers (b/c it's fast). -+// 2. Trigger all living processes with followers via DB -+// 3. Future followers (think partial upgrade) can read harmony_task_history -+// 3a. The Listen() handles slow follows. -+func (h *taskTypeHandler) triggerCompletionListeners(tID TaskID) { -+ // InProcess (#1 from Description) -+ inProcessDefs := h.TaskEngine.follows[h.Name] -+ inProcessFollowers := make([]string, len(inProcessDefs)) -+ for _, fs := range inProcessDefs { -+ if fs.f(tID, fs.h.AddTask) { -+ inProcessFollowers = append(inProcessFollowers, fs.h.Name) -+ } -+ } -+ -+ // Over HTTP (#2 from Description) -+ var hps []struct { -+ HostAndPort string -+ ToType string -+ } -+ err := h.TaskEngine.db.Select(h.TaskEngine.ctx, &hps, `SELECT m.host_and_port, to_type -+ FROM harmony_task_follow f JOIN harmony_machines m ON m.id=f.owner_id -+ WHERE from_type=$1 AND to_type NOT IN $2 AND f.owner_id != $3`, -+ h.Name, inProcessFollowers, h.TaskEngine.ownerID) -+ if err != nil { -+ log.Warn("Could not fast-trigger partner processes.", err) -+ return -+ } -+ hostsVisited := map[string]bool{} -+ tasksVisited := map[string]bool{} -+ for _, v := range hps { -+ if hostsVisited[v.HostAndPort] || tasksVisited[v.ToType] { -+ continue -+ } -+ resp, err := hClient.Get(v.HostAndPort + "/scheduler/follows/" + h.Name) -+ if err != nil { -+ log.Warn("Couldn't hit http endpoint: ", err) -+ continue -+ } -+ b, err := io.ReadAll(resp.Body) -+ if err != nil { -+ log.Warn("Couldn't hit http endpoint: ", err) -+ continue -+ } -+ hostsVisited[v.HostAndPort], tasksVisited[v.ToType] = true, true -+ if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted { -+ log.Error("IO failed for fast nudge: ", string(b)) -+ continue -+ } -+ } -+} -diff --git a/lib/harmony/resources/memsys.go b/lib/harmony/resources/memsys.go -new file mode 100644 -index 000000000..1a45b5b22 ---- /dev/null -+++ b/lib/harmony/resources/memsys.go -@@ -0,0 +1,22 @@ -+//go:build darwin || freebsd || openbsd || dragonfly || netbsd -+// +build darwin freebsd openbsd dragonfly netbsd -+ -+package resources -+ -+import ( -+ "encoding/binary" -+ "syscall" -+) -+ -+func sysctlUint64(name string) (uint64, error) { -+ s, err := syscall.Sysctl(name) -+ if err != nil { -+ return 0, err -+ } -+ // hack because the string conversion above drops a \0 -+ b := []byte(s) -+ if len(b) < 8 { -+ b = append(b, 0) -+ } -+ return binary.LittleEndian.Uint64(b), nil -+} -diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go -new file mode 100644 -index 000000000..77200b873 ---- /dev/null -+++ b/lib/harmony/resources/resources.go -@@ -0,0 +1,180 @@ -+package resources -+ -+import ( -+ "bytes" -+ "context" -+ "fmt" -+ "os/exec" -+ "regexp" -+ "runtime" -+ "strings" -+ "sync/atomic" -+ "time" -+ -+ cl "github.com/Nv7-Github/go-cl" -+ ffi "github.com/filecoin-project/filecoin-ffi" -+ "github.com/filecoin-project/lotus/lib/harmony/harmonydb" -+ logging "github.com/ipfs/go-log/v2" -+ "github.com/pbnjay/memory" -+ -+ "golang.org/x/sys/unix" -+ -+ "github.com/samber/lo" -+) -+ -+var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats -+ -+type Resources struct { -+ Cpu int -+ Gpu float64 -+ GpuRam uint64 -+ Ram uint64 -+ MachineID int -+} -+type Reg struct { -+ Resources -+ shutdown atomic.Bool -+} -+ -+var logger = logging.Logger("harmonytask") -+ -+var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted") -+ -+func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { -+ var reg Reg -+ var err error -+ reg.Resources, err = getResources() -+ if err != nil { -+ return nil, err -+ } -+ ctx := context.Background() -+ { // Learn our owner_id while updating harmony_machines -+ var ownerID []int -+ err := db.Select(ctx, &ownerID, `SELECT id FROM harmony_machines WHERE host_and_port=$1`, hostnameAndPort) -+ if err != nil { -+ return nil, fmt.Errorf("could not read from harmony_machines: %w", err) -+ } -+ if len(ownerID) == 0 { -+ err = db.QueryRow(ctx, `INSERT INTO harmony_machines -+ (host_and_port, cpu, ram, gpu, gpuram) VALUES -+ ($1,$2,$3,$4,$5) RETURNING id`, -+ hostnameAndPort, reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam).Scan(®.Resources.MachineID) -+ if err != nil { -+ return nil, err -+ } -+ -+ } else { -+ reg.MachineID = ownerID[0] -+ _, err := db.Exec(ctx, `UPDATE harmony_machines SET -+ cpu=$1, ram=$2, gpu=$3, gpuram=$4 WHERE id=$6`, -+ reg.Cpu, reg.Ram, reg.Gpu, reg.GpuRam, reg.Resources.MachineID) -+ if err != nil { -+ return nil, err -+ } -+ } -+ CleanupMachines(context.Background(), db) -+ } -+ go func() { -+ for { -+ time.Sleep(time.Minute) -+ if reg.shutdown.Load() { -+ return -+ } -+ _, err := db.Exec(ctx, `UPDATE harmony_machines SET last_contact=CURRENT_TIMESTAMP`) -+ if err != nil { -+ logger.Error("Cannot keepalive ", err) -+ } -+ } -+ }() -+ -+ return ®, nil -+} -+func CleanupMachines(ctx context.Context, db *harmonydb.DB) int { -+ ct, err := db.Exec(ctx, `DELETE FROM harmony_machines WHERE last_contact < $1`, -+ time.Now().Add(-1*LOOKS_DEAD_TIMEOUT)) -+ if err != nil { -+ logger.Warn("unable to delete old machines: ", err) -+ } -+ return ct -+} -+ -+func (res *Reg) Shutdown() { -+ res.shutdown.Store(true) -+} -+ -+func getResources() (res Resources, err error) { -+ b, err := exec.Command(`ps`, `-ef`).CombinedOutput() -+ if err != nil { -+ logger.Warn("Could not safety check for 2+ processes: ", err) -+ } else { -+ found := 0 -+ for _, b := range bytes.Split(b, []byte("\n")) { -+ if lotusRE.Match(b) { -+ found++ -+ } -+ } -+ if found > 1 { -+ logger.Error("This Lotus process should run alone on a machine. Use CGroup.") -+ } -+ } -+ -+ res = Resources{ -+ Cpu: runtime.NumCPU(), -+ Ram: memory.FreeMemory(), -+ GpuRam: getGpuRam(), -+ } -+ -+ { // GPU boolean -+ gpus, err := ffi.GetGPUDevices() -+ if err != nil { -+ logger.Errorf("getting gpu devices failed: %+v", err) -+ } -+ all := strings.ToLower(strings.Join(gpus, ",")) -+ if len(gpus) > 1 || strings.Contains(all, "ati") || strings.Contains(all, "nvidia") { -+ res.Gpu = 1 -+ } -+ } -+ -+ return res, nil -+} -+ -+func getGpuRam() uint64 { -+ platforms, err := cl.GetPlatforms() -+ if err != nil { -+ logger.Error(err) -+ return 0 -+ } -+ -+ return uint64(lo.SumBy(platforms, func(p *cl.Platform) int64 { -+ d, err := p.GetDevices(cl.DeviceTypeAll) -+ if err != nil { -+ logger.Error(err) -+ return 0 -+ } -+ return lo.SumBy(d, func(d *cl.Device) int64 { return d.GlobalMemSize() }) -+ })) -+} -+ -+func DiskFree(path string) (uint64, error) { -+ s := unix.Statfs_t{} -+ err := unix.Statfs(path, &s) -+ if err != nil { -+ return 0, err -+ } -+ -+ return s.Bfree * uint64(s.Bsize), nil -+} -+ -+/* NOT for Darwin. -+func GetMemFree() uint64 { -+ in := unix.Sysinfo_t{} -+ err := unix.Sysinfo(&in) -+ if err != nil { -+ return 0 -+ } -+ // If this is a 32-bit system, then these fields are -+ // uint32 instead of uint64. -+ // So we always convert to uint64 to match signature. -+ return uint64(in.Freeram) * uint64(in.Unit) -+} -+*/ From dfb029cb301d8df40b1ff3d1224b11caa679bab5 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Tue, 15 Aug 2023 19:44:33 -0500 Subject: [PATCH 03/17] harmonytask: consider remaining tasks too --- lib/harmony/harmonytask/taskTypeHandler.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go index 079f33704..ed2bd4c8a 100644 --- a/lib/harmony/harmonytask/taskTypeHandler.go +++ b/lib/harmony/harmonytask/taskTypeHandler.go @@ -52,6 +52,7 @@ func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { } func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { +top: if len(ids) == 0 { return true // stop looking for takers } @@ -91,7 +92,14 @@ func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { } if ct == 0 { log.Infow("did not accept task", "task_id", strconv.Itoa(int(*tID)), "reason", "already Taken") - return false + var tryAgain = make([]TaskID, 0, len(ids)-1) + for _, id := range ids { + if id != *tID { + tryAgain = append(tryAgain, id) + } + } + ids = tryAgain + goto top } go func() { From 497e4e5ab5d9a99ddc659bd1593dd5ae0f7a7a10 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Wed, 16 Aug 2023 16:56:09 -0500 Subject: [PATCH 04/17] harmonytask: better messages --- lib/harmony/harmonytask/doc.go | 34 +++++------ lib/harmony/harmonytask/harmonytask.go | 65 ++++++++++++---------- lib/harmony/harmonytask/notifyingMx.go | 16 ------ lib/harmony/harmonytask/taskTypeHandler.go | 10 ++-- lib/harmony/resources/resources.go | 4 +- 5 files changed, 60 insertions(+), 69 deletions(-) delete mode 100644 lib/harmony/harmonytask/notifyingMx.go diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go index 357c3e15c..07641976a 100644 --- a/lib/harmony/harmonytask/doc.go +++ b/lib/harmony/harmonytask/doc.go @@ -11,23 +11,23 @@ machines to accept work (round robin) before trying again to accept. * Mental Model: - Things that block tasks: - - task not registered for any running server - - max was specified and reached - - resource exhaustion - - CanAccept() interface (per-task implmentation) does not accept it. - Ways tasks start: (slowest first) - - DB Read every 1 minute - - Bump via HTTP if registered in DB - - Task was added (to db) by this process - Ways tasks get added: - - Async Listener task (for chain, etc) - - Followers: Tasks get added because another task completed - When Follower collectors run: - - If both sides are process-local, then - - Otherwise, at the listen interval during db scrape - How duplicate tasks are avoided: - - that's up to the task definition, but probably a unique key + Things that block tasks: + - task not registered for any running server + - max was specified and reached + - resource exhaustion + - CanAccept() interface (per-task implmentation) does not accept it. + Ways tasks start: (slowest first) + - DB Read every 1 minute + - Bump via HTTP if registered in DB + - Task was added (to db) by this process + Ways tasks get added: + - Async Listener task (for chain, etc) + - Followers: Tasks get added because another task completed + When Follower collectors run: + - If both sides are process-local, then + - Otherwise, at the listen interval during db scrape + How duplicate tasks are avoided: + - that's up to the task definition, but probably a unique key * To use: diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 1f5662959..3f1ede4f9 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -3,12 +3,14 @@ package harmonytask import ( "context" "fmt" + "net/http" "strconv" + "sync" "sync/atomic" "time" "github.com/filecoin-project/lotus/lib/harmony/resources" - "github.com/gin-gonic/gin" + "github.com/gorilla/mux" "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ) @@ -94,7 +96,7 @@ type TaskEngine struct { ctx context.Context handlers []*taskTypeHandler db *harmonydb.DB - workAdderMutex *notifyingMx + workAdderMutex sync.Mutex reg *resources.Reg grace context.CancelFunc taskMap map[string]*taskTypeHandler @@ -125,15 +127,14 @@ func New( } ctx, grace := context.WithCancel(context.Background()) e := &TaskEngine{ - ctx: ctx, - grace: grace, - db: db, - reg: reg, - ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" - workAdderMutex: ¬ifyingMx{}, - taskMap: make(map[string]*taskTypeHandler, len(impls)), - tryAllWork: make(chan bool), - follows: make(map[string][]followStruct), + ctx: ctx, + grace: grace, + db: db, + reg: reg, + ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" + taskMap: make(map[string]*taskTypeHandler, len(impls)), + tryAllWork: make(chan bool), + follows: make(map[string][]followStruct), } e.lastCleanup.Store(time.Now()) for _, c := range impls { @@ -184,7 +185,7 @@ func New( continue // not really fatal, but not great } } - if !h.considerWork([]TaskID{TaskID(w.ID)}) { + if !h.considerWork("recovered", []TaskID{TaskID(w.ID)}) { log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) } } @@ -297,7 +298,7 @@ func (e *TaskEngine) pollerTryAllWork() { log.Error("Unable to read work ", err) continue } - accepted := v.considerWork(unownedTasks) + accepted := v.considerWork("poller", unownedTasks) if !accepted { log.Warn("Work not accepted") continue @@ -309,17 +310,20 @@ func (e *TaskEngine) pollerTryAllWork() { } } -// AddHttpHandlers TODO this needs to be called by the http server to register routes. +// GetHttpHandlers needs to be used by the http server to register routes. // This implements the receiver-side of "follows" and "bumps" the fast way. -func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) { - s := root.Group("/scheduler/") - f := s.Group("/follows") +func (e *TaskEngine) GetHttpHandlers() http.Handler { + root := mux.NewRouter() + s := root.PathPrefix("/scheduler") + f := s.PathPrefix("/follows") + b := s.PathPrefix("/bump") for name, v := range e.follows { - f.GET("/"+name+"/:tID", func(c *gin.Context) { - tIDString := c.Param("tID") + f.Path("/" + name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tIDString := mux.Vars(r)["tID"] tID, err := strconv.Atoi(tIDString) if err != nil { - c.AbortWithError(401, err) + w.WriteHeader(401) + fmt.Fprint(w, err.Error()) return } taskAdded := false @@ -328,28 +332,31 @@ func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) { } if taskAdded { e.tryAllWork <- true - c.Status(200) + w.WriteHeader(200) + return } - c.Status(202) // NOTE: 202 for "accepted" but not worked. + w.WriteHeader(202) // NOTE: 202 for "accepted" but not worked. }) } - b := s.Group("/bump") for _, h := range e.handlers { - b.GET("/"+h.Name+"/:tID", func(c *gin.Context) { - tIDString := c.Param("tID") + b.Path("/" + h.Name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tIDString := mux.Vars(r)["tID"] tID, err := strconv.Atoi(tIDString) if err != nil { - c.AbortWithError(401, err) + w.WriteHeader(401) + fmt.Fprint(w, err.Error()) return } // We NEED to block while trying to deliver // this work to ease the network impact. - if h.considerWork([]TaskID{TaskID(tID)}) { - c.Status(200) + if h.considerWork("bump", []TaskID{TaskID(tID)}) { + w.WriteHeader(200) + return } - c.Status(202) // NOTE: 202 for "accepted" but not worked. + w.WriteHeader(202) // NOTE: 202 for "accepted" but not worked. }) } + return root } func (e *TaskEngine) bump(taskType string) { diff --git a/lib/harmony/harmonytask/notifyingMx.go b/lib/harmony/harmonytask/notifyingMx.go deleted file mode 100644 index 51c4e0a53..000000000 --- a/lib/harmony/harmonytask/notifyingMx.go +++ /dev/null @@ -1,16 +0,0 @@ -package harmonytask - -import "sync" - -type notifyingMx struct { - sync.Mutex - UnlockNotify func() -} - -func (n *notifyingMx) Unlock() { - tmp := n.UnlockNotify - n.Mutex.Unlock() - if tmp != nil { - tmp() - } -} diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go index ed2bd4c8a..6693102f2 100644 --- a/lib/harmony/harmonytask/taskTypeHandler.go +++ b/lib/harmony/harmonytask/taskTypeHandler.go @@ -19,8 +19,7 @@ type taskTypeHandler struct { TaskInterface TaskTypeDetails TaskEngine *TaskEngine - Count atomic.Int32 /// locked by TaskEngine's mutex - + Count atomic.Int32 } func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { @@ -46,12 +45,12 @@ func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) { return } - if !h.considerWork([]TaskID{tID}) { + if !h.considerWork("adder", []TaskID{tID}) { h.TaskEngine.bump(h.Name) // We can't do it. How about someone else. } } -func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) { +func (h *taskTypeHandler) considerWork(from string, ids []TaskID) (workAccepted bool) { top: if len(ids) == 0 { return true // stop looking for takers @@ -104,6 +103,7 @@ top: go func() { h.Count.Add(1) + log.Infow("Beginning work on Task", "id", *tID, "from", from, "type", h.Name) var done bool var doErr error @@ -136,7 +136,7 @@ top: return owner == h.TaskEngine.ownerID }) if doErr != nil { - log.Error("Do("+h.Name+", taskID="+strconv.Itoa(int(*tID))+") returned error: ", doErr) + log.Errorw("Do() returned error", "type", h.Name, "id", strconv.Itoa(int(*tID)), "error", doErr) } }() return true diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index 77200b873..8f9d69db5 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -38,7 +38,7 @@ type Reg struct { var logger = logging.Logger("harmonytask") -var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted") +var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted|yb-master|yb-tserver") func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { var reg Reg @@ -114,7 +114,7 @@ func getResources() (res Resources, err error) { } } if found > 1 { - logger.Error("This Lotus process should run alone on a machine. Use CGroup.") + logger.Warn("lotus-provider's defaults are for running alone. Use task maximums or CGroups.") } } From 11c33b6faff06831de1d44098302e1aa95c78a9b Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Wed, 16 Aug 2023 17:54:26 -0500 Subject: [PATCH 05/17] harmonytask: linter --- lib/harmony/harmonytask/harmonytask.go | 3 +-- lib/harmony/resources/resources.go | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 3f1ede4f9..6a9511547 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -9,10 +9,9 @@ import ( "sync/atomic" "time" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/lib/harmony/resources" "github.com/gorilla/mux" - - "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ) // Consts (except for unit test) diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index 8f9d69db5..fb1505af0 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -16,10 +16,8 @@ import ( "github.com/filecoin-project/lotus/lib/harmony/harmonydb" logging "github.com/ipfs/go-log/v2" "github.com/pbnjay/memory" - - "golang.org/x/sys/unix" - "github.com/samber/lo" + "golang.org/x/sys/unix" ) var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats From 6cdecf60d9d82171b4223dcccd66670cd0fe3b6f Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Thu, 17 Aug 2023 23:22:23 -0500 Subject: [PATCH 06/17] lint error: go mod tidy --- go.mod | 23 +++-------------------- go.sum | 31 ------------------------------- 2 files changed, 3 insertions(+), 51 deletions(-) diff --git a/go.mod b/go.mod index 661495e89..774d2842d 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/GeertJohan/go.rice v1.0.3 github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa + github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/alecthomas/jsonschema v0.0.0-20200530073317-71f438968921 github.com/buger/goterm v1.0.3 @@ -128,11 +129,13 @@ require ( github.com/multiformats/go-multihash v0.2.3 github.com/multiformats/go-varint v0.0.7 github.com/open-rpc/meta-schema v0.0.0-20201029221707-1b72ef2ea333 + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/polydawn/refmt v0.89.0 github.com/prometheus/client_golang v1.16.0 github.com/puzpuzpuz/xsync/v2 v2.4.0 github.com/raulk/clock v1.1.0 github.com/raulk/go-watchdog v1.3.0 + github.com/samber/lo v1.38.1 github.com/stretchr/testify v1.8.4 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 github.com/urfave/cli/v2 v2.25.5 @@ -167,8 +170,6 @@ require ( require ( github.com/GeertJohan/go.incremental v1.0.0 // indirect - github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 // indirect - github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef // indirect github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/StackExchange/wmi v1.2.1 // indirect @@ -179,10 +180,8 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bep/debounce v1.2.1 // indirect github.com/boltdb/bolt v1.3.1 // indirect - github.com/bytedance/sonic v1.9.1 // indirect github.com/cespare/xxhash v1.1.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/cilium/ebpf v0.9.1 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect @@ -206,9 +205,7 @@ require ( github.com/flynn/noise v1.0.0 // indirect github.com/francoispqt/gojay v1.2.13 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gdamore/encoding v1.0.0 // indirect - github.com/gin-contrib/sse v0.1.0 // indirect github.com/gin-gonic/gin v1.9.1 // indirect github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect @@ -218,11 +215,7 @@ require ( github.com/go-openapi/jsonpointer v0.19.3 // indirect github.com/go-openapi/jsonreference v0.19.4 // indirect github.com/go-openapi/swag v0.19.11 // indirect - github.com/go-playground/locales v0.14.1 // indirect - github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect - github.com/goccy/go-json v0.10.2 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/glog v1.1.0 // indirect @@ -267,12 +260,10 @@ require ( github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect - github.com/json-iterator/go v1.1.12 // indirect github.com/kilic/bls12-381 v0.1.0 // indirect github.com/klauspost/compress v1.16.5 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/koron/go-ssdp v0.0.4 // indirect - github.com/leodido/go-urn v1.2.4 // indirect github.com/libp2p/go-cidranger v1.1.0 // indirect github.com/libp2p/go-flow-metrics v0.1.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect @@ -293,8 +284,6 @@ require ( github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/minio/sha256-simd v1.0.1 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect @@ -305,8 +294,6 @@ require ( github.com/onsi/ginkgo/v2 v2.9.7 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect - github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -322,13 +309,10 @@ require ( github.com/rivo/uniseg v0.1.0 // indirect github.com/rs/cors v1.7.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/samber/lo v1.38.1 // indirect - github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad // indirect github.com/shirou/gopsutil v2.18.12+incompatible // indirect github.com/sirupsen/logrus v1.9.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/tidwall/gjson v1.14.4 // indirect - github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/twmb/murmur3 v1.1.6 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect @@ -346,7 +330,6 @@ require ( go.opentelemetry.io/otel/trace v1.16.0 // indirect go.uber.org/dig v1.17.0 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect - golang.org/x/arch v0.3.0 // indirect golang.org/x/mod v0.10.0 // indirect golang.org/x/text v0.10.0 // indirect gonum.org/v1/gonum v0.13.0 // indirect diff --git a/go.sum b/go.sum index 74127c535..7d580a256 100644 --- a/go.sum +++ b/go.sum @@ -59,8 +59,6 @@ github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZ github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4= github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee h1:8doiS7ib3zi6/K172oDhSKU0dJ/miJramo9NITOMyZQ= github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee/go.mod h1:W0GbEAA4uFNYOGG2cJpmFJ04E6SD1NLELPYZB57/7AY= -github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006 h1:TKWkFaRW5EPQyrS1pM0vm3vvqw/jmHu+FkV8gRD+7/w= -github.com/Inkeliz/go-opencl v0.0.0-20200806180703-5f0707fba006/go.mod h1:9ILtD1/UTP/Y7JMCU8loWZMDvhrQuTgHzHatG6z9ZdQ= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y= github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa h1:1PPxEyGdIGVkX/kqMvLJ95a1dGS1Sz7tpNEgehEYYt0= @@ -146,9 +144,7 @@ github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46f github.com/buger/goterm v1.0.3 h1:7V/HeAQHrzPk/U4BvyH2g9u+xbUW9nr4yRPyG59W4fM= github.com/buger/goterm v1.0.3/go.mod h1:HiFWV3xnkolgrBV3mY8m0X0Pumt4zg4QhbdOzQtB8tE= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= -github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= -github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -159,9 +155,7 @@ github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ= -github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= -github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= @@ -397,7 +391,6 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= -github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gbrlsnchs/jwt/v3 v3.0.1 h1:lbUmgAKpxnClrKloyIwpxm4OuWeDl5wLk52G91ODPw4= github.com/gbrlsnchs/jwt/v3 v3.0.1/go.mod h1:AncDcjXz18xetI3A6STfXq2w+LuTx8pQ8bGEwRN8zVM= github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= @@ -409,7 +402,6 @@ github.com/georgysavva/scany/v2 v2.0.0/go.mod h1:sigOdh+0qb/+aOs3TVhehVT10p8qJL7 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= @@ -457,18 +449,12 @@ github.com/go-openapi/swag v0.19.8/go.mod h1:ao+8BpOPyKdpQz3AOJfbeEVpLmWAvlT1IfT github.com/go-openapi/swag v0.19.11 h1:RFTu/dlFySpyVvJDfp/7674JY4SDglYWKztbiIGFpmc= github.com/go-openapi/swag v0.19.11/go.mod h1:Uc0gKkdR+ojzsEpjh39QChyu92vPgIr72POcgHMAgSY= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= -github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= -github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= -github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= @@ -485,7 +471,6 @@ github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6Wezm github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= @@ -990,10 +975,8 @@ github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= -github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= github.com/libp2p/go-addr-util v0.0.2/go.mod h1:Ecd6Fb3yIuLzq4bD7VcywcVSBtefcAwnUISBM3WG15E= @@ -1432,7 +1415,6 @@ github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtP github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= -github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= @@ -1541,8 +1523,6 @@ github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= -github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= -github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= @@ -1629,8 +1609,6 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stvp/go-udp-testing v0.0.0-20201019212854-469649b16807/go.mod h1:7jxmlfBCDBXRzr0eAQJ48XC1hBu1np4CS5+cHEYfwpc= @@ -1652,17 +1630,13 @@ github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhso github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= -github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= -github.com/ugorji/go v1.2.6/go.mod h1:anCg0y61KIhDlPZmnH+so+RQbysYVyDko0IMgJv0Nn0= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.6 h1:7kbGefxLoDBuYXOms4yD7223OpNMMPNPZxXk5TvFcyQ= -github.com/ugorji/go/codec v1.2.6/go.mod h1:V6TCNZ4PHqoHGFZuSG1W8nrCzzdgA2DozYxWFFpvxTw= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -1826,9 +1800,7 @@ go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1 go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= -golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= -golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -2104,8 +2076,6 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= -golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= @@ -2361,7 +2331,6 @@ lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1 nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= From d732a7b747775ea50ba2139a55cb7c9f3e976efd Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Fri, 18 Aug 2023 12:53:03 -0500 Subject: [PATCH 07/17] harmonytask: doc fixes --- lib/harmony/harmonytask/doc.go | 7 +++---- lib/harmony/harmonytask/harmonytask.go | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go index 07641976a..44fccb644 100644 --- a/lib/harmony/harmonytask/doc.go +++ b/lib/harmony/harmonytask/doc.go @@ -1,7 +1,6 @@ /* - Package harmomnytask implements a pure (no task logic), distributed - task manager. This clean interface allows a task implementer to completely - +Package harmonytask implements a pure (no task logic), distributed +task manager. This clean interface allows a task implementer to completely avoid being concerned with task scheduling and management. It's based on the idea of tasks as small units of work broken from other work by hardware, parallelizabilty, reliability, or any other reason. @@ -43,7 +42,7 @@ but the design **requires** extraInfo tables to grow until the task's info could not possibly be used by a following task, including slow release rollout. This would normally be in the order of months old. * -Other possible enhancements include more collaboative coordination +Other possible enhancements include more collaborative coordination to assign a task to machines closer to the data. __Database_Behavior__ diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 6a9511547..9f6fe5fd3 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -206,7 +206,7 @@ func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { deadlineChan := time.NewTimer(deadline).C // block bumps & follows by unreg from DBs. - _, err := e.db.Exec(context.Background(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) + _, err := e.db.Exec(context.TODO(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) if err != nil { log.Warn("Could not clean-up impl table: %w", err) } From 2912cf6f9cd4ec60f9c19aae40f71b1d954f346c Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 11:26:26 -0500 Subject: [PATCH 08/17] harmonytask review comments --- itests/harmonytask_test.go | 3 ++- lib/harmony/harmonytask/harmonytask.go | 3 ++- lib/harmony/harmonytask/taskTypeHandler.go | 3 ++- lib/harmony/resources/resources.go | 6 ++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 2c8523d82..16a282728 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -10,12 +10,13 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/filecoin-project/lotus/itests/kit" "github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/lib/harmony/harmonytask" "github.com/filecoin-project/lotus/lib/harmony/resources" "github.com/filecoin-project/lotus/node/impl" - "github.com/stretchr/testify/require" ) type task1 struct { diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 9f6fe5fd3..0aab9241b 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -9,9 +9,10 @@ import ( "sync/atomic" "time" + "github.com/gorilla/mux" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/lib/harmony/resources" - "github.com/gorilla/mux" ) // Consts (except for unit test) diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go index 6693102f2..41e098083 100644 --- a/lib/harmony/harmonytask/taskTypeHandler.go +++ b/lib/harmony/harmonytask/taskTypeHandler.go @@ -9,8 +9,9 @@ import ( "sync/atomic" "time" - "github.com/filecoin-project/lotus/lib/harmony/harmonydb" logging "github.com/ipfs/go-log/v2" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ) var log = logging.Logger("harmonytask") diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index fb1505af0..918dbea11 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -12,12 +12,14 @@ import ( "time" cl "github.com/Nv7-Github/go-cl" - ffi "github.com/filecoin-project/filecoin-ffi" - "github.com/filecoin-project/lotus/lib/harmony/harmonydb" logging "github.com/ipfs/go-log/v2" "github.com/pbnjay/memory" "github.com/samber/lo" "golang.org/x/sys/unix" + + ffi "github.com/filecoin-project/filecoin-ffi" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" ) var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats From 82d0c2889bd85f874806c5702e87575bc2ee3e71 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 15:31:00 -0500 Subject: [PATCH 09/17] opengl export --- go.mod | 2 +- go.sum | 4 ++-- lib/harmony/resources/resources.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 774d2842d..2241f33b5 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/GeertJohan/go.rice v1.0.3 github.com/Gurpartap/async v0.0.0-20180927173644-4f7f499dd9ee github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa - github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/alecthomas/jsonschema v0.0.0-20200530073317-71f438968921 github.com/buger/goterm v1.0.3 @@ -136,6 +135,7 @@ require ( github.com/raulk/clock v1.1.0 github.com/raulk/go-watchdog v1.3.0 github.com/samber/lo v1.38.1 + github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad github.com/stretchr/testify v1.8.4 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 github.com/urfave/cli/v2 v2.25.5 diff --git a/go.sum b/go.sum index 7d580a256..81bc9dbfb 100644 --- a/go.sum +++ b/go.sum @@ -66,8 +66,6 @@ github.com/Kubuxu/imtui v0.0.0-20210401140320-41663d68d0fa/go.mod h1:WUmMvh9wMtq github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= -github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef h1:DiNnYI6NBdeXGOJXptJcrYeDavJf4tImz/B4MOVQtMs= -github.com/Nv7-Github/go-cl v0.0.0-20210426150049-f121093b60ef/go.mod h1:RRVtxaQlBBnbo+n2fgYHhxQmXDkRLKWcWX93lJL0Yhw= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= @@ -1523,6 +1521,8 @@ github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= +github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index 918dbea11..98d177328 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -11,10 +11,10 @@ import ( "sync/atomic" "time" - cl "github.com/Nv7-Github/go-cl" logging "github.com/ipfs/go-log/v2" "github.com/pbnjay/memory" "github.com/samber/lo" + "github.com/samuel/go-opencl/cl" "golang.org/x/sys/unix" ffi "github.com/filecoin-project/filecoin-ffi" From eb294c971458dce069a5485e5c169d9ba5d30737 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 16:41:00 -0500 Subject: [PATCH 10/17] opencl harmonytask another try --- lib/harmony/resources/miniopencl/cl.h | 15 ++++ .../resources/miniopencl/miniopencl.go | 87 +++++++++++++++++++ lib/harmony/resources/resources.go | 4 +- 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 lib/harmony/resources/miniopencl/cl.h create mode 100644 lib/harmony/resources/miniopencl/miniopencl.go diff --git a/lib/harmony/resources/miniopencl/cl.h b/lib/harmony/resources/miniopencl/cl.h new file mode 100644 index 000000000..5e9b25447 --- /dev/null +++ b/lib/harmony/resources/miniopencl/cl.h @@ -0,0 +1,15 @@ + +#ifndef CL_H +#define CL_H + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS + +#ifdef __APPLE__ +#include "OpenCL/opencl.h" +#else +#include "CL/opencl.h" +#endif + +#endif /* CL_H */ \ No newline at end of file diff --git a/lib/harmony/resources/miniopencl/miniopencl.go b/lib/harmony/resources/miniopencl/miniopencl.go new file mode 100644 index 000000000..6b07e1cba --- /dev/null +++ b/lib/harmony/resources/miniopencl/miniopencl.go @@ -0,0 +1,87 @@ +package cl + +// #include "cl.h" +import "C" +import ( + "fmt" + "unsafe" +) + +const maxPlatforms = 32 + +type Platform struct { + id C.cl_platform_id +} + +// Obtain the list of platforms available. +func GetPlatforms() ([]*Platform, error) { + var platformIds [maxPlatforms]C.cl_platform_id + var nPlatforms C.cl_uint + if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS { + return nil, toError(err) + } + platforms := make([]*Platform, nPlatforms) + for i := 0; i < int(nPlatforms); i++ { + platforms[i] = &Platform{id: platformIds[i]} + } + return platforms, nil +} + +const maxDeviceCount = 64 + +type DeviceType uint + +const ( + DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL +) + +type Device struct { + id C.cl_device_id +} + +func (p *Platform) GetAllDevices() ([]*Device, error) { + var deviceIds [maxDeviceCount]C.cl_device_id + var numDevices C.cl_uint + var platformId C.cl_platform_id + if p != nil { + platformId = p.id + } + if err := C.clGetDeviceIDs(platformId, C.cl_device_type(DeviceTypeAll), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS { + return nil, toError(err) + } + if numDevices > maxDeviceCount { + numDevices = maxDeviceCount + } + devices := make([]*Device, numDevices) + for i := 0; i < int(numDevices); i++ { + devices[i] = &Device{id: deviceIds[i]} + } + return devices, nil +} + +func toError(code C.cl_int) error { + return ErrOther(code) +} + +type ErrOther int + +func (e ErrOther) Error() string { + return fmt.Sprintf("cl: error %d", int(e)) +} + +// Size of global device memory in bytes. +func (d *Device) GlobalMemSize() int64 { + val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true) + return val +} + +func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) { + var val C.cl_ulong + if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { + if panicOnError { + panic("Should never fail") + } + return 0, toError(err) + } + return int64(val), nil +} diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index 98d177328..94df047d8 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -14,12 +14,12 @@ import ( logging "github.com/ipfs/go-log/v2" "github.com/pbnjay/memory" "github.com/samber/lo" - "github.com/samuel/go-opencl/cl" "golang.org/x/sys/unix" ffi "github.com/filecoin-project/filecoin-ffi" "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + cl "github.com/filecoin-project/lotus/lib/harmony/resources/miniopencl" ) var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats @@ -146,7 +146,7 @@ func getGpuRam() uint64 { } return uint64(lo.SumBy(platforms, func(p *cl.Platform) int64 { - d, err := p.GetDevices(cl.DeviceTypeAll) + d, err := p.GetAllDevices() if err != nil { logger.Error(err) return 0 From 610a8c55e9f72dc15a8b569bd1853e897c3c7654 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 17:12:57 -0500 Subject: [PATCH 11/17] harmonytask more linter cleanups --- lib/harmony/harmonytask/taskTypeHandler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/taskTypeHandler.go index 41e098083..7d11a957f 100644 --- a/lib/harmony/harmonytask/taskTypeHandler.go +++ b/lib/harmony/harmonytask/taskTypeHandler.go @@ -186,7 +186,7 @@ func (h *taskTypeHandler) recordCompletion(tID TaskID, workStart time.Time, done } // Note: Extra Info is left laying around for later review & clean-up } else { - tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) + _, err := tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) if err != nil { log.Error("Could not disown failed task: ", tID, err) return false From 84f4cdfc15ba038ac6cad78cdd1d2538a46dde20 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 17:13:17 -0500 Subject: [PATCH 12/17] harmonytask cleanups --- .circleci/config.yml | 6 ++++++ go.mod | 1 - itests/harmonytask_test.go | 6 ++++-- lib/harmony/harmonytask/harmonytask.go | 15 ++++++++------- lib/harmony/resources/miniopencl/cl.h | 2 ++ 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7abb9d5fc..64da19e34 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -783,6 +783,12 @@ workflows: - build suite: itest-harmonydb target: "./itests/harmonydb_test.go" + - test: + name: test-itest-harmonytask + requires: + - build + suite: itest-harmonytask + target: "./itests/harmonytask_test.go" - test: name: test-itest-lite_migration requires: diff --git a/go.mod b/go.mod index 2241f33b5..714444dcf 100644 --- a/go.mod +++ b/go.mod @@ -135,7 +135,6 @@ require ( github.com/raulk/clock v1.1.0 github.com/raulk/go-watchdog v1.3.0 github.com/samber/lo v1.38.1 - github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad github.com/stretchr/testify v1.8.4 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 github.com/urfave/cli/v2 v2.25.5 diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 16a282728..0754d8cca 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -50,7 +50,8 @@ func (t *task1) TypeDetails() harmonytask.TaskTypeDetails { } } func (t *task1) Adder(add harmonytask.AddTaskFunc) { - for _, v := range t.toAdd { + for _, vTmp := range t.toAdd { + v := vTmp add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { t.myPersonalTableLock.Lock() defer t.myPersonalTableLock.Unlock() @@ -115,7 +116,8 @@ func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { dtl: dtl, canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return nil, nil }, adder: func(add harmonytask.AddTaskFunc) { - for _, v := range []string{"A", "B"} { + for _, vTmp := range []string{"A", "B"} { + v := vTmp add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) bool { _, err := tx.Exec("INSERT INTO itest_scratch (some_int, content) VALUES ($1,$2)", tID, v) require.NoError(t, err) diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 0aab9241b..078a19be4 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -248,10 +248,10 @@ func (e *TaskEngine) followWorkInDB() { var lastFollowTime time.Time lastFollowTime, e.lastFollowTime = e.lastFollowTime, time.Now() - for from_name, srcs := range e.follows { + for fromName, srcs := range e.follows { var cList []int // Which work is done (that we follow) since we last checked? err := e.db.Select(e.ctx, &cList, `SELECT h.task_id FROM harmony_task_history - WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, from_name) + WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, fromName) if err != nil { log.Error("Could not query DB: ", err) return @@ -269,9 +269,9 @@ func (e *TaskEngine) followWorkInDB() { continue } // we need to create this task - if !src.h.Follows[from_name](TaskID(workAlreadyDone), src.h.AddTask) { + if !src.h.Follows[fromName](TaskID(workAlreadyDone), src.h.AddTask) { // But someone may have beaten us to it. - log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, from_name) + log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, fromName) } } } @@ -317,7 +317,7 @@ func (e *TaskEngine) GetHttpHandlers() http.Handler { s := root.PathPrefix("/scheduler") f := s.PathPrefix("/follows") b := s.PathPrefix("/bump") - for name, v := range e.follows { + for name, vs := range e.follows { f.Path("/" + name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) { tIDString := mux.Vars(r)["tID"] tID, err := strconv.Atoi(tIDString) @@ -327,7 +327,7 @@ func (e *TaskEngine) GetHttpHandlers() http.Handler { return } taskAdded := false - for _, v := range v { + for _, v := range vs { taskAdded = taskAdded || v.f(TaskID(tID), v.h.AddTask) } if taskAdded { @@ -338,7 +338,8 @@ func (e *TaskEngine) GetHttpHandlers() http.Handler { w.WriteHeader(202) // NOTE: 202 for "accepted" but not worked. }) } - for _, h := range e.handlers { + for _, hTmp := range e.handlers { + h := hTmp b.Path("/" + h.Name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) { tIDString := mux.Vars(r)["tID"] tID, err := strconv.Atoi(tIDString) diff --git a/lib/harmony/resources/miniopencl/cl.h b/lib/harmony/resources/miniopencl/cl.h index 5e9b25447..e90fb7692 100644 --- a/lib/harmony/resources/miniopencl/cl.h +++ b/lib/harmony/resources/miniopencl/cl.h @@ -6,6 +6,8 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#define CL_TARGET_OPENCL_VERSION 300 + #ifdef __APPLE__ #include "OpenCL/opencl.h" #else From 415a0ac364b946ac67b37996d3382308b933d142 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 17:33:25 -0500 Subject: [PATCH 13/17] harmonytask more lints --- go.sum | 2 -- itests/harmonytask_test.go | 17 +++++++++++++---- lib/harmony/harmonytask/harmonytask.go | 3 ++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/go.sum b/go.sum index 81bc9dbfb..496fe2205 100644 --- a/go.sum +++ b/go.sum @@ -1521,8 +1521,6 @@ github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= -github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad h1:zyvTnsJPPAqVg2v3bbvTI+RdbVPJufZ+CWCPOX0Dtp8= -github.com/samuel/go-opencl v0.0.0-20171108220231-cbcfd10c32ad/go.mod h1:KCqoxhWgoxCWg13iOq53YFf50jlonuuhIpO916aWEkg= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 0754d8cca..158a7dc5c 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -26,6 +26,15 @@ type task1 struct { WorkCompleted []string } +func withDbSetup(t *testing.T, f func(*kit.TestMiner)) { + _, miner, _ := kit.EnsembleMinimal(t, + kit.LatestActorsAt(-1), + kit.MockProofs(), + ) + + f(miner) +} + func (t *task1) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { if !stillOwned() { return false, errors.New("Why not still owned?") @@ -63,7 +72,7 @@ func (t *task1) Adder(add harmonytask.AddTaskFunc) { } func TestHarmonyTasks(t *testing.T) { - withSetup(t, func(m *kit.TestMiner) { + withDbSetup(t, func(m *kit.TestMiner) { cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB t1 := &task1{ toAdd: []int{56, 73}, @@ -145,7 +154,7 @@ func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { } func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { - withSetup(t, func(m *kit.TestMiner) { + withDbSetup(t, func(m *kit.TestMiner) { cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB senderParty := fooLetterAdder(t, cdb) workerParty := fooLetterSaver(t, cdb) @@ -163,7 +172,7 @@ func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { } func TestWorkStealing(t *testing.T) { - withSetup(t, func(m *kit.TestMiner) { + withDbSetup(t, func(m *kit.TestMiner) { cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB ctx := context.Background() @@ -190,7 +199,7 @@ func TestWorkStealing(t *testing.T) { } func TestTaskRetry(t *testing.T) { - withSetup(t, func(m *kit.TestMiner) { + withDbSetup(t, func(m *kit.TestMiner) { cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB senderParty := fooLetterAdder(t, cdb) harmonytask.POLL_DURATION = time.Millisecond * 100 diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 078a19be4..eefe5b8a0 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -327,7 +327,8 @@ func (e *TaskEngine) GetHttpHandlers() http.Handler { return } taskAdded := false - for _, v := range vs { + for _, vTmp := range vs { + v := vTmp taskAdded = taskAdded || v.f(TaskID(tID), v.h.AddTask) } if taskAdded { From 6fd468dfc0d7b721fcd3e6d2ddc7c97627e5b019 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 17:47:43 -0500 Subject: [PATCH 14/17] harmonytask parallel test run gotcha --- itests/harmonytask_test.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 158a7dc5c..cd8c09818 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -117,7 +117,6 @@ func (t *passthru) Adder(add harmonytask.AddTaskFunc) { // Common stuff var dtl = harmonytask.TaskTypeDetails{Name: "foo", Max: -1, Cost: resources.Resources{}} -var letters []string var lettersMutex sync.Mutex func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { @@ -136,7 +135,7 @@ func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { }, } } -func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { +func fooLetterSaver(t *testing.T, cdb *harmonydb.DB, dest *[]string) *passthru { return &passthru{ dtl: dtl, canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, @@ -147,7 +146,7 @@ func fooLetterSaver(t *testing.T, cdb *harmonydb.DB) *passthru { require.NoError(t, err) lettersMutex.Lock() defer lettersMutex.Unlock() - letters = append(letters, content) + *dest = append(*dest, content) return true, nil }, } @@ -157,7 +156,8 @@ func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { withDbSetup(t, func(m *kit.TestMiner) { cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB senderParty := fooLetterAdder(t, cdb) - workerParty := fooLetterSaver(t, cdb) + var dest []string + workerParty := fooLetterSaver(t, cdb, &dest) harmonytask.POLL_DURATION = time.Millisecond * 100 sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") require.NoError(t, err) @@ -166,8 +166,8 @@ func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. sender.GracefullyTerminate(time.Second * 5) worker.GracefullyTerminate(time.Second * 5) - sort.Strings(letters) - require.Equal(t, letters, []string{"A", "B"}) + sort.Strings(dest) + require.Equal(t, dest, []string{"A", "B"}) }) } @@ -190,11 +190,12 @@ func TestWorkStealing(t *testing.T) { harmonytask.POLL_DURATION = time.Millisecond * 100 harmonytask.CLEANUP_FREQUENCY = time.Millisecond * 100 - worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb)}, "test:2") + var dest []string + worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb, &dest)}, "test:2") require.ErrorIs(t, err, nil) time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. worker.GracefullyTerminate(time.Second * 5) - require.Equal(t, []string{"M"}, letters) + require.Equal(t, []string{"M"}, dest) }) } @@ -207,6 +208,7 @@ func TestTaskRetry(t *testing.T) { require.NoError(t, err) alreadyFailed := map[string]bool{} + var dest []string fails2xPerMsg := &passthru{ dtl: dtl, canAccept: func(list []harmonytask.TaskID) (*harmonytask.TaskID, error) { return &list[0], nil }, @@ -221,7 +223,7 @@ func TestTaskRetry(t *testing.T) { alreadyFailed[content] = true return false, errors.New("intentional 'error'") } - letters = append(letters, content) + dest = append(dest, content) return true, nil }, } @@ -230,8 +232,8 @@ func TestTaskRetry(t *testing.T) { time.Sleep(3 * time.Second) sender.GracefullyTerminate(time.Hour) rcv.GracefullyTerminate(time.Hour) - sort.Strings(letters) - require.Equal(t, []string{"A", "B"}, letters) + sort.Strings(dest) + require.Equal(t, []string{"A", "B"}, dest) type hist struct { TaskID int Result bool From c7aaa16e03136f046242b81ac76090e233ee7ab1 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 18:02:04 -0500 Subject: [PATCH 15/17] harmonytask lints --- itests/harmonytask_test.go | 10 ++-------- lib/harmony/harmonytask/harmonytask.go | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index cd8c09818..979d277d4 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "sort" - "strings" "sync" "testing" "time" @@ -82,14 +81,9 @@ func TestHarmonyTasks(t *testing.T) { require.NoError(t, err) time.Sleep(3 * time.Second) // do the work. FLAKYNESS RISK HERE. e.GracefullyTerminate(time.Minute) - require.Equal(t, t1.WorkCompleted, 2, "wrong amount of work complete: expected 2 got:") + expected := []string{"taskResult56", "taskResult73"} sort.Strings(t1.WorkCompleted) - got := strings.Join(t1.WorkCompleted, ",") - expected := "taskResult56,taskResult73" - if got != expected { - t.Fatal("Unexpected results! Wanted " + expected + " got " + got) - } - // TODO test history table looks right. + require.Equal(t, len(t1.WorkCompleted), expected, "unexpected results") }) } diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index eefe5b8a0..8e5c89e26 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -317,7 +317,8 @@ func (e *TaskEngine) GetHttpHandlers() http.Handler { s := root.PathPrefix("/scheduler") f := s.PathPrefix("/follows") b := s.PathPrefix("/bump") - for name, vs := range e.follows { + for name, vsTmp := range e.follows { + vs := vsTmp f.Path("/" + name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) { tIDString := mux.Vars(r)["tID"] tID, err := strconv.Atoi(tIDString) From ec8fd2883460b22d6f5b4f507dc6911b4bc6a048 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Mon, 21 Aug 2023 21:55:49 -0500 Subject: [PATCH 16/17] harmonytask work --- itests/harmonytask_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 979d277d4..80923b1d5 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -83,7 +83,7 @@ func TestHarmonyTasks(t *testing.T) { e.GracefullyTerminate(time.Minute) expected := []string{"taskResult56", "taskResult73"} sort.Strings(t1.WorkCompleted) - require.Equal(t, len(t1.WorkCompleted), expected, "unexpected results") + require.Equal(t, t1.WorkCompleted, expected, "unexpected results") }) } From 72917c19cd8e472fa3abd043711ed1fda13827f1 Mon Sep 17 00:00:00 2001 From: "Andrew Jackson (Ajax)" Date: Fri, 25 Aug 2023 16:11:31 -0500 Subject: [PATCH 17/17] harmonytask - final review comments --- cmd/lotus-worker/main.go | 1 - itests/harmonytask_test.go | 2 +- lib/harmony/harmonytask/doc.go | 5 +-- lib/harmony/harmonytask/harmonytask.go | 22 +++++++++--- ...askTypeHandler.go => task_type_handler.go} | 7 ++++ .../{miniopencl.go => mini_opencl.go} | 2 ++ lib/harmony/resources/resources.go | 34 +++++++------------ 7 files changed, 42 insertions(+), 31 deletions(-) rename lib/harmony/harmonytask/{taskTypeHandler.go => task_type_handler.go} (97%) rename lib/harmony/resources/miniopencl/{miniopencl.go => mini_opencl.go} (94%) diff --git a/cmd/lotus-worker/main.go b/cmd/lotus-worker/main.go index 995a3cbe0..944791275 100644 --- a/cmd/lotus-worker/main.go +++ b/cmd/lotus-worker/main.go @@ -609,7 +609,6 @@ var runCmd = &cli.Command{ if err := srv.Shutdown(context.TODO()); err != nil { log.Errorf("shutting down RPC server failed: %s", err) } - //taskManager.GracefullyTerminate(5*time.Hour) log.Warn("Graceful shutdown successful") }() diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 80923b1d5..b36e9ab11 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -21,7 +21,7 @@ import ( type task1 struct { toAdd []int myPersonalTableLock sync.Mutex - myPersonalTable map[harmonytask.TaskID]int // This would typicallyb be a DB table + myPersonalTable map[harmonytask.TaskID]int // This would typically be a DB table WorkCompleted []string } diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go index 44fccb644..772b674cd 100644 --- a/lib/harmony/harmonytask/doc.go +++ b/lib/harmony/harmonytask/doc.go @@ -23,8 +23,9 @@ Mental Model: - Async Listener task (for chain, etc) - Followers: Tasks get added because another task completed When Follower collectors run: - - If both sides are process-local, then - - Otherwise, at the listen interval during db scrape + - If both sides are process-local, then this process will pick it up. + - If properly registered already, the http endpoint will be tried to start it. + - Otherwise, at the listen interval during db scrape it will be found. How duplicate tasks are avoided: - that's up to the task definition, but probably a unique key diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 8e5c89e26..cd401f6d2 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -56,7 +56,7 @@ type TaskInterface interface { // CanAccept should return if the task can run on this machine. It should // return null if the task type is not allowed on this machine. // It should select the task it most wants to accomplish. - // It is also responsible for determining disk space (including scratch). + // It is also responsible for determining & reserving disk space (including scratch). CanAccept([]TaskID) (*TaskID, error) // TypeDetails() returns static details about how this task behaves and @@ -181,7 +181,7 @@ func New( if h == nil { _, err := db.Exec(e.ctx, `UPDATE harmony_task SET owner=NULL WHERE id=$1`, w.ID) if err != nil { - log.Error("Cannot remove self from owner field: ", err) + log.Errorw("Cannot remove self from owner field", "error", err) continue // not really fatal, but not great } } @@ -206,12 +206,14 @@ func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { e.reg.Shutdown() deadlineChan := time.NewTimer(deadline).C + ctx := context.TODO() + // block bumps & follows by unreg from DBs. - _, err := e.db.Exec(context.TODO(), `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) + _, err := e.db.Exec(ctx, `DELETE FROM harmony_task_impl WHERE owner_id=$1`, e.ownerID) if err != nil { log.Warn("Could not clean-up impl table: %w", err) } - _, err = e.db.Exec(context.Background(), `DELETE FROM harmony_task_follow WHERE owner_id=$1`, e.ownerID) + _, err = e.db.Exec(ctx, `DELETE FROM harmony_task_follow WHERE owner_id=$1`, e.ownerID) if err != nil { log.Warn("Could not clean-up impl table: %w", err) } @@ -271,7 +273,7 @@ func (e *TaskEngine) followWorkInDB() { // we need to create this task if !src.h.Follows[fromName](TaskID(workAlreadyDone), src.h.AddTask) { // But someone may have beaten us to it. - log.Infof("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, fromName) + log.Debugf("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, fromName) } } } @@ -386,11 +388,21 @@ func (e *TaskEngine) bump(taskType string) { // resourcesInUse requires workListsMutex to be already locked. func (e *TaskEngine) resourcesInUse() resources.Resources { tmp := e.reg.Resources + copy(tmp.GpuRam, e.reg.Resources.GpuRam) for _, t := range e.handlers { ct := t.Count.Load() tmp.Cpu -= int(ct) * t.Cost.Cpu tmp.Gpu -= float64(ct) * t.Cost.Gpu tmp.Ram -= uint64(ct) * t.Cost.Ram + for i := int32(0); i < ct; i++ { + for grIdx, j := range tmp.GpuRam { + if j > t.Cost.GpuRam[0] { + tmp.GpuRam[grIdx] = j - t.Cost.GpuRam[0] + break + } + } + log.Warn("We should never get out of gpuram for what's consumed.") + } } return tmp } diff --git a/lib/harmony/harmonytask/taskTypeHandler.go b/lib/harmony/harmonytask/task_type_handler.go similarity index 97% rename from lib/harmony/harmonytask/taskTypeHandler.go rename to lib/harmony/harmonytask/task_type_handler.go index 7d11a957f..932cfc297 100644 --- a/lib/harmony/harmonytask/taskTypeHandler.go +++ b/lib/harmony/harmonytask/task_type_handler.go @@ -223,6 +223,13 @@ func (h *taskTypeHandler) AssertMachineHasCapacity() error { if r.Gpu-h.Cost.Gpu < 0 { return errors.New("Did not accept " + h.Name + " task: out of available GPU") } + for _, u := range r.GpuRam { + if u > h.Cost.GpuRam[0] { + goto enoughGpuRam + } + } + return errors.New("Did not accept " + h.Name + " task: out of GPURam") +enoughGpuRam: return nil } diff --git a/lib/harmony/resources/miniopencl/miniopencl.go b/lib/harmony/resources/miniopencl/mini_opencl.go similarity index 94% rename from lib/harmony/resources/miniopencl/miniopencl.go rename to lib/harmony/resources/miniopencl/mini_opencl.go index 6b07e1cba..a6bac9582 100644 --- a/lib/harmony/resources/miniopencl/miniopencl.go +++ b/lib/harmony/resources/miniopencl/mini_opencl.go @@ -1,3 +1,5 @@ +// Package cl was borrowed from the go-opencl library which is more complex and +// doesn't compile well for our needs. package cl // #include "cl.h" diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go index 94df047d8..115859d75 100644 --- a/lib/harmony/resources/resources.go +++ b/lib/harmony/resources/resources.go @@ -27,7 +27,7 @@ var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats type Resources struct { Cpu int Gpu float64 - GpuRam uint64 + GpuRam []uint64 Ram uint64 MachineID int } @@ -72,7 +72,8 @@ func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { return nil, err } } - CleanupMachines(context.Background(), db) + cleaned := CleanupMachines(context.Background(), db) + logger.Infow("Cleaned up machines", "count", cleaned) } go func() { for { @@ -138,21 +139,24 @@ func getResources() (res Resources, err error) { return res, nil } -func getGpuRam() uint64 { +func getGpuRam() (res []uint64) { platforms, err := cl.GetPlatforms() if err != nil { logger.Error(err) - return 0 + return res } - return uint64(lo.SumBy(platforms, func(p *cl.Platform) int64 { + lo.ForEach(platforms, func(p *cl.Platform, i int) { d, err := p.GetAllDevices() if err != nil { logger.Error(err) - return 0 + return } - return lo.SumBy(d, func(d *cl.Device) int64 { return d.GlobalMemSize() }) - })) + lo.ForEach(d, func(d *cl.Device, i int) { + res = append(res, uint64(d.GlobalMemSize())) + }) + }) + return res } func DiskFree(path string) (uint64, error) { @@ -164,17 +168,3 @@ func DiskFree(path string) (uint64, error) { return s.Bfree * uint64(s.Bsize), nil } - -/* NOT for Darwin. -func GetMemFree() uint64 { - in := unix.Sysinfo_t{} - err := unix.Sysinfo(&in) - if err != nil { - return 0 - } - // If this is a 32-bit system, then these fields are - // uint32 instead of uint64. - // So we always convert to uint64 to match signature. - return uint64(in.Freeram) * uint64(in.Unit) -} -*/