harmonytask: better messages
This commit is contained in:
parent
dfb029cb30
commit
497e4e5ab5
@ -3,12 +3,14 @@ package harmonytask
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/filecoin-project/lotus/lib/harmony/resources"
|
"github.com/filecoin-project/lotus/lib/harmony/resources"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gorilla/mux"
|
||||||
|
|
||||||
"github.com/filecoin-project/lotus/lib/harmony/harmonydb"
|
"github.com/filecoin-project/lotus/lib/harmony/harmonydb"
|
||||||
)
|
)
|
||||||
@ -94,7 +96,7 @@ type TaskEngine struct {
|
|||||||
ctx context.Context
|
ctx context.Context
|
||||||
handlers []*taskTypeHandler
|
handlers []*taskTypeHandler
|
||||||
db *harmonydb.DB
|
db *harmonydb.DB
|
||||||
workAdderMutex *notifyingMx
|
workAdderMutex sync.Mutex
|
||||||
reg *resources.Reg
|
reg *resources.Reg
|
||||||
grace context.CancelFunc
|
grace context.CancelFunc
|
||||||
taskMap map[string]*taskTypeHandler
|
taskMap map[string]*taskTypeHandler
|
||||||
@ -130,7 +132,6 @@ func New(
|
|||||||
db: db,
|
db: db,
|
||||||
reg: reg,
|
reg: reg,
|
||||||
ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort"
|
ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort"
|
||||||
workAdderMutex: ¬ifyingMx{},
|
|
||||||
taskMap: make(map[string]*taskTypeHandler, len(impls)),
|
taskMap: make(map[string]*taskTypeHandler, len(impls)),
|
||||||
tryAllWork: make(chan bool),
|
tryAllWork: make(chan bool),
|
||||||
follows: make(map[string][]followStruct),
|
follows: make(map[string][]followStruct),
|
||||||
@ -184,7 +185,7 @@ func New(
|
|||||||
continue // not really fatal, but not great
|
continue // not really fatal, but not great
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !h.considerWork([]TaskID{TaskID(w.ID)}) {
|
if !h.considerWork("recovered", []TaskID{TaskID(w.ID)}) {
|
||||||
log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name)
|
log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -297,7 +298,7 @@ func (e *TaskEngine) pollerTryAllWork() {
|
|||||||
log.Error("Unable to read work ", err)
|
log.Error("Unable to read work ", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
accepted := v.considerWork(unownedTasks)
|
accepted := v.considerWork("poller", unownedTasks)
|
||||||
if !accepted {
|
if !accepted {
|
||||||
log.Warn("Work not accepted")
|
log.Warn("Work not accepted")
|
||||||
continue
|
continue
|
||||||
@ -309,17 +310,20 @@ func (e *TaskEngine) pollerTryAllWork() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddHttpHandlers TODO this needs to be called by the http server to register routes.
|
// GetHttpHandlers needs to be used by the http server to register routes.
|
||||||
// This implements the receiver-side of "follows" and "bumps" the fast way.
|
// This implements the receiver-side of "follows" and "bumps" the fast way.
|
||||||
func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) {
|
func (e *TaskEngine) GetHttpHandlers() http.Handler {
|
||||||
s := root.Group("/scheduler/")
|
root := mux.NewRouter()
|
||||||
f := s.Group("/follows")
|
s := root.PathPrefix("/scheduler")
|
||||||
|
f := s.PathPrefix("/follows")
|
||||||
|
b := s.PathPrefix("/bump")
|
||||||
for name, v := range e.follows {
|
for name, v := range e.follows {
|
||||||
f.GET("/"+name+"/:tID", func(c *gin.Context) {
|
f.Path("/" + name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
tIDString := c.Param("tID")
|
tIDString := mux.Vars(r)["tID"]
|
||||||
tID, err := strconv.Atoi(tIDString)
|
tID, err := strconv.Atoi(tIDString)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithError(401, err)
|
w.WriteHeader(401)
|
||||||
|
fmt.Fprint(w, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
taskAdded := false
|
taskAdded := false
|
||||||
@ -328,28 +332,31 @@ func (e *TaskEngine) AddHttpHandlers(root gin.IRouter) {
|
|||||||
}
|
}
|
||||||
if taskAdded {
|
if taskAdded {
|
||||||
e.tryAllWork <- true
|
e.tryAllWork <- true
|
||||||
c.Status(200)
|
w.WriteHeader(200)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
c.Status(202) // NOTE: 202 for "accepted" but not worked.
|
w.WriteHeader(202) // NOTE: 202 for "accepted" but not worked.
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
b := s.Group("/bump")
|
|
||||||
for _, h := range e.handlers {
|
for _, h := range e.handlers {
|
||||||
b.GET("/"+h.Name+"/:tID", func(c *gin.Context) {
|
b.Path("/" + h.Name + "/{tID}").Methods("GET").HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
tIDString := c.Param("tID")
|
tIDString := mux.Vars(r)["tID"]
|
||||||
tID, err := strconv.Atoi(tIDString)
|
tID, err := strconv.Atoi(tIDString)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithError(401, err)
|
w.WriteHeader(401)
|
||||||
|
fmt.Fprint(w, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// We NEED to block while trying to deliver
|
// We NEED to block while trying to deliver
|
||||||
// this work to ease the network impact.
|
// this work to ease the network impact.
|
||||||
if h.considerWork([]TaskID{TaskID(tID)}) {
|
if h.considerWork("bump", []TaskID{TaskID(tID)}) {
|
||||||
c.Status(200)
|
w.WriteHeader(200)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
c.Status(202) // NOTE: 202 for "accepted" but not worked.
|
w.WriteHeader(202) // NOTE: 202 for "accepted" but not worked.
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
return root
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *TaskEngine) bump(taskType string) {
|
func (e *TaskEngine) bump(taskType string) {
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
package harmonytask
|
|
||||||
|
|
||||||
import "sync"
|
|
||||||
|
|
||||||
type notifyingMx struct {
|
|
||||||
sync.Mutex
|
|
||||||
UnlockNotify func()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (n *notifyingMx) Unlock() {
|
|
||||||
tmp := n.UnlockNotify
|
|
||||||
n.Mutex.Unlock()
|
|
||||||
if tmp != nil {
|
|
||||||
tmp()
|
|
||||||
}
|
|
||||||
}
|
|
@ -19,8 +19,7 @@ type taskTypeHandler struct {
|
|||||||
TaskInterface
|
TaskInterface
|
||||||
TaskTypeDetails
|
TaskTypeDetails
|
||||||
TaskEngine *TaskEngine
|
TaskEngine *TaskEngine
|
||||||
Count atomic.Int32 /// locked by TaskEngine's mutex
|
Count atomic.Int32
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) {
|
func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) {
|
||||||
@ -46,12 +45,12 @@ func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !h.considerWork([]TaskID{tID}) {
|
if !h.considerWork("adder", []TaskID{tID}) {
|
||||||
h.TaskEngine.bump(h.Name) // We can't do it. How about someone else.
|
h.TaskEngine.bump(h.Name) // We can't do it. How about someone else.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *taskTypeHandler) considerWork(ids []TaskID) (workAccepted bool) {
|
func (h *taskTypeHandler) considerWork(from string, ids []TaskID) (workAccepted bool) {
|
||||||
top:
|
top:
|
||||||
if len(ids) == 0 {
|
if len(ids) == 0 {
|
||||||
return true // stop looking for takers
|
return true // stop looking for takers
|
||||||
@ -104,6 +103,7 @@ top:
|
|||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
h.Count.Add(1)
|
h.Count.Add(1)
|
||||||
|
log.Infow("Beginning work on Task", "id", *tID, "from", from, "type", h.Name)
|
||||||
|
|
||||||
var done bool
|
var done bool
|
||||||
var doErr error
|
var doErr error
|
||||||
@ -136,7 +136,7 @@ top:
|
|||||||
return owner == h.TaskEngine.ownerID
|
return owner == h.TaskEngine.ownerID
|
||||||
})
|
})
|
||||||
if doErr != nil {
|
if doErr != nil {
|
||||||
log.Error("Do("+h.Name+", taskID="+strconv.Itoa(int(*tID))+") returned error: ", doErr)
|
log.Errorw("Do() returned error", "type", h.Name, "id", strconv.Itoa(int(*tID)), "error", doErr)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return true
|
return true
|
||||||
|
@ -38,7 +38,7 @@ type Reg struct {
|
|||||||
|
|
||||||
var logger = logging.Logger("harmonytask")
|
var logger = logging.Logger("harmonytask")
|
||||||
|
|
||||||
var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted")
|
var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted|yb-master|yb-tserver")
|
||||||
|
|
||||||
func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) {
|
func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) {
|
||||||
var reg Reg
|
var reg Reg
|
||||||
@ -114,7 +114,7 @@ func getResources() (res Resources, err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if found > 1 {
|
if found > 1 {
|
||||||
logger.Error("This Lotus process should run alone on a machine. Use CGroup.")
|
logger.Warn("lotus-provider's defaults are for running alone. Use task maximums or CGroups.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user