Merge pull request #11498 from filecoin-project/fix/harmony-reclaim

harmony: Fix task reclaim on restart
This commit is contained in:
Andrew Jackson (Ajax) 2023-12-07 09:46:33 -06:00 committed by GitHub
commit cf8fed9440
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 17 deletions

View File

@ -176,7 +176,7 @@ func New(
continue // not really fatal, but not great continue // not really fatal, but not great
} }
} }
if !h.considerWork("recovered", []TaskID{TaskID(w.ID)}) { if !h.considerWork(workSourceRecover, []TaskID{TaskID(w.ID)}) {
log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name)
} }
} }
@ -285,7 +285,7 @@ func (e *TaskEngine) pollerTryAllWork() {
continue continue
} }
if len(unownedTasks) > 0 { if len(unownedTasks) > 0 {
accepted := v.considerWork("poller", unownedTasks) accepted := v.considerWork(workSourcePoller, unownedTasks)
if accepted { if accepted {
return // accept new work slowly and in priority order return // accept new work slowly and in priority order
} }

View File

@ -49,6 +49,11 @@ func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) (bool, error
} }
} }
const (
workSourcePoller = "poller"
workSourceRecover = "recovered"
)
// considerWork is called to attempt to start work on a task-id of this task type. // considerWork is called to attempt to start work on a task-id of this task type.
// It presumes single-threaded calling, so there should not be a multi-threaded re-entry. // It presumes single-threaded calling, so there should not be a multi-threaded re-entry.
// The only caller should be the one work poller thread. This does spin off other threads, // The only caller should be the one work poller thread. This does spin off other threads,
@ -87,6 +92,8 @@ top:
return false return false
} }
// if recovering we don't need to try to claim anything because those tasks are already claimed by us
if from != workSourceRecover {
// 4. Can we claim the work for our hostname? // 4. Can we claim the work for our hostname?
ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID) ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID)
if err != nil { if err != nil {
@ -104,6 +111,7 @@ top:
ids = tryAgain ids = tryAgain
goto top goto top
} }
}
h.Count.Add(1) h.Count.Add(1)
go func() { go func() {