fix(test): wait for kind cluster cleanup before recreating

Replace the fixed `sleep 20` with a polling loop that waits for `kind get clusters` to report no clusters. The previous approach was flaky on CI runners where Docker takes longer to tear down cgroup hierarchies after `kind delete cluster`. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 05:26:48 +00:00 · 2026-03-10 05:26:48 +00:00 · 108f13a09b
commit 108f13a09b
parent d64046df55
1 changed files with 16 additions and 3 deletions
--- a/tests/k8s-deploy/run-deploy-test.sh
+++ b/tests/k8s-deploy/run-deploy-test.sh
@ -46,6 +46,18 @@ wait_for_log_output () {
 }


+wait_for_cluster_destroyed () {
+    for i in {1..60}
+    do
+        if ! kind get clusters 2>/dev/null | grep -q .; then
+            return
+        fi
+        sleep 2
+    done
+    echo "waiting for kind cluster cleanup: FAILED"
+    exit 1
+}
+
 delete_cluster_exit () {
    $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes
    exit 1
@ -227,9 +239,10 @@ fi

 # Stop then start again and check the volume was preserved
 $TEST_TARGET_SO deployment --dir $test_deployment_dir stop
-# Sleep a bit just in case
-# sleep for longer to check if that's why the subsequent create cluster fails
-sleep 20
+# Wait for the kind cluster to be fully destroyed before recreating it.
+# Without this, the second 'kind create cluster' can fail with cgroup
+# detection errors because Docker hasn't finished cleaning up.
+wait_for_cluster_destroyed
 $TEST_TARGET_SO deployment --dir $test_deployment_dir start
 wait_for_pods_started
 wait_for_log_output