From 35c07be18051854d4f19a799beb1952d40ebb905 Mon Sep 17 00:00:00 2001
From: David Boreham <david@bozemanpass.com>
Date: Wed, 14 Aug 2024 13:55:41 -0600
Subject: [PATCH] Add CI job for k8s deployment control test

---
 .../workflows/test-k8s-deployment-control.yml | 69 +++++++++++++++++++
 .../triggers/test-k8s-deployment-control      |  0
 tests/k8s-deployment-control/run-test.sh      | 43 +++++++++---
 3 files changed, 103 insertions(+), 9 deletions(-)
 create mode 100644 .gitea/workflows/test-k8s-deployment-control.yml
 create mode 100644 .gitea/workflows/triggers/test-k8s-deployment-control

diff --git a/.gitea/workflows/test-k8s-deployment-control.yml b/.gitea/workflows/test-k8s-deployment-control.yml
new file mode 100644
index 00000000..f4848a6a
--- /dev/null
+++ b/.gitea/workflows/test-k8s-deployment-control.yml
@@ -0,0 +1,69 @@
+name: K8s Deployment Control Test
+
+on:
+  pull_request:
+    branches: '*'
+  push:
+    branches: '*'
+    paths:
+      - '!**'
+      - '.gitea/workflows/triggers/test-k8s-deployment-control'
+      - '.gitea/workflows/test-k8s-deployment-control.yml'
+      - 'tests/k8s-deployment-control/run-test.sh'
+  schedule: # Note: coordinate with other tests to not overload runners at the same time of day
+    - cron: '3 30 * * *'
+
+jobs:
+  test:
+    name: "Run deployment control suite on kind/k8s"
+    runs-on: ubuntu-22.04
+    steps:
+      - name: "Clone project repository"
+        uses: actions/checkout@v3
+      # At present the stock setup-python action fails on Linux/aarch64
+      # Conditional steps below workaroud this by using deadsnakes for that case only
+      - name: "Install Python for ARM on Linux"
+        if: ${{ runner.arch == 'arm64' && runner.os == 'Linux' }}
+        uses: deadsnakes/action@v3.0.1
+        with:
+          python-version: '3.8'
+      - name: "Install Python cases other than ARM on Linux"
+        if: ${{ ! (runner.arch == 'arm64' && runner.os == 'Linux') }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8'
+      - name: "Print Python version"
+        run: python3 --version
+      - name: "Install shiv"
+        run: pip install shiv
+      - name: "Generate build version file"
+        run: ./scripts/create_build_tag_file.sh
+      - name: "Build local shiv package"
+        run: ./scripts/build_shiv_package.sh
+      - name: "Check cgroups version"
+        run: mount | grep cgroup
+      - name: "Install kind"
+        run: ./tests/scripts/install-kind.sh
+      - name: "Install Kubectl"
+        run: ./tests/scripts/install-kubectl.sh
+      - name: "Run k8s deployment control test"
+        run: |
+          source /opt/bash-utils/cgroup-helper.sh
+          join_cgroup
+          ./tests/k8s-deployment-control/run-test.sh
+      - name: Notify Vulcanize Slack on CI failure
+        if: ${{ always() && github.ref_name == 'main' }}
+        uses: ravsamhq/notify-slack-action@v2
+        with:
+          status: ${{ job.status }}
+          notify_when: 'failure'
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.VULCANIZE_SLACK_CI_ALERTS }}
+      - name: Notify DeepStack Slack on CI failure
+        if: ${{ always() && github.ref_name == 'main' }}
+        uses: ravsamhq/notify-slack-action@v2
+        with:
+          status: ${{ job.status }}
+          notify_when: 'failure'
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.DEEPSTACK_SLACK_CI_ALERTS }}
diff --git a/.gitea/workflows/triggers/test-k8s-deployment-control b/.gitea/workflows/triggers/test-k8s-deployment-control
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/k8s-deployment-control/run-test.sh b/tests/k8s-deployment-control/run-test.sh
index ac9bf004..31f9c7dd 100755
--- a/tests/k8s-deployment-control/run-test.sh
+++ b/tests/k8s-deployment-control/run-test.sh
@@ -113,10 +113,18 @@ echo "deploy create output file test: passed"
 #    hostPort: 80
 
 # We need to change it to this:
+# Note we also turn up the log level on the scheduler in order to diagnose placement errors
+# See logs like: kubectl -n kube-system logs kube-scheduler-laconic-f185cd245d8dba98-control-plane
 kind_config_file=${test_deployment_dir}/kind-config.yml
 cat << EOF > ${kind_config_file} 
 kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
+kubeadmConfigPatches:
+- |
+  kind: ClusterConfiguration
+  scheduler:
+    extraArgs:
+      v: "3"
 nodes:
 - role: control-plane
   kubeadmConfigPatches:
@@ -143,7 +151,7 @@ nodes:
     nodeRegistration:
       taints:
         - key: "nodeavoid"
-          value: "a"
+          value: "c"
           effect: "NoSchedule"
 EOF
 
@@ -165,32 +173,49 @@ EOF
 # We can now modify the deployment spec file to require a set of affinity and/or taint combinations
 # then bring up the deployment and check that the pod is scheduled to an expected node.
 
-# Add a requirement to schedule on a node labeled nodetype=c
+# Add a requirement to schedule on a node labeled nodetype=c and
+# a toleration such that no other pods schedule on that node
 deployment_spec_file=${test_deployment_dir}/spec.yml
 cat << EOF >> ${deployment_spec_file}
 node-affinities:
   - label: nodetype
     value: c
+node-tolerations:
+  - key: nodeavoid
+    value: c
 EOF
 
+# Get the deployment ID so we can generate low level kubectl commands later
+deployment_id=$(cat ${test_deployment_dir}/deployment.yml | cut -d ' ' -f 2)
+
 # Try to start the deployment
 $TEST_TARGET_SO deployment --dir $test_deployment_dir start
 wait_for_pods_started
 # Check logs command works
 wait_for_log_output
 sleep 1
-log_output_3=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
-if [[ "$log_output_3" == *"filesystem is fresh"* ]]; then
-    echo "deployment logs test: passed"
+log_output_1=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
+if [[ "$log_output_1" == *"filesystem is fresh"* ]]; then
+    echo "deployment of pod test: passed"
 else
-    echo "deployment logs test: FAILED"
-    echo $log_output_3
+    echo "deployment pod test: FAILED"
+    echo $log_output_1
     delete_cluster_exit
 fi
 
 # The deployment's pod should be scheduled onto node: worker3
-
-exit 1
+# Check that's what happened
+# Get get the node onto which the stack pod has been deployed
+deployment_node=$(kubectl get pods -l app=${deployment_id} -o=jsonpath='{.items..spec.nodeName}')
+expected_node=${deployment_id}-worker3
+echo "Stack pod deployed to node: ${deployment_node}"
+if [[ ${deployment_node} == ${expected_node} ]]; then
+    echo "deployment of pod test: passed"
+else
+    echo "deployment pod test: FAILED"
+    echo "Stack pod deployed to node: ${deployment_node}, expected node: ${expected_node}"
+    delete_cluster_exit
+fi
 
 # Stop and clean up
 $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes