From 35c07be18051854d4f19a799beb1952d40ebb905 Mon Sep 17 00:00:00 2001 From: David Boreham Date: Wed, 14 Aug 2024 13:55:41 -0600 Subject: [PATCH] Add CI job for k8s deployment control test --- .../workflows/test-k8s-deployment-control.yml | 69 +++++++++++++++++++ .../triggers/test-k8s-deployment-control | 0 tests/k8s-deployment-control/run-test.sh | 43 +++++++++--- 3 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 .gitea/workflows/test-k8s-deployment-control.yml create mode 100644 .gitea/workflows/triggers/test-k8s-deployment-control diff --git a/.gitea/workflows/test-k8s-deployment-control.yml b/.gitea/workflows/test-k8s-deployment-control.yml new file mode 100644 index 00000000..f4848a6a --- /dev/null +++ b/.gitea/workflows/test-k8s-deployment-control.yml @@ -0,0 +1,69 @@ +name: K8s Deployment Control Test + +on: + pull_request: + branches: '*' + push: + branches: '*' + paths: + - '!**' + - '.gitea/workflows/triggers/test-k8s-deployment-control' + - '.gitea/workflows/test-k8s-deployment-control.yml' + - 'tests/k8s-deployment-control/run-test.sh' + schedule: # Note: coordinate with other tests to not overload runners at the same time of day + - cron: '3 30 * * *' + +jobs: + test: + name: "Run deployment control suite on kind/k8s" + runs-on: ubuntu-22.04 + steps: + - name: "Clone project repository" + uses: actions/checkout@v3 + # At present the stock setup-python action fails on Linux/aarch64 + # Conditional steps below workaroud this by using deadsnakes for that case only + - name: "Install Python for ARM on Linux" + if: ${{ runner.arch == 'arm64' && runner.os == 'Linux' }} + uses: deadsnakes/action@v3.0.1 + with: + python-version: '3.8' + - name: "Install Python cases other than ARM on Linux" + if: ${{ ! (runner.arch == 'arm64' && runner.os == 'Linux') }} + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: "Print Python version" + run: python3 --version + - name: "Install shiv" + run: pip install shiv + - name: "Generate build version file" + run: ./scripts/create_build_tag_file.sh + - name: "Build local shiv package" + run: ./scripts/build_shiv_package.sh + - name: "Check cgroups version" + run: mount | grep cgroup + - name: "Install kind" + run: ./tests/scripts/install-kind.sh + - name: "Install Kubectl" + run: ./tests/scripts/install-kubectl.sh + - name: "Run k8s deployment control test" + run: | + source /opt/bash-utils/cgroup-helper.sh + join_cgroup + ./tests/k8s-deployment-control/run-test.sh + - name: Notify Vulcanize Slack on CI failure + if: ${{ always() && github.ref_name == 'main' }} + uses: ravsamhq/notify-slack-action@v2 + with: + status: ${{ job.status }} + notify_when: 'failure' + env: + SLACK_WEBHOOK_URL: ${{ secrets.VULCANIZE_SLACK_CI_ALERTS }} + - name: Notify DeepStack Slack on CI failure + if: ${{ always() && github.ref_name == 'main' }} + uses: ravsamhq/notify-slack-action@v2 + with: + status: ${{ job.status }} + notify_when: 'failure' + env: + SLACK_WEBHOOK_URL: ${{ secrets.DEEPSTACK_SLACK_CI_ALERTS }} diff --git a/.gitea/workflows/triggers/test-k8s-deployment-control b/.gitea/workflows/triggers/test-k8s-deployment-control new file mode 100644 index 00000000..e69de29b diff --git a/tests/k8s-deployment-control/run-test.sh b/tests/k8s-deployment-control/run-test.sh index ac9bf004..31f9c7dd 100755 --- a/tests/k8s-deployment-control/run-test.sh +++ b/tests/k8s-deployment-control/run-test.sh @@ -113,10 +113,18 @@ echo "deploy create output file test: passed" # hostPort: 80 # We need to change it to this: +# Note we also turn up the log level on the scheduler in order to diagnose placement errors +# See logs like: kubectl -n kube-system logs kube-scheduler-laconic-f185cd245d8dba98-control-plane kind_config_file=${test_deployment_dir}/kind-config.yml cat << EOF > ${kind_config_file} kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 +kubeadmConfigPatches: +- | + kind: ClusterConfiguration + scheduler: + extraArgs: + v: "3" nodes: - role: control-plane kubeadmConfigPatches: @@ -143,7 +151,7 @@ nodes: nodeRegistration: taints: - key: "nodeavoid" - value: "a" + value: "c" effect: "NoSchedule" EOF @@ -165,32 +173,49 @@ EOF # We can now modify the deployment spec file to require a set of affinity and/or taint combinations # then bring up the deployment and check that the pod is scheduled to an expected node. -# Add a requirement to schedule on a node labeled nodetype=c +# Add a requirement to schedule on a node labeled nodetype=c and +# a toleration such that no other pods schedule on that node deployment_spec_file=${test_deployment_dir}/spec.yml cat << EOF >> ${deployment_spec_file} node-affinities: - label: nodetype value: c +node-tolerations: + - key: nodeavoid + value: c EOF +# Get the deployment ID so we can generate low level kubectl commands later +deployment_id=$(cat ${test_deployment_dir}/deployment.yml | cut -d ' ' -f 2) + # Try to start the deployment $TEST_TARGET_SO deployment --dir $test_deployment_dir start wait_for_pods_started # Check logs command works wait_for_log_output sleep 1 -log_output_3=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) -if [[ "$log_output_3" == *"filesystem is fresh"* ]]; then - echo "deployment logs test: passed" +log_output_1=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) +if [[ "$log_output_1" == *"filesystem is fresh"* ]]; then + echo "deployment of pod test: passed" else - echo "deployment logs test: FAILED" - echo $log_output_3 + echo "deployment pod test: FAILED" + echo $log_output_1 delete_cluster_exit fi # The deployment's pod should be scheduled onto node: worker3 - -exit 1 +# Check that's what happened +# Get get the node onto which the stack pod has been deployed +deployment_node=$(kubectl get pods -l app=${deployment_id} -o=jsonpath='{.items..spec.nodeName}') +expected_node=${deployment_id}-worker3 +echo "Stack pod deployed to node: ${deployment_node}" +if [[ ${deployment_node} == ${expected_node} ]]; then + echo "deployment of pod test: passed" +else + echo "deployment pod test: FAILED" + echo "Stack pod deployed to node: ${deployment_node}, expected node: ${expected_node}" + delete_cluster_exit +fi # Stop and clean up $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes