Add support for k8s pod to node affinity and taint toleration (#917)
	
		
			
	
		
	
	
		
	
		
			All checks were successful
		
		
	
	
		
			
				
	
				Lint Checks / Run linter (push) Successful in 38s
				
			
		
			
				
	
				Publish / Build and publish (push) Successful in 1m15s
				
			
		
			
				
	
				Smoke Test / Run basic test suite (push) Successful in 4m40s
				
			
		
			
				
	
				Webapp Test / Run webapp test suite (push) Successful in 5m5s
				
			
		
			
				
	
				Deploy Test / Run deploy test suite (push) Successful in 5m42s
				
			
		
			
				
	
				K8s Deployment Control Test / Run deployment control suite on kind/k8s (push) Successful in 6m16s
				
			
		
			
				
	
				Database Test / Run database hosting test on kind/k8s (push) Successful in 9m22s
				
			
		
			
				
	
				Container Registry Test / Run contaier registry hosting test on kind/k8s (push) Successful in 3m30s
				
			
		
			
				
	
				External Stack Test / Run external stack test suite (push) Successful in 4m31s
				
			
		
			
				
	
				Fixturenet-Laconicd-Test / Run Laconicd fixturenet and Laconic CLI tests (push) Successful in 13m12s
				
			
		
			
				
	
				K8s Deploy Test / Run deploy test suite on kind/k8s (push) Successful in 7m24s
				
			
		
		
	
	
				
					
				
			
		
			All checks were successful
		
		
	
	Lint Checks / Run linter (push) Successful in 38s
				
			Publish / Build and publish (push) Successful in 1m15s
				
			Smoke Test / Run basic test suite (push) Successful in 4m40s
				
			Webapp Test / Run webapp test suite (push) Successful in 5m5s
				
			Deploy Test / Run deploy test suite (push) Successful in 5m42s
				
			K8s Deployment Control Test / Run deployment control suite on kind/k8s (push) Successful in 6m16s
				
			Database Test / Run database hosting test on kind/k8s (push) Successful in 9m22s
				
			Container Registry Test / Run contaier registry hosting test on kind/k8s (push) Successful in 3m30s
				
			External Stack Test / Run external stack test suite (push) Successful in 4m31s
				
			Fixturenet-Laconicd-Test / Run Laconicd fixturenet and Laconic CLI tests (push) Successful in 13m12s
				
			K8s Deploy Test / Run deploy test suite on kind/k8s (push) Successful in 7m24s
				
			Reviewed-on: #917 Reviewed-by: Thomas E Lackey <telackey@noreply.git.vdb.to> Co-authored-by: David Boreham <david@bozemanpass.com> Co-committed-by: David Boreham <david@bozemanpass.com>
This commit is contained in:
		
							parent
							
								
									60d34217f8
								
							
						
					
					
						commit
						e56da7dcc1
					
				
							
								
								
									
										69
									
								
								.gitea/workflows/test-k8s-deployment-control.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								.gitea/workflows/test-k8s-deployment-control.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,69 @@ | ||||
| name: K8s Deployment Control Test | ||||
| 
 | ||||
| on: | ||||
|   pull_request: | ||||
|     branches: '*' | ||||
|   push: | ||||
|     branches: '*' | ||||
|     paths: | ||||
|       - '!**' | ||||
|       - '.gitea/workflows/triggers/test-k8s-deployment-control' | ||||
|       - '.gitea/workflows/test-k8s-deployment-control.yml' | ||||
|       - 'tests/k8s-deployment-control/run-test.sh' | ||||
|   schedule: # Note: coordinate with other tests to not overload runners at the same time of day | ||||
|     - cron: '3 30 * * *' | ||||
| 
 | ||||
| jobs: | ||||
|   test: | ||||
|     name: "Run deployment control suite on kind/k8s" | ||||
|     runs-on: ubuntu-22.04 | ||||
|     steps: | ||||
|       - name: "Clone project repository" | ||||
|         uses: actions/checkout@v3 | ||||
|       # At present the stock setup-python action fails on Linux/aarch64 | ||||
|       # Conditional steps below workaroud this by using deadsnakes for that case only | ||||
|       - name: "Install Python for ARM on Linux" | ||||
|         if: ${{ runner.arch == 'arm64' && runner.os == 'Linux' }} | ||||
|         uses: deadsnakes/action@v3.0.1 | ||||
|         with: | ||||
|           python-version: '3.8' | ||||
|       - name: "Install Python cases other than ARM on Linux" | ||||
|         if: ${{ ! (runner.arch == 'arm64' && runner.os == 'Linux') }} | ||||
|         uses: actions/setup-python@v4 | ||||
|         with: | ||||
|           python-version: '3.8' | ||||
|       - name: "Print Python version" | ||||
|         run: python3 --version | ||||
|       - name: "Install shiv" | ||||
|         run: pip install shiv | ||||
|       - name: "Generate build version file" | ||||
|         run: ./scripts/create_build_tag_file.sh | ||||
|       - name: "Build local shiv package" | ||||
|         run: ./scripts/build_shiv_package.sh | ||||
|       - name: "Check cgroups version" | ||||
|         run: mount | grep cgroup | ||||
|       - name: "Install kind" | ||||
|         run: ./tests/scripts/install-kind.sh | ||||
|       - name: "Install Kubectl" | ||||
|         run: ./tests/scripts/install-kubectl.sh | ||||
|       - name: "Run k8s deployment control test" | ||||
|         run: | | ||||
|           source /opt/bash-utils/cgroup-helper.sh | ||||
|           join_cgroup | ||||
|           ./tests/k8s-deployment-control/run-test.sh | ||||
|       - name: Notify Vulcanize Slack on CI failure | ||||
|         if: ${{ always() && github.ref_name == 'main' }} | ||||
|         uses: ravsamhq/notify-slack-action@v2 | ||||
|         with: | ||||
|           status: ${{ job.status }} | ||||
|           notify_when: 'failure' | ||||
|         env: | ||||
|           SLACK_WEBHOOK_URL: ${{ secrets.VULCANIZE_SLACK_CI_ALERTS }} | ||||
|       - name: Notify DeepStack Slack on CI failure | ||||
|         if: ${{ always() && github.ref_name == 'main' }} | ||||
|         uses: ravsamhq/notify-slack-action@v2 | ||||
|         with: | ||||
|           status: ${{ job.status }} | ||||
|           notify_when: 'failure' | ||||
|         env: | ||||
|           SLACK_WEBHOOK_URL: ${{ secrets.DEEPSTACK_SLACK_CI_ALERTS }} | ||||
							
								
								
									
										27
									
								
								docs/k8s-deployment-enhancements.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								docs/k8s-deployment-enhancements.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | ||||
| # K8S Deployment Enhancements | ||||
| ## Controlling pod placement | ||||
| The placement of pods created as part of a stack deployment can be controlled to either avoid certain nodes, or require certain nodes. | ||||
| ### Pod/Node Affinity | ||||
| Node affinity rules applied to pods target node labels. The effect is that a pod can only be placed on a node having the specified label value. Note that other pods that do not have any node affinity rules can also be placed on those same nodes. Thus node affinity for a pod controls where that pod can be placed, but does not control where other pods are placed. | ||||
| 
 | ||||
| Node affinity for stack pods is specified in the deployment's `spec.yml` file as follows: | ||||
| ``` | ||||
| node-affinities: | ||||
|   - label: nodetype | ||||
|     value: typeb | ||||
| ``` | ||||
| This example denotes that the stack's pods should only be placed on nodes that have the label `nodetype` with value `typeb`. | ||||
| ### Node Taint Toleration | ||||
| K8s nodes can be given one or more "taints". These are special fields (distinct from labels) with a name (key) and optional value. | ||||
| When placing pods, the k8s scheduler will only assign a pod to a tainted node if the pod posesses a corresponding "toleration". | ||||
| This is metadata associated with the pod that specifies that the pod "tolerates" a given taint. | ||||
| Therefore taint toleration provides a mechanism by which only certain pods can be placed on specific nodes, and provides a complementary mechanism to node affinity. | ||||
| 
 | ||||
| Taint toleration for stack pods is specified in the deployment's `spec.yml` file as follows: | ||||
| ``` | ||||
| node-tolerations: | ||||
|   - key: nodetype | ||||
|     value: typeb | ||||
| ``` | ||||
| This example denotes that the stack's pods will tolerate a taint: `nodetype=typeb` | ||||
| 
 | ||||
| @ -35,5 +35,7 @@ security_key = "security" | ||||
| annotations_key = "annotations" | ||||
| labels_key = "labels" | ||||
| replicas_key = "replicas" | ||||
| node_affinities_key = "node-affinities" | ||||
| node_tolerations_key = "node-tolerations" | ||||
| kind_config_filename = "kind-config.yml" | ||||
| kube_config_filename = "kubeconfig.yml" | ||||
|  | ||||
| @ -365,6 +365,8 @@ class ClusterInfo: | ||||
| 
 | ||||
|         annotations = None | ||||
|         labels = {"app": self.app_name} | ||||
|         affinity = None | ||||
|         tolerations = None | ||||
| 
 | ||||
|         if self.spec.get_annotations(): | ||||
|             annotations = {} | ||||
| @ -377,12 +379,52 @@ class ClusterInfo: | ||||
|                 for service_name in services: | ||||
|                     labels[key.replace("{name}", service_name)] = value | ||||
| 
 | ||||
|         if self.spec.get_node_affinities(): | ||||
|             affinities = [] | ||||
|             for rule in self.spec.get_node_affinities(): | ||||
|                 # TODO add some input validation here | ||||
|                 label_name = rule['label'] | ||||
|                 label_value = rule['value'] | ||||
|                 affinities.append(client.V1NodeSelectorTerm( | ||||
|                             match_expressions=[client.V1NodeSelectorRequirement( | ||||
|                                 key=label_name, | ||||
|                                 operator="In", | ||||
|                                 values=[label_value] | ||||
|                             )] | ||||
|                         ) | ||||
|                     ) | ||||
|             affinity = client.V1Affinity( | ||||
|                 node_affinity=client.V1NodeAffinity( | ||||
|                     required_during_scheduling_ignored_during_execution=client.V1NodeSelector( | ||||
|                         node_selector_terms=affinities | ||||
|                     )) | ||||
|                 ) | ||||
| 
 | ||||
|         if self.spec.get_node_tolerations(): | ||||
|             tolerations = [] | ||||
|             for toleration in self.spec.get_node_tolerations(): | ||||
|                 # TODO add some input validation here | ||||
|                 toleration_key = toleration['key'] | ||||
|                 toleration_value = toleration['value'] | ||||
|                 tolerations.append(client.V1Toleration( | ||||
|                     effect="NoSchedule", | ||||
|                     key=toleration_key, | ||||
|                     operator="Equal", | ||||
|                     value=toleration_value | ||||
|                 )) | ||||
| 
 | ||||
|         template = client.V1PodTemplateSpec( | ||||
|             metadata=client.V1ObjectMeta( | ||||
|                 annotations=annotations, | ||||
|                 labels=labels | ||||
|             ), | ||||
|             spec=client.V1PodSpec(containers=containers, image_pull_secrets=image_pull_secrets, volumes=volumes), | ||||
|             spec=client.V1PodSpec( | ||||
|                 containers=containers, | ||||
|                 image_pull_secrets=image_pull_secrets, | ||||
|                 volumes=volumes, | ||||
|                 affinity=affinity, | ||||
|                 tolerations=tolerations | ||||
|                 ), | ||||
|         ) | ||||
|         spec = client.V1DeploymentSpec( | ||||
|             replicas=self.spec.get_replicas(), | ||||
|  | ||||
| @ -120,6 +120,12 @@ class Spec: | ||||
|     def get_replicas(self): | ||||
|         return self.obj.get(constants.replicas_key, 1) | ||||
| 
 | ||||
|     def get_node_affinities(self): | ||||
|         return self.obj.get(constants.node_affinities_key, []) | ||||
| 
 | ||||
|     def get_node_tolerations(self): | ||||
|         return self.obj.get(constants.node_tolerations_key, []) | ||||
| 
 | ||||
|     def get_labels(self): | ||||
|         return self.obj.get(constants.labels_key, {}) | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										222
									
								
								tests/k8s-deployment-control/run-test.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										222
									
								
								tests/k8s-deployment-control/run-test.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,222 @@ | ||||
| #!/usr/bin/env bash | ||||
| set -e | ||||
| if [ -n "$CERC_SCRIPT_DEBUG" ]; then | ||||
|     set -x | ||||
|     # Dump environment variables for debugging | ||||
|     echo "Environment variables:" | ||||
|     env | ||||
| fi | ||||
| 
 | ||||
| if [ "$1" == "from-path" ]; then | ||||
|     TEST_TARGET_SO="laconic-so" | ||||
| else | ||||
|     TEST_TARGET_SO=$( ls -t1 ./package/laconic-so* | head -1 ) | ||||
| fi | ||||
| 
 | ||||
| # Helper functions: TODO move into a separate file | ||||
| wait_for_pods_started () { | ||||
|     for i in {1..50} | ||||
|     do | ||||
|         local ps_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir ps ) | ||||
| 
 | ||||
|         if [[ "$ps_output" == *"Running containers:"* ]]; then | ||||
|             # if ready, return | ||||
|             return | ||||
|         else | ||||
|             # if not ready, wait | ||||
|             sleep 5 | ||||
|         fi | ||||
|     done | ||||
|     # Timed out, error exit | ||||
|     echo "waiting for pods to start: FAILED" | ||||
|     delete_cluster_exit | ||||
| } | ||||
| 
 | ||||
| wait_for_log_output () { | ||||
|     for i in {1..50} | ||||
|     do | ||||
| 
 | ||||
|         local log_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) | ||||
| 
 | ||||
|         if [[ ! -z "$log_output" ]]; then | ||||
|             # if ready, return | ||||
|             return | ||||
|         else | ||||
|             # if not ready, wait | ||||
|             sleep 5 | ||||
|         fi | ||||
|     done | ||||
|     # Timed out, error exit | ||||
|     echo "waiting for pods log content: FAILED" | ||||
|     delete_cluster_exit | ||||
| } | ||||
| 
 | ||||
| delete_cluster_exit () { | ||||
|     $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes | ||||
|     exit 1 | ||||
| } | ||||
| 
 | ||||
| # Set a non-default repo dir | ||||
| export CERC_REPO_BASE_DIR=~/stack-orchestrator-test/repo-base-dir | ||||
| echo "Testing this package: $TEST_TARGET_SO" | ||||
| echo "Test version command" | ||||
| reported_version_string=$( $TEST_TARGET_SO version ) | ||||
| echo "Version reported is: ${reported_version_string}" | ||||
| echo "Cloning repositories into: $CERC_REPO_BASE_DIR" | ||||
| rm -rf $CERC_REPO_BASE_DIR | ||||
| mkdir -p $CERC_REPO_BASE_DIR | ||||
| $TEST_TARGET_SO --stack test setup-repositories | ||||
| $TEST_TARGET_SO --stack test build-containers | ||||
| # Test basic stack-orchestrator deploy to k8s | ||||
| test_deployment_dir=$CERC_REPO_BASE_DIR/test-deployment-dir | ||||
| test_deployment_spec=$CERC_REPO_BASE_DIR/test-deployment-spec.yml | ||||
| 
 | ||||
| # Create a deployment that we can use to check our test cases | ||||
| $TEST_TARGET_SO --stack test deploy --deploy-to k8s-kind init --output $test_deployment_spec | ||||
| # Check the file now exists | ||||
| if [ ! -f "$test_deployment_spec" ]; then | ||||
|     echo "deploy init test: spec file not present" | ||||
|     echo "deploy init test: FAILED" | ||||
|     exit 1 | ||||
| fi | ||||
| echo "deploy init test: passed" | ||||
| 
 | ||||
| $TEST_TARGET_SO --stack test deploy create --spec-file $test_deployment_spec --deployment-dir $test_deployment_dir | ||||
| # Check the deployment dir exists | ||||
| if [ ! -d "$test_deployment_dir" ]; then | ||||
|     echo "deploy create test: deployment directory not present" | ||||
|     echo "deploy create test: FAILED" | ||||
|     exit 1 | ||||
| fi | ||||
| echo "deploy create test: passed" | ||||
| # Check the file writted by the create command in the stack now exists | ||||
| if [ ! -f "$test_deployment_dir/create-file" ]; then | ||||
|     echo "deploy create test: create output file not present" | ||||
|     echo "deploy create test: FAILED" | ||||
|     exit 1 | ||||
| fi | ||||
| echo "deploy create output file test: passed" | ||||
| 
 | ||||
| # At this point the deployment's kind-config.yml will look like this: | ||||
| # kind: Cluster | ||||
| # apiVersion: kind.x-k8s.io/v1alpha4 | ||||
| # nodes: | ||||
| # - role: control-plane | ||||
| #   kubeadmConfigPatches: | ||||
| #     - | | ||||
| #       kind: InitConfiguration | ||||
| #       nodeRegistration: | ||||
| #         kubeletExtraArgs: | ||||
| #           node-labels: "ingress-ready=true" | ||||
| #   extraPortMappings: | ||||
| #   - containerPort: 80 | ||||
| #    hostPort: 80 | ||||
| 
 | ||||
| # We need to change it to this: | ||||
| # Note we also turn up the log level on the scheduler in order to diagnose placement errors | ||||
| # See logs like: kubectl -n kube-system logs kube-scheduler-laconic-f185cd245d8dba98-control-plane | ||||
| kind_config_file=${test_deployment_dir}/kind-config.yml | ||||
| cat << EOF > ${kind_config_file}  | ||||
| kind: Cluster | ||||
| apiVersion: kind.x-k8s.io/v1alpha4 | ||||
| kubeadmConfigPatches: | ||||
| - | | ||||
|   kind: ClusterConfiguration | ||||
|   scheduler: | ||||
|     extraArgs: | ||||
|       v: "3" | ||||
| nodes: | ||||
| - role: control-plane | ||||
|   kubeadmConfigPatches: | ||||
|     - | | ||||
|       kind: InitConfiguration | ||||
|       nodeRegistration: | ||||
|         kubeletExtraArgs: | ||||
|           node-labels: "ingress-ready=true" | ||||
|   extraPortMappings: | ||||
|   - containerPort: 80 | ||||
|     hostPort: 80 | ||||
| - role: worker | ||||
|   labels: | ||||
|     nodetype: a | ||||
| - role: worker | ||||
|   labels: | ||||
|     nodetype: b | ||||
| - role: worker | ||||
|   labels: | ||||
|     nodetype: c | ||||
|   kubeadmConfigPatches: | ||||
|   - | | ||||
|     kind: JoinConfiguration | ||||
|     nodeRegistration: | ||||
|       taints: | ||||
|         - key: "nodeavoid" | ||||
|           value: "c" | ||||
|           effect: "NoSchedule" | ||||
| EOF | ||||
| 
 | ||||
| # At this point we should have 4 nodes, three labeled like this: | ||||
| # $ kubectl get nodes --show-labels=true | ||||
| # NAME                                     STATUS   ROLES           AGE     VERSION   LABELS | ||||
| # laconic-3af549a3ba0e3a3c-control-plane   Ready    control-plane   2m37s   v1.30.0   ...,ingress-ready=true | ||||
| # laconic-3af549a3ba0e3a3c-worker          Ready    <none>          2m18s   v1.30.0   ...,nodetype=a | ||||
| # laconic-3af549a3ba0e3a3c-worker2         Ready    <none>          2m18s   v1.30.0   ...,nodetype=b | ||||
| # laconic-3af549a3ba0e3a3c-worker3         Ready    <none>          2m18s   v1.30.0   ...,nodetype=c | ||||
| 
 | ||||
| # And with taints like this: | ||||
| # $ kubectl get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints --no-headers | ||||
| # laconic-3af549a3ba0e3a3c-control-plane   [map[effect:NoSchedule key:node-role.kubernetes.io/control-plane]] | ||||
| # laconic-3af549a3ba0e3a3c-worker          <none> | ||||
| # laconic-3af549a3ba0e3a3c-worker2         <none> | ||||
| # laconic-3af549a3ba0e3a3c-worker3         [map[effect:NoSchedule key:nodeavoid value:c]] | ||||
| 
 | ||||
| # We can now modify the deployment spec file to require a set of affinity and/or taint combinations | ||||
| # then bring up the deployment and check that the pod is scheduled to an expected node. | ||||
| 
 | ||||
| # Add a requirement to schedule on a node labeled nodetype=c and | ||||
| # a toleration such that no other pods schedule on that node | ||||
| deployment_spec_file=${test_deployment_dir}/spec.yml | ||||
| cat << EOF >> ${deployment_spec_file} | ||||
| node-affinities: | ||||
|   - label: nodetype | ||||
|     value: c | ||||
| node-tolerations: | ||||
|   - key: nodeavoid | ||||
|     value: c | ||||
| EOF | ||||
| 
 | ||||
| # Get the deployment ID so we can generate low level kubectl commands later | ||||
| deployment_id=$(cat ${test_deployment_dir}/deployment.yml | cut -d ' ' -f 2) | ||||
| 
 | ||||
| # Try to start the deployment | ||||
| $TEST_TARGET_SO deployment --dir $test_deployment_dir start | ||||
| wait_for_pods_started | ||||
| # Check logs command works | ||||
| wait_for_log_output | ||||
| sleep 1 | ||||
| log_output_1=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) | ||||
| if [[ "$log_output_1" == *"filesystem is fresh"* ]]; then | ||||
|     echo "deployment of pod test: passed" | ||||
| else | ||||
|     echo "deployment pod test: FAILED" | ||||
|     echo $log_output_1 | ||||
|     delete_cluster_exit | ||||
| fi | ||||
| 
 | ||||
| # The deployment's pod should be scheduled onto node: worker3 | ||||
| # Check that's what happened | ||||
| # Get get the node onto which the stack pod has been deployed | ||||
| deployment_node=$(kubectl get pods -l app=${deployment_id} -o=jsonpath='{.items..spec.nodeName}') | ||||
| expected_node=${deployment_id}-worker3 | ||||
| echo "Stack pod deployed to node: ${deployment_node}" | ||||
| if [[ ${deployment_node} == ${expected_node} ]]; then | ||||
|     echo "deployment of pod test: passed" | ||||
| else | ||||
|     echo "deployment pod test: FAILED" | ||||
|     echo "Stack pod deployed to node: ${deployment_node}, expected node: ${expected_node}" | ||||
|     delete_cluster_exit | ||||
| fi | ||||
| 
 | ||||
| # Stop and clean up | ||||
| $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes | ||||
| echo "Test passed" | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user