From 1f9653e6f7d8a4ef1e9745888157f5625a8d4350 Mon Sep 17 00:00:00 2001 From: David Boreham Date: Tue, 16 Jan 2024 15:55:58 -0700 Subject: [PATCH] Fix kind mode and add k8s deployment test (#704) * Fix kind mode and add k8s deployment test * Fix lint errors --- stack_orchestrator/deploy/k8s/cluster_info.py | 6 +- stack_orchestrator/deploy/k8s/deploy_k8s.py | 63 ++++++++------- stack_orchestrator/deploy/k8s/helpers.py | 12 ++- tests/k8s-deploy/run-deploy-test.sh | 76 ++++++++++++++++--- 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index 7718c777..85fd63a8 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -168,8 +168,8 @@ class ClusterInfo: result.append(pv) return result - # to suit the deployment, and also annotate the container specs to point at said volumes - def get_deployment(self): + # TODO: put things like image pull policy into an object-scope struct + def get_deployment(self, image_pull_policy: str = None): containers = [] for pod_name in self.parsed_pod_yaml_map: pod = self.parsed_pod_yaml_map[pod_name] @@ -189,7 +189,7 @@ class ClusterInfo: container = client.V1Container( name=container_name, image=image_to_use, - image_pull_policy="Always", + image_pull_policy=image_pull_policy, env=envs_from_environment_variables_map(self.environment_variables.map), ports=[client.V1ContainerPort(container_port=port)], volume_mounts=volume_mounts, diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index bf82ebdf..0a339fe9 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -111,7 +111,7 @@ class K8sDeployer(Deployer): print("PVCs created:") print(f"{pvc_resp}") # Process compose files into a Deployment - deployment = self.cluster_info.get_deployment() + deployment = self.cluster_info.get_deployment(image_pull_policy=None if self.is_kind() else "Always") # Create the k8s objects if opts.o.debug: print(f"Sending this deployment: {deployment}") @@ -132,18 +132,18 @@ class K8sDeployer(Deployer): print("Service created:") print(f"{service_resp}") - # TODO: disable ingress for kind - ingress: client.V1Ingress = self.cluster_info.get_ingress() + if not self.is_kind(): + ingress: client.V1Ingress = self.cluster_info.get_ingress() - if opts.o.debug: - print(f"Sending this ingress: {ingress}") - ingress_resp = self.networking_api.create_namespaced_ingress( - namespace=self.k8s_namespace, - body=ingress - ) - if opts.o.debug: - print("Ingress created:") - print(f"{ingress_resp}") + if opts.o.debug: + print(f"Sending this ingress: {ingress}") + ingress_resp = self.networking_api.create_namespaced_ingress( + namespace=self.k8s_namespace, + body=ingress + ) + if opts.o.debug: + print("Ingress created:") + print(f"{ingress_resp}") def down(self, timeout, volumes): self.connect_api() @@ -196,16 +196,16 @@ class K8sDeployer(Deployer): except client.exceptions.ApiException as e: _check_delete_exception(e) - # TODO: disable ingress for kind - ingress: client.V1Ingress = self.cluster_info.get_ingress() - if opts.o.debug: - print(f"Deleting this ingress: {ingress}") - try: - self.networking_api.delete_namespaced_ingress( - name=ingress.metadata.name, namespace=self.k8s_namespace - ) - except client.exceptions.ApiException as e: - _check_delete_exception(e) + if not self.is_kind(): + ingress: client.V1Ingress = self.cluster_info.get_ingress() + if opts.o.debug: + print(f"Deleting this ingress: {ingress}") + try: + self.networking_api.delete_namespaced_ingress( + name=ingress.metadata.name, namespace=self.k8s_namespace + ) + except client.exceptions.ApiException as e: + _check_delete_exception(e) if self.is_kind(): # Destroy the kind cluster @@ -219,7 +219,7 @@ class K8sDeployer(Deployer): if all_pods.items: for p in all_pods.items: - if self.cluster_info.app_name in p.metadata.name: + if f"{self.cluster_info.app_name}-deployment" in p.metadata.name: pods.append(p) if not pods: @@ -266,7 +266,7 @@ class K8sDeployer(Deployer): ret = [] for p in pods.items: - if self.cluster_info.app_name in p.metadata.name: + if f"{self.cluster_info.app_name}-deployment" in p.metadata.name: pod_ip = p.status.pod_ip ports = AttrDict() for c in p.spec.containers: @@ -299,11 +299,20 @@ class K8sDeployer(Deployer): def logs(self, services, tail, follow, stream): self.connect_api() - pods = pods_in_deployment(self.core_api, "test-deployment") + pods = pods_in_deployment(self.core_api, self.cluster_info.app_name) if len(pods) > 1: print("Warning: more than one pod in the deployment") - k8s_pod_name = pods[0] - log_data = self.core_api.read_namespaced_pod_log(k8s_pod_name, namespace="default", container="test") + if len(pods) == 0: + log_data = "******* Pods not running ********\n" + else: + k8s_pod_name = pods[0] + # If the pod is not yet started, the logs request below will throw an exception + try: + log_data = self.core_api.read_namespaced_pod_log(k8s_pod_name, namespace="default", container="test") + except client.exceptions.ApiException as e: + if opts.o.debug: + print(f"Error from read_namespaced_pod_log: {e}") + log_data = "******* No logs available ********\n" return log_stream_from_string(log_data) def update(self): diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 9f968dbf..62545dfd 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -21,6 +21,7 @@ from typing import Set, Mapping, List from stack_orchestrator.opts import opts from stack_orchestrator.deploy.deploy_util import parsed_pod_files_map_from_file_names +from stack_orchestrator.deploy.deployer import DeployerException def _run_command(command: str): @@ -29,10 +30,13 @@ def _run_command(command: str): result = subprocess.run(command, shell=True) if opts.o.debug: print(f"Result: {result}") + return result def create_cluster(name: str, config_file: str): - _run_command(f"kind create cluster --name {name} --config {config_file}") + result = _run_command(f"kind create cluster --name {name} --config {config_file}") + if result.returncode != 0: + raise DeployerException(f"kind create cluster failed: {result}") def destroy_cluster(name: str): @@ -41,12 +45,14 @@ def destroy_cluster(name: str): def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]): for image in image_set: - _run_command(f"kind load docker-image {image} --name {kind_cluster_name}") + result = _run_command(f"kind load docker-image {image} --name {kind_cluster_name}") + if result.returncode != 0: + raise DeployerException(f"kind create cluster failed: {result}") def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): pods = [] - pod_response = core_api.list_namespaced_pod(namespace="default", label_selector="app=test-app") + pod_response = core_api.list_namespaced_pod(namespace="default", label_selector=f"app={deployment_name}") if opts.o.debug: print(f"pod_response: {pod_response}") for pod_info in pod_response.items: diff --git a/tests/k8s-deploy/run-deploy-test.sh b/tests/k8s-deploy/run-deploy-test.sh index b7ee9dd0..5f1c0102 100755 --- a/tests/k8s-deploy/run-deploy-test.sh +++ b/tests/k8s-deploy/run-deploy-test.sh @@ -1,14 +1,59 @@ #!/usr/bin/env bash set -e if [ -n "$CERC_SCRIPT_DEBUG" ]; then - set -x + set -x + # Dump environment variables for debugging + echo "Environment variables:" + env fi + +# Helper functions: TODO move into a separate file +wait_for_pods_started () { + for i in {1..5} + do + local ps_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir ps ) + + if [[ "$ps_output" == *"Running containers:"* ]]; then + # if ready, return + return + else + # if not ready, wait + sleep 5 + fi + done + # Timed out, error exit + echo "waiting for pods to start: FAILED" + delete_cluster_exit +} + +wait_for_log_output () { + for i in {1..5} + do + + local log_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) + + if [[ ! -z "$log_output" ]]; then + # if ready, return + return + else + # if not ready, wait + sleep 5 + fi + done + # Timed out, error exit + echo "waiting for pods log content: FAILED" + delete_cluster_exit +} + + +delete_cluster_exit () { + $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes + exit 1 +} + # Note: eventually this test should be folded into ../deploy/ # but keeping it separate for now for convenience TEST_TARGET_SO=$( ls -t1 ./package/laconic-so* | head -1 ) -# Dump environment variables for debugging -echo "Environment variables:" -env # Set a non-default repo dir export CERC_REPO_BASE_DIR=~/stack-orchestrator-test/repo-base-dir echo "Testing this package: $TEST_TARGET_SO" @@ -53,23 +98,36 @@ fi echo "deploy create output file test: passed" # Try to start the deployment $TEST_TARGET_SO deployment --dir $test_deployment_dir start -# TODO: add a check to see if the container is up -# Sleep because k8s not up yet -sleep 30 +wait_for_pods_started # Check logs command works +wait_for_log_output log_output_3=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) if [[ "$log_output_3" == *"Filesystem is fresh"* ]]; then echo "deployment logs test: passed" else echo "deployment logs test: FAILED" - exit 1 + delete_cluster_exit fi # Check the config variable CERC_TEST_PARAM_1 was passed correctly if [[ "$log_output_3" == *"Test-param-1: PASSED"* ]]; then echo "deployment config test: passed" else echo "deployment config test: FAILED" - exit 1 + delete_cluster_exit +fi +# Stop then start again and check the volume was preserved +$TEST_TARGET_SO deployment --dir $test_deployment_dir stop +# Sleep a bit just in case +sleep 2 +$TEST_TARGET_SO deployment --dir $test_deployment_dir start +wait_for_pods_started +wait_for_log_output +log_output_4=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) +if [[ "$log_output_4" == *"Filesystem is old"* ]]; then + echo "Retain volumes test: passed" +else + echo "Retain volumes test: FAILED" + delete_cluster_exit fi # Stop and clean up $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes