From 8cc0a9a19ae080b8490dd00253614d41e79e4f90 Mon Sep 17 00:00:00 2001 From: AFDudley Date: Mon, 9 Mar 2026 20:04:58 +0000 Subject: [PATCH] add/local-test-runner (#996) Co-authored-by: A. F. Dudley Reviewed-on: https://git.vdb.to/cerc-io/stack-orchestrator/pulls/996 --- TODO.md | 19 +++++++ stack_orchestrator/deploy/k8s/cluster_info.py | 44 +++++++++++++-- stack_orchestrator/deploy/spec.py | 21 ++++++++ tests/scripts/run-test-local.sh | 53 +++++++++++++++++++ 4 files changed, 133 insertions(+), 4 deletions(-) create mode 100755 tests/scripts/run-test-local.sh diff --git a/TODO.md b/TODO.md index 349530c8..65439ab5 100644 --- a/TODO.md +++ b/TODO.md @@ -7,6 +7,25 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment. +## Bugs + +### `deploy create` doesn't auto-generate volume mappings for new pods + +When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create` +does not generate default host path mappings in spec.yml for the new pod's +volumes. The deployment then fails at scheduling because the PVCs don't exist. + +**Expected**: `deploy create` enumerates all volumes from all compose files +in the stack and generates default host paths for any that aren't already +mapped in the spec.yml `volumes:` section. + +**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently +missing, causing `FailedScheduling: persistentvolumeclaim not found`. + +**Workaround**: Manually add volume entries to spec.yml and create host dirs. + +**Files**: `deployment_create.py` (`_write_config_file`, volume handling) + ## Architecture Refactoring ### Separate Deployer from Stack Orchestrator CLI diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index da24bdc2..2ebf96f2 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -394,13 +394,43 @@ class ClusterInfo: result.append(pv) return result + def _any_service_has_host_network(self): + for pod_name in self.parsed_pod_yaml_map: + pod = self.parsed_pod_yaml_map[pod_name] + for svc in pod.get("services", {}).values(): + if svc.get("network_mode") == "host": + return True + return False + + def _resolve_container_resources( + self, container_name: str, service_info: dict, global_resources: Resources + ) -> Resources: + """Resolve resources for a container using layered priority. + + Priority: spec per-container > compose deploy.resources + > spec global > DEFAULT + """ + # 1. Check spec.yml for per-container override + per_container = self.spec.get_container_resources_for(container_name) + if per_container: + return per_container + + # 2. Check compose service_info for deploy.resources + deploy_block = service_info.get("deploy", {}) + compose_resources = deploy_block.get("resources", {}) if deploy_block else {} + if compose_resources: + return Resources(compose_resources) + + # 3. Fall back to spec.yml global (already resolved with DEFAULT fallback) + return global_resources + # TODO: put things like image pull policy into an object-scope struct def get_deployment(self, image_pull_policy: Optional[str] = None): containers = [] services = {} - resources = self.spec.get_container_resources() - if not resources: - resources = DEFAULT_CONTAINER_RESOURCES + global_resources = self.spec.get_container_resources() + if not global_resources: + global_resources = DEFAULT_CONTAINER_RESOURCES for pod_name in self.parsed_pod_yaml_map: pod = self.parsed_pod_yaml_map[pod_name] services = pod["services"] @@ -483,6 +513,9 @@ class ClusterInfo: ) ) ] + container_resources = self._resolve_container_resources( + container_name, service_info, global_resources + ) container = client.V1Container( name=container_name, image=image_to_use, @@ -501,7 +534,7 @@ class ClusterInfo: if self.spec.get_capabilities() else None, ), - resources=to_k8s_resource_requirements(resources), + resources=to_k8s_resource_requirements(container_resources), ) containers.append(container) volumes = volumes_for_pod_files( @@ -568,6 +601,7 @@ class ClusterInfo: ) ) + use_host_network = self._any_service_has_host_network() template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(annotations=annotations, labels=labels), spec=client.V1PodSpec( @@ -577,6 +611,8 @@ class ClusterInfo: affinity=affinity, tolerations=tolerations, runtime_class_name=self.spec.get_runtime_class(), + host_network=use_host_network or None, + dns_policy=("ClusterFirstWithHostNet" if use_host_network else None), ), ) spec = client.V1DeploymentSpec( diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index e5647b04..bd62779e 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -120,6 +120,27 @@ class Spec: self.obj.get(constants.resources_key, {}).get("containers", {}) ) + def get_container_resources_for( + self, container_name: str + ) -> typing.Optional[Resources]: + """Look up per-container resource overrides from spec.yml. + + Checks resources.containers. in the spec. Returns None + if no per-container override exists (caller falls back to other sources). + """ + containers_block = self.obj.get(constants.resources_key, {}).get( + "containers", {} + ) + if container_name in containers_block: + entry = containers_block[container_name] + # Only treat it as a per-container override if it's a dict with + # reservations/limits nested inside (not a top-level global key) + if isinstance(entry, dict) and ( + "reservations" in entry or "limits" in entry + ): + return Resources(entry) + return None + def get_volume_resources(self): return Resources( self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {}) diff --git a/tests/scripts/run-test-local.sh b/tests/scripts/run-test-local.sh new file mode 100755 index 00000000..f6f32346 --- /dev/null +++ b/tests/scripts/run-test-local.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Run a test suite locally in an isolated venv. +# +# Usage: +# ./tests/scripts/run-test-local.sh +# +# Examples: +# ./tests/scripts/run-test-local.sh tests/webapp-test/run-webapp-test.sh +# ./tests/scripts/run-test-local.sh tests/smoke-test/run-smoke-test.sh +# ./tests/scripts/run-test-local.sh tests/k8s-deploy/run-deploy-test.sh +# +# The script creates a temporary venv, installs shiv, builds the laconic-so +# package, runs the requested test, then cleans up. + +set -euo pipefail + +if [ $# -lt 1 ]; then + echo "Usage: $0 [args...]" + exit 1 +fi + +TEST_SCRIPT="$1" +shift + +if [ ! -f "$TEST_SCRIPT" ]; then + echo "Error: $TEST_SCRIPT not found" + exit 1 +fi + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +VENV_DIR=$(mktemp -d /tmp/so-test-XXXXXX) + +cleanup() { + echo "Cleaning up venv: $VENV_DIR" + rm -rf "$VENV_DIR" +} +trap cleanup EXIT + +cd "$REPO_DIR" + +echo "==> Creating venv in $VENV_DIR" +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" + +echo "==> Installing shiv" +pip install -q shiv + +echo "==> Building laconic-so package" +./scripts/create_build_tag_file.sh +./scripts/build_shiv_package.sh + +echo "==> Running: $TEST_SCRIPT $*" +exec "./$TEST_SCRIPT" "$@"