diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py index 5f9665d4..87130c0d 100644 --- a/stack_orchestrator/deploy/deploy.py +++ b/stack_orchestrator/deploy/deploy.py @@ -203,13 +203,12 @@ def logs_operation(ctx, tail: int, follow: bool, extra_args: str): print(stream_content.decode("utf-8"), end="") -def run_job_operation(ctx, job_name: str, release_name: str = None): +def run_job_operation(ctx, job_name: str, helm_release: str = None): global_context = ctx.parent.parent.obj if not global_context.dry_run: print(f"Running job: {job_name}") try: - ctx.obj.deployer.run_job(job_name, release_name) - print(f"Job {job_name} completed successfully") + ctx.obj.deployer.run_job(job_name, helm_release) except Exception as e: print(f"Error running job {job_name}: {e}") sys.exit(1) diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index 6b254225..196b3301 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -171,10 +171,10 @@ def update(ctx): @command.command() @click.argument('job_name') -@click.option('--release-name', help='Helm release name (only for k8s helm chart deployments, defaults to chart name)') +@click.option('--helm-release', help='Helm release name (only for k8s helm chart deployments, defaults to chart name)') @click.pass_context -def run_job(ctx, job_name, release_name): +def run_job(ctx, job_name, helm_release): '''run a one-time job from the stack''' from stack_orchestrator.deploy.deploy import run_job_operation ctx.obj = make_deploy_context(ctx) - run_job_operation(ctx, job_name, release_name) + run_job_operation(ctx, job_name, helm_release) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 1207260c..fdc29f51 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -510,7 +510,7 @@ class K8sDeployer(Deployer): # We need to figure out how to do this -- check why we're being called first pass - def run_job(self, job_name: str, release_name: str = None): + def run_job(self, job_name: str, helm_release: str = None): if not opts.o.dry_run: from stack_orchestrator.deploy.k8s.helm.job_runner import run_helm_job @@ -524,7 +524,7 @@ class K8sDeployer(Deployer): run_helm_job( chart_dir=chart_dir, job_name=job_name, - release_name=release_name, + release=helm_release, namespace=self.k8s_namespace, timeout=600, verbose=opts.o.verbose diff --git a/stack_orchestrator/deploy/k8s/helm/chart_generator.py b/stack_orchestrator/deploy/k8s/helm/chart_generator.py index 01976ea3..5964d755 100644 --- a/stack_orchestrator/deploy/k8s/helm/chart_generator.py +++ b/stack_orchestrator/deploy/k8s/helm/chart_generator.py @@ -70,6 +70,46 @@ def _wrap_job_templates_with_conditionals(chart_dir: Path, jobs: list) -> None: print(f"Wrapped job template with conditional: {job_template_file.name}") +def _add_pvc_retention_policy(chart_dir: Path) -> None: + """ + Add helm.sh/resource-policy: keep annotation to PVC templates. + + This ensures PVCs are not deleted when the Helm release is uninstalled, + preserving data for future deployments or job executions. + """ + templates_dir = chart_dir / "templates" + if not templates_dir.exists(): + return + + yaml = get_yaml() + + # Find all PVC template files + pvc_files = list(templates_dir.glob("*-persistentvolumeclaim.yaml")) + + for pvc_file in pvc_files: + try: + # Read the PVC template + content = yaml.load(open(pvc_file, "r")) + + # Add the resource policy annotation + if "metadata" not in content: + content["metadata"] = {} + if "annotations" not in content["metadata"]: + content["metadata"]["annotations"] = {} + + content["metadata"]["annotations"]["helm.sh/resource-policy"] = "keep" + + # Write back + with open(pvc_file, "w") as f: + yaml.dump(content, f) + + if opts.o.debug: + print(f"Added retention policy to: {pvc_file.name}") + except Exception as e: + if opts.o.debug: + print(f"Warning: Failed to add retention policy to {pvc_file.name}: {e}") + + def _post_process_chart(chart_dir: Path, chart_name: str, jobs: list) -> None: """ Post-process Kompose-generated chart to fix common issues. @@ -77,6 +117,7 @@ def _post_process_chart(chart_dir: Path, chart_name: str, jobs: list) -> None: Fixes: 1. Chart.yaml name, description and keywords 2. Add conditional wrappers to job templates (default: disabled) + 3. Add resource retention policy to PVCs (prevent deletion on uninstall) TODO: - Add defaultMode: 0755 to ConfigMap volumes containing scripts (.sh files) @@ -105,6 +146,9 @@ def _post_process_chart(chart_dir: Path, chart_name: str, jobs: list) -> None: if jobs: _wrap_job_templates_with_conditionals(chart_dir, jobs) + # Add resource retention policy to PVCs + _add_pvc_retention_policy(chart_dir) + def generate_helm_chart(stack_path: str, spec_file: str, deployment_dir_path: Path) -> None: """ diff --git a/stack_orchestrator/deploy/k8s/helm/job_runner.py b/stack_orchestrator/deploy/k8s/helm/job_runner.py index 7a8e3726..00829971 100644 --- a/stack_orchestrator/deploy/k8s/helm/job_runner.py +++ b/stack_orchestrator/deploy/k8s/helm/job_runner.py @@ -50,7 +50,7 @@ def get_release_name_from_chart(chart_dir: Path) -> str: def run_helm_job( chart_dir: Path, job_name: str, - release_name: str = None, + release: str = None, namespace: str = "default", timeout: int = 600, verbose: bool = False @@ -67,7 +67,7 @@ def run_helm_job( Args: chart_dir: Path to the Helm chart directory job_name: Name of the job to run (without -job suffix) - release_name: Optional Helm release name (defaults to chart name from Chart.yaml) + release: Optional Helm release name (defaults to chart name from Chart.yaml) namespace: Kubernetes namespace timeout: Timeout in seconds for job completion (default: 600) verbose: Enable verbose output @@ -79,13 +79,13 @@ def run_helm_job( raise Exception(f"Chart directory not found: {chart_dir}") # Use provided release name, or get it from Chart.yaml - if release_name is None: - release_name = get_release_name_from_chart(chart_dir) + if release is None: + release = get_release_name_from_chart(chart_dir) if verbose: - print(f"Using release name from Chart.yaml: {release_name}") + print(f"Using release name from Chart.yaml: {release}") else: if verbose: - print(f"Using provided release name: {release_name}") + print(f"Using provided release name: {release}") job_template_file = f"templates/{job_name}-job.yaml" @@ -97,9 +97,10 @@ def run_helm_job( try: # Render job template with job enabled # Use --set-json to properly handle job names with dashes - values_json = json.dumps({"jobs": {job_name: {"enabled": True}}}) + jobs_dict = {job_name: {"enabled": True}} + values_json = json.dumps(jobs_dict) helm_cmd = [ - "helm", "template", release_name, str(chart_dir), + "helm", "template", release, str(chart_dir), "--show-only", job_template_file, "--set-json", f"jobs={values_json}" ] @@ -114,18 +115,22 @@ def run_helm_job( if verbose: print(f"Generated job manifest:\n{result.stdout}") + # Parse the manifest to get the actual job name + yaml = get_yaml() + manifest = yaml.load(result.stdout) + actual_job_name = manifest.get("metadata", {}).get("name", job_name) + # Apply the job manifest kubectl_apply_cmd = ["kubectl", "apply", "-f", tmp_file.name, "-n", namespace] subprocess.run(kubectl_apply_cmd, check=True, capture_output=True, text=True) if verbose: - print(f"Job {job_name} created, waiting for completion...") + print(f"Job {actual_job_name} created, waiting for completion...") # Wait for job completion - job_full_name = f"{release_name}-{job_name}" wait_cmd = [ "kubectl", "wait", "--for=condition=complete", - f"job/{job_full_name}", + f"job/{actual_job_name}", f"--timeout={timeout}s", "-n", namespace ]