feat: update-in-place deployments with rolling updates

Replace the destroy-and-recreate deployment model with in-place updates.

deploy_k8s.py: All resource creation (Deployment, Service, Ingress,
NodePort, ConfigMap) now uses create-or-update semantics. If a resource
already exists (409 Conflict), it patches instead of failing. For
Deployments, this triggers a k8s rolling update — old pods serve traffic
until new pods pass readiness checks.

deployment.py: restart() no longer calls down(). It just calls up()
which patches existing resources. No namespace deletion, no downtime
gap, no race conditions. k8s handles the rollout.

This gives:
- Zero-downtime deploys (old pods serve during rollout)
- Automatic rollback (if new pods fail readiness, rollout stalls)
- Manual rollback via kubectl rollout undo

Closes so-l2l (parts A and B).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
A. F. Dudley 2026-03-18 19:40:20 +00:00
parent ba39c991f1
commit 2d11ca7bb0
2 changed files with 93 additions and 47 deletions

View File

@ -17,7 +17,7 @@ import click
from pathlib import Path
import subprocess
import sys
import time
from stack_orchestrator import constants
from stack_orchestrator.deploy.images import push_images_operation
from stack_orchestrator.deploy.deploy import (
@ -383,23 +383,17 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip):
deployment_context.init(deployment_context.deployment_dir)
ctx.obj = deployment_context
# Stop deployment
print("\n[4/4] Restarting deployment...")
# Apply updated deployment (create-or-update triggers rolling update).
# No down() — k8s rolling update keeps old pods serving traffic until
# new pods pass readiness checks.
print("\n[4/4] Applying deployment update...")
ctx.obj = make_deploy_context(ctx)
down_operation(
ctx, delete_volumes=False, extra_args_list=[], skip_cluster_management=True
)
# Brief pause to ensure clean shutdown
time.sleep(5)
# Start deployment
up_operation(
ctx, services_list=None, stay_attached=False, skip_cluster_management=True
)
print("\n=== Restart Complete ===")
print("Deployment restarted with git-tracked configuration.")
print("Deployment updated via rolling update.")
if new_hostname and new_hostname != current_hostname:
print(f"\nNew hostname: {new_hostname}")
print("Caddy will automatically provision TLS certificate.")

View File

@ -384,12 +384,20 @@ class K8sDeployer(Deployer):
if opts.o.debug:
print(f"Sending this ConfigMap: {cfg_map}")
if not opts.o.dry_run:
cfg_rsp = self.core_api.create_namespaced_config_map(
body=cfg_map, namespace=self.k8s_namespace
)
if opts.o.debug:
print("ConfigMap created:")
print(f"{cfg_rsp}")
cm_name = cfg_map.metadata.name
try:
self.core_api.create_namespaced_config_map(
body=cfg_map, namespace=self.k8s_namespace
)
except ApiException as e:
if e.status == 409:
self.core_api.patch_namespaced_config_map(
name=cm_name,
namespace=self.k8s_namespace,
body=cfg_map,
)
else:
raise
def _create_deployment(self):
# Skip if there are no pods to deploy (e.g. jobs-only stacks)
@ -401,38 +409,64 @@ class K8sDeployer(Deployer):
deployment = self.cluster_info.get_deployment(
image_pull_policy="Always"
)
# Create the k8s objects
# Create or update the k8s Deployment
if opts.o.debug:
print(f"Sending this deployment: {deployment}")
if not opts.o.dry_run:
deployment_resp = cast(
client.V1Deployment,
self.apps_api.create_namespaced_deployment(
body=deployment, namespace=self.k8s_namespace
),
)
name = deployment.metadata.name
try:
deployment_resp = cast(
client.V1Deployment,
self.apps_api.create_namespaced_deployment(
body=deployment, namespace=self.k8s_namespace
),
)
print(f"Created Deployment {name}")
except ApiException as e:
if e.status == 409:
# Already exists — patch to trigger rolling update
deployment_resp = cast(
client.V1Deployment,
self.apps_api.patch_namespaced_deployment(
name=name,
namespace=self.k8s_namespace,
body=deployment,
),
)
print(f"Updated Deployment {name} (rolling update)")
else:
raise
if opts.o.debug:
print("Deployment created:")
meta = deployment_resp.metadata
spec = deployment_resp.spec
if meta and spec and spec.template.spec:
ns = meta.namespace
name = meta.name
gen = meta.generation
containers = spec.template.spec.containers
img = containers[0].image if containers else None
print(f"{ns} {name} {gen} {img}")
print(f" {meta.namespace} {meta.name} gen={meta.generation} {img}")
service = self.cluster_info.get_service()
if opts.o.debug:
print(f"Sending this service: {service}")
if service and not opts.o.dry_run:
service_resp = self.core_api.create_namespaced_service(
namespace=self.k8s_namespace, body=service
)
svc_name = service.metadata.name
try:
service_resp = self.core_api.create_namespaced_service(
namespace=self.k8s_namespace, body=service
)
print(f"Created Service {svc_name}")
except ApiException as e:
if e.status == 409:
# Service exists — patch it (preserves clusterIP)
service_resp = self.core_api.patch_namespaced_service(
name=svc_name,
namespace=self.k8s_namespace,
body=service,
)
print(f"Updated Service {svc_name}")
else:
raise
if opts.o.debug:
print("Service created:")
print(f"{service_resp}")
print(f" {service_resp}")
def _create_jobs(self):
# Process job compose files into k8s Jobs
@ -570,12 +604,22 @@ class K8sDeployer(Deployer):
if opts.o.debug:
print(f"Sending this ingress: {ingress}")
if not opts.o.dry_run:
ingress_resp = self.networking_api.create_namespaced_ingress(
namespace=self.k8s_namespace, body=ingress
)
if opts.o.debug:
print("Ingress created:")
print(f"{ingress_resp}")
ing_name = ingress.metadata.name
try:
self.networking_api.create_namespaced_ingress(
namespace=self.k8s_namespace, body=ingress
)
print(f"Created Ingress {ing_name}")
except ApiException as e:
if e.status == 409:
self.networking_api.patch_namespaced_ingress(
name=ing_name,
namespace=self.k8s_namespace,
body=ingress,
)
print(f"Updated Ingress {ing_name}")
else:
raise
else:
if opts.o.debug:
print("No ingress configured")
@ -585,12 +629,20 @@ class K8sDeployer(Deployer):
if opts.o.debug:
print(f"Sending this nodeport: {nodeport}")
if not opts.o.dry_run:
nodeport_resp = self.core_api.create_namespaced_service(
namespace=self.k8s_namespace, body=nodeport
)
if opts.o.debug:
print("NodePort created:")
print(f"{nodeport_resp}")
np_name = nodeport.metadata.name
try:
self.core_api.create_namespaced_service(
namespace=self.k8s_namespace, body=nodeport
)
except ApiException as e:
if e.status == 409:
self.core_api.patch_namespaced_service(
name=np_name,
namespace=self.k8s_namespace,
body=nodeport,
)
else:
raise
# Call start() hooks — stacks can create additional k8s resources
if self.deployment_context: