feat: update-in-place deployments with rolling updates
Replace the destroy-and-recreate deployment model with in-place updates. deploy_k8s.py: All resource creation (Deployment, Service, Ingress, NodePort, ConfigMap) now uses create-or-update semantics. If a resource already exists (409 Conflict), it patches instead of failing. For Deployments, this triggers a k8s rolling update — old pods serve traffic until new pods pass readiness checks. deployment.py: restart() no longer calls down(). It just calls up() which patches existing resources. No namespace deletion, no downtime gap, no race conditions. k8s handles the rollout. This gives: - Zero-downtime deploys (old pods serve during rollout) - Automatic rollback (if new pods fail readiness, rollout stalls) - Manual rollback via kubectl rollout undo Closes so-l2l (parts A and B). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ba39c991f1
commit
2d11ca7bb0
@ -17,7 +17,7 @@ import click
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from stack_orchestrator import constants
|
||||
from stack_orchestrator.deploy.images import push_images_operation
|
||||
from stack_orchestrator.deploy.deploy import (
|
||||
@ -383,23 +383,17 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip):
|
||||
deployment_context.init(deployment_context.deployment_dir)
|
||||
ctx.obj = deployment_context
|
||||
|
||||
# Stop deployment
|
||||
print("\n[4/4] Restarting deployment...")
|
||||
# Apply updated deployment (create-or-update triggers rolling update).
|
||||
# No down() — k8s rolling update keeps old pods serving traffic until
|
||||
# new pods pass readiness checks.
|
||||
print("\n[4/4] Applying deployment update...")
|
||||
ctx.obj = make_deploy_context(ctx)
|
||||
down_operation(
|
||||
ctx, delete_volumes=False, extra_args_list=[], skip_cluster_management=True
|
||||
)
|
||||
|
||||
# Brief pause to ensure clean shutdown
|
||||
time.sleep(5)
|
||||
|
||||
# Start deployment
|
||||
up_operation(
|
||||
ctx, services_list=None, stay_attached=False, skip_cluster_management=True
|
||||
)
|
||||
|
||||
print("\n=== Restart Complete ===")
|
||||
print("Deployment restarted with git-tracked configuration.")
|
||||
print("Deployment updated via rolling update.")
|
||||
if new_hostname and new_hostname != current_hostname:
|
||||
print(f"\nNew hostname: {new_hostname}")
|
||||
print("Caddy will automatically provision TLS certificate.")
|
||||
|
||||
@ -384,12 +384,20 @@ class K8sDeployer(Deployer):
|
||||
if opts.o.debug:
|
||||
print(f"Sending this ConfigMap: {cfg_map}")
|
||||
if not opts.o.dry_run:
|
||||
cfg_rsp = self.core_api.create_namespaced_config_map(
|
||||
body=cfg_map, namespace=self.k8s_namespace
|
||||
)
|
||||
if opts.o.debug:
|
||||
print("ConfigMap created:")
|
||||
print(f"{cfg_rsp}")
|
||||
cm_name = cfg_map.metadata.name
|
||||
try:
|
||||
self.core_api.create_namespaced_config_map(
|
||||
body=cfg_map, namespace=self.k8s_namespace
|
||||
)
|
||||
except ApiException as e:
|
||||
if e.status == 409:
|
||||
self.core_api.patch_namespaced_config_map(
|
||||
name=cm_name,
|
||||
namespace=self.k8s_namespace,
|
||||
body=cfg_map,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
def _create_deployment(self):
|
||||
# Skip if there are no pods to deploy (e.g. jobs-only stacks)
|
||||
@ -401,38 +409,64 @@ class K8sDeployer(Deployer):
|
||||
deployment = self.cluster_info.get_deployment(
|
||||
image_pull_policy="Always"
|
||||
)
|
||||
# Create the k8s objects
|
||||
# Create or update the k8s Deployment
|
||||
if opts.o.debug:
|
||||
print(f"Sending this deployment: {deployment}")
|
||||
if not opts.o.dry_run:
|
||||
deployment_resp = cast(
|
||||
client.V1Deployment,
|
||||
self.apps_api.create_namespaced_deployment(
|
||||
body=deployment, namespace=self.k8s_namespace
|
||||
),
|
||||
)
|
||||
name = deployment.metadata.name
|
||||
try:
|
||||
deployment_resp = cast(
|
||||
client.V1Deployment,
|
||||
self.apps_api.create_namespaced_deployment(
|
||||
body=deployment, namespace=self.k8s_namespace
|
||||
),
|
||||
)
|
||||
print(f"Created Deployment {name}")
|
||||
except ApiException as e:
|
||||
if e.status == 409:
|
||||
# Already exists — patch to trigger rolling update
|
||||
deployment_resp = cast(
|
||||
client.V1Deployment,
|
||||
self.apps_api.patch_namespaced_deployment(
|
||||
name=name,
|
||||
namespace=self.k8s_namespace,
|
||||
body=deployment,
|
||||
),
|
||||
)
|
||||
print(f"Updated Deployment {name} (rolling update)")
|
||||
else:
|
||||
raise
|
||||
if opts.o.debug:
|
||||
print("Deployment created:")
|
||||
meta = deployment_resp.metadata
|
||||
spec = deployment_resp.spec
|
||||
if meta and spec and spec.template.spec:
|
||||
ns = meta.namespace
|
||||
name = meta.name
|
||||
gen = meta.generation
|
||||
containers = spec.template.spec.containers
|
||||
img = containers[0].image if containers else None
|
||||
print(f"{ns} {name} {gen} {img}")
|
||||
print(f" {meta.namespace} {meta.name} gen={meta.generation} {img}")
|
||||
|
||||
service = self.cluster_info.get_service()
|
||||
if opts.o.debug:
|
||||
print(f"Sending this service: {service}")
|
||||
if service and not opts.o.dry_run:
|
||||
service_resp = self.core_api.create_namespaced_service(
|
||||
namespace=self.k8s_namespace, body=service
|
||||
)
|
||||
svc_name = service.metadata.name
|
||||
try:
|
||||
service_resp = self.core_api.create_namespaced_service(
|
||||
namespace=self.k8s_namespace, body=service
|
||||
)
|
||||
print(f"Created Service {svc_name}")
|
||||
except ApiException as e:
|
||||
if e.status == 409:
|
||||
# Service exists — patch it (preserves clusterIP)
|
||||
service_resp = self.core_api.patch_namespaced_service(
|
||||
name=svc_name,
|
||||
namespace=self.k8s_namespace,
|
||||
body=service,
|
||||
)
|
||||
print(f"Updated Service {svc_name}")
|
||||
else:
|
||||
raise
|
||||
if opts.o.debug:
|
||||
print("Service created:")
|
||||
print(f"{service_resp}")
|
||||
print(f" {service_resp}")
|
||||
|
||||
def _create_jobs(self):
|
||||
# Process job compose files into k8s Jobs
|
||||
@ -570,12 +604,22 @@ class K8sDeployer(Deployer):
|
||||
if opts.o.debug:
|
||||
print(f"Sending this ingress: {ingress}")
|
||||
if not opts.o.dry_run:
|
||||
ingress_resp = self.networking_api.create_namespaced_ingress(
|
||||
namespace=self.k8s_namespace, body=ingress
|
||||
)
|
||||
if opts.o.debug:
|
||||
print("Ingress created:")
|
||||
print(f"{ingress_resp}")
|
||||
ing_name = ingress.metadata.name
|
||||
try:
|
||||
self.networking_api.create_namespaced_ingress(
|
||||
namespace=self.k8s_namespace, body=ingress
|
||||
)
|
||||
print(f"Created Ingress {ing_name}")
|
||||
except ApiException as e:
|
||||
if e.status == 409:
|
||||
self.networking_api.patch_namespaced_ingress(
|
||||
name=ing_name,
|
||||
namespace=self.k8s_namespace,
|
||||
body=ingress,
|
||||
)
|
||||
print(f"Updated Ingress {ing_name}")
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
if opts.o.debug:
|
||||
print("No ingress configured")
|
||||
@ -585,12 +629,20 @@ class K8sDeployer(Deployer):
|
||||
if opts.o.debug:
|
||||
print(f"Sending this nodeport: {nodeport}")
|
||||
if not opts.o.dry_run:
|
||||
nodeport_resp = self.core_api.create_namespaced_service(
|
||||
namespace=self.k8s_namespace, body=nodeport
|
||||
)
|
||||
if opts.o.debug:
|
||||
print("NodePort created:")
|
||||
print(f"{nodeport_resp}")
|
||||
np_name = nodeport.metadata.name
|
||||
try:
|
||||
self.core_api.create_namespaced_service(
|
||||
namespace=self.k8s_namespace, body=nodeport
|
||||
)
|
||||
except ApiException as e:
|
||||
if e.status == 409:
|
||||
self.core_api.patch_namespaced_service(
|
||||
name=np_name,
|
||||
namespace=self.k8s_namespace,
|
||||
body=nodeport,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
# Call start() hooks — stacks can create additional k8s resources
|
||||
if self.deployment_context:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user