From dc15c0f4a54527a473f6862a9b44690ca7f57f08 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Wed, 18 Mar 2026 19:43:09 +0000 Subject: [PATCH] feat: auto-generate readiness probes from http-proxy routes Containers referenced in spec.yml http-proxy routes now get TCP readiness probes on the proxied port. This tells k8s when a container is actually ready to serve traffic. Without readiness probes, k8s considers pods ready immediately after start, which means: - Rolling updates cut over before the app is listening - Broken containers look "ready" and receive traffic (502s) - kubectl rollout undo has nothing to roll back to The probes use TCP socket checks (not HTTP) to work with any protocol. Initial delay 5s, check every 10s, fail after 3 consecutive failures. Closes so-l2l part C. Co-Authored-By: Claude Opus 4.6 (1M context) --- stack_orchestrator/deploy/k8s/cluster_info.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index eed8ba91..9524d8f3 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -237,6 +237,28 @@ class ClusterInfo: ) return ingress + def _get_readiness_probe_ports(self) -> dict: + """Map container names to TCP readiness probe ports. + + Derives probe ports from http-proxy routes in the spec. If a container + has an http-proxy route (proxy-to: container:port), we probe that port. + This tells k8s when the container is ready to serve traffic, which is + required for safe rolling updates. + """ + probe_ports: dict = {} + http_proxy_list = self.spec.get_http_proxy() + if http_proxy_list: + for http_proxy in http_proxy_list: + for route in http_proxy.get("routes", []): + proxy_to = route.get("proxy-to", "") + if ":" in proxy_to: + container, port_str = proxy_to.rsplit(":", 1) + port = int(port_str) + # Use the first route's port for each container + if container not in probe_ports: + probe_ports[container] = port + return probe_ports + # TODO: suppoprt multiple services def get_service(self): # Collect all ports from http-proxy routes @@ -471,6 +493,7 @@ class ClusterInfo: containers = [] init_containers = [] services = {} + readiness_probe_ports = self._get_readiness_probe_ports() global_resources = self.spec.get_container_resources() if not global_resources: global_resources = DEFAULT_CONTAINER_RESOURCES @@ -569,6 +592,16 @@ class ClusterInfo: container_resources = self._resolve_container_resources( container_name, service_info, global_resources ) + # Readiness probe from http-proxy routes + readiness_probe = None + probe_port = readiness_probe_ports.get(container_name) + if probe_port: + readiness_probe = client.V1Probe( + tcp_socket=client.V1TCPSocketAction(port=probe_port), + initial_delay_seconds=5, + period_seconds=10, + failure_threshold=3, + ) container = client.V1Container( name=container_name, image=image_to_use, @@ -579,6 +612,7 @@ class ClusterInfo: env_from=env_from, ports=container_ports if container_ports else None, volume_mounts=volume_mounts, + readiness_probe=readiness_probe, security_context=client.V1SecurityContext( privileged=self.spec.get_privileged(), run_as_user=int(service_info["user"]) if "user" in service_info else None,