diff --git a/.gitignore b/.gitignore
index 3aaa220b..6abbf941 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ __pycache__
 package
 stack_orchestrator/data/build_tag.txt
 /build
+.worktrees
diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl
index 9e07f970..ed5117b7 100644
--- a/.pebbles/events.jsonl
+++ b/.pebbles/events.jsonl
@@ -1,2 +1,7 @@
 {"type": "create", "timestamp": "2026-03-18T14:45:07.038870Z", "issue_id": "so-a1a", "payload": {"title": "deploy create should support external credential injection", "type": "feature", "priority": "2", "description": "deploy create generates config.env but provides no mechanism to inject external credentials (API keys, tokens, etc.) at creation time. Operators must append to config.env after the fact, which mutates a build artifact. deploy create should accept --credentials-file or similar to include secrets in the generated config.env."}}
 {"type": "create", "timestamp": "2026-03-18T14:45:07.038942Z", "issue_id": "so-b2b", "payload": {"title": "REGISTRY_TOKEN / imagePullSecret flow undocumented", "type": "bug", "priority": "2", "description": "create_registry_secret() exists in deployment_create.py and is called during up(), but REGISTRY_TOKEN is not documented in spec.yml or any user-facing docs. The restart command warns \"Registry token env var REGISTRY_TOKEN not set, skipping registry secret\" but doesn't explain how to set it. For GHCR private images, this is required and the flow from spec.yml -> config.env -> imagePullSecret needs documentation."}}
+{"type": "create", "timestamp": "2026-03-18T19:10:00.000000Z", "issue_id": "so-k1k", "payload": {"title": "Stack path resolution differs between deploy create and deployment restart", "type": "bug", "priority": "2", "description": "deploy create resolves --stack as a relative path from cwd. deployment restart resolves --stack-path as absolute, then computes repo_root as 4 parents up (assuming stack_orchestrator/data/stacks/name structure). External stacks with different nesting depths (e.g. stack-orchestrator/stacks/name = 3 levels) get wrong repo_root, causing --spec-file resolution to fail. The two commands should use the same path resolution logic."}}
+{"type": "create", "timestamp": "2026-03-18T19:25:00.000000Z", "issue_id": "so-l2l", "payload": {"title": "deployment restart should update in place, not delete/recreate", "type": "bug", "priority": "1", "description": "deployment restart deletes the entire namespace then recreates everything from scratch. This causes:\n\n1. **Downtime** — nothing serves traffic between delete and successful recreate\n2. **No rollback** — deleting the namespace destroys ReplicaSet revision history\n3. **Race conditions** — namespace may still be terminating when up() tries to create\n4. **Cascading failures** — if ANY container fails to start, the entire site is down with no fallback\n\nFix: three changes needed.\n\n**A. up() should create-or-update, not just create.** Use patch/apply semantics for Deployments, Services, Ingresses. When the pod spec changes (new env vars, new image), k8s creates a new ReplicaSet, scales it up, waits for readiness probes, then scales the old one down. Old pods serve traffic until new pods are healthy.\n\n**B. down() should never delete the namespace on restart.** Only on explicit teardown. The namespace owns the revision history. Current code: _delete_namespace() on every down(). Should: delete individual resources by label for teardown, do nothing for restart (let update-in-place handle it).\n\n**C. All containers need readiness probes.** Without them k8s considers pods ready immediately, defeating rolling update safety. laconic-so should generate readiness probes from the http-proxy routes in spec.yml (if a container has an http route, probe that port).\n\nWith these changes, k8s native rolling updates provide zero-downtime deploys and automatic rollback (if new pods fail readiness, rollout stalls, old pods keep serving).\n\nSource files:\n- deploy_k8s.py: up(), down(), _create_deployment(), _delete_namespace()\n- cluster_info.py: pod spec generation (needs readiness probes)\n- deployment.py: restart() orchestration"}}
+{"type": "create", "timestamp": "2026-03-18T20:15:03.000000Z", "issue_id": "so-m3m", "payload": {"title": "Add credentials-files spec key for on-disk credential injection", "type": "feature", "priority": "1", "description": "deployment restart regenerates config.env from spec.yml, wiping credentials that were appended from on-disk files (e.g. ~/.credentials/*.env). Operators must append credentials after deploy create, which is fragile and breaks on restart.\n\nFix: New top-level spec key credentials-files. _write_config_file() reads each file and appends its contents to config.env after writing config vars. Files are read at deploy time from the deployment host.\n\nSpec syntax:\n  credentials-files:\n    - ~/.credentials/dumpster-secrets.env\n    - ~/.credentials/dumpster-r2.env\n\nFiles:\n- deploy/spec.py: add get_credentials_files() returning list of paths\n- deploy/deployment_create.py: in _write_config_file(), after writing config vars, read and append each credentials file (expand ~ to home dir)\n\nAlso update dumpster-stack spec.yml to use the new key and remove the ansible credential append workaround from woodburn_deployer (group_vars/all.yml credentials_env_files, stack_deploy role append tasks, restart_dumpster.yml credential steps). Those cleanups are in the woodburn_deployer repo."}}
+{"type":"status_update","timestamp":"2026-03-18T21:54:12.59148256Z","issue_id":"so-m3m","payload":{"status":"in_progress"}}
+{"type":"close","timestamp":"2026-03-18T21:55:31.6035544Z","issue_id":"so-m3m","payload":{}}
diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py
index 0804b5a6..2c140f30 100644
--- a/stack_orchestrator/deploy/deployment.py
+++ b/stack_orchestrator/deploy/deployment.py
@@ -399,11 +399,60 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip, image):
     deployment_context.init(deployment_context.deployment_dir)
     ctx.obj = deployment_context
 
-    # Apply updated deployment (create-or-update triggers rolling update).
-    # No down() — k8s rolling update keeps old pods serving traffic until
-    # new pods pass readiness checks.
+    # Apply updated deployment.
+    # If maintenance-service is configured, swap Ingress to maintenance
+    # backend during the Recreate window so users see a branded page
+    # instead of bare 502s.
     print("\n[4/4] Applying deployment update...")
     ctx.obj = make_deploy_context(ctx)
+
+    # Check for maintenance service in the (reloaded) spec
+    maintenance_svc = deployment_context.spec.get_maintenance_service()
+    if maintenance_svc:
+        print(f"Maintenance service configured: {maintenance_svc}")
+        _restart_with_maintenance(
+            ctx, deployment_context, maintenance_svc, image_overrides
+        )
+    else:
+        up_operation(
+            ctx,
+            services_list=None,
+            stay_attached=False,
+            skip_cluster_management=True,
+            image_overrides=image_overrides or None,
+        )
+
+    print("\n=== Restart Complete ===")
+    print("Deployment updated via rolling update.")
+    if new_hostname and new_hostname != current_hostname:
+        print(f"\nNew hostname: {new_hostname}")
+        print("Caddy will automatically provision TLS certificate.")
+
+
+def _restart_with_maintenance(
+    ctx, deployment_context, maintenance_svc, image_overrides
+):
+    """Restart with Ingress swap to maintenance service during Recreate.
+
+    Flow:
+    1. Deploy all pods (including maintenance pod) with up_operation
+    2. Patch Ingress: swap all route backends to maintenance service
+    3. Scale main (non-maintenance) Deployments to 0
+    4. Scale main Deployments back up (triggers Recreate with new spec)
+    5. Wait for readiness
+    6. Patch Ingress: restore original backends
+
+    This ensures the maintenance pod is already running before we touch
+    the Ingress, and the main pods get a clean Recreate.
+    """
+    import time
+
+    from kubernetes.client.exceptions import ApiException
+
+    from stack_orchestrator.deploy.deploy import up_operation
+
+    # Step 1: Apply the full deployment (creates/updates all pods + services)
+    # This ensures maintenance pod exists before we swap Ingress to it.
     up_operation(
         ctx,
         services_list=None,
@@ -412,8 +461,146 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip, image):
         image_overrides=image_overrides or None,
     )
 
-    print("\n=== Restart Complete ===")
-    print("Deployment updated via rolling update.")
-    if new_hostname and new_hostname != current_hostname:
-        print(f"\nNew hostname: {new_hostname}")
-        print("Caddy will automatically provision TLS certificate.")
+    # Parse maintenance service spec: "container-name:port"
+    maint_container = maintenance_svc.split(":")[0]
+    maint_port = int(maintenance_svc.split(":")[1])
+
+    # Connect to k8s API
+    deploy_ctx = ctx.obj
+    deployer = deploy_ctx.deployer
+    deployer.connect_api()
+    namespace = deployer.k8s_namespace
+    app_name = deployer.cluster_info.app_name
+    networking_api = deployer.networking_api
+    apps_api = deployer.apps_api
+
+    ingress_name = f"{app_name}-ingress"
+
+    # Step 2: Read current Ingress and save original backends
+    try:
+        ingress = networking_api.read_namespaced_ingress(
+            name=ingress_name, namespace=namespace
+        )
+    except ApiException:
+        print("Warning: No Ingress found, skipping maintenance swap")
+        return
+
+    # Resolve which service the maintenance container belongs to
+    maint_service_name = deployer.cluster_info._resolve_service_name_for_container(
+        maint_container
+    )
+
+    # Save original backends for restoration
+    original_backends = []
+    for rule in ingress.spec.rules:
+        rule_backends = []
+        for path in rule.http.paths:
+            rule_backends.append(
+                {
+                    "name": path.backend.service.name,
+                    "port": path.backend.service.port.number,
+                }
+            )
+        original_backends.append(rule_backends)
+
+    # Patch all Ingress backends to point to maintenance service
+    print("Swapping Ingress to maintenance service...")
+    for rule in ingress.spec.rules:
+        for path in rule.http.paths:
+            path.backend.service.name = maint_service_name
+            path.backend.service.port.number = maint_port
+
+    networking_api.replace_namespaced_ingress(
+        name=ingress_name, namespace=namespace, body=ingress
+    )
+    print("Ingress now points to maintenance service")
+
+    # Step 3: Find main (non-maintenance) Deployments and scale to 0
+    # then back up to trigger a clean Recreate
+    deployments_resp = apps_api.list_namespaced_deployment(
+        namespace=namespace, label_selector=f"app={app_name}"
+    )
+    main_deployments = []
+    for dep in deployments_resp.items:
+        dep_name = dep.metadata.name
+        # Skip maintenance deployments
+        component = (dep.metadata.labels or {}).get("app.kubernetes.io/component", "")
+        is_maintenance = maint_container in component
+        if not is_maintenance:
+            main_deployments.append(dep_name)
+
+    if main_deployments:
+        # Scale down main deployments
+        for dep_name in main_deployments:
+            print(f"Scaling down {dep_name}...")
+            apps_api.patch_namespaced_deployment_scale(
+                name=dep_name,
+                namespace=namespace,
+                body={"spec": {"replicas": 0}},
+            )
+
+        # Wait for pods to terminate
+        print("Waiting for main pods to terminate...")
+        deadline = time.monotonic() + 120
+        while time.monotonic() < deadline:
+            pods = deployer.core_api.list_namespaced_pod(
+                namespace=namespace,
+                label_selector=f"app={app_name}",
+            )
+            # Count non-maintenance pods
+            active = sum(
+                1
+                for p in pods.items
+                if p.metadata
+                and p.metadata.deletion_timestamp is None
+                and not any(
+                    maint_container in (c.name or "") for c in (p.spec.containers or [])
+                )
+            )
+            if active == 0:
+                break
+            time.sleep(2)
+
+        # Scale back up
+        replicas = deployment_context.spec.get_replicas()
+        for dep_name in main_deployments:
+            print(f"Scaling up {dep_name} to {replicas} replicas...")
+            apps_api.patch_namespaced_deployment_scale(
+                name=dep_name,
+                namespace=namespace,
+                body={"spec": {"replicas": replicas}},
+            )
+
+        # Step 5: Wait for readiness
+        print("Waiting for main pods to become ready...")
+        deadline = time.monotonic() + 300
+        while time.monotonic() < deadline:
+            all_ready = True
+            for dep_name in main_deployments:
+                dep = apps_api.read_namespaced_deployment(
+                    name=dep_name, namespace=namespace
+                )
+                ready = dep.status.ready_replicas or 0
+                desired = dep.spec.replicas or 1
+                if ready < desired:
+                    all_ready = False
+                    break
+            if all_ready:
+                break
+            time.sleep(5)
+
+    # Step 6: Restore original Ingress backends
+    print("Restoring original Ingress backends...")
+    ingress = networking_api.read_namespaced_ingress(
+        name=ingress_name, namespace=namespace
+    )
+    for i, rule in enumerate(ingress.spec.rules):
+        for j, path in enumerate(rule.http.paths):
+            if i < len(original_backends) and j < len(original_backends[i]):
+                path.backend.service.name = original_backends[i][j]["name"]
+                path.backend.service.port.number = original_backends[i][j]["port"]
+
+    networking_api.replace_namespaced_ingress(
+        name=ingress_name, namespace=namespace, body=ingress
+    )
+    print("Ingress restored to original backends")
diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index d9ede7f1..6890b430 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -167,6 +167,28 @@ class ClusterInfo:
                         nodeports.append(service)
         return nodeports
 
+    def _resolve_service_name_for_container(self, container_name: str) -> str:
+        """Resolve the k8s Service name that routes to a given container.
+
+        For multi-pod stacks, each pod has its own Service. We find which
+        pod file contains this container and return the corresponding
+        service name. For single-pod stacks, returns the legacy service name.
+        """
+        pod_files = list(self.parsed_pod_yaml_map.keys())
+        multi_pod = len(pod_files) > 1
+
+        if not multi_pod:
+            return f"{self.app_name}-service"
+
+        for pod_file in pod_files:
+            pod = self.parsed_pod_yaml_map[pod_file]
+            if container_name in pod.get("services", {}):
+                pod_name = self._pod_name_from_file(pod_file)
+                return f"{self.app_name}-{pod_name}-service"
+
+        # Fallback: container not found in any pod file
+        return f"{self.app_name}-service"
+
     def get_ingress(
         self, use_tls=False, certificates=None, cluster_issuer="letsencrypt-prod"
     ):
@@ -186,12 +208,16 @@ class ClusterInfo:
                 if use_tls:
                     tls.append(
                         client.V1IngressTLS(
-                            hosts=certificate["spec"]["dnsNames"]
-                            if certificate
-                            else [host_name],
-                            secret_name=certificate["spec"]["secretName"]
-                            if certificate
-                            else f"{self.app_name}-{host_name}-tls",
+                            hosts=(
+                                certificate["spec"]["dnsNames"]
+                                if certificate
+                                else [host_name]
+                            ),
+                            secret_name=(
+                                certificate["spec"]["secretName"]
+                                if certificate
+                                else f"{self.app_name}-{host_name}-tls"
+                            ),
                         )
                     )
 
@@ -202,16 +228,18 @@ class ClusterInfo:
                     if opts.o.debug:
                         print(f"proxy config: {path} -> {proxy_to}")
                     # proxy_to has the form <service>:<port>
+                    container_name = proxy_to.split(":")[0]
                     proxy_to_port = int(proxy_to.split(":")[1])
+                    service_name = self._resolve_service_name_for_container(
+                        container_name
+                    )
                     paths.append(
                         client.V1HTTPIngressPath(
                             path_type="Prefix",
                             path=path,
                             backend=client.V1IngressBackend(
                                 service=client.V1IngressServiceBackend(
-                                    # TODO: this looks wrong
-                                    name=f"{self.app_name}-service",
-                                    # TODO: pull port number from the service
+                                    name=service_name,
                                     port=client.V1ServiceBackendPort(
                                         number=proxy_to_port
                                     ),
@@ -618,14 +646,16 @@ class ClusterInfo:
                     readiness_probe=readiness_probe,
                     security_context=client.V1SecurityContext(
                         privileged=self.spec.get_privileged(),
-                        run_as_user=int(service_info["user"])
-                        if "user" in service_info
-                        else None,
-                        capabilities=client.V1Capabilities(
-                            add=self.spec.get_capabilities()
-                        )
-                        if self.spec.get_capabilities()
-                        else None,
+                        run_as_user=(
+                            int(service_info["user"])
+                            if "user" in service_info
+                            else None
+                        ),
+                        capabilities=(
+                            client.V1Capabilities(add=self.spec.get_capabilities())
+                            if self.spec.get_capabilities()
+                            else None
+                        ),
                     ),
                     resources=to_k8s_resource_requirements(container_resources),
                 )
@@ -647,18 +677,34 @@ class ClusterInfo:
         volumes = volumes_for_pod_files(parsed_yaml_map, self.spec, self.app_name)
         return containers, init_containers, services, volumes
 
-    # TODO: put things like image pull policy into an object-scope struct
-    def get_deployment(self, image_pull_policy: Optional[str] = None):
-        containers, init_containers, services, volumes = self._build_containers(
-            self.parsed_pod_yaml_map, image_pull_policy
-        )
-        registry_config = self.spec.get_image_registry_config()
-        if registry_config:
-            secret_name = f"{self.app_name}-image-pull-secret"
-            image_pull_secrets = [client.V1LocalObjectReference(name=secret_name)]
-        else:
-            image_pull_secrets = []
+    def _pod_name_from_file(self, pod_file: str) -> str:
+        """Extract pod name from compose file path.
 
+        docker-compose-dumpster.yml -> dumpster
+        docker-compose-dumpster-maintenance.yml -> dumpster-maintenance
+        """
+        import os
+
+        base = os.path.basename(pod_file)
+        name = base
+        if name.startswith("docker-compose-"):
+            name = name[len("docker-compose-") :]
+        if name.endswith(".yml"):
+            name = name[: -len(".yml")]
+        elif name.endswith(".yaml"):
+            name = name[: -len(".yaml")]
+        return name
+
+    def _pod_has_pvcs(self, parsed_pod_file: Any) -> bool:
+        """Check if a parsed compose file declares named volumes (PVCs)."""
+        volumes = parsed_pod_file.get("volumes", {})
+        return len(volumes) > 0
+
+    def _build_common_pod_metadata(self, services: dict) -> tuple:
+        """Build shared annotations, labels, affinity, tolerations for pods.
+
+        Returns (annotations, labels, affinity, tolerations).
+        """
         annotations = None
         labels = {"app": self.app_name}
         if self.stack_name:
@@ -680,7 +726,6 @@ class ClusterInfo:
         if self.spec.get_node_affinities():
             affinities = []
             for rule in self.spec.get_node_affinities():
-                # TODO add some input validation here
                 label_name = rule["label"]
                 label_value = rule["value"]
                 affinities.append(
@@ -703,7 +748,6 @@ class ClusterInfo:
         if self.spec.get_node_tolerations():
             tolerations = []
             for toleration in self.spec.get_node_tolerations():
-                # TODO add some input validation here
                 toleration_key = toleration["key"]
                 toleration_value = toleration["value"]
                 tolerations.append(
@@ -715,44 +759,210 @@ class ClusterInfo:
                     )
                 )
 
-        use_host_network = self._any_service_has_host_network()
-        template = client.V1PodTemplateSpec(
-            metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
-            spec=client.V1PodSpec(
-                containers=containers,
-                init_containers=init_containers or None,
-                image_pull_secrets=image_pull_secrets,
-                volumes=volumes,
-                affinity=affinity,
-                tolerations=tolerations,
-                runtime_class_name=self.spec.get_runtime_class(),
-                host_network=use_host_network or None,
-                dns_policy=("ClusterFirstWithHostNet" if use_host_network else None),
-            ),
-        )
-        spec = client.V1DeploymentSpec(
-            replicas=self.spec.get_replicas(),
-            template=template,
-            selector={"matchLabels": {"app": self.app_name}},
-        )
+        return annotations, labels, affinity, tolerations
 
-        deployment = client.V1Deployment(
-            api_version="apps/v1",
-            kind="Deployment",
-            metadata=client.V1ObjectMeta(
-                name=f"{self.app_name}-deployment",
-                labels={
-                    "app": self.app_name,
-                    **(
-                        {"app.kubernetes.io/stack": self.stack_name}
-                        if self.stack_name
-                        else {}
+    # TODO: put things like image pull policy into an object-scope struct
+    def get_deployment(self, image_pull_policy: Optional[str] = None):
+        """Build a single k8s Deployment from all pod files (legacy behavior).
+
+        When only one pod is defined in the stack, this is equivalent to
+        get_deployments()[0]. Kept for backward compatibility.
+        """
+        deployments = self.get_deployments(image_pull_policy)
+        if not deployments:
+            return None
+        # Legacy: return the first (and usually only) deployment
+        return deployments[0]
+
+    def get_deployments(
+        self, image_pull_policy: Optional[str] = None
+    ) -> List[client.V1Deployment]:
+        """Build one k8s Deployment per pod file.
+
+        Each pod file (docker-compose-<name>.yml) becomes its own Deployment
+        with independent lifecycle and update strategy:
+        - Pods with PVCs get strategy=Recreate (can't do rolling updates
+          with ReadWriteOnce volumes)
+        - Pods without PVCs get strategy=RollingUpdate
+
+        This enables maintenance services to survive main pod restarts.
+        """
+        if not self.parsed_pod_yaml_map:
+            return []
+
+        registry_config = self.spec.get_image_registry_config()
+        if registry_config:
+            secret_name = f"{self.app_name}-image-pull-secret"
+            image_pull_secrets = [client.V1LocalObjectReference(name=secret_name)]
+        else:
+            image_pull_secrets = []
+
+        use_host_network = self._any_service_has_host_network()
+        pod_files = list(self.parsed_pod_yaml_map.keys())
+
+        # Single pod file: preserve legacy naming ({app_name}-deployment)
+        # Multiple pod files: use {app_name}-{pod_name}-deployment
+        multi_pod = len(pod_files) > 1
+
+        deployments = []
+        for pod_file in pod_files:
+            pod_name = self._pod_name_from_file(pod_file)
+            single_pod_map = {pod_file: self.parsed_pod_yaml_map[pod_file]}
+            containers, init_containers, services, volumes = self._build_containers(
+                single_pod_map, image_pull_policy
+            )
+            annotations, labels, affinity, tolerations = (
+                self._build_common_pod_metadata(services)
+            )
+
+            # Add pod-name label so Services can target specific pods
+            if multi_pod:
+                labels["app.kubernetes.io/component"] = pod_name
+
+            has_pvcs = self._pod_has_pvcs(self.parsed_pod_yaml_map[pod_file])
+            if has_pvcs:
+                strategy = client.V1DeploymentStrategy(type="Recreate")
+            else:
+                strategy = client.V1DeploymentStrategy(
+                    type="RollingUpdate",
+                    rolling_update=client.V1RollingUpdateDeployment(
+                        max_unavailable=0, max_surge=1
                     ),
-                },
-            ),
-            spec=spec,
-        )
-        return deployment
+                )
+
+            # Pod selector: for multi-pod, select by both app and component
+            selector_labels = {"app": self.app_name}
+            if multi_pod:
+                selector_labels["app.kubernetes.io/component"] = pod_name
+
+            template = client.V1PodTemplateSpec(
+                metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
+                spec=client.V1PodSpec(
+                    containers=containers,
+                    init_containers=init_containers or None,
+                    image_pull_secrets=image_pull_secrets,
+                    volumes=volumes,
+                    affinity=affinity,
+                    tolerations=tolerations,
+                    runtime_class_name=self.spec.get_runtime_class(),
+                    host_network=use_host_network or None,
+                    dns_policy=(
+                        "ClusterFirstWithHostNet" if use_host_network else None
+                    ),
+                ),
+            )
+
+            if multi_pod:
+                deployment_name = f"{self.app_name}-{pod_name}-deployment"
+            else:
+                deployment_name = f"{self.app_name}-deployment"
+
+            spec = client.V1DeploymentSpec(
+                replicas=self.spec.get_replicas(),
+                template=template,
+                selector={"matchLabels": selector_labels},
+                strategy=strategy,
+            )
+
+            deployment = client.V1Deployment(
+                api_version="apps/v1",
+                kind="Deployment",
+                metadata=client.V1ObjectMeta(
+                    name=deployment_name,
+                    labels={
+                        "app": self.app_name,
+                        **(
+                            {
+                                "app.kubernetes.io/stack": self.stack_name,
+                            }
+                            if self.stack_name
+                            else {}
+                        ),
+                        **(
+                            {"app.kubernetes.io/component": pod_name}
+                            if multi_pod
+                            else {}
+                        ),
+                    },
+                ),
+                spec=spec,
+            )
+            deployments.append(deployment)
+
+        return deployments
+
+    def get_services(self) -> List[client.V1Service]:
+        """Build per-pod ClusterIP Services for multi-pod stacks.
+
+        Each pod's containers get their own Service so Ingress can route
+        to specific pods. For single-pod stacks, returns a list with one
+        service matching the legacy get_service() behavior.
+        """
+        pod_files = list(self.parsed_pod_yaml_map.keys())
+        multi_pod = len(pod_files) > 1
+
+        if not multi_pod:
+            # Legacy: single service for all pods
+            svc = self.get_service()
+            return [svc] if svc else []
+
+        # Multi-pod: one service per pod, only for pods that have
+        # ports referenced by http-proxy routes
+        http_proxy_list = self.spec.get_http_proxy()
+        if not http_proxy_list:
+            return []
+
+        # Build map: container_name -> port from http-proxy routes
+        container_ports: dict = {}
+        for http_proxy in http_proxy_list:
+            for route in http_proxy.get("routes", []):
+                proxy_to = route.get("proxy-to", "")
+                if ":" in proxy_to:
+                    container, port_str = proxy_to.rsplit(":", 1)
+                    port = int(port_str)
+                    if container not in container_ports:
+                        container_ports[container] = set()
+                    container_ports[container].add(port)
+
+        # Build map: pod_file -> set of service names in that pod
+        pod_services_map: dict = {}
+        for pod_file in pod_files:
+            pod = self.parsed_pod_yaml_map[pod_file]
+            pod_services_map[pod_file] = set(pod.get("services", {}).keys())
+
+        services = []
+        for pod_file in pod_files:
+            pod_name = self._pod_name_from_file(pod_file)
+            svc_names = pod_services_map[pod_file]
+            # Collect ports from http-proxy that belong to this pod's containers
+            ports_set: Set[int] = set()
+            for svc_name in svc_names:
+                if svc_name in container_ports:
+                    ports_set.update(container_ports[svc_name])
+
+            if not ports_set:
+                continue
+
+            service_ports = [
+                client.V1ServicePort(port=p, target_port=p, name=f"port-{p}")
+                for p in sorted(ports_set)
+            ]
+            service = client.V1Service(
+                metadata=client.V1ObjectMeta(
+                    name=f"{self.app_name}-{pod_name}-service",
+                    labels={"app": self.app_name},
+                ),
+                spec=client.V1ServiceSpec(
+                    type="ClusterIP",
+                    ports=service_ports,
+                    selector={
+                        "app": self.app_name,
+                        "app.kubernetes.io/component": pod_name,
+                    },
+                ),
+            )
+            services.append(service)
+        return services
 
     def get_jobs(self, image_pull_policy: Optional[str] = None) -> List[client.V1Job]:
         """Build k8s Job objects from parsed job compose files.
diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index 787f20fd..eb257ef6 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -411,91 +411,102 @@ class K8sDeployer(Deployer):
             if opts.o.debug:
                 print("No pods defined, skipping Deployment creation")
             return
-        # Process compose files into a Deployment
-        deployment = self.cluster_info.get_deployment(image_pull_policy="Always")
-        # Apply image overrides if provided
-        if self.image_overrides:
-            for container in deployment.spec.template.spec.containers:
-                if container.name in self.image_overrides:
-                    container.image = self.image_overrides[container.name]
-                    if opts.o.debug:
-                        print(
-                            f"Overriding image for {container.name}: {container.image}"
-                        )
-        # Create or update the k8s Deployment
-        if opts.o.debug:
-            print(f"Sending this deployment: {deployment}")
-        if not opts.o.dry_run:
-            name = deployment.metadata.name
-            try:
-                deployment_resp = cast(
-                    client.V1Deployment,
-                    self.apps_api.create_namespaced_deployment(
-                        body=deployment, namespace=self.k8s_namespace
-                    ),
-                )
-                print(f"Created Deployment {name}")
-            except ApiException as e:
-                if e.status == 409:
-                    # Already exists — replace to ensure removed fields
-                    # (volumes, mounts, env vars) are actually deleted.
-                    # Patch uses strategic merge which preserves old fields.
-                    existing = self.apps_api.read_namespaced_deployment(
-                        name=name, namespace=self.k8s_namespace
-                    )
-                    deployment.metadata.resource_version = (
-                        existing.metadata.resource_version
-                    )
+        # Process compose files into Deployments (one per pod file)
+        deployments = self.cluster_info.get_deployments(image_pull_policy="Always")
+        for deployment in deployments:
+            # Apply image overrides if provided
+            if self.image_overrides:
+                for container in deployment.spec.template.spec.containers:
+                    if container.name in self.image_overrides:
+                        container.image = self.image_overrides[container.name]
+                        if opts.o.debug:
+                            print(
+                                f"Overriding image for {container.name}:"
+                                f" {container.image}"
+                            )
+            # Create or update the k8s Deployment
+            if opts.o.debug:
+                print(f"Sending this deployment: {deployment}")
+            if not opts.o.dry_run:
+                name = deployment.metadata.name
+                try:
                     deployment_resp = cast(
                         client.V1Deployment,
-                        self.apps_api.replace_namespaced_deployment(
-                            name=name,
-                            namespace=self.k8s_namespace,
-                            body=deployment,
+                        self.apps_api.create_namespaced_deployment(
+                            body=deployment, namespace=self.k8s_namespace
                         ),
                     )
-                    print(f"Updated Deployment {name} (rolling update)")
-                else:
-                    raise
-            if opts.o.debug:
-                meta = deployment_resp.metadata
-                spec = deployment_resp.spec
-                if meta and spec and spec.template.spec:
-                    containers = spec.template.spec.containers
-                    img = containers[0].image if containers else None
-                    print(f"  {meta.namespace} {meta.name} gen={meta.generation} {img}")
+                    strategy = (
+                        deployment.spec.strategy.type
+                        if deployment.spec.strategy
+                        else "default"
+                    )
+                    print(f"Created Deployment {name} (strategy: {strategy})")
+                except ApiException as e:
+                    if e.status == 409:
+                        # Already exists — replace to ensure removed fields
+                        # (volumes, mounts, env vars) are actually deleted.
+                        existing = self.apps_api.read_namespaced_deployment(
+                            name=name, namespace=self.k8s_namespace
+                        )
+                        deployment.metadata.resource_version = (
+                            existing.metadata.resource_version
+                        )
+                        deployment_resp = cast(
+                            client.V1Deployment,
+                            self.apps_api.replace_namespaced_deployment(
+                                name=name,
+                                namespace=self.k8s_namespace,
+                                body=deployment,
+                            ),
+                        )
+                        print(f"Updated Deployment {name} (rolling update)")
+                    else:
+                        raise
+                if opts.o.debug:
+                    meta = deployment_resp.metadata
+                    spec = deployment_resp.spec
+                    if meta and spec and spec.template.spec:
+                        containers = spec.template.spec.containers
+                        img = containers[0].image if containers else None
+                        print(
+                            f"  {meta.namespace} {meta.name}"
+                            f" gen={meta.generation} {img}"
+                        )
 
-        service = self.cluster_info.get_service()
-        if opts.o.debug:
-            print(f"Sending this service: {service}")
-        if service and not opts.o.dry_run:
-            svc_name = service.metadata.name
-            try:
-                service_resp = self.core_api.create_namespaced_service(
-                    namespace=self.k8s_namespace, body=service
-                )
-                print(f"Created Service {svc_name}")
-            except ApiException as e:
-                if e.status == 409:
-                    # Replace to ensure removed ports are deleted.
-                    # Must preserve clusterIP (immutable) and resourceVersion.
-                    existing = self.core_api.read_namespaced_service(
-                        name=svc_name, namespace=self.k8s_namespace
-                    )
-                    service.metadata.resource_version = (
-                        existing.metadata.resource_version
-                    )
-                    service.spec.cluster_ip = existing.spec.cluster_ip
-                    service_resp = self.core_api.replace_namespaced_service(
-                        name=svc_name,
-                        namespace=self.k8s_namespace,
-                        body=service,
-                    )
-                    print(f"Updated Service {svc_name}")
-                else:
-                    raise
+        # Create Services (one per pod for multi-pod, or one for single-pod)
+        services = self.cluster_info.get_services()
+        for service in services:
             if opts.o.debug:
-                print(f"  {service_resp}")
+                print(f"Sending this service: {service}")
+            if service and not opts.o.dry_run:
+                svc_name = service.metadata.name
+                try:
+                    service_resp = self.core_api.create_namespaced_service(
+                        namespace=self.k8s_namespace, body=service
+                    )
+                    print(f"Created Service {svc_name}")
+                except ApiException as e:
+                    if e.status == 409:
+                        # Replace to ensure removed ports are deleted.
+                        # Must preserve clusterIP (immutable) and resourceVersion.
+                        existing = self.core_api.read_namespaced_service(
+                            name=svc_name, namespace=self.k8s_namespace
+                        )
+                        service.metadata.resource_version = (
+                            existing.metadata.resource_version
+                        )
+                        service.spec.cluster_ip = existing.spec.cluster_ip
+                        service_resp = self.core_api.replace_namespaced_service(
+                            name=svc_name,
+                            namespace=self.k8s_namespace,
+                            body=service,
+                        )
+                        print(f"Updated Service {svc_name}")
+                    else:
+                        raise
+                if opts.o.debug:
+                    print(f"  {service_resp}")
 
     def _create_jobs(self):
         # Process job compose files into k8s Jobs
@@ -880,48 +891,49 @@ class K8sDeployer(Deployer):
                 print("No pods defined, skipping update")
             return
         self.connect_api()
-        ref_deployment = self.cluster_info.get_deployment()
-        if not ref_deployment or not ref_deployment.metadata:
-            return
-        ref_name = ref_deployment.metadata.name
-        if not ref_name:
-            return
+        ref_deployments = self.cluster_info.get_deployments()
+        for ref_deployment in ref_deployments:
+            if not ref_deployment or not ref_deployment.metadata:
+                continue
+            ref_name = ref_deployment.metadata.name
+            if not ref_name:
+                continue
 
-        deployment = cast(
-            client.V1Deployment,
-            self.apps_api.read_namespaced_deployment(
-                name=ref_name, namespace=self.k8s_namespace
-            ),
-        )
-        if not deployment.spec or not deployment.spec.template:
-            return
-        template_spec = deployment.spec.template.spec
-        if not template_spec or not template_spec.containers:
-            return
+            deployment = cast(
+                client.V1Deployment,
+                self.apps_api.read_namespaced_deployment(
+                    name=ref_name, namespace=self.k8s_namespace
+                ),
+            )
+            if not deployment.spec or not deployment.spec.template:
+                continue
+            template_spec = deployment.spec.template.spec
+            if not template_spec or not template_spec.containers:
+                continue
 
-        ref_spec = ref_deployment.spec
-        if ref_spec and ref_spec.template and ref_spec.template.spec:
-            ref_containers = ref_spec.template.spec.containers
-            if ref_containers:
-                new_env = ref_containers[0].env
-                for container in template_spec.containers:
-                    old_env = container.env
-                    if old_env != new_env:
-                        container.env = new_env
+            ref_spec = ref_deployment.spec
+            if ref_spec and ref_spec.template and ref_spec.template.spec:
+                ref_containers = ref_spec.template.spec.containers
+                if ref_containers:
+                    new_env = ref_containers[0].env
+                    for container in template_spec.containers:
+                        old_env = container.env
+                        if old_env != new_env:
+                            container.env = new_env
 
-        template_meta = deployment.spec.template.metadata
-        if template_meta:
-            template_meta.annotations = {
-                "kubectl.kubernetes.io/restartedAt": datetime.utcnow()
-                .replace(tzinfo=timezone.utc)
-                .isoformat()
-            }
+            template_meta = deployment.spec.template.metadata
+            if template_meta:
+                template_meta.annotations = {
+                    "kubectl.kubernetes.io/restartedAt": datetime.utcnow()
+                    .replace(tzinfo=timezone.utc)
+                    .isoformat()
+                }
 
-        self.apps_api.patch_namespaced_deployment(
-            name=ref_name,
-            namespace=self.k8s_namespace,
-            body=deployment,
-        )
+            self.apps_api.patch_namespaced_deployment(
+                name=ref_name,
+                namespace=self.k8s_namespace,
+                body=deployment,
+            )
 
     def run(
         self,
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index 2cef0e4a..ef37bc3c 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -264,5 +264,14 @@ class Spec:
     def is_kind_deployment(self):
         return self.get_deployment_type() in [constants.k8s_kind_deploy_type]
 
+    def get_maintenance_service(self) -> typing.Optional[str]:
+        """Return maintenance-service value (e.g. 'dumpster-maintenance:8000') or None.
+
+        When set, the restart command swaps Ingress backends to this service
+        during the main pod Recreate, so users see a branded maintenance page
+        instead of a bare 502.
+        """
+        return self.obj.get("maintenance-service")
+
     def is_docker_deployment(self):
         return self.get_deployment_type() in [constants.compose_deploy_type]