Merge pull request 'fix(k8s): query resources by label in down() for proper cleanup' (#987) from fix-down-cleanup-by-label into main

Reviewed-on: cerc-io/stack-orchestrator#987
This commit is contained in:
AFDudley 2026-02-03 22:57:52 +00:00
commit b41e0cb2f5
2 changed files with 109 additions and 84 deletions

View File

@ -125,7 +125,8 @@ class ClusterInfo:
name=( name=(
f"{self.app_name}-nodeport-" f"{self.app_name}-nodeport-"
f"{pod_port}-{protocol.lower()}" f"{pod_port}-{protocol.lower()}"
) ),
labels={"app": self.app_name},
), ),
spec=client.V1ServiceSpec( spec=client.V1ServiceSpec(
type="NodePort", type="NodePort",
@ -208,7 +209,9 @@ class ClusterInfo:
ingress = client.V1Ingress( ingress = client.V1Ingress(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-ingress", annotations=ingress_annotations name=f"{self.app_name}-ingress",
labels={"app": self.app_name},
annotations=ingress_annotations,
), ),
spec=spec, spec=spec,
) )
@ -238,7 +241,10 @@ class ClusterInfo:
] ]
service = client.V1Service( service = client.V1Service(
metadata=client.V1ObjectMeta(name=f"{self.app_name}-service"), metadata=client.V1ObjectMeta(
name=f"{self.app_name}-service",
labels={"app": self.app_name},
),
spec=client.V1ServiceSpec( spec=client.V1ServiceSpec(
type="ClusterIP", type="ClusterIP",
ports=service_ports, ports=service_ports,
@ -320,7 +326,7 @@ class ClusterInfo:
spec = client.V1ConfigMap( spec = client.V1ConfigMap(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{cfg_map_name}", name=f"{self.app_name}-{cfg_map_name}",
labels={"configmap-label": cfg_map_name}, labels={"app": self.app_name, "configmap-label": cfg_map_name},
), ),
binary_data=data, binary_data=data,
) )
@ -377,7 +383,10 @@ class ClusterInfo:
pv = client.V1PersistentVolume( pv = client.V1PersistentVolume(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{volume_name}", name=f"{self.app_name}-{volume_name}",
labels={"volume-label": f"{self.app_name}-{volume_name}"}, labels={
"app": self.app_name,
"volume-label": f"{self.app_name}-{volume_name}",
},
), ),
spec=spec, spec=spec,
) )

View File

@ -384,104 +384,120 @@ class K8sDeployer(Deployer):
def down(self, timeout, volumes, skip_cluster_management): # noqa: C901 def down(self, timeout, volumes, skip_cluster_management): # noqa: C901
self.skip_cluster_management = skip_cluster_management self.skip_cluster_management = skip_cluster_management
self.connect_api() self.connect_api()
# Delete the k8s objects
# Query K8s for resources by label selector instead of generating names
# from config. This ensures we clean up orphaned resources when deployment
# IDs change (e.g., after force_redeploy).
label_selector = f"app={self.cluster_info.app_name}"
if volumes: if volumes:
# Create the host-path-mounted PVs for this deployment # Delete PVs for this deployment (PVs use volume-label pattern)
pvs = self.cluster_info.get_pvs() try:
for pv in pvs: pvs = self.core_api.list_persistent_volume(
if opts.o.debug: label_selector=f"app={self.cluster_info.app_name}"
print(f"Deleting this pv: {pv}") )
try: for pv in pvs.items:
pv_resp = self.core_api.delete_persistent_volume(
name=pv.metadata.name
)
if opts.o.debug: if opts.o.debug:
print("PV deleted:") print(f"Deleting PV: {pv.metadata.name}")
print(f"{pv_resp}") try:
self.core_api.delete_persistent_volume(name=pv.metadata.name)
except ApiException as e:
_check_delete_exception(e)
except ApiException as e:
if opts.o.debug:
print(f"Error listing PVs: {e}")
# Delete PVCs for this deployment
try:
pvcs = self.core_api.list_namespaced_persistent_volume_claim(
namespace=self.k8s_namespace, label_selector=label_selector
)
for pvc in pvcs.items:
if opts.o.debug:
print(f"Deleting PVC: {pvc.metadata.name}")
try:
self.core_api.delete_namespaced_persistent_volume_claim(
name=pvc.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
except ApiException as e:
if opts.o.debug:
print(f"Error listing PVCs: {e}")
# Delete ConfigMaps for this deployment
try:
cfg_maps = self.core_api.list_namespaced_config_map(
namespace=self.k8s_namespace, label_selector=label_selector
)
for cfg_map in cfg_maps.items:
if opts.o.debug:
print(f"Deleting ConfigMap: {cfg_map.metadata.name}")
try:
self.core_api.delete_namespaced_config_map(
name=cfg_map.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e: except ApiException as e:
_check_delete_exception(e) _check_delete_exception(e)
except ApiException as e:
if opts.o.debug:
print(f"Error listing ConfigMaps: {e}")
# Figure out the PVCs for this deployment # Delete Deployments for this deployment
pvcs = self.cluster_info.get_pvcs() try:
for pvc in pvcs: deployments = self.apps_api.list_namespaced_deployment(
namespace=self.k8s_namespace, label_selector=label_selector
)
for deployment in deployments.items:
if opts.o.debug: if opts.o.debug:
print(f"Deleting this pvc: {pvc}") print(f"Deleting Deployment: {deployment.metadata.name}")
try: try:
pvc_resp = self.core_api.delete_namespaced_persistent_volume_claim( self.apps_api.delete_namespaced_deployment(
name=pvc.metadata.name, namespace=self.k8s_namespace name=deployment.metadata.name, namespace=self.k8s_namespace
) )
if opts.o.debug:
print("PVCs deleted:")
print(f"{pvc_resp}")
except ApiException as e: except ApiException as e:
_check_delete_exception(e) _check_delete_exception(e)
except ApiException as e:
# Figure out the ConfigMaps for this deployment
cfg_maps = self.cluster_info.get_configmaps()
for cfg_map in cfg_maps:
if opts.o.debug: if opts.o.debug:
print(f"Deleting this ConfigMap: {cfg_map}") print(f"Error listing Deployments: {e}")
try:
cfg_map_resp = self.core_api.delete_namespaced_config_map( # Delete Services for this deployment (includes both ClusterIP and NodePort)
name=cfg_map.metadata.name, namespace=self.k8s_namespace try:
) services = self.core_api.list_namespaced_service(
namespace=self.k8s_namespace, label_selector=label_selector
)
for service in services.items:
if opts.o.debug: if opts.o.debug:
print("ConfigMap deleted:") print(f"Deleting Service: {service.metadata.name}")
print(f"{cfg_map_resp}")
except ApiException as e:
_check_delete_exception(e)
deployment = self.cluster_info.get_deployment()
if opts.o.debug:
print(f"Deleting this deployment: {deployment}")
if deployment and deployment.metadata and deployment.metadata.name:
try:
self.apps_api.delete_namespaced_deployment(
name=deployment.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
service = self.cluster_info.get_service()
if opts.o.debug:
print(f"Deleting service: {service}")
if service and service.metadata and service.metadata.name:
try:
self.core_api.delete_namespaced_service(
namespace=self.k8s_namespace, name=service.metadata.name
)
except ApiException as e:
_check_delete_exception(e)
ingress = self.cluster_info.get_ingress(use_tls=not self.is_kind())
if ingress and ingress.metadata and ingress.metadata.name:
if opts.o.debug:
print(f"Deleting this ingress: {ingress}")
try:
self.networking_api.delete_namespaced_ingress(
name=ingress.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
else:
if opts.o.debug:
print("No ingress to delete")
nodeports: List[client.V1Service] = self.cluster_info.get_nodeports()
for nodeport in nodeports:
if opts.o.debug:
print(f"Deleting this nodeport: {nodeport}")
if nodeport.metadata and nodeport.metadata.name:
try: try:
self.core_api.delete_namespaced_service( self.core_api.delete_namespaced_service(
namespace=self.k8s_namespace, name=nodeport.metadata.name namespace=self.k8s_namespace, name=service.metadata.name
) )
except ApiException as e: except ApiException as e:
_check_delete_exception(e) _check_delete_exception(e)
else: except ApiException as e:
if opts.o.debug: if opts.o.debug:
print("No nodeport to delete") print(f"Error listing Services: {e}")
# Delete Ingresses for this deployment
try:
ingresses = self.networking_api.list_namespaced_ingress(
namespace=self.k8s_namespace, label_selector=label_selector
)
for ingress in ingresses.items:
if opts.o.debug:
print(f"Deleting Ingress: {ingress.metadata.name}")
try:
self.networking_api.delete_namespaced_ingress(
name=ingress.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
if not ingresses.items and opts.o.debug:
print("No ingress to delete")
except ApiException as e:
if opts.o.debug:
print(f"Error listing Ingresses: {e}")
if self.is_kind() and not self.skip_cluster_management: if self.is_kind() and not self.skip_cluster_management:
# Destroy the kind cluster # Destroy the kind cluster