From 579cd467cd4e9d875c4891582fa18bbb6a9a807b Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Tue, 1 Aug 2023 11:31:08 +0100 Subject: [PATCH] Make waiting for scheduler Pod more robust (#793) --- dask_kubernetes/common/networking.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dask_kubernetes/common/networking.py b/dask_kubernetes/common/networking.py index a589a976a..d29411214 100644 --- a/dask_kubernetes/common/networking.py +++ b/dask_kubernetes/common/networking.py @@ -200,13 +200,17 @@ async def wait_for_scheduler(cluster_name, namespace, timeout=None): while True: async with kubernetes.client.api_client.ApiClient() as api_client: k8s_api = kubernetes.client.CoreV1Api(api_client) - pods = await k8s_api.list_namespaced_pod( - namespace=namespace, - label_selector=f"dask.org/component=scheduler,dask.org/cluster-name={cluster_name}", - ) - pod = await Pod.objects(api, namespace=namespace).get_by_name( - pods.items[0].metadata.name - ) + try: + [pod] = ( + await k8s_api.list_namespaced_pod( + namespace=namespace, + label_selector=f"dask.org/component=scheduler,dask.org/cluster-name={cluster_name}", + ) + ).items + except ValueError: + await asyncio.sleep(0.25) + continue + pod = await Pod.objects(api, namespace=namespace).get_by_name(pod.metadata.name) phase = pod.obj["status"]["phase"] if phase == "Running": if not pod_start_time: