Skip to content

Commit

Permalink
Beautify code and add comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
OrenZ1 committed Jun 2, 2024
1 parent e0244fb commit eff70e1
Showing 1 changed file with 12 additions and 16 deletions.
28 changes: 12 additions & 16 deletions enterprise_gateway/services/processproxies/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ def __init__(self, kernel_manager: RemoteKernelManager, proxy_config: dict):
self._initialize_kernel_launch_terminate_on_events()

def _initialize_kernel_launch_terminate_on_events(self):
"""
Parse the `kernel_launch_terminate_on_events` configuration, for easier access during startup.
[{"type": "Warning", "reason": "FailedMount", "timeout_in_seconds": 0},
{"type": Warning", "reason": "Unschedulable", "timeout_in_seconds": 30}] ->
{"Warning": {"FailedMount": 0, "Unschedulable": 30}}
"""
self.kernel_launch_terminate_on_events = defaultdict(dict)
for configuration in self.kernel_manager.parent.kernel_launch_terminate_on_events:
self.kernel_launch_terminate_on_events[
Expand Down Expand Up @@ -201,14 +207,7 @@ async def confirm_remote_startup(self) -> None:
self.log.debug("Trying to confirm kernel container startup status")
self.start_time = RemoteProcessProxy.get_current_time()
self.kernel_events_to_occurrence_time = {}
i = 1
container_status = self.get_container_status(i)
while not container_status:
i += 1
self.detect_launch_failure()
await self.handle_timeout()
container_status self.get_container_status(i)

i = 0
ready_to_connect = False # we're ready to connect when we have a connection file to use
while not ready_to_connect:
i += 1
Expand All @@ -231,23 +230,20 @@ async def confirm_remote_startup(self) -> None:
)
self.pgid = 0
else:
self.log_and_raise(
http_status_code=500,
reason="Error starting kernel container; status was not available. Perhaps the kernel pod died unexpectedly"
)
self.detect_launch_failure()
self.kernel_events_to_occurrence_time = {}

def _handle_pending_kernel(self):
self.log.debug("Sampling kernel container events")
kernel_pod_events = self.get_container_events()
for event in kernel_pod_events:
if event.type in self.kernel_launch_terminate_on_events and event.reason in self.kernel_launch_terminate_on_events[event.type]:
hashed_event = hash(f"{event.type}{event.reason}")
if hashed_event not in self.kernel_events_to_occurrence_time:
self.kernel_events_to_occurrence_time[hashed_event] = RemoteProcessProxy.get_current_time()
event_key = f"{event.type}{event.reason}"
if event_key not in self.kernel_events_to_occurrence_time:
self.kernel_events_to_occurrence_time[event_key] = RemoteProcessProxy.get_current_time()
if RemoteProcessProxy.get_time_diff(
RemoteProcessProxy.get_current_time(),
self.kernel_events_to_occurrence_time[hashed_event]
self.kernel_events_to_occurrence_time[event_key]
) >= self.kernel_launch_terminate_on_events[event.type][event.reason]:
self.kill()
self.log_and_raise(
Expand Down

0 comments on commit eff70e1

Please sign in to comment.