Skip to content

Commit

Permalink
controller: Rewrite timout mechanism
Browse files Browse the repository at this point in the history
Kill the qemu process as it stalls the ARCHIE execution. After the qemu
process is killed, the qemu thread will terminate. As the pipes are
closed the python worker will terminate and write the received results
to the hdf5collector.
  • Loading branch information
aewag committed Nov 2, 2023
1 parent bfd88b0 commit 42e218e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
27 changes: 20 additions & 7 deletions controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import logging
from multiprocessing import Manager, Process, Value
from pathlib import Path
import psutil
import signal
from statistics import mean
import subprocess
Expand Down Expand Up @@ -788,21 +789,33 @@ def controller(
if len(times) > 0:
time_max = max(times)

for i in range(len(p_list)):
p = p_list[i]
for i, p in enumerate(p_list):
# Find finished processes
p["process"].join(timeout=0)

# Kill process if timeout exceeded and gdb is not used
# Halt experiment if timeout duration exceeded
# If gdb is used the timeout is not applicable
if (
p["process"].is_alive()
and (time.time() - p["start_time"]) > config_qemu["timeout"]
and not config_qemu.get("gdb", False)
):
clogger.error(
f"Process {p['process'].name} ran into timeout and was killed!"
)
p["process"].terminate()
clogger.warning(f"Experiment {p['faults']['index']} ran into timeout")
# Search for qemu thread and kill qemu process if found
# qemu process is a child process of qemu thread
qemu_thread_name = f"qemu{p['faults']['index']}"
for process in psutil.process_iter():
if process.name() != qemu_thread_name:
continue
clogger.debug(f"{process.name()} killed")
assert (
len(process.children()) == 1
), "qemu thread should only have qemu child process"
process.children()[0].terminate()
break
else:
clogger.debug(f"{qemu_thread_name} not found to kill")
# Wait for worker process terminates
p["process"].join()

if p["process"].is_alive() is False:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ tables==3.7.0
json5==0.9.10
protobuf==4.21.12
tqdm==4.65.0
psutil==5.9.6

0 comments on commit 42e218e

Please sign in to comment.