Parsl · ClaudiaCumberbatch · Mar 5, 2024 · Mar 5, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/Makefile b/Makefile
@@ -122,4 +122,4 @@ coverage: ## show the coverage report
 
 .PHONY: clean
 clean: ## clean up the environment by deleting the .venv, dist, eggs, mypy caches, coverage info, etc
-	rm -rf .venv $(DEPS) dist *.egg-info .mypy_cache build .pytest_cache .coverage runinfo_* $(WORKQUEUE_INSTALL)
+	rm -rf .venv $(DEPS) dist *.egg-info .mypy_cache build .pytest_cache .coverage runinfo $(WORKQUEUE_INSTALL)
diff --git a/mypy.ini b/mypy.ini
@@ -203,3 +203,6 @@ ignore_missing_imports = True
 
 [mypy-proxystore.*]
 ignore_missing_imports = True
+
+[mypy-diaspora_event_sdk.*]
+ignore_missing_imports = True
diff --git a/parsl/executors/high_throughput/executor.py b/parsl/executors/high_throughput/executor.py
@@ -217,6 +217,9 @@ class HighThroughputExecutor(BlockProviderExecutor, RepresentationMixin):
 
     encrypted : bool
         Flag to enable/disable encryption (CurveZMQ). Default is False.
+
+    radio_mode : str
+        The radio mode to use. Options include "htex" and "diaspora". Default is "htex".
     """
 
     @typeguard.typechecked
@@ -246,7 +249,8 @@ def __init__(self,
                  enable_mpi_mode: bool = False,
                  mpi_launcher: str = "mpiexec",
                  block_error_handler: Union[bool, Callable[[BlockProviderExecutor, Dict[str, JobStatus]], None]] = True,
-                 encrypted: bool = False):
+                 encrypted: bool = False,
+                 radio_mode: str = "htex"):
 
         logger.debug("Initializing HighThroughputExecutor")
 
@@ -308,6 +312,7 @@ def __init__(self,
         self.worker_logdir_root = worker_logdir_root
         self.cpu_affinity = cpu_affinity
         self.encrypted = encrypted
+        self.radio_mode = radio_mode
         self.cert_dir = None
 
         self.enable_mpi_mode = enable_mpi_mode
@@ -323,7 +328,7 @@ def __init__(self,
             launch_cmd = DEFAULT_LAUNCH_CMD
         self.launch_cmd = launch_cmd
 
-    radio_mode = "htex"
+    # radio_mode = "htex"
 
     def _warn_deprecated(self, old: str, new: str):
         warnings.warn(

diff --git a/parsl/monitoring/monitoring.py b/parsl/monitoring/monitoring.py
@@ -269,7 +269,7 @@ def start(self, run_id: str, dfk_run_dir: str, config_run_dir: Union[str, os.Pat
 
     # TODO: tighten the Any message format
     def send(self, mtype: MessageType, message: Any) -> None:
-        self.logger.debug("Sending message type {}".format(mtype))
+        self.logger.debug("Sending message type {} content {}".format(mtype, message))
         try:
             self._dfk_channel.send_pyobj((mtype, message))
         except zmq.Again:

diff --git a/parsl/monitoring/radios.py b/parsl/monitoring/radios.py
@@ -1,3 +1,5 @@
+import datetime
+import json
 import os
 import socket
 import pickle
@@ -6,7 +8,7 @@
 
 from abc import ABCMeta, abstractmethod
 
-from typing import Optional
+from typing import Optional, Any
 
 from parsl.serialize import serialize
 
@@ -22,6 +24,42 @@ def send(self, message: object) -> None:
         pass
 
 
+class DateTimeEncoder(json.JSONEncoder):
+    def default(self, obj: Any) -> Any:
+        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
+            return obj.isoformat()
+        return super(DateTimeEncoder, self).default(obj)
+
+
+class DiasporaRadio(MonitoringRadio):
+    def __init__(self, monitoring_url: str, source_id: int, timeout: int = 10):
+        from diaspora_event_sdk import KafkaProducer
+        self.source_id = source_id
+        self.producer = KafkaProducer(value_serializer=DiasporaRadio.serialize)
+        logger.info("Diaspora-based monitoring channel initializing")
+
+    def send(self, message: object) -> None:
+        topic = "radio-test"
+        if isinstance(message, tuple):
+            # TODO: make configurable
+            if 'run_id' in message[1]:
+                key = message[1]['run_id'].encode("utf-8")
+            else:
+                logger.info("set key as init")
+                key = b"init"
+            # logger.info(f"Sending message of type {key}:{msg_type} to topic {topic}, content {message[1]}")
+            self.producer.send(topic=topic, key=key, value=message[1])
+        else:
+            key = b"payload"
+            self.producer.send(topic=topic, key=key, value=message)
+        logger.info("Sent message")
+        return
+
+    @staticmethod
+    def serialize(value: Any) -> bytes:
+        return json.dumps(value, cls=DateTimeEncoder).encode("utf-8")
+
+
 class FilesystemRadio(MonitoringRadio):
     """A MonitoringRadio that sends messages over a shared filesystem.
 
@@ -173,3 +211,16 @@ def send(self, message: object) -> None:
             logging.error("Could not send message within timeout limit")
             return
         return
+
+
+def get_monitoring_radio(monitoring_url: str, source_id: int, radio_mode: str, run_dir: str) -> MonitoringRadio:
+    if radio_mode == "udp":
+        return UDPRadio(monitoring_url, source_id)
+    elif radio_mode == "htex":
+        return HTEXRadio(monitoring_url, source_id)
+    elif radio_mode == "filesystem":
+        return FilesystemRadio(monitoring_url=monitoring_url, source_id=source_id, run_dir=run_dir)
+    elif radio_mode == "diaspora":
+        return DiasporaRadio(monitoring_url, source_id)
+    else:
+        raise ValueError(f"Unknown radio mode {radio_mode}")
diff --git a/parsl/monitoring/remote.py b/parsl/monitoring/remote.py
@@ -2,14 +2,14 @@
 import time
 import logging
 import datetime
+import parsl.monitoring.radios as radios
 from functools import wraps
 
 from parsl.multiprocessing import ForkProcess
 from multiprocessing import Event
 from parsl.process_loggers import wrap_with_logs
 
 from parsl.monitoring.message_type import MessageType
-from parsl.monitoring.radios import MonitoringRadio, UDPRadio, HTEXRadio, FilesystemRadio
 from typing import Any, Callable, Dict, List, Sequence, Tuple
 
 logger = logging.getLogger(__name__)
@@ -121,18 +121,8 @@ def send_first_last_message(try_id: int,
     import platform
     import os
 
-    radio: MonitoringRadio
-    if radio_mode == "udp":
-        radio = UDPRadio(monitoring_hub_url,
-                         source_id=task_id)
-    elif radio_mode == "htex":
-        radio = HTEXRadio(monitoring_hub_url,
-                          source_id=task_id)
-    elif radio_mode == "filesystem":
-        radio = FilesystemRadio(monitoring_url=monitoring_hub_url,
-                                source_id=task_id, run_dir=run_dir)
-    else:
-        raise RuntimeError(f"Unknown radio mode: {radio_mode}")
+    radio: radios.MonitoringRadio
+    radio = radios.get_monitoring_radio(monitoring_hub_url, task_id, radio_mode, run_dir)
 
     msg = (MessageType.RESOURCE_INFO,
            {'run_id': run_id,
@@ -177,18 +167,8 @@ def monitor(pid: int,
 
     setproctitle("parsl: task resource monitor")
 
-    radio: MonitoringRadio
-    if radio_mode == "udp":
-        radio = UDPRadio(monitoring_hub_url,
-                         source_id=task_id)
-    elif radio_mode == "htex":
-        radio = HTEXRadio(monitoring_hub_url,
-                          source_id=task_id)
-    elif radio_mode == "filesystem":
-        radio = FilesystemRadio(monitoring_url=monitoring_hub_url,
-                                source_id=task_id, run_dir=run_dir)
-    else:
-        raise RuntimeError(f"Unknown radio mode: {radio_mode}")
+    radio: radios.MonitoringRadio
+    radio = radios.get_monitoring_radio(monitoring_hub_url, task_id, radio_mode, run_dir)
 
     logging.debug("start of monitor")
 

diff --git a/parsl/tests/test_radio/__init__.py b/parsl/tests/test_radio/__init__.py
diff --git a/parsl/tests/test_radio/diaspora_login.py b/parsl/tests/test_radio/diaspora_login.py
@@ -0,0 +1,11 @@
+'''
+Before using diaspora radio, the user should first login to the diaspora event service.
+This can not be aggregated into test file, because it needs an authentication token requiring cli
+which pytest does not support.
+'''
+from diaspora_event_sdk import Client as GlobusClient
+c = GlobusClient()
+print(c.retrieve_key())
+topic = "radio-test" + c.subject_openid[-12:]
+print(c.register_topic(topic))
+print(c.list_topics())
diff --git a/parsl/tests/test_radio/test_basic.py b/parsl/tests/test_radio/test_basic.py
@@ -0,0 +1,72 @@
+import logging
+import os
+import parsl
+import pytest
+import threading
+import time
+
+from diaspora_event_sdk import KafkaConsumer
+from diaspora_event_sdk import Client as GlobusClient
+
+
+logger = logging.getLogger(__name__)
+
+
+def consumer_check(consumer):
+    start = time.time()
+    for record in consumer:
+        end = time.time()
+        if end - start > 60:
+            assert False, "No messages received"
+        if record:
+            break
+
+
+@parsl.python_app
+def this_app():
+    # this delay needs to be several times the resource monitoring
+    # period configured in the test configuration, so that some
+    # messages are actually sent - there is no guarantee that any
+    # (non-first) resource message will be sent at all for a short app.
+    import time
+    time.sleep(3)
+
+    return 5
+
+
+@pytest.mark.skip(reason="requires diaspora login")
+def test_diaspora_radio():
+    c = GlobusClient()
+    topic = "radio-test" + c.subject_openid[-12:]
+    consumer = KafkaConsumer(topic)
+    # open a new thread for the consumer
+    consumer_thread = threading.Thread(target=consumer_check, args=(consumer,))
+    consumer_thread.start()
+
+    # this is imported here rather than at module level because
+    # it isn't available in a plain parsl install, so this module
+    # would otherwise fail to import and break even a basic test
+    # run.
+    import sqlalchemy
+    from sqlalchemy import text
+    from parsl.tests.configs.htex_local_alternate import fresh_config
+
+    if os.path.exists("runinfo/monitoring.db"):
+        logger.info("Monitoring database already exists - deleting")
+        os.remove("runinfo/monitoring.db")
+
+    logger.info("loading parsl")
+    c = fresh_config()
+    c.executors[0].radio_mode = "diaspora"
+    parsl.load(c)
+
+    logger.info("invoking and waiting for result")
+    assert this_app().result() == 5
+
+    logger.info("cleaning up parsl")
+    parsl.dfk().cleanup()
+    parsl.clear()
+
+    consumer_thread.join()
+
+    logger.info("all done")
diff --git a/setup.py b/setup.py
@@ -35,6 +35,7 @@
     'flux': ['pyyaml', 'cffi', 'jsonschema'],
     'proxystore': ['proxystore'],
     'radical-pilot': ['radical.pilot'],
+    'diaspora_radio': ['diaspora-event-sdk[kafka-python]'],
     # Disabling psi-j since github direct links are not allowed by pypi
     # 'psij': ['psi-j-parsl@git+https://github.com/ExaWorks/psi-j-parsl']
 }

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -21,3 +21,7 @@ sqlalchemy2-stubs
 Sphinx==4.5.0
 twine
 wheel
+
+botocore==1.29.125
+diaspora-event-sdk[kafka-python]
+cloudpickle