diff --git a/docs/source/operators/config-cli.md b/docs/source/operators/config-cli.md index baf40d175..585def30c 100644 --- a/docs/source/operators/config-cli.md +++ b/docs/source/operators/config-cli.md @@ -283,6 +283,29 @@ FileKernelSessionManager(KernelSessionManager) options reside. This directory should exist. (EG_PERSISTENCE_ROOT env var) Default: '' +WebhookKernelSessionManager(KernelSessionManager) options +--------------------------------------------------------- +--WebhookKernelSessionManager.enable_persistence= + Enable kernel session persistence (True or False). Default = False + (EG_KERNEL_SESSION_PERSISTENCE env var) + Default: False +--WebhookKernelSessionManager.persistence_root= + Identifies the root 'directory' under which the 'kernel_sessions' node will + reside. This directory should exist. (EG_PERSISTENCE_ROOT env var) + Default: None +--WebhookKernelSessionManager.webhook_url= + URL endpoint for webhook kernel session manager + Default: None +--WebhookKernelSessionManager.auth_type= + Authentication type for webhook kernel session manager API. Either basic, digest or None + Default: None +--WebhookKernelSessionManager.webhook_username= + Username for webhook kernel session manager API auth + Default: None +--WebhookKernelSessionManager.webhook_password= + Password for webhook kernel session manager API auth + Default: None + RemoteMappingKernelManager(AsyncMappingKernelManager) options ------------------------------------------------------------- --RemoteMappingKernelManager.allowed_message_types=... diff --git a/docs/source/operators/config-kernel-persistence.md b/docs/source/operators/config-kernel-persistence.md new file mode 100644 index 000000000..46289972e --- /dev/null +++ b/docs/source/operators/config-kernel-persistence.md @@ -0,0 +1,39 @@ +# Kernel Session Persistence + +Enabling kernel session persistence allows Jupyter Notebooks to reconnect to kernels when Enterprise Gateway is restarted. There are two ways of persisting kernel sessions: _File Kernel Session Persistence_ and _Webhook Kernel Session Persistence_. + +NOTE: Kernel Session Persistence should be considered experimental! + +## File Kernel Session Persistence + +File Kernel Session Persistence stores all kernel sessions as a file in a specified directory. To enable this, set the environment variable `EG_KERNEL_SESSION_PERSISTENCE=True` or configure `FileKernelSessionManager.enable_persistence=True`. To change the directory in which the kernel session file is being saved, either set the environment variable `EG_PERSISTENCE_ROOT` or configure `FileKernelSessionManager.persistence_root` to the directory. + +## Webhook Kernel Session Persistence + +Webhook Kernel Session Persistence stores all kernel sessions to any database. In order for this to work, an API must be created. The API must include four endpoints: + +- A GET that will retrieve a list of all kernel sessions from a database +- A GET that will take the kernel id as a path variable and retrieve that information from a database +- A DELETE that will delete all kernel sessions, where the body of the request is a list of kernel ids +- A POST that will take kernel id as a path variable and kernel session in the body of the request and save it to a database where the object being saved is: + +``` + { + kernel_id: UUID string, + kernel_session: JSON + } +``` + +To enable the webhook kernel session persistence, set the environment variable `EG_KERNEL_SESSION_PERSISTENCE=True` or configure `WebhookKernelSessionManager.enable_persistence=True`. To connect the API, set the environment varible `EG_WEBHOOK_URL` or configure `WebhookKernelSessionManager.webhook_url` to the API endpoint. + +### Enabling Authentication + +Enabling authentication is an option if the API requries it for requests. Set the environment variable `EG_AUTH_TYPE` or configure `WebhookKernelSessionManager.auth_type` to be either `Basic` or `Digest`. If it is set to an empty string authentication won't be enabled. + +Then set the environment variables `EG_WEBHOOK_USERNAME` and `EG_WEBHOOK_PASSWORD` or configure `WebhookKernelSessionManager.webhook_username` and `WebhookKernelSessionManager.webhook_password` to provide the username and password for authentication. + +## Testing Kernel Session Persistence + +Once kernel session persistence has been enabled and configured, create a kernel by opening up a Jupyter Notebook. Save some variable in that notebook and shutdown Enterprise Gateway using `kill -9 PID`, wher `PID` is the PID of gateway. Restart Enterprise Gateway and refresh you notebook tab. If all worked correctly, the variable should be loaded without the need to rerun the cell. + +If you are using docker, ensure the container isn't tied to the PID of Enterprise Gateway. The container should still run after killing that PID. diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index e4dbe61f2..6b0f8ef9e 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -65,4 +65,5 @@ Jupyter Enterprise Gateway adheres to config-kernel-override config-dynamic config-culling + config-kernel-persistence config-security diff --git a/enterprise_gateway/enterprisegatewayapp.py b/enterprise_gateway/enterprisegatewayapp.py index ced8214af..0bb698b63 100644 --- a/enterprise_gateway/enterprisegatewayapp.py +++ b/enterprise_gateway/enterprisegatewayapp.py @@ -34,7 +34,10 @@ default_handlers as default_kernelspec_handlers, ) from .services.sessions.handlers import default_handlers as default_session_handlers -from .services.sessions.kernelsessionmanager import FileKernelSessionManager +from .services.sessions.kernelsessionmanager import ( + FileKernelSessionManager, + WebhookKernelSessionManager, +) from .services.sessions.sessionmanager import SessionManager try: @@ -77,7 +80,12 @@ class EnterpriseGatewayApp(EnterpriseGatewayConfigMixin, JupyterApp): """ # Also include when generating help options - classes = [KernelSpecCache, FileKernelSessionManager, RemoteMappingKernelManager] + classes = [ + KernelSpecCache, + FileKernelSessionManager, + WebhookKernelSessionManager, + RemoteMappingKernelManager, + ] # Enable some command line shortcuts aliases = aliases diff --git a/enterprise_gateway/services/sessions/kernelsessionmanager.py b/enterprise_gateway/services/sessions/kernelsessionmanager.py index 31ebd5823..bbf0bbaff 100644 --- a/enterprise_gateway/services/sessions/kernelsessionmanager.py +++ b/enterprise_gateway/services/sessions/kernelsessionmanager.py @@ -7,8 +7,10 @@ import os import threading +import requests from jupyter_core.paths import jupyter_data_dir -from traitlets import Bool, Unicode, default +from requests.auth import HTTPBasicAuth, HTTPDigestAuth +from traitlets import Bool, CaselessStrEnum, Unicode, default from traitlets.config.configurable import LoggingConfigurable kernels_lock = threading.Lock() @@ -385,3 +387,150 @@ def _get_sessions_loc(self): if not os.path.exists(path): os.makedirs(path, 0o755) return path + + +class WebhookKernelSessionManager(KernelSessionManager): + """ + Performs kernel session persistence operations against URL provided (EG_WEBHOOK_URL). The URL must have 4 endpoints + associated with it. 1 delete endpoint that takes a list of kernel ids in the body, 1 post endpoint that takes kernels id as a + url param and the kernel session as the body, 1 get endpoint that returns all kernel sessions, and 1 get endpoint that returns + a specific kernel session based on kernel id as url param. + """ + + # Webhook URL + webhook_url_env = "EG_WEBHOOK_URL" + webhook_url = Unicode( + config=True, + allow_none=True, + help="""URL endpoint for webhook kernel session manager""", + ) + + @default("webhook_url") + def webhook_url_default(self): + return os.getenv(self.webhook_url_env, None) + + # Webhook Username + webhook_username_env = "EG_WEBHOOK_USERNAME" + webhook_username = Unicode( + config=True, + allow_none=True, + help="""Username for webhook kernel session manager API auth""", + ) + + @default("webhook_username") + def webhook_username_default(self): + return os.getenv(self.webhook_username_env, None) + + # Webhook Password + webhook_password_env = "EG_WEBHOOK_PASSWORD" + webhook_password = Unicode( + config=True, + allow_none=True, + help="""Password for webhook kernel session manager API auth""", + ) + + @default("webhook_password") + def webhook_password_default(self): + return os.getenv(self.webhook_password_env, None) + + # Auth Type + auth_type_env = "EG_AUTH_TYPE" + auth_type = CaselessStrEnum( + config=True, + allow_none=True, + values=["basic", "digest"], + help="""Authentication type for webhook kernel session manager API. Either basic, digest or None""", + ) + + @default("auth_type") + def auth_type_default(self): + return os.getenv(self.auth_type_env, None) + + def __init__(self, kernel_manager, **kwargs): + super().__init__(kernel_manager, **kwargs) + if self.enable_persistence: + self.log.info("Webhook kernel session persistence activated") + self.auth = "" + if self.auth_type: + if self.webhook_username and self.webhook_password: + if self.auth_type == "basic": + self.auth = HTTPBasicAuth(self.webhook_username, self.webhook_password) + elif self.auth_type == "digest": + self.auth = HTTPDigestAuth(self.webhook_username, self.webhook_password) + elif self.auth_type is None: + self.auth = "" + else: + self.log.error("No such option for auth_type/EG_AUTH_TYPE") + else: + self.log.error("Username and/or password aren't set") + + def delete_sessions(self, kernel_ids): + """ + Deletes kernel sessions from database + + :param list of strings kernel_ids: A list of kernel ids + """ + if self.enable_persistence: + response = requests.delete(self.webhook_url, auth=self.auth, json=kernel_ids) + self.log.debug(f"Webhook kernel session deleting: {kernel_ids}") + if response.status_code != 204: + self.log.error(response.raise_for_status()) + + def save_session(self, kernel_id): + """ + Saves kernel session to database + + :param string kernel_id: A kernel id + """ + if self.enable_persistence: + if kernel_id is not None: + temp_session = dict() + temp_session[kernel_id] = self._sessions[kernel_id] + body = KernelSessionManager.pre_save_transformation(temp_session) + response = requests.post( + f"{self.webhook_url}/{kernel_id}", auth=self.auth, json=body + ) + self.log.debug(f"Webhook kernel session saving: {kernel_id}") + if response.status_code != 204: + self.log.error(response.raise_for_status()) + + def load_sessions(self): + """ + Loads kernel sessions from database + """ + if self.enable_persistence: + response = requests.get(self.webhook_url, auth=self.auth) + if response.status_code == 200: + kernel_sessions = response.content + for kernel_session in kernel_sessions: + self._load_session_from_response(kernel_session) + else: + self.log.error(response.raise_for_status()) + + def load_session(self, kernel_id): + """ + Loads a kernel session from database + + :param string kernel_id: A kernel id + """ + if self.enable_persistence: + if kernel_id is not None: + response = requests.get(f"{self.webhook_url}/{kernel_id}", auth=self.auth) + if response.status_code == 200: + kernel_session = response.content + self._load_session_from_response(kernel_session) + else: + self.log.error(response.raise_for_status()) + + def _load_session_from_response(self, kernel_session: dict): + """ + Loads kernel session to current session + + :param dictionary kernel_session: Kernel session information + """ + self.log.debug("Loading saved session(s)") + self._sessions.update( + KernelSessionManager.post_load_transformation( + json.loads(kernel_session)["kernel_session"] + ) + )