From 5057175219da55277ece2a9c1c4051a11cf3628d Mon Sep 17 00:00:00 2001 From: Ian Stuart Date: Thu, 14 Mar 2024 06:59:19 +0000 Subject: [PATCH 01/22] Update the server-side api --- jupyter_resource_usage/api.py | 13 ++++++++ jupyter_resource_usage/config.py | 46 +++++++++++++++++++++++++++ jupyter_resource_usage/metrics.py | 22 +++++++++---- jupyter_resource_usage/prometheus.py | 7 +++- jupyter_resource_usage/static/main.js | 38 ++++++++++++++++++++++ 5 files changed, 118 insertions(+), 8 deletions(-) diff --git a/jupyter_resource_usage/api.py b/jupyter_resource_usage/api.py index 97e515a..91c433d 100644 --- a/jupyter_resource_usage/api.py +++ b/jupyter_resource_usage/api.py @@ -75,6 +75,19 @@ async def get(self): metrics.update(cpu_percent=cpu_percent, cpu_count=cpu_count) + # Optionally get Disk information + if config.track_disk_usage: + try: + disk_info = psutil.disk_usage(config.disk_path) + except: + pass + else: + metrics.update(disk_used=disk_info['used'], disk_total=disk_info['total']) + if config.disk_warning_threshold != 0: + limits["disk"]["warn"] = (disk_info['total'] - disk_info['used']) < ( + disk_info['total'] * config.cpu_warning_threshold + ) + self.write(json.dumps(metrics)) @run_on_executor diff --git a/jupyter_resource_usage/config.py b/jupyter_resource_usage/config.py index 7263fff..37f3211 100644 --- a/jupyter_resource_usage/config.py +++ b/jupyter_resource_usage/config.py @@ -6,6 +6,7 @@ from traitlets import Float from traitlets import Int from traitlets import List +from traitlets import Str from traitlets import TraitType from traitlets import Union from traitlets.config import Configurable @@ -56,6 +57,19 @@ class ResourceUseDisplay(Configurable): trait=PSUtilMetric(), default_value=[{"name": "cpu_count"}] ) + process_disk_metrics = List( + trait=PSUtilMetric(), + default_value=[], + ) + + system_disk_metrics = List( + trait=PSUtilMetric(), + default_value=[ + {"name": "disk_usage", "args": ['/home' ], "attribute": "total"}, + {"name": "disk_usage", "args": ['/home' ], "attribute": "used"} + ], + ) + mem_warning_threshold = Float( default_value=0.1, help=""" @@ -123,6 +137,38 @@ def _mem_limit_default(self): def _cpu_limit_default(self): return float(os.environ.get("CPU_LIMIT", 0)) + track_disk_usage = Bool( + default_value=False, + help=""" + Set to True in order to enable reporting of disk usage statistics. + """, + ).tag(config=True) + + disk_path = Union( + trait_types=[Str(), Callable()], + default_value='/home/joyvan', + help=""" + A path in the partition to be reported on. + """, + ).tag(config=True) + + @default("disk_path") + def _disk_path_default(self): + return str(os.environ.get("HOME", '/home/joyvan')) + + disk_warning_threshold = Float( + default_value=0.1, + help=""" + Warn user with flashing lights when disk usage is within this fraction + total space. + + For example, if total size is 10G, `disk_warning_threshold` is 0.1, + we will start warning the user when they use (10 - (10 * 0.1)) G. + + Set to 0 to disable warning. + """, + ).tag(config=True) + enable_prometheus_metrics = Bool( default_value=True, help=""" diff --git a/jupyter_resource_usage/metrics.py b/jupyter_resource_usage/metrics.py index ae5e457..4208ecf 100644 --- a/jupyter_resource_usage/metrics.py +++ b/jupyter_resource_usage/metrics.py @@ -13,10 +13,10 @@ def __init__(self, server_app: ServerApp): ] self.server_app = server_app - def get_process_metric_value(self, process, name, kwargs, attribute=None): + def get_process_metric_value(self, process, name, args, kwargs, attribute=None): try: # psutil.Process methods will either return... - metric_value = getattr(process, name)(**kwargs) + metric_value = getattr(process, name)(*args, **kwargs) if attribute is not None: # ... a named tuple return getattr(metric_value, attribute) else: # ... or a number @@ -26,7 +26,7 @@ def get_process_metric_value(self, process, name, kwargs, attribute=None): except BaseException: return 0 - def process_metric(self, name, kwargs={}, attribute=None): + def process_metric(self, name, args=[], kwargs={}, attribute=None): if psutil is None: return None else: @@ -34,17 +34,20 @@ def process_metric(self, name, kwargs={}, attribute=None): all_processes = [current_process] + current_process.children(recursive=True) process_metric_value = lambda process: self.get_process_metric_value( - process, name, kwargs, attribute + process, name, args, kwargs, attribute ) return sum([process_metric_value(process) for process in all_processes]) - def system_metric(self, name, kwargs={}, attribute=None): + def system_metric(self, name, args=[], kwargs={}, attribute=None): if psutil is None: return None else: - # psutil functions will either return... - metric_value = getattr(psutil, name)(**kwargs) + # psutil functions will either raise an error, or return... + try: + metric_value = getattr(psutil, name)(*args, **kwargs) + except: + return None if attribute is not None: # ... a named tuple return getattr(metric_value, attribute) else: # ... or a number @@ -80,3 +83,8 @@ def cpu_metrics(self): return self.metrics( self.config.process_cpu_metrics, self.config.system_cpu_metrics ) + + def disk_metrics(self): + return self.metrics( + self.config.process_disk_metrics, self.config.system_cpu_metrics + ) diff --git a/jupyter_resource_usage/prometheus.py b/jupyter_resource_usage/prometheus.py index 511ce79..be9bf5a 100644 --- a/jupyter_resource_usage/prometheus.py +++ b/jupyter_resource_usage/prometheus.py @@ -18,7 +18,7 @@ def __init__(self, metricsloader: PSUtilMetricsLoader): self.config = metricsloader.config self.session_manager = metricsloader.server_app.session_manager - gauge_names = ["total_memory", "max_memory", "total_cpu", "max_cpu"] + gauge_names = ["total_memory", "max_memory", "total_cpu", "max_cpu", "disk_total", "disk_usage"] for name in gauge_names: phrase = name + "_usage" gauge = Gauge(phrase, "counter for " + phrase.replace("_", " "), []) @@ -34,6 +34,11 @@ async def __call__(self, *args, **kwargs): if cpu_metric_values is not None: self.TOTAL_CPU_USAGE.set(cpu_metric_values["cpu_percent"]) self.MAX_CPU_USAGE.set(self.apply_cpu_limit(cpu_metric_values)) + if self.config.track_disk_usage: + disk_metric_values = self.metricsloader.disk_metrics() + if disk_metric_values is not None: + self.CURRENT_DISK_USAGE.set(disk_metric_values["disk_usage_used"]) + self.MAX_DISK_USAGE.set(disk_metric_values["disk_usage_total"]) def apply_memory_limit(self, memory_metric_values) -> Optional[int]: if memory_metric_values is None: diff --git a/jupyter_resource_usage/static/main.js b/jupyter_resource_usage/static/main.js index b4e09d2..ecfff0c 100644 --- a/jupyter_resource_usage/static/main.js +++ b/jupyter_resource_usage/static/main.js @@ -25,6 +25,17 @@ define([ .attr('title', 'Actively used CPU (updates every 5s)') ) ); + $('#maintoolbar-container').append( + $('
').attr('id', 'jupyter-resource-usage-display-disk') + .addClass('btn-group') + .addClass('jupyter-resource-usage-hide') + .addClass('pull-right').append( + $('').text(' Disk: ') + ).append( + $('').attr('id', 'jupyter-resource-usage-disk') + .attr('title', 'Disk usage (updates every 5s)') + ) + ); // FIXME: Do something cleaner to get styles in here? $('head').append( $('