diff --git a/config/clusters/jupyter-meets-the-earth/common.values.yaml b/config/clusters/jupyter-meets-the-earth/common.values.yaml index bfbb057556..11ee63bdd2 100644 --- a/config/clusters/jupyter-meets-the-earth/common.values.yaml +++ b/config/clusters/jupyter-meets-the-earth/common.values.yaml @@ -284,65 +284,3 @@ dask-gateway: memory: request: 2G limit: 500G - - # Note that we are overriding options provided in 2i2c's helm chart that has - # default values for these config entries. - # - extraConfig: - # This configuration represents options that can be presented to users - # that want to create a Dask cluster using dask-gateway. For more - # details, see https://gateway.dask.org/cluster-options.html - # - # The goal is to provide a simple configuration that allow the user some - # flexibility while also fitting well well on AWS nodes that are all - # having 1:4 ratio between CPU and GB of memory. By providing the - # username label, we help administrators to track user pods. - option_handler: | - from dask_gateway_server.options import Options, Select, String, Mapping - def cluster_options(user): - def option_handler(options): - if ":" not in options.image: - raise ValueError("When specifying an image you must also provide a tag") - extra_labels = {} - scheduler_extra_pod_annotations = { - "prometheus.io/scrape": "true", - "prometheus.io/port": "8787", - } - chosen_worker_cpu = int(options.worker_specification.split("CPU")[0]) - chosen_worker_memory = 4 * chosen_worker_cpu - # We multiply the requests by a fraction to ensure that the - # worker fit well within a node that need some resources - # reserved for system pods. - return { - # A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable - "image": options.image, - "scheduler_extra_pod_labels": extra_labels, - "scheduler_extra_pod_annotations": scheduler_extra_pod_annotations, - "worker_extra_pod_labels": extra_labels, - "worker_cores": 0.85 * chosen_worker_cpu, - "worker_cores_limit": chosen_worker_cpu, - "worker_memory": "%fG" % (0.85 * chosen_worker_memory), - "worker_memory_limit": "%fG" % chosen_worker_memory, - "environment": options.environment, - } - return Options( - Select( - "worker_specification", - [ - "1CPU, 4GB", - "2CPU, 8GB", - "4CPU, 16GB", - "8CPU, 32GB", - "16CPU, 64GB", - "32CPU, 128GB", - "64CPU, 256GB", - ], - default="1CPU, 4GB", - label="Worker specification", - ), - # The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable - String("image", label="Image"), - Mapping("environment", {}, label="Environment variables"), - handler=option_handler, - ) - c.Backend.cluster_options = cluster_options diff --git a/helm-charts/basehub/templates/configmap-cluster-info.yaml b/helm-charts/basehub/templates/configmap-cluster-info.yaml new file mode 100644 index 0000000000..38527de654 --- /dev/null +++ b/helm-charts/basehub/templates/configmap-cluster-info.yaml @@ -0,0 +1,19 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: basehub-cluster-info + labels: + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + app.kubernetes.io/name: basehub + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +data: + {{- $k8s_dist := "" }} + {{- if (.Capabilities.KubeVersion.Version | contains "gke") }} + {{- $k8s_dist = "gke" }} + {{- else if (.Capabilities.KubeVersion.Version | contains "eks") }} + {{- $k8s_dist = "eks" }} + {{- else }} + {{- $k8s_dist = "aks" }} + {{- end }} + K8S_DIST: {{ $k8s_dist }} diff --git a/helm-charts/basehub/values.yaml b/helm-charts/basehub/values.yaml index c2b494e781..75062c5d2c 100644 --- a/helm-charts/basehub/values.yaml +++ b/helm-charts/basehub/values.yaml @@ -461,6 +461,12 @@ jupyterhub: - value: "/rstudio" title: RStudio description: An IDE For R, created by the RStudio company + extraEnv: + BASEHUB_K8S_DIST: + valueFrom: + configMapKeyRef: + name: basehub-cluster-info + key: K8S_DIST initContainers: - name: templates-clone image: alpine/git:2.40.1 diff --git a/helm-charts/daskhub/values.yaml b/helm-charts/daskhub/values.yaml index 6ca37074f6..202d220105 100644 --- a/helm-charts/daskhub/values.yaml +++ b/helm-charts/daskhub/values.yaml @@ -146,13 +146,32 @@ dask-gateway: nodeSelector: # Dask workers get their own pre-emptible pool k8s.dask.org/node-purpose: worker + env: + - name: BASEHUB_K8S_DIST + valueFrom: + configMapKeyRef: + name: basehub-cluster-info + key: K8S_DIST - # TODO: figure out a replacement for userLimits. extraConfig: + # This configuration represents options that can be presented to users + # that want to create a Dask cluster using dask-gateway client. + # + # This configuration is meant to enable the user to request dask worker + # pods that fits well on 2i2c's clusters. Currently the only kind of + # instance types used are n2-highmem-16 or r5.4xlarge. + # + # - Documentation about exposing cluster options to users: + # https://gateway.dask.org/cluster-options.html and the + # - Reference for KubeClusterConfig, which is what can be configured: + # https://gateway.dask.org/api-server.html#kubeclusterconfig. + # optionHandler: | - from dask_gateway_server.options import Options, Integer, Float, String, Mapping + import os import string + from dask_gateway_server.options import Integer, Mapping, Options, Select, String + # Escape a string to be dns-safe in the same way that KubeSpawner does it. # Reference https://github.com/jupyterhub/kubespawner/blob/616f72c4aee26c3d2127c6af6086ec50d6cda383/kubespawner/spawner.py#L1828-L1835 # Adapted from https://github.com/minrk/escapism to avoid installing the package @@ -177,40 +196,131 @@ dask-gateway: chars.append(escaped_hex_char) return u''.join(chars) + # Decide on available instance types and their resource allocation + # choices to expose based on cloud provider. For each daskhub hub + # managed by 2i2c, there should be these instance types available. + # + cloud_provider = os.environ["BASEHUB_K8S_DIST"] # gke, eks, or aks + instance_types = { + "gke": ["n2-highmem-16"], + "eks": ["r5.4xlarge"], + # 2i2c doesn't yet manage any dask-gateway installations on AKS, so + # this hasn't been configured yet and may cause an error - but that + # is good as we really should have this if we setup dask-gateway for + # AKS anyhow. + # aks: [], + } + + # NOTE: Data mentioned below comes from manual inspection of data + # collected and currently only available at + # https://github.com/2i2c-org/infrastructure/pull/3337. + # + resource_allocations = { + # n2-highmem-16 nodes in our clusters have 15.89 allocatable cores + # and 116.549Gi allocatable memory, and daemonset are expected to + # not add more than 400m cores and 800Mi (0.781Gi) memory with some + # margin, so we get 15.49 cores and 115.768Gi available for worker + # pods to request. + # + # This is an initial conservative strategy, allowing a slight + # oversubscription of CPU but not any oversubscription of memory. + # + # To workaround https://github.com/dask/dask-gateway/issues/765, we + # round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49] + # to [0.9, 1.9, 3.8, 7.7, 15.4]. + # + "n2-highmem-16": { + "1CPU, 7.2Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.235G", "worker_memory_limit": "7.235G"}, + "2CPU, 14.5Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "14.471G", "worker_memory_limit": "14.471G"}, + "4CPU, 28.9Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "28.942G", "worker_memory_limit": "28.942G"}, + "8CPU, 57.9Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "57.884G", "worker_memory_limit": "57.884G"}, + "16CPU, 115.8Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "115.768G", "worker_memory_limit": "115.768G"}, + }, + # r5.4xlarge nodes in our clusters have 15.89 allocatable cores and + # 121.504Gi allocatable memory, and daemonset are expected to not + # add more than 400m cores and 800Mi (0.781Gi) memory with some + # margin, so we get 15.49 cores and 120.723Gi available for worker + # pods to request. + # + # This is an initial conservative strategy, allowing a slight + # oversubscription of CPU but not any oversubscription of memory. + # + # To workaround https://github.com/dask/dask-gateway/issues/765, we + # round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49] + # to [0.9, 1.9, 3.8, 7.7, 15.4]. + # + "r5.4xlarge": { + "1CPU, 7.5Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.545G", "worker_memory_limit": "7.545G"}, + "2CPU, 15.1Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "15.090G", "worker_memory_limit": "15.090G"}, + "4CPU, 30.2Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "30.180G", "worker_memory_limit": "30.180G"}, + "8CPU, 60.4Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "60.361G", "worker_memory_limit": "60.361G"}, + "16CPU, 120.7Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "120.723G", "worker_memory_limit": "120.723G"}, + }, + } + + # for now we support only on one instance type per cluster, listing it + # as an option is a way to help convey how things work a bit better + it = instance_types[cloud_provider][0] + ra = resource_allocations[it] + ra_keys = list(ra.keys()) + def cluster_options(user): - safe_username = escape_string_label_safe(user.name) def option_handler(options): if ":" not in options.image: raise ValueError("When specifying an image you must also provide a tag") + extra_labels = { + "hub.jupyter.org/username": escape_string_label_safe(user.name), + } scheduler_extra_pod_annotations = { - "hub.jupyter.org/username": safe_username, + "hub.jupyter.org/username": user.name, "prometheus.io/scrape": "true", "prometheus.io/port": "8787", } - extra_labels = { - "hub.jupyter.org/username": safe_username, + worker_extra_pod_annotations = { + "hub.jupyter.org/username": user.name, } + picked_ra = ra[options.worker_resource_allocation] + return { - "worker_cores_limit": options.worker_cores, - "worker_cores": options.worker_cores, - "worker_memory": "%fG" % options.worker_memory, + # A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable "image": options.image, - "scheduler_extra_pod_annotations": scheduler_extra_pod_annotations, "scheduler_extra_pod_labels": extra_labels, + "scheduler_extra_pod_annotations": scheduler_extra_pod_annotations, "worker_extra_pod_labels": extra_labels, + "worker_extra_pod_annotations": worker_extra_pod_annotations, + "worker_cores": picked_ra["worker_cores"], + "worker_cores_limit": picked_ra["worker_cores_limit"], + "worker_memory": picked_ra["worker_memory"], + "worker_memory_limit": picked_ra["worker_memory_limit"], "environment": options.environment, + "idle_timeout": options.idle_timeout_minutes * 60, } return Options( - Integer("worker_cores", 2, min=1, label="Worker Cores"), - Float("worker_memory", 4, min=1, label="Worker Memory (GiB)"), - # The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable + Select( + "instance_type", + [it], + default=it, + label="Instance type running worker containers", + ), + Select( + "worker_resource_allocation", + ra_keys, + default=ra_keys[0], + label="Resources per worker container", + ), + # The default image is pre-specified by the dask-gateway client + # via the env var DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE set on + # the jupyterhub user pods String("image", label="Image"), - Mapping("environment", {}, label="Environment Variables"), + Mapping("environment", {}, label="Environment variables (YAML)"), + Integer("idle_timeout_minutes", 30, min=0, label="Idle cluster terminated after (minutes)"), handler=option_handler, ) c.Backend.cluster_options = cluster_options - idle: | - # timeout after 30 minutes of inactivity + + # timeout after 30 minutes of inactivity by default, keep this in sync + # with the user exposed option idle_timeout_minutes's default value + # configured above c.KubeClusterConfig.idle_timeout = 1800 prefix: "/services/dask-gateway" # Users connect to the Gateway through the JupyterHub service. auth: