From 44ce0146e665dd920c120ac6f4d74526a32587c3 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:03:07 -0400 Subject: [PATCH] Azure Infiniband Updated Ubuntu Versions (#462) Co-authored-by: Jacob Tomlinson Co-authored-by: James Lamb --- source/cloud/azure/azureml.md | 2 +- .../rapids-azureml-hpo/notebook.ipynb | 12 +++++++---- source/guides/azure/infiniband.md | 20 ++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/source/cloud/azure/azureml.md b/source/cloud/azure/azureml.md index 5bfc1a99..ffe502ac 100644 --- a/source/cloud/azure/azureml.md +++ b/source/cloud/azure/azureml.md @@ -32,7 +32,7 @@ The compute instance provides an integrated Jupyter notebook service, JupyterLab Sign in to [Azure Machine Learning Studio](https://ml.azure.com/) and navigate to your workspace on the left-side menu. -Select **Compute** > **+ New** > choose a [RAPIDS compatible GPU](https://medium.com/dropout-analytics/which-gpus-work-with-rapids-ai-f562ef29c75f) VM size (e.g., `Standard_NC12s_v3`) +Select **Compute** > **+ New** (Create compute instance) > choose a [RAPIDS compatible GPU](https://medium.com/dropout-analytics/which-gpus-work-with-rapids-ai-f562ef29c75f) VM size (e.g., `Standard_NC12s_v3`) ![Screenshot of create new notebook with a gpu-instance](../../images/azureml-create-notebook-instance.png) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index fd553e13..d6f6736e 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -12,7 +12,7 @@ ] }, "source": [ - "# Train and Hyperparameter-Tune with RAPIDS" + "# Train and Hyperparameter-Tune with RAPIDS on AzureML" ] }, { @@ -97,12 +97,16 @@ "from azure.ai.ml import MLClient\n", "from azure.identity import DefaultAzureCredential\n", "\n", + "subscription_id = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "resource_group_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "workspace_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "\n", "# Get a handle to the workspace\n", "ml_client = MLClient(\n", " credential=DefaultAzureCredential(),\n", - " subscription_id=\"fc4f4a6b-4041-4b1c-8249-854d68edcf62\",\n", - " resource_group_name=\"rapidsai-deployment\",\n", - " workspace_name=\"rapids-aml-cluster\",\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group_name,\n", + " workspace_name=workspace_name,\n", ")\n", "\n", "print(\n", diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index daca2391..aaaff5a4 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -13,8 +13,8 @@ for demonstration. - Select `East US` region. - Change `Availability options` to `Availability set` and create a set. - If building multiple instances put additional instances in the same set. -- Use the 2nd Gen Ubuntu 20.04 image. - - Search all images for `Ubuntu Server 20.04` and choose the second one down on the list. +- Use the 2nd Gen Ubuntu 24.04 image. + - Search all images for `Ubuntu Server 24.04` and choose the second one down on the list. - Change size to `ND40rs_v2`. - Set password login with credentials. - User `someuser` @@ -39,8 +39,8 @@ The commands below should work for Ubuntu. See the [CUDA Toolkit documentation]( ```shell sudo apt-get install -y linux-headers-$(uname -r) distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') -wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.0-1_all.deb -sudo dpkg -i cuda-keyring_1.0-1_all.deb +wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-drivers ``` @@ -118,11 +118,11 @@ Mon Nov 14 20:32:39 2022 ### InfiniBand Driver -On Ubuntu 20.04 +On Ubuntu 24.04 ```shell sudo apt-get install -y automake dh-make git libcap2 libnuma-dev libtool make pkg-config udev curl librdmacm-dev rdma-core \ - libgfortran5 bison chrpath flex graphviz gfortran tk dpatch quilt swig tcl ibverbs-utils + libgfortran5 bison chrpath flex graphviz gfortran tk quilt swig tcl ibverbs-utils ``` Check install @@ -247,7 +247,13 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg bash Mambaforge-Linux-x86_64.sh ``` -Accept the default and allow conda init to run. Then start a new shell. +Accept the default and allow conda init to run. + +```shell +~/mambaforge/bin/conda init +``` + +Then start a new shell. Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs)