04_optional-aws-sagemaker-notebook

rasbt · Nov 28, 2024 · ffcc55a · ffcc55a
1 parent bb31de8
commit ffcc55a
Show file tree

Hide file tree

Showing 2 changed files with 198 additions and 0 deletions.
diff --git a/setup/04_optional-aws-sagemaker-notebook/README.md b/setup/04_optional-aws-sagemaker-notebook/README.md
@@ -0,0 +1,31 @@
+# AWS CloudFormation Template: Jupyter Notebook with LLMs-from-scratch Repo
+
+This CloudFormation template creates a GPU-enabled Jupyter notebook in Amazon SageMaker with an execution role and the LLMs-from-scratch GitHub repository.
+
+## What it does:
+
+1. Creates an IAM role with the necessary permissions for the SageMaker notebook instance.
+2. Creates a KMS key and an alias for encrypting the notebook instance.
+3. Configures a notebook instance lifecycle configuration script that:
+   - Installs a separate Miniconda installation in the user's home directory.
+   - Creates a custom Python environment with TensorFlow 2.15.0 and PyTorch 2.1.0, both with CUDA support.
+   - Installs additional packages like Jupyter Lab, Matplotlib, and other useful libraries.
+   - Registers the custom environment as a Jupyter kernel.
+4. Creates the SageMaker notebook instance with the specified configuration, including the GPU-enabled instance type, the execution role, and the default code repository.
+
+## How to use:
+
+1. Download the CloudFormation template file (`cloudformation-template.yml`).
+2. In the AWS Management Console, navigate to the CloudFormation service.
+3. Create a new stack and upload the template file.
+4. Provide a name for the notebook instance (e.g., "LLMsFromScratchNotebook") (defaults to the LLMs-from-scratch GitHub repo).
+5. Review and accept the template's parameters, then create the stack.
+6. Once the stack creation is complete, the SageMaker notebook instance will be available in the SageMaker console.
+7. Open the notebook instance and start using the pre-configured environment to work on your LLMs-from-scratch projects.
+
+## Key Points:
+
+- The template creates a GPU-enabled (ml.g4dn.xlarge) notebook instance with 50GB of storage.
+- It sets up a custom Miniconda environment with TensorFlow 2.15.0 and PyTorch 2.1.0, both with CUDA support.
+- The custom environment is registered as a Jupyter kernel, making it available for use in the notebook.
+- The template also creates a KMS key for encrypting the notebook instance and an IAM role with the necessary permissions.
diff --git a/setup/04_optional-aws-sagemaker-notebook/cloudformation-template.yml b/setup/04_optional-aws-sagemaker-notebook/cloudformation-template.yml
@@ -0,0 +1,167 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: 'CloudFormation template to create a GPU-enabled Jupyter notebook in SageMaker with an execution role and 
+LLMs-from-scratch Repo'
+
+Parameters:
+  NotebookName:
+    Type: String
+    Default: 'LLMsFromScratchNotebook'
+  DefaultRepoUrl:
+    Type: String
+    Default: 'https://github.com/rasbt/LLMs-from-scratch.git'
+
+Resources:
+  SageMakerExecutionRole:
+    Type: AWS::IAM::Role
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service:
+                - sagemaker.amazonaws.com
+            Action:
+              - sts:AssumeRole
+      ManagedPolicyArns:
+        - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
+        - arn:aws:iam::aws:policy/AmazonBedrockFullAccess
+
+  KmsKey:
+    Type: AWS::KMS::Key
+    Properties:
+      Description: 'KMS key for SageMaker notebook'
+      KeyPolicy:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root'
+            Action: 'kms:*'
+            Resource: '*'
+      EnableKeyRotation: true
+
+  KmsKeyAlias:
+    Type: AWS::KMS::Alias
+    Properties:
+      AliasName: !Sub 'alias/${NotebookName}-kms-key'
+      TargetKeyId: !Ref KmsKey
+
+  TensorConfigLifecycle:
+    Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
+    Properties:
+      NotebookInstanceLifecycleConfigName: "TensorConfigv241128"
+      OnCreate:
+        - Content: !Base64 |
+            #!/bin/bash
+            set -e
+
+            # Create a startup script that will run in the background
+            cat << 'EOF' > /home/ec2-user/SageMaker/setup-environment.sh
+            #!/bin/bash
+            
+            sudo -u ec2-user -i <<'INNEREOF'
+            unset SUDO_UID
+
+            # Install a separate conda installation via Miniconda
+            WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
+            mkdir -p "$WORKING_DIR"
+            wget https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh -O "$WORKING_DIR/miniconda.sh"
+            bash "$WORKING_DIR/miniconda.sh" -b -u -p "$WORKING_DIR/miniconda" 
+            rm -rf "$WORKING_DIR/miniconda.sh"
+
+            # Ensure we're using the Miniconda conda
+            export PATH="$WORKING_DIR/miniconda/bin:$PATH"
+
+            # Initialize conda
+            "$WORKING_DIR/miniconda/bin/conda" init bash
+            source ~/.bashrc
+
+            # Create and activate environment
+            KERNEL_NAME="tensorflow2_p39"
+            PYTHON="3.9"
+            "$WORKING_DIR/miniconda/bin/conda" create --yes --name "$KERNEL_NAME" python="$PYTHON"
+            eval "$("$WORKING_DIR/miniconda/bin/conda" shell.bash activate "$KERNEL_NAME")"
+
+            # Install CUDA toolkit and cuDNN
+            "$WORKING_DIR/miniconda/bin/conda" install --yes cudatoolkit=11.8 cudnn
+
+            # Install ipykernel
+            "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip" install --quiet ipykernel
+
+            # Install PyTorch with CUDA support
+            "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip3" install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
+
+            # Install other packages
+            "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow[gpu]
+            "$WORKING_DIR/miniconda/bin/conda" install --yes tensorflow-gpu
+            "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow==2.15.0
+            "$WORKING_DIR/miniconda/bin/conda" install --yes setuptools tiktoken tqdm numpy pandas psutil
+
+            "$WORKING_DIR/miniconda/bin/conda" install -y jupyterlab==4.0
+            "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install matplotlib==3.7.1
+
+            # Create a flag file to indicate setup is complete
+            touch /home/ec2-user/SageMaker/setup-complete
+
+            INNEREOF
+            EOF
+
+            # Make the script executable and run it in the background
+            chmod +x /home/ec2-user/SageMaker/setup-environment.sh
+            sudo -u ec2-user nohup /home/ec2-user/SageMaker/setup-environment.sh > /home/ec2-user/SageMaker/setup.log 2>&1 &
+
+      OnStart:
+        - Content: !Base64 |
+            #!/bin/bash
+            set -e
+
+            # Check if setup is still running or not started
+            if ! [ -f /home/ec2-user/SageMaker/setup-complete ]; then
+                echo "Setup still in progress or not started. Check setup.log for details."
+                exit 0
+            fi
+
+            sudo -u ec2-user -i <<'EOF'
+            unset SUDO_UID
+
+            WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda
+            source "$WORKING_DIR/miniconda/bin/activate"
+
+            for env in $WORKING_DIR/miniconda/envs/*; do
+                BASENAME=$(basename "$env")
+                source activate "$BASENAME"
+                python -m ipykernel install --user --name "$BASENAME" --display-name "Custom ($BASENAME)"
+            done
+            EOF
+
+            echo "Restarting the Jupyter server.."
+            CURR_VERSION=$(cat /etc/os-release)
+            if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then
+                sudo initctl restart jupyter-server --no-wait
+            else
+                sudo systemctl --no-block restart jupyter-server.service
+            fi
+
+  SageMakerNotebookInstance:
+    Type: AWS::SageMaker::NotebookInstance
+    Properties:
+      InstanceType: ml.g4dn.xlarge
+      NotebookInstanceName: !Ref NotebookName
+      RoleArn: !GetAtt SageMakerExecutionRole.Arn
+      DefaultCodeRepository: !Ref DefaultRepoUrl
+      KmsKeyId: !GetAtt KmsKey.Arn
+      PlatformIdentifier: notebook-al2-v2
+      VolumeSizeInGB: 50
+      LifecycleConfigName: !GetAtt TensorConfigLifecycle.NotebookInstanceLifecycleConfigName
+
+Outputs:
+  NotebookInstanceName:
+    Description: The name of the created SageMaker Notebook Instance
+    Value: !Ref SageMakerNotebookInstance
+  ExecutionRoleArn:
+    Description: The ARN of the created SageMaker Execution Role
+    Value: !GetAtt SageMakerExecutionRole.Arn
+  KmsKeyArn:
+    Description: The ARN of the created KMS Key for the notebook
+    Value: !GetAtt KmsKey.Arn