diff --git a/setup/04_optional-aws-sagemaker-notebook/README.md b/setup/04_optional-aws-sagemaker-notebook/README.md new file mode 100644 index 00000000..b2049d2b --- /dev/null +++ b/setup/04_optional-aws-sagemaker-notebook/README.md @@ -0,0 +1,31 @@ +# AWS CloudFormation Template: Jupyter Notebook with LLMs-from-scratch Repo + +This CloudFormation template creates a GPU-enabled Jupyter notebook in Amazon SageMaker with an execution role and the LLMs-from-scratch GitHub repository. + +## What it does: + +1. Creates an IAM role with the necessary permissions for the SageMaker notebook instance. +2. Creates a KMS key and an alias for encrypting the notebook instance. +3. Configures a notebook instance lifecycle configuration script that: + - Installs a separate Miniconda installation in the user's home directory. + - Creates a custom Python environment with TensorFlow 2.15.0 and PyTorch 2.1.0, both with CUDA support. + - Installs additional packages like Jupyter Lab, Matplotlib, and other useful libraries. + - Registers the custom environment as a Jupyter kernel. +4. Creates the SageMaker notebook instance with the specified configuration, including the GPU-enabled instance type, the execution role, and the default code repository. + +## How to use: + +1. Download the CloudFormation template file (`cloudformation-template.yml`). +2. In the AWS Management Console, navigate to the CloudFormation service. +3. Create a new stack and upload the template file. +4. Provide a name for the notebook instance (e.g., "LLMsFromScratchNotebook") (defaults to the LLMs-from-scratch GitHub repo). +5. Review and accept the template's parameters, then create the stack. +6. Once the stack creation is complete, the SageMaker notebook instance will be available in the SageMaker console. +7. Open the notebook instance and start using the pre-configured environment to work on your LLMs-from-scratch projects. + +## Key Points: + +- The template creates a GPU-enabled (ml.g4dn.xlarge) notebook instance with 50GB of storage. +- It sets up a custom Miniconda environment with TensorFlow 2.15.0 and PyTorch 2.1.0, both with CUDA support. +- The custom environment is registered as a Jupyter kernel, making it available for use in the notebook. +- The template also creates a KMS key for encrypting the notebook instance and an IAM role with the necessary permissions. \ No newline at end of file diff --git a/setup/04_optional-aws-sagemaker-notebook/cloudformation-template.yml b/setup/04_optional-aws-sagemaker-notebook/cloudformation-template.yml new file mode 100755 index 00000000..50b3f53f --- /dev/null +++ b/setup/04_optional-aws-sagemaker-notebook/cloudformation-template.yml @@ -0,0 +1,167 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'CloudFormation template to create a GPU-enabled Jupyter notebook in SageMaker with an execution role and +LLMs-from-scratch Repo' + +Parameters: + NotebookName: + Type: String + Default: 'LLMsFromScratchNotebook' + DefaultRepoUrl: + Type: String + Default: 'https://github.com/rasbt/LLMs-from-scratch.git' + +Resources: + SageMakerExecutionRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - sagemaker.amazonaws.com + Action: + - sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess + - arn:aws:iam::aws:policy/AmazonBedrockFullAccess + + KmsKey: + Type: AWS::KMS::Key + Properties: + Description: 'KMS key for SageMaker notebook' + KeyPolicy: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' + Action: 'kms:*' + Resource: '*' + EnableKeyRotation: true + + KmsKeyAlias: + Type: AWS::KMS::Alias + Properties: + AliasName: !Sub 'alias/${NotebookName}-kms-key' + TargetKeyId: !Ref KmsKey + + TensorConfigLifecycle: + Type: AWS::SageMaker::NotebookInstanceLifecycleConfig + Properties: + NotebookInstanceLifecycleConfigName: "TensorConfigv241128" + OnCreate: + - Content: !Base64 | + #!/bin/bash + set -e + + # Create a startup script that will run in the background + cat << 'EOF' > /home/ec2-user/SageMaker/setup-environment.sh + #!/bin/bash + + sudo -u ec2-user -i <<'INNEREOF' + unset SUDO_UID + + # Install a separate conda installation via Miniconda + WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda + mkdir -p "$WORKING_DIR" + wget https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh -O "$WORKING_DIR/miniconda.sh" + bash "$WORKING_DIR/miniconda.sh" -b -u -p "$WORKING_DIR/miniconda" + rm -rf "$WORKING_DIR/miniconda.sh" + + # Ensure we're using the Miniconda conda + export PATH="$WORKING_DIR/miniconda/bin:$PATH" + + # Initialize conda + "$WORKING_DIR/miniconda/bin/conda" init bash + source ~/.bashrc + + # Create and activate environment + KERNEL_NAME="tensorflow2_p39" + PYTHON="3.9" + "$WORKING_DIR/miniconda/bin/conda" create --yes --name "$KERNEL_NAME" python="$PYTHON" + eval "$("$WORKING_DIR/miniconda/bin/conda" shell.bash activate "$KERNEL_NAME")" + + # Install CUDA toolkit and cuDNN + "$WORKING_DIR/miniconda/bin/conda" install --yes cudatoolkit=11.8 cudnn + + # Install ipykernel + "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip" install --quiet ipykernel + + # Install PyTorch with CUDA support + "$WORKING_DIR/miniconda/envs/$KERNEL_NAME/bin/pip3" install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118 + + # Install other packages + "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow[gpu] + "$WORKING_DIR/miniconda/bin/conda" install --yes tensorflow-gpu + "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install tensorflow==2.15.0 + "$WORKING_DIR/miniconda/bin/conda" install --yes setuptools tiktoken tqdm numpy pandas psutil + + "$WORKING_DIR/miniconda/bin/conda" install -y jupyterlab==4.0 + "$WORKING_DIR/miniconda/envs/tensorflow2_p39/bin/pip" install matplotlib==3.7.1 + + # Create a flag file to indicate setup is complete + touch /home/ec2-user/SageMaker/setup-complete + + INNEREOF + EOF + + # Make the script executable and run it in the background + chmod +x /home/ec2-user/SageMaker/setup-environment.sh + sudo -u ec2-user nohup /home/ec2-user/SageMaker/setup-environment.sh > /home/ec2-user/SageMaker/setup.log 2>&1 & + + OnStart: + - Content: !Base64 | + #!/bin/bash + set -e + + # Check if setup is still running or not started + if ! [ -f /home/ec2-user/SageMaker/setup-complete ]; then + echo "Setup still in progress or not started. Check setup.log for details." + exit 0 + fi + + sudo -u ec2-user -i <<'EOF' + unset SUDO_UID + + WORKING_DIR=/home/ec2-user/SageMaker/custom-miniconda + source "$WORKING_DIR/miniconda/bin/activate" + + for env in $WORKING_DIR/miniconda/envs/*; do + BASENAME=$(basename "$env") + source activate "$BASENAME" + python -m ipykernel install --user --name "$BASENAME" --display-name "Custom ($BASENAME)" + done + EOF + + echo "Restarting the Jupyter server.." + CURR_VERSION=$(cat /etc/os-release) + if [[ $CURR_VERSION == *$"http://aws.amazon.com/amazon-linux-ami/"* ]]; then + sudo initctl restart jupyter-server --no-wait + else + sudo systemctl --no-block restart jupyter-server.service + fi + + SageMakerNotebookInstance: + Type: AWS::SageMaker::NotebookInstance + Properties: + InstanceType: ml.g4dn.xlarge + NotebookInstanceName: !Ref NotebookName + RoleArn: !GetAtt SageMakerExecutionRole.Arn + DefaultCodeRepository: !Ref DefaultRepoUrl + KmsKeyId: !GetAtt KmsKey.Arn + PlatformIdentifier: notebook-al2-v2 + VolumeSizeInGB: 50 + LifecycleConfigName: !GetAtt TensorConfigLifecycle.NotebookInstanceLifecycleConfigName + +Outputs: + NotebookInstanceName: + Description: The name of the created SageMaker Notebook Instance + Value: !Ref SageMakerNotebookInstance + ExecutionRoleArn: + Description: The ARN of the created SageMaker Execution Role + Value: !GetAtt SageMakerExecutionRole.Arn + KmsKeyArn: + Description: The ARN of the created KMS Key for the notebook + Value: !GetAtt KmsKey.Arn \ No newline at end of file