Merge branch 'main' into ko3n1g/ci/multi-tenancy-for-tests

NVIDIA · May 16, 2024 · 32950d4 · 32950d4
2 parents 226245e + 526b6ad
commit 32950d4
Show file tree

Hide file tree

Showing 63 changed files with 2,757 additions and 1,082 deletions.
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
@@ -15,7 +15,9 @@ name: "CICD NeMo"
 
 on:
   pull_request:
-    branches: [ "main" ]
+    branches:
+      - 'main'
+      - 'r**'
     types: [ labeled ]
 
 concurrency:
@@ -73,92 +75,45 @@ jobs:
       uses: actions/checkout@v4
       with:
         path: ${{ github.run_id }}
-
-    - name: Container setup
-      run: |
-        # Pull base PyTorch container
-        docker pull nvcr.io/nvidia/pytorch:24.02-py3
-        docker run --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 --volume ${{ github.workspace }}/${{ github.run_id }}:/workspace --volume /mnt/datadrive/TestData:/home/TestData nvcr.io/nvidia/pytorch:24.02-py3 /bin/bash -c '
-            set -x
-
-            # PyTorch version
-            python -c "import torch; print(torch.__version__)"
-            python -c "import torchvision; print(torchvision.__version__)"
-
-            # Install test requirements
-            apt-get update && apt-get install -y bc && pip install -r requirements/requirements_test.txt && pip install -r requirements/requirements_lightning.txt
-
-            # Code formatting checks
-            python setup.py style
-
-            # Copyright Headers check
-            python tests/check_copyright_header.py --dir .
-
-            # NeMo Installation
-            ./reinstall.sh release
-
-            # Transformer Engine installation
-            git clone https://github.com/NVIDIA/TransformerEngine.git && \
-                pushd TransformerEngine && \
-                git fetch origin bfe21c3d68b0a9951e5716fb520045db53419c5e && \
-                git checkout FETCH_HEAD && \
-                git submodule init && git submodule update && \
-                NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .  && \
-                popd
-
-            # Apex installation
-            git clone https://github.com/NVIDIA/apex.git && \
-                pushd apex && \
-                git checkout 810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c && \
-                cp -R apex /usr/local/lib/python3.10/dist-packages && \
-                popd
-
-            # pip package should be working with main, if not we can update the commit here
-            # until the pip package is updated
-            # Megatron Core installation
-            git clone https://github.com/NVIDIA/Megatron-LM.git && \
-                pushd Megatron-LM && \
-                git checkout c90aa1671fc0b97f80fa6c3bb892ce6f8e88e7c9 && \
-                pip install . && \
-                  pushd megatron/core/datasets && \
-                  make && \
-                  popd && \
-                popd
-            export PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
-
-            # Install only for test: L2: Segmentation Tool
-            pushd tools/ctc_segmentation && \
-                pip install -r requirements.txt && \
-                apt-get update && apt-get install libsox-fmt-all -y && \
-                popd
-
-            # AMMO installation
-            pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
-
-            # PyTorch Lightning version
-            python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
-
-            # PyTorch Lightning DDP Checks
-            CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"
-
-            # Basic Import Checks
-            python -c "import nemo.collections.asr as nemo_asr"
-            python -c "import nemo.collections.nlp as nemo_nlp"
-            python -c "import nemo.collections.tts as nemo_tts"
-
-            # set permission
-            chmod 777 -R /workspace
-            '
-            ### \'\'
-
-    - name: Push container to registry for future use
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+      with: 
+        # We use `docker` driver as this speeds things up for 
+        # trivial (non-multi-stage) builds.
+        driver: docker
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        file: Dockerfile.ci
+        push: true
+        cache-from: nemoci.azurecr.io/nemo_container:latest
+        cache-to: type=inline
+        tags: |
+          nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+          nemoci.azurecr.io/nemo_container:latest
+
+    - name: Run some checks
       run: |
-        # Push container
-        echo "Docker: List containers" && docker ps -a
-        DOCKER_COMMIT=$(docker ps --latest --quiet)  # latest container
-        docker commit $DOCKER_COMMIT nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-        docker tag nemoci.azurecr.io/nemo_container_${{ github.run_id }} nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-        docker push nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+        docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '\
+          # PyTorch Lightning version
+          python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
+
+          # PyTorch Lightning DDP Checks
+          CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"
+
+          # Basic Import Checks
+          python -c "import nemo.collections.asr as nemo_asr"
+          python -c "import nemo.collections.nlp as nemo_nlp"
+          python -c "import nemo.collections.tts as nemo_tts"
+
+          python setup.py style
+          python tests/check_copyright_header.py --dir .
+
+          # These checks are not crucial
+          exit 0
+        '
 
     # - name: Build and push to local registry
     #   uses: docker/build-push-action@v5
@@ -399,7 +354,7 @@ jobs:
         - name: Checkout repository
           uses: actions/checkout@v4
         - run: |
-            python examples/nlp/language_modeling/megatron_llama_quantization.py \
+            python examples/nlp/language_modeling/megatron_quantization.py \
             model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
             quantization.algorithm=null \
             model_save=/home/TestData/nlp/megatron_llama/ci_baseline
@@ -408,69 +363,70 @@ jobs:
         - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
           if: "failure()"
 
-  # L2_PTQ_Llama2_FP8:
-  #   needs: [cicd-test-container-setup]
-  #   runs-on: self-hosted-azure
-  #   timeout-minutes: 10
-  #   container:
-  #     image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-  #     options:
-  #       # --user 0:128
-  #       --device=/dev/nvidia0
-  #       --gpus all
-  #       --shm-size=8g
-  #       --env TRANSFORMERS_OFFLINE=0
-  #       --env HYDRA_FULL_ERROR=1
-  #       --volume /mnt/datadrive/TestData:/home/TestData
-  #   steps:
-  #       - name: Checkout repository
-  #         uses: actions/checkout@v4
-  #       - run: |
-  #           python examples/nlp/language_modeling/megatron_llama_quantization.py \
-  #           model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
-  #           tensor_model_parallel_size=2 \
-  #           trainer.devices=2 \
-  #           quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
-  #           quantization.algorithm=fp8 \
-  #           quantization.num_calib_size=8 \
-  #           inference.batch_size=2 \
-  #           export.inference_tensor_parallel=2 \
-  #           model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
-
-  #           rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
-  #       - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
-  #         if: "failure()"
-
-  # L2_PTQ_Llama2_INT8_SQ:
-  #   needs: [cicd-test-container-setup]
-  #   runs-on: self-hosted-azure
-  #   timeout-minutes: 10
-  #   container:
-  #     image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-  #     options:
-  #       # --user 0:128
-  #       --device=/dev/nvidia0
-  #       --gpus all
-  #       --shm-size=8g
-  #       --env TRANSFORMERS_OFFLINE=0
-  #       --env HYDRA_FULL_ERROR=1
-  #       --volume /mnt/datadrive/TestData:/home/TestData
-  #   steps:
-  #       - name: Checkout repository
-  #         uses: actions/checkout@v4
-  #       - run: |
-  #           python examples/nlp/language_modeling/megatron_llama_quantization.py \
-  #           model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
-  #           quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
-  #           quantization.algorithm=int8_sq \
-  #           quantization.num_calib_size=8 \
-  #           inference.batch_size=2 \
-  #           model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
-
-  #           rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
-  #       - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
-  #         if: "failure()"
-
+  L2_PTQ_Llama2_FP8:
+     needs: [cicd-test-container-setup]
+     runs-on: self-hosted-azure
+     timeout-minutes: 10
+     container:
+       image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+       options:
+         # --user 0:128
+         --device=/dev/nvidia0
+         --gpus all
+         --shm-size=8g
+         --env TRANSFORMERS_OFFLINE=0
+         --env HYDRA_FULL_ERROR=1
+         --volume /mnt/datadrive/TestData:/home/TestData
+     steps:
+         - name: Checkout repository
+           uses: actions/checkout@v4
+         - run: |
+             python examples/nlp/language_modeling/megatron_quantization.py \
+             model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+             tensor_model_parallel_size=2 \
+             trainer.devices=2 \
+             quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+             quantization.algorithm=fp8 \
+             quantization.num_calib_size=8 \
+             inference.batch_size=2 \
+             export.inference_tensor_parallel=2 \
+             model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+
+             rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+         - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+           if: "failure()"
+
+  L2_PTQ_Llama2_INT8_SQ:
+     needs: [cicd-test-container-setup]
+     runs-on: self-hosted-azure
+     timeout-minutes: 10
+     container:
+       image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+       options:
+         # --user 0:128
+         --device=/dev/nvidia0
+         --gpus all
+         --shm-size=8g
+         --env TRANSFORMERS_OFFLINE=0
+         --env HYDRA_FULL_ERROR=1
+         --volume /mnt/datadrive/TestData:/home/TestData
+     steps:
+         - name: Checkout repository
+           uses: actions/checkout@v4
+         - run: |
+             python examples/nlp/language_modeling/megatron_quantization.py \
+             model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+             quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+             quantization.algorithm=int8_sq \
+             quantization.num_calib_size=8 \
+             inference.batch_size=2 \
+             model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+
+             rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+         - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+           if: "failure()"
+
+  # TODO: investigate int4_awq stuck issues and restore the test
   #L2_PTQ_Llama2_INT4_AWQ:
   #  needs: [cicd-test-container-setup]
   #  runs-on: self-hosted-azure
@@ -489,7 +445,7 @@ jobs:
   #      - name: Checkout repository
   #        uses: actions/checkout@v4
   #      - run: |
-  #          python examples/nlp/language_modeling/megatron_llama_quantization.py \
+  #          python examples/nlp/language_modeling/megatron_quantization.py \
   #          model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
   #          tensor_model_parallel_size=1 \
   #          trainer.devices=1 \
@@ -6489,7 +6445,7 @@ jobs:
   Speech_Checkpoints_tests:
     needs: [cicd-test-container-setup]
     runs-on: self-hosted-azure
-    timeout-minutes: 10
+    timeout-minutes: 20
     container:
       image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
       options: 

diff --git a/Dockerfile b/Dockerfile
@@ -133,8 +133,6 @@ RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-chec
 RUN pip install flash-attn
 # install numba for latest containers
 RUN pip install numba>=0.57.1
-# install ammo
-RUN pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
 
 # copy nemo source into a scratch image
 FROM scratch as nemo-src

diff --git a/Dockerfile.ci b/Dockerfile.ci
@@ -0,0 +1,74 @@
+# syntax=docker/dockerfile:1-labs
+
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
+
+FROM ${BASE_IMAGE}
+
+ENV TRANSFORMERS_OFFLINE=0 
+ENV HYDRA_FULL_ERROR=1
+ENV PYTHONUNBUFFERED=1
+
+# APT packages
+RUN <<"EOF" bash -ex
+apt-get update
+apt-get install -y bc libsox-fmt-all -y 
+apt-get clean
+EOF
+
+WORKDIR /workspace
+
+# Install NeMo requirements
+ARG TE_TAG=bfe21c3d68b0a9951e5716fb520045db53419c5e
+ARG MODELOPT_VERSION=0.11.0
+ARG MCORE_TAG=c90aa1671fc0b97f80fa6c3bb892ce6f8e88e7c9
+ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
+RUN \
+--mount=type=bind,source=requirements,target=requirements \
+--mount=type=bind,source=tools,target=tools \
+--mount=type=bind,source=setup.py,target=setup.py \
+--mount=type=bind,source=nemo/package_info.py,target=nemo/package_info.py \
+--mount=type=bind,source=nemo/__init__.py,target=nemo/__init__.py <<"EOF" bash -ex
+pip install --no-cache-dir --no-build-isolation --extra-index-url https://pypi.nvidia.com \
+"transformer-engine @ git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG}" \
+"megatron_core @ git+https://github.com/NVIDIA/Megatron-LM.git@${MCORE_TAG}" \
+"nvidia-modelopt[torch]~=${MODELOPT_VERSION}" \
+"apex @ git+https://github.com/NVIDIA/apex.git@${APEX_TAG}" \
+-r tools/ctc_segmentation/requirements.txt \
+".[all]"
+
+# Megatron Core installation
+git clone https://github.com/NVIDIA/Megatron-LM.git && \
+pushd Megatron-LM && \
+git checkout ${MCORE_TAG} && \
+  pushd megatron/core/datasets && \
+  make && \
+  popd && \
+popd
+export PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
+EOF
+
+# Copy over NeMo code
+COPY ./ ./
+RUN <<"EOF" bash -ex
+pip install --no-cache-dir --no-build-isolation ".[all]"
+
+# set permission
+chmod 777 -R /workspace
+EOF
+
+ENV PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
+