switch benchmarking and testing jobs to run using "test" label (#273)

SUMMARY: * update benchmarking, testing, and accuracy jobs to run on label `aws-test-a10g-24G` or `aws-test-a10-96G` which is based on "vanilla deeplearning" AMI * update relevant GHA actions and workflows to not be dependent on `pyenv` virtualenv * update "model cache" to use local disk as opposed to "EFS" TEST PLAN: runs on remote push --------- Co-authored-by: andy-neuma <[email protected]> Co-authored-by: Domenic Barbuzzi <[email protected]>
neuralmagic · May 30, 2024 · a160eb9 · a160eb9 · github-actions · May 31, 2024
1 parent 4fabcfc
commit a160eb9
Show file tree

Hide file tree

Showing 15 changed files with 112 additions and 108 deletions.
diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml
@@ -22,9 +22,11 @@ runs:
       # move source directories
       mv vllm vllm-ignore || echo "no 'vllm' folder to move"
       mv csrc csrc-ignore || echo "no 'csrc' folder to move"
-      COMMIT=${{ github.sha }}
-      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      if [ ! -z "${{ inputs.venv }}" ]; then
+        COMMIT=${{ github.sha }}
+        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      fi
       pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
       SUCCESS=0
       .github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?

diff --git a/.github/actions/nm-hf-cache/action.yml b/.github/actions/nm-hf-cache/action.yml
@@ -2,13 +2,12 @@ name: HF cache
 description: 'mount HF cache'
 inputs:
   fs_cache:
-    description: 'filesystem to use for HF cache'
+    description: '(deprecated) filesystem to use for HF cache'
     required: true
 runs:
   using: composite
   steps:
   - run: |
-      sudo mkdir -m 777 -p /EFS
-      sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${{ inputs.fs_cache }}:/ /EFS
+      sudo mkdir -m 777 -p ${HF_HOME}
       sudo chown -R $(whoami):$(whoami) ${HF_HOME}
     shell: bash
diff --git a/.github/actions/nm-install-test-whl/action.yml b/.github/actions/nm-install-test-whl/action.yml
@@ -38,9 +38,11 @@ runs:
         mv vllm vllm-ignore
         mv csrc csrc-ignore
         # activate venv and installs
-        COMMIT=${{ github.sha }}
-        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        if [ -n "${{ inputs.venv }}" ]; then
+          COMMIT=${{ github.sha }}
+          VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+          source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        fi
         pip3 install coverage
         pip3 install pytest-cov
         pip3 install pytest-xdist
@@ -60,7 +62,7 @@ runs:
         # test and collect code coverage
         SUCCESS=0
         PYTHON_MAJOR_MINOR=$(echo "python${{ inputs.python }}" | cut -d'.' -f1,2)
-        VLLM_SRC=$(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/lib/${PYTHON_MAJOR_MINOR}/site-packages/vllm
+        VLLM_SRC=$(python -c "import vllm; print(vllm.__path__[0])")
         ./.github/scripts/run-tests -s ${VLLM_SRC} -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} -f ${{ inputs.test_skip_list }}|| SUCCESS=$?
         echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
         exit ${SUCCESS}

diff --git a/.github/actions/nm-install-whl/action.yml b/.github/actions/nm-install-whl/action.yml
@@ -16,9 +16,11 @@ runs:
         mv vllm vllm-ignore
         mv csrc csrc-ignore
         # activate and install
-        COMMIT=${{ github.sha }}
-        VENV="${{ env.VENV_BASE }}-${COMMIT:0:7}"
-        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        if [ ! -z "${{ inputs.venv }}" ]; then
+          COMMIT=${{ github.sha }}
+          VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+          source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        fi
         pip3 install -r requirements-dev.txt
         BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
         WHL=$(find . -type f -iname "*${BASE}*.whl")

diff --git a/.github/actions/nm-lm-eval-accuracy/action.yml b/.github/actions/nm-lm-eval-accuracy/action.yml
@@ -16,9 +16,11 @@ runs:
       mv vllm vllm-ignore || echo "no 'vllm' folder to move"
       mv csrc csrc-ignore || echo "no 'csrc' folder to move"
 
-      COMMIT=${{ github.sha }}
-      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      if [ -n "${{ inputs.venv }}" ]; then
+        COMMIT=${{ github.sha }}
+        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      fi
 
       pip3 install git+https://github.com/EleutherAI/lm-evaluation-harness.git@262f879a06aa5de869e5dd951d0ff2cf2f9ba380
       pip3 install pytest openai==1.3.9

diff --git a/.github/actions/nm-lm-eval-smoke/action.yml b/.github/actions/nm-lm-eval-smoke/action.yml
@@ -16,9 +16,11 @@ runs:
       mv vllm vllm-ignore || echo "no 'vllm' folder to move"
       mv csrc csrc-ignore || echo "no 'csrc' folder to move"
 
-      COMMIT=${{ github.sha }}
-      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      if [ -n "${{ inputs.venv }}" ]; then
+        COMMIT=${{ github.sha }}
+        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      fi
 
       pip3 install git+https://github.com/EleutherAI/lm-evaluation-harness.git@9516087b81a61d0e220b22cc1b75be76de23bc10
       pip3 install optimum auto-gptq

diff --git a/.github/actions/nm-produce-gha-benchmark-json/action.yml b/.github/actions/nm-produce-gha-benchmark-json/action.yml
@@ -11,7 +11,7 @@ inputs:
     description: 'Path to a file where the GHA CustomSmallerIsBetter JSON is to be stored'
     required: true
   observation_metrics_output_file_path:
-    description: 'Path to a file where metrics that we only want to observe are stored' 
+    description: 'Path to a file where metrics that we only want to observe are stored'
   python:
     description: 'python version, e.g. 3.10.12'
     required: true
@@ -21,11 +21,13 @@ inputs:
 runs:
   using: composite
   steps:
-  - id: produce_gha_benchmark_jsons 
+  - id: produce_gha_benchmark_jsons
     run: |
-      COMMIT=${{ github.sha }}
-      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
-      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      if [ ! -z "${{ inputs.venv }}" ]; then
+        COMMIT=${{ github.sha }}
+        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      fi
       SUCCESS=0
       python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging -i ${{inputs.vllm_benchmark_jsons_path}} --bigger-is-better-metrics-output-file-path ${{ inputs.bigger_is_better_output_file_path }} --smaller-is-better-metrics-output-file-path ${{ inputs.smaller_is_better_output_file_path }} --observation-metrics-output-file-path ${{ inputs.observation_metrics_output_file_path }} || SUCCESS=$?
       echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"

diff --git a/.github/actions/nm-set-env/action.yml b/.github/actions/nm-set-env/action.yml
@@ -23,11 +23,11 @@ runs:
       fi
       # CUDA
       echo "TORCH_CUDA_ARCH_LIST=7.0 7.5 8.0 8.6 8.9 9.0+PTX" >> $GITHUB_ENV
-      echo "PATH=/usr/local/apps/pyenv/plugins/pyenv-virtualenv/shims:/usr/local/apps/pyenv/shims:/usr/local/apps/pyenv/bin:/usr/local/apps/nvm/versions/node/v19.9.0/bin:/usr/local/apps/nvm/versions/node/v16.20.2/bin:/usr/local/cuda-12.1/bin:/usr/local/cuda-12.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/${WHOAMI}/.local/bin:" >> $GITHUB_ENV
-      echo "LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
+      echo "PATH=${PATH}:/usr/local/apps/pyenv/plugins/pyenv-virtualenv/shims:/usr/local/apps/pyenv/shims:/usr/local/apps/pyenv/bin:/usr/local/apps/nvm/versions/node/v19.9.0/bin:/usr/local/apps/nvm/versions/node/v16.20.2/bin:/usr/local/cuda-12.1/bin:/usr/local/cuda-12.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/${WHOAMI}/.local/bin:" >> $GITHUB_ENV
+      echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
       # HF Cache
       echo "HF_TOKEN=${HF_TOKEN_SECRET}" >> $GITHUB_ENV
-      echo "HF_HOME=/EFS/hf_home" >> $GITHUB_ENV
+      echo "HF_HOME=/model-cache" >> $GITHUB_ENV
       # build
       NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
       echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -24,12 +24,12 @@ jobs:
             python: 3.8.17
             gitref: ${{ github.ref }}
 
-            test_label_solo: aws-avx2-32G-a10g-24G
-            test_label_multi: aws-avx2-192G-4-a10g-96G
+            test_label_solo: aws-test-a10g-24G
+            test_label_multi: aws-test-4-a10g-96G
             test_timeout: 480
             test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
 
-            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_label: aws-test-a10g-24G
             benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
             benchmark_timeout: 720
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
@@ -42,12 +42,12 @@ jobs:
             python: 3.9.17
             gitref: ${{ github.ref }}
 
-            test_label_solo: aws-avx2-32G-a10g-24G
-            test_label_multi: aws-avx2-192G-4-a10g-96G
+            test_label_solo: aws-test-a10g-24G
+            test_label_multi: aws-test-4-a10g-96G
             test_timeout: 480
             test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
 
-            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_label: aws-test-a10g-24G
             benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
             benchmark_timeout: 720
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
@@ -60,12 +60,12 @@ jobs:
             python: 3.10.12
             gitref: ${{ github.ref }}
 
-            test_label_solo: aws-avx2-32G-a10g-24G
-            test_label_multi: aws-avx2-192G-4-a10g-96G
+            test_label_solo: aws-test-a10g-24G
+            test_label_multi: aws-test-4-a10g-96G
             test_timeout: 480
             test_skip_list: neuralmagic/tests/skip-for-nightly.txt
 
-            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_label: aws-test-a10g-24G
             benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
             benchmark_timeout: 720
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
@@ -78,12 +78,12 @@ jobs:
             python: 3.11.4
             gitref: ${{ github.ref }}
 
-            test_label_solo: aws-avx2-32G-a10g-24G
-            test_label_multi: aws-avx2-192G-4-a10g-96G
+            test_label_solo: aws-test-a10g-24G
+            test_label_multi: aws-test-4-a10g-96G
             test_timeout: 480
             test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
 
-            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_label: aws-test-a10g-24G
             benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
             benchmark_timeout: 720
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"

diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
@@ -67,9 +67,6 @@ on:
           - 'false'
         default: 'false'
 
-env:
-    VENV_BASE: "BENCHMARK"
-
 jobs:
 
   BENCHMARK:
@@ -81,6 +78,13 @@ jobs:
       gh_action_benchmark_input_artifact_name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}}
 
     steps:
+
+      - name: set python
+        id: set_python
+        uses: actions/setup-python@v5
+        with:
+            python-version: ${{ inputs.python }}
+
       - name: checkout repository code
         uses: actions/checkout@v4
         with:
@@ -96,13 +100,6 @@ jobs:
           Gi_per_thread: 1
           nvcc_threads: 0
 
-      - name: set python
-        id: set_python
-        uses: ./.github/actions/nm-set-python/
-        with:
-          python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
-
       - name: hf cache
         id: hf_cache
         uses: ./.github/actions/nm-hf-cache/
@@ -121,15 +118,15 @@ jobs:
         uses: ./.github/actions/nm-install-whl/
         with:
             python: ${{ inputs.python }}
-            venv: ${{ env.VENV_BASE }}
+            venv:
 
       - name: run benchmarks
         uses: ./.github/actions/nm-benchmark/
         with:
           benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
           output_directory: benchmark-results
           python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
+          venv:
 
       - name: store benchmark result artifacts
         if: success()
@@ -139,6 +136,12 @@ jobs:
           path: benchmark-results
           retention-days: 2
 
+      - name: mount EFS
+        run: |
+            sudo mkdir -m 777 -p /EFS
+            sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${{ secrets.HF_FS_CACHE }}:/ /EFS
+            sudo chown -R $(whoami):$(whoami) /EFS
+
       - name: copy benchmark results to EFS store
         if: success()
         uses: ./.github/actions/nm-copy-benchmark-data-to-efs
@@ -160,7 +163,7 @@ jobs:
           # Metrics that we only want to observe are stored here
           observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
           python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
+          venv:
 
       - name: set gh action benchmark input artifact name
         id: set_gh_action_benchmark_input_artifact_name

diff --git a/.github/workflows/nm-test-accuracy-full.yml b/.github/workflows/nm-test-accuracy-full.yml
@@ -64,16 +64,20 @@ on:
         type: string
         required: true
 
-env:
-  VENV_BASE: "LM_EVAL"
-
 jobs:
   TEST-ACCURACY-FULL:
 
     runs-on: ${{ inputs.label }}
     timeout-minutes: ${{ fromJSON(inputs.timeout) }}
 
     steps:
+
+      - name: set python
+        id: set_python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+
       - name: checkout repository code
         uses: actions/checkout@v4
         with:
@@ -89,13 +93,6 @@ jobs:
           Gi_per_thread: ${{ inputs.Gi_per_thread }}
           nvcc_threads: ${{ inputs.nvcc_threads }}
 
-      - name: set python
-        id: set_python
-        uses: ./.github/actions/nm-set-python/
-        with:
-          python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
-
       - name: hf cache
         id: hf_cache
         uses: ./.github/actions/nm-hf-cache/
@@ -114,10 +111,10 @@ jobs:
         uses: ./.github/actions/nm-install-whl/
         with:
             python: ${{ inputs.python }}
-            venv: ${{ env.VENV_BASE }}
+            venv:
 
       - name: run lm-eval-accuracy
         uses: ./.github/actions/nm-lm-eval-accuracy/
         with:
           python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
+          venv:
diff --git a/.github/workflows/nm-test-accuracy-smoke.yml b/.github/workflows/nm-test-accuracy-smoke.yml
@@ -64,16 +64,20 @@ on:
         type: string
         required: true
 
-env:
-  VENV_BASE: "LM_EVAL"
-
 jobs:
   TEST-ACCURACY-SMOKE:
 
     runs-on: ${{ inputs.label }}
     timeout-minutes: ${{ fromJSON(inputs.timeout) }}
 
     steps:
+
+      - name: set python
+        id: set_python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python }}
+
       - name: checkout repository code
         uses: actions/checkout@v4
         with:
@@ -89,13 +93,6 @@ jobs:
           Gi_per_thread: ${{ inputs.Gi_per_thread }}
           nvcc_threads: ${{ inputs.nvcc_threads }}
 
-      - name: set python
-        id: set_python
-        uses: ./.github/actions/nm-set-python/
-        with:
-          python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
-
       - name: hf cache
         id: hf_cache
         uses: ./.github/actions/nm-hf-cache/
@@ -114,10 +111,10 @@ jobs:
         uses: ./.github/actions/nm-install-whl/
         with:
             python: ${{ inputs.python }}
-            venv: ${{ env.VENV_BASE }}
+            venv:
 
       - name: run lm-eval-smoke
         uses: ./.github/actions/nm-lm-eval-smoke/
         with:
           python: ${{ inputs.python }}
-          venv: ${{ env.VENV_BASE }}
+          venv: