From 3a89020dfd3f1a4e974b1d1f0181a59cdddd5806 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 15:49:55 +0800 Subject: [PATCH 01/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 214 +++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 .github/workflows/runs_on_1424.yml diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml new file mode 100644 index 000000000..7640ed70f --- /dev/null +++ b/.github/workflows/runs_on_1424.yml @@ -0,0 +1,214 @@ +name: runs on 1424 + +on: + workflow_dispatch: + pull_request: + paths-ignore: + - "**.md" + - ".github/ISSUE_TEMPLATE/**" + - ".git*" + - "CODE_OF_CONDUCT**" + +concurrency: + group: 1424-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +env: + NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' + ENV_PATH: '/mnt/cache/share/platform/cienv' + CLUSTER_V100: SH1424 + +jobs: + Checkout-code: + name: Checkout code + runs-on: github-poc-ci + steps: + - name: clone repo + run: | + set -ex + cd ${GITHUB_WORKSPACE} && rm -rf source + if [ -n "${{ github.event.pull_request.head.repo.full_name }}" ] && [[ ! "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then + git clone ${{ github.event.pull_request.head.repo.ssh_url }} source + cd ${GITHUB_WORKSPACE}/source && git checkout ${{ github.event.pull_request.head.sha }} + git remote add mainrepo git@github.com:${GITHUB_REPOSITORY}.git + git fetch mainrepo && git merge --no-edit mainrepo/${{ github.base_ref }} + else + git clone ${{ github.event.repository.clone_url }} source && cd source + if [ $GITHUB_EVENT_NAME == "pull_request" ]; then + echo "${{ github.base_ref }} " + git checkout ${{ github.event.pull_request.head.sha }} && git merge --no-edit origin/${{ github.base_ref }} + else + echo $GITHUB_EVENT_NAME + git checkout ${{ github.sha }} + fi + fi + cd ${GITHUB_WORKSPACE}/source/dipu/third_party && rm -rf DIOPI && git clone https://github.com/DeepLink-org/DIOPI.git + rm -rf kineto && git clone --reference /home/autolink/rsync/sourcecode/DeepLink-org/kineto https://github.com/DeepLink-org/kineto.git kineto + cd ../.. && git submodule update --init && cd dipu/third_party/kineto && git submodule update --init + cd ${GITHUB_WORKSPACE}/source/dipu && bash /home/autolink/rsync/sourcecode/update_code.sh + rsync -a /home/autolink/rsync/sourcecode/mmlab_pack . && cd mmlab_pack + bash ../scripts/ci/ci_one_iter.sh clone + - name: Rsync to Server + run: | + ssh ${CLUSTER_V100} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ + && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ \ + && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}_DIOPI/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/ || echo "failure to connect to cuda" + + Build-Cuda: + name: Build-dipu-cuda + needs: [Rsync] + runs-on: github-poc-ci + env: + GPU_REQUESTS: 1 + steps: + - name: Build dipu + run: | + ssh ${CUDA_CLUSTER} """ + set -e + export USE_COVERAGE=ON + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source scripts/ci/nv/ci_nv_env.sh + rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + + Tidy-Cuda: + name: Run tidy (cuda) + needs: [Build-Cuda] + runs-on: github-poc-ci + steps: + - name: clang-tidy + run: | + ssh $CUDA_CLUSTER """ + set -eo pipefail + source ~/.bashrc + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + """ + + Test-Cuda: + name: Test-dipu-cuda + needs: [Build-Cuda, Tidy-Cuda] + runs-on: github-poc-ci + env: + GPU_REQUESTS: 1 + steps: + - name: Run-test + run: | + ssh ${CUDA_CLUSTER} """ + set -ex + export USE_COVERAGE=ON + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source scripts/ci/nv/ci_nv_env.sh + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh + if [ "${ALL_COVERAGE}" = "ON" ]; then + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + fi + """ + - name: increment coverage check + if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} + run: | + ssh ${CUDA_CLUSTER} """ + set -e + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ + rm -rf scripts + ln -s ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + source /mnt/cache/share/platform/env/pt2.0_diopi + bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} + """ + + Test-One-Iter_Cuda: + name: Test-one-iter-cuda + needs: [Build-Cuda, Tidy-Cuda] + runs-on: github-poc-ci + env: + GPU_REQUESTS: 1 + steps: + - name: build some env + run: | + ssh ${CUDA_CLUSTER} """ + set -ex + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source scripts/ci/nv/ci_nv_env.sh + basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH + export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda + """ + - name: run-one-iter-for-tradition + run: | + ssh ${CUDA_CLUSTER} """ + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source scripts/ci/nv/ci_nv_env.sh + basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + - name: run-one-iter-for-llm + run: | + ssh ${CUDA_CLUSTER} """ + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source scripts/ci/nv/ci_nv_env.sh + basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + - name: Perform cleanup one iter data + if: always() + run: | + ssh ${CUDA_CLUSTER} """ + set -ex + echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + rm -rf one_iter_data + touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 + """ + - name: Check for failure + if: ${{ failure() }} + run: exit 1 + + Build-Cuda-Latest-Target: + name: Build-dipu-cuda-latest-target + needs: [Tidy-Cuda] + runs-on: github-poc-ci + env: + GPU_REQUESTS: 1 + steps: + - name: Build dipu diopi-latest-target + run: | + ssh ${CUDA_CLUSTER} """ + set -ex + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source scripts/ci/nv/ci_nv_env.sh + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + + Test-Cuda-Latest-Target: + name: Test-dipu-cuda-latest-target + needs: [Build-Cuda-Latest-Target] + runs-on: github-poc-ci + env: + GPU_REQUESTS: 1 + steps: + - name: Run-test + run: | + ssh ${CUDA_CLUSTER} """ + set -ex + cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu + source scripts/ci/nv/ci_nv_env.sh + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ + || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ \ No newline at end of file From b38a1e4483dd95482a47b735334c42a2e9c7ea98 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 15:52:27 +0800 Subject: [PATCH 02/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index 7640ed70f..eca9b495b 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -19,8 +19,8 @@ env: CLUSTER_V100: SH1424 jobs: - Checkout-code: - name: Checkout code + Rsync: + name: Rsync code runs-on: github-poc-ci steps: - name: clone repo From 2788c0da6415615fca80fa01b27ccd1702680461 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 16:02:32 +0800 Subject: [PATCH 03/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index eca9b495b..f3bcc0ad5 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -26,14 +26,14 @@ jobs: - name: clone repo run: | set -ex - cd ${GITHUB_WORKSPACE} && rm -rf source + cd ${GITHUB_WORKSPACE} && rm -rf DIPU ${REPO}_DIOPI ${REPO} ${REPO}.dev if [ -n "${{ github.event.pull_request.head.repo.full_name }}" ] && [[ ! "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then - git clone ${{ github.event.pull_request.head.repo.ssh_url }} source - cd ${GITHUB_WORKSPACE}/source && git checkout ${{ github.event.pull_request.head.sha }} + git clone ${{ github.event.pull_request.head.repo.ssh_url }} ${REPO} + cd ${GITHUB_WORKSPACE}/${REPO} && git checkout ${{ github.event.pull_request.head.sha }} git remote add mainrepo git@github.com:${GITHUB_REPOSITORY}.git git fetch mainrepo && git merge --no-edit mainrepo/${{ github.base_ref }} else - git clone ${{ github.event.repository.clone_url }} source && cd source + git clone https://github.com/DeepLink-org/${REPO}.git && cd ${REPO} if [ $GITHUB_EVENT_NAME == "pull_request" ]; then echo "${{ github.base_ref }} " git checkout ${{ github.event.pull_request.head.sha }} && git merge --no-edit origin/${{ github.base_ref }} @@ -42,12 +42,18 @@ jobs: git checkout ${{ github.sha }} fi fi - cd ${GITHUB_WORKSPACE}/source/dipu/third_party && rm -rf DIOPI && git clone https://github.com/DeepLink-org/DIOPI.git - rm -rf kineto && git clone --reference /home/autolink/rsync/sourcecode/DeepLink-org/kineto https://github.com/DeepLink-org/kineto.git kineto - cd ../.. && git submodule update --init && cd dipu/third_party/kineto && git submodule update --init - cd ${GITHUB_WORKSPACE}/source/dipu && bash /home/autolink/rsync/sourcecode/update_code.sh + cd ${GITHUB_WORKSPACE}/${REPO}/dipu && rm -rf third_party/kineto + git clone --reference /home/autolink/rsync/sourcecode/DeepLink-org/kineto https://github.com/DeepLink-org/kineto.git third_party/kineto + git submodule update --init && cd third_party/kineto && git submodule update --init + cd ${GITHUB_WORKSPACE} && cp -R ${REPO} ${REPO}_DIOPI + cd ${REPO}/dipu && bash /home/autolink/rsync/sourcecode/update_code.sh rsync -a /home/autolink/rsync/sourcecode/mmlab_pack . && cd mmlab_pack bash ../scripts/ci/ci_one_iter.sh clone + # dipu_diopi depend on latest target diopi branch, not diopi in submodule. here assume diopi and dipu use same 'target branch' " github.base_ref " + cd ${GITHUB_WORKSPACE}/${REPO}_DIOPI/dipu/third_party && rm -rf DIOPI && git clone https://github.com/DeepLink-org/DIOPI.git + if [ $GITHUB_EVENT_NAME == "pull_request" ]; then + cd ./DIOPI && git checkout ${{ github.base_ref }} + fi - name: Rsync to Server run: | ssh ${CLUSTER_V100} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ From 22928d390e6a4bfa3672bebe9a5418d19daa51c5 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 16:41:35 +0800 Subject: [PATCH 04/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index f3bcc0ad5..e00bcd89e 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -17,6 +17,9 @@ env: NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' ENV_PATH: '/mnt/cache/share/platform/cienv' CLUSTER_V100: SH1424 + REPO: ${{ github.event.repository.name }} + ALL_COVERAGE: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} + REQUIRE_COVERAGE: ${{ vars.REQUIRE_COVERAGE != '' && vars.REQUIRE_COVERAGE || '0' }} jobs: Rsync: From d41a583989241ab79c0372448aa4eef07185914a Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 16:46:44 +0800 Subject: [PATCH 05/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index e00bcd89e..b4d0bfd0f 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -14,6 +14,7 @@ concurrency: cancel-in-progress: true env: + DEEPLINK_PATH: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}' NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' ENV_PATH: '/mnt/cache/share/platform/cienv' CLUSTER_V100: SH1424 @@ -75,11 +76,11 @@ jobs: ssh ${CUDA_CLUSTER} """ set -e export USE_COVERAGE=ON - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source scripts/ci/nv/ci_nv_env.sh rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ - || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) """ Tidy-Cuda: @@ -110,11 +111,11 @@ jobs: ssh ${CUDA_CLUSTER} """ set -ex export USE_COVERAGE=ON - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh if [ "${ALL_COVERAGE}" = "ON" ]; then - bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" fi """ - name: increment coverage check @@ -122,9 +123,9 @@ jobs: run: | ssh ${CUDA_CLUSTER} """ set -e - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ rm -rf scripts - ln -s ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts source /mnt/cache/share/platform/env/pt2.0_diopi bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} """ @@ -140,9 +141,9 @@ jobs: run: | ssh ${CUDA_CLUSTER} """ set -ex - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh - basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH export PYTHONPATH=\$(pwd):\$PYTHONPATH @@ -152,9 +153,9 @@ jobs: - name: run-one-iter-for-tradition run: | ssh ${CUDA_CLUSTER} """ - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh - basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} export PYTHONPATH=\$(pwd):\$PYTHONPATH cd mmlab_pack @@ -164,9 +165,9 @@ jobs: - name: run-one-iter-for-llm run: | ssh ${CUDA_CLUSTER} """ - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh - basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} export PYTHONPATH=\$(pwd):\$PYTHONPATH cd mmlab_pack @@ -180,7 +181,7 @@ jobs: set -ex echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack rm -rf one_iter_data touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 """ @@ -199,10 +200,10 @@ jobs: run: | ssh ${CUDA_CLUSTER} """ set -ex - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source scripts/ci/nv/ci_nv_env.sh srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ - || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) """ Test-Cuda-Latest-Target: @@ -216,8 +217,8 @@ jobs: run: | ssh ${CUDA_CLUSTER} """ set -ex - cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu source scripts/ci/nv/ci_nv_env.sh - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ - || ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) """ \ No newline at end of file From 49d8397bc44d19d9177d31a1d3a5d0c00c1b9b87 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 16:59:03 +0800 Subject: [PATCH 06/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index b4d0bfd0f..636fb0a56 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -16,7 +16,7 @@ concurrency: env: DEEPLINK_PATH: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}' NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' - ENV_PATH: '/mnt/cache/share/platform/cienv' + CUDA_PARTATION: ${{ vars.SH1988_SLURM_PAR != '' && vars.SH1988_SLURM_PAR || 'pat_dev' }} CLUSTER_V100: SH1424 REPO: ${{ github.event.repository.name }} ALL_COVERAGE: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} @@ -73,7 +73,7 @@ jobs: steps: - name: Build dipu run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -e export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu @@ -90,7 +90,7 @@ jobs: steps: - name: clang-tidy run: | - ssh $CUDA_CLUSTER """ + ssh ${CLUSTER_V100} """ set -eo pipefail source ~/.bashrc cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && @@ -108,7 +108,7 @@ jobs: steps: - name: Run-test run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -ex export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu @@ -121,7 +121,7 @@ jobs: - name: increment coverage check if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -e cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ rm -rf scripts @@ -139,7 +139,7 @@ jobs: steps: - name: build some env run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh @@ -152,7 +152,7 @@ jobs: """ - name: run-one-iter-for-tradition run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack @@ -164,7 +164,7 @@ jobs: """ - name: run-one-iter-for-llm run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source scripts/ci/nv/ci_nv_env.sh basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack @@ -177,7 +177,7 @@ jobs: - name: Perform cleanup one iter data if: always() run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -ex echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" @@ -198,7 +198,7 @@ jobs: steps: - name: Build dipu diopi-latest-target run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source scripts/ci/nv/ci_nv_env.sh @@ -215,7 +215,7 @@ jobs: steps: - name: Run-test run: | - ssh ${CUDA_CLUSTER} """ + ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu source scripts/ci/nv/ci_nv_env.sh From 1f4b15dbb5846c63a2e7d0ef97b71444d9f03cfb Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 30 Jan 2024 17:17:04 +0800 Subject: [PATCH 07/30] add v100 on 1424 --- .github/workflows/runs_on_1424.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index 636fb0a56..c9e4f292d 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -16,6 +16,7 @@ concurrency: env: DEEPLINK_PATH: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}' NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' + ENV_PATH: '/mnt/cache/share/deeplinkci/github' CUDA_PARTATION: ${{ vars.SH1988_SLURM_PAR != '' && vars.SH1988_SLURM_PAR || 'pat_dev' }} CLUSTER_V100: SH1424 REPO: ${{ github.event.repository.name }} @@ -77,7 +78,7 @@ jobs: set -e export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) @@ -112,7 +113,7 @@ jobs: set -ex export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh if [ "${ALL_COVERAGE}" = "ON" ]; then bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" @@ -142,7 +143,7 @@ jobs: ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH @@ -154,7 +155,7 @@ jobs: run: | ssh ${CLUSTER_V100} """ cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} export PYTHONPATH=\$(pwd):\$PYTHONPATH @@ -166,7 +167,7 @@ jobs: run: | ssh ${CLUSTER_V100} """ cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} export PYTHONPATH=\$(pwd):\$PYTHONPATH @@ -201,7 +202,7 @@ jobs: ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) """ @@ -218,7 +219,7 @@ jobs: ssh ${CLUSTER_V100} """ set -ex cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu - source scripts/ci/nv/ci_nv_env.sh + source ${ENV_PATH}/dipu_env srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) """ \ No newline at end of file From 7a5721a8716bb37898c477b75601bbb9f0963c47 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 10:55:26 +0800 Subject: [PATCH 08/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 80 ++++++++ .github/workflows/_runs-on-nv-step2.yml | 236 ++++++++++++++++++++++++ .github/workflows/main.yml | 140 +++----------- .github/workflows/runs_on_1424.yml | 12 +- 4 files changed, 347 insertions(+), 121 deletions(-) create mode 100644 .github/workflows/_runs-on-nv-step1.yml create mode 100644 .github/workflows/_runs-on-nv-step2.yml diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml new file mode 100644 index 000000000..474a5594c --- /dev/null +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -0,0 +1,80 @@ +name: runs on nv step 1 + +on: + workflow_call: + inputs: + runner: + description: Set up the runner + type: string + required: false + default: "tps-sco-ci" + deeplink_path: + description: ci work home + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' + run_result: + description: select which job to run + type: string + required: true + default: '' + env_path: + description: env file path + type: string + required: false + default: '/mnt/cache/share/platform/env' + +jobs: + Build-Cuda: + name: Build-dipu-cuda + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +240 -type d |xargs rm -rf + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env \ + && rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -e + export USE_COVERAGE=ON + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source ${ENV_PATH}/dipu_env + rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Tidy-Cuda: + name: Run tidy (cuda) + needs: [Build-Cuda] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Check SupportedDiopiFunctions.txt + run: | + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + - name: Run clang-tidy + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + else + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + fi \ No newline at end of file diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml new file mode 100644 index 000000000..33e3e4732 --- /dev/null +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -0,0 +1,236 @@ +name: runs on nv step 2 + +on: + workflow_call: + inputs: + runner: + description: Set up the runner + type: string + required: false + default: "tps-sco-ci" + deeplink_path: + description: ci work home + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' + run_result: + description: select which job to run + type: string + required: true + default: '' + env_path: + description: env file path + type: string + required: false + default: '/mnt/cache/share/platform/env' + all_coverage: + description: all coverage + type: string + required: true + default: 'false' + require_coverage: + description: input coverage rate + type: string + required: false + default: '0' +jobs: + Test-Cuda: + name: Test-dipu-cuda + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + ALL_COVERAGE: ${{ inputs.all_coverage }} + REQUIRE_COVERAGE: ${{ inputs.require_coverage }} + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash tests/run_nv_tests.sh" + if [ "${ALL_COVERAGE}" = "ON" ]; then + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + fi + else + ssh SH1424 """ + set -ex + export USE_COVERAGE=ON + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh + if [ "${ALL_COVERAGE}" = "ON" ]; then + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + fi + """ + fi + - name: increment coverage check + if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda + ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + source ${ENV_PATH}/dipu_env + bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} + else + ssh SH1424 """ + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ + rm -rf scripts + ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + source /mnt/cache/share/platform/env/pt2.0_diopi + bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} + """ + fi + + Test-One-Iter_Cuda: + name: Test-one-iter-cuda + needs: [Build-Cuda, Tidy-Cuda] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: build some env + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + export basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ + srun --job-name=${GITHUB_JOB} bash -c "cd ${basic_path} \ + && export PYTHONPATH=${basic_path}/mmlab_pack:${basic_path}/mmlab_pack/mmengine:${basic_path}/mmlab_pack/mmcv:$PYTHONPATH \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && bash ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_one_iter.sh build_cuda" + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH + export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda + """ + fi + - name: run-one-iter-for-tradition + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && rm -rf one_iter_data \ + && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + else + ssh SH1424 """ + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + fi + - name: run-one-iter-for-llm + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && rm -rf one_iter_data \ + && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + else + ssh SH1424 """ + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + fi + - name: Perform cleanup one iter data + if: always() + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + rm -rf one_iter_data + touch one_iter_data #用于占位,防止创建新的 one_iter_data 文件夹 + else + ssh SH1424 """ + set -ex + echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + rm -rf one_iter_data + touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 + """ + fi + - name: Check for failure + if: ${{ failure() }} + run: exit 1 + + Build-Cuda-Latest-Target: + name: Build-dipu-cuda-latest-target + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu diopi-latest-target + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Test-Cuda-Latest-Target: + name: Test-dipu-cuda-latest-target + needs: [Build-Cuda-Latest-Target] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Latest-Target/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ddc633014..283d65520 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -36,6 +36,8 @@ jobs: Rsync: name: Rsync code runs-on: github-poc-ci + outputs: + getrunner: ${{ steps.set-run-vars.outputs.GETRUNNER }} steps: - name: clone repo run: | @@ -68,8 +70,14 @@ jobs: if [ $GITHUB_EVENT_NAME == "pull_request" ]; then cd ./DIOPI && git checkout ${{ github.base_ref }} fi - - name: Rsync to Server + - name: set output and Rsync to Server + id: set-run-vars run: | + cd ~/rsync && GETRUNNER=$(cat cuda_runner) + if [[ ${GETRUNNER} == *diopi* ]]; then + CUDA_CLUSTER="SH1424" + fi + echo "GETRUNNER=$GETRUNNER" >> "$GITHUB_OUTPUT" ssh ${CAMB_CLUSTER} "mkdir -p ${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source ${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}/ ${CAMB_CLUSTER}:${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source/ \ && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}_DIOPI/ ${CAMB_CLUSTER}:${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main/ || echo "failure to connect to camb" @@ -82,7 +90,7 @@ jobs: Build-Camb: name: Build-dipu-camb - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 @@ -203,7 +211,7 @@ jobs: Build-Camb-Latest-Target: name: Build-dipu-camb-latest-target - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 @@ -237,124 +245,26 @@ jobs: srun --job-name=${GITHUB_JOB} --partition=${CAMB_PARTATION} --time=40 --gres=mlu:${MLU_REQUESTS} sh tests/run_camb_tests.sh """ - Build-Cuda: - name: Build-dipu-cuda + Runs-On-Nv-Step1: + name: runs on nv step1 needs: [Rsync] - runs-on: tps-sco-ci - steps: - - name: Build dipu - run: | - set -e - cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +240 -type d |xargs rm -rf - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${ENV_PATH}/dipu_env \ - && rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - - Tidy-Cuda: - name: Run tidy (cuda) - needs: [Build-Cuda] - runs-on: tps-sco-ci - steps: - - name: Check SupportedDiopiFunctions.txt - run: | - cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && - git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || - { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } - - name: Run clang-tidy - run: | - srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" - - Test-Cuda: - name: Test-dipu-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ - && source ${ENV_PATH}/dipu_env \ - && bash tests/run_nv_tests.sh" - if [ "${ALL_COVERAGE}" = "ON" ]; then - bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" - fi - - name: increment coverage check - if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda - ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts - source ${ENV_PATH}/dipu_env - bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} - - Test-One-Iter_Cuda: - name: Test-one-iter-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: build some env - run: | - set -e - export basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ - srun --job-name=${GITHUB_JOB} bash -c "cd ${basic_path} \ - && export PYTHONPATH=${basic_path}/mmlab_pack:${basic_path}/mmlab_pack/mmengine:${basic_path}/mmlab_pack/mmcv:$PYTHONPATH \ - && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ - && bash ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_one_iter.sh build_cuda" - - name: run-one-iter-for-tradition - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ - && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ - && rm -rf one_iter_data \ - && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - - name: run-one-iter-for-llm - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ - && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ - && rm -rf one_iter_data \ - && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - - name: Perform cleanup one iter data - if: always() - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - rm -rf one_iter_data - touch one_iter_data #用于占位,防止创建新的 one_iter_data 文件夹 - - name: Check for failure - if: ${{ failure() }} - run: exit 1 + uses: ./.github/workflows/_runs-on-nv-step1.yml + with: + runner: ${{ needs.Rsync.outputs.getrunner }} - Build-Cuda-Latest-Target: - name: Build-dipu-cuda-latest-target - needs: [Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Build dipu diopi-latest-target - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${ENV_PATH}/dipu_env \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + Runs-On-Nv-Step2: + name: runs on nv step2 + needs: [Runs-On-Nv-Step1] + uses: ./.github/workflows/_runs-on-nv-step2.yml + with: + runner: ${{ needs.Rsync.outputs.getrunner }} + all_coverage: ${ALL_COVERAGE} + require_coverage: ${REQUIRE_COVERAGE} - Test-Cuda-Latest-Target: - name: Test-dipu-cuda-latest-target - needs: [Build-Cuda-Latest-Target] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Latest-Target/dipu \ - && source ${ENV_PATH}/dipu_env \ - && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target Build-PyTorch-For-Ascend-910b: name: Build-dipu-pytorch-for-ascend-910b - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: tps-ascend-ci-910b steps: - name: Build PyTorch diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml index c9e4f292d..4754fbd5a 100644 --- a/.github/workflows/runs_on_1424.yml +++ b/.github/workflows/runs_on_1424.yml @@ -2,12 +2,12 @@ name: runs on 1424 on: workflow_dispatch: - pull_request: - paths-ignore: - - "**.md" - - ".github/ISSUE_TEMPLATE/**" - - ".git*" - - "CODE_OF_CONDUCT**" +# pull_request: +# paths-ignore: +# - "**.md" +# - ".github/ISSUE_TEMPLATE/**" +# - ".git*" +# - "CODE_OF_CONDUCT**" concurrency: group: 1424-${{ github.head_ref || github.ref }} From 1a1176dce3335a8c81e94479a325ca1380de936b Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 11:02:52 +0800 Subject: [PATCH 09/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 5 ----- .github/workflows/_runs-on-nv-step2.yml | 5 ----- 2 files changed, 10 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index 474a5594c..c5649de90 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -13,11 +13,6 @@ on: type: string required: false default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' - run_result: - description: select which job to run - type: string - required: true - default: '' env_path: description: env file path type: string diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 33e3e4732..1d090264c 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -13,11 +13,6 @@ on: type: string required: false default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' - run_result: - description: select which job to run - type: string - required: true - default: '' env_path: description: env file path type: string From a2510950c41e76fd2d4092160ecd869c70ef24c9 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 11:05:00 +0800 Subject: [PATCH 10/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 1d090264c..6f91a1838 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -84,7 +84,6 @@ jobs: Test-One-Iter_Cuda: name: Test-one-iter-cuda - needs: [Build-Cuda, Tidy-Cuda] runs-on: ${{ inputs.runner }} env: GETRUNNER: ${{ inputs.runner }} From a56af446e264d84fc3e658796942ce09c780f474 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 11:24:45 +0800 Subject: [PATCH 11/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 2 +- .github/workflows/_runs-on-nv-step2.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index c5649de90..d1ac3b383 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -17,7 +17,7 @@ on: description: env file path type: string required: false - default: '/mnt/cache/share/platform/env' + default: '/mnt/cache/share/deeplinkci/github' jobs: Build-Cuda: diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 6f91a1838..4d5c600f6 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -17,7 +17,7 @@ on: description: env file path type: string required: false - default: '/mnt/cache/share/platform/env' + default: '/mnt/cache/share/deeplinkci/github' all_coverage: description: all coverage type: string From 6af1cd1967964501ff06157cf1a4190bc4bb9da6 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 13:22:42 +0800 Subject: [PATCH 12/30] add sco and 1424 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 283d65520..17b1d5b99 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -254,7 +254,7 @@ jobs: Runs-On-Nv-Step2: name: runs on nv step2 - needs: [Runs-On-Nv-Step1] + needs: [Rsync,Runs-On-Nv-Step1] uses: ./.github/workflows/_runs-on-nv-step2.yml with: runner: ${{ needs.Rsync.outputs.getrunner }} From 93d998d955cb8b623a3071e895bf8b531ccf36db Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 15:20:20 +0800 Subject: [PATCH 13/30] add sco and 1424 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 17b1d5b99..707c31b60 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -73,7 +73,7 @@ jobs: - name: set output and Rsync to Server id: set-run-vars run: | - cd ~/rsync && GETRUNNER=$(cat cuda_runner) + GETRUNNER=$(cat ~/rsync/cuda_runner_dipu) if [[ ${GETRUNNER} == *diopi* ]]; then CUDA_CLUSTER="SH1424" fi From 7d84ac66596aa27802ff07459f43edd0dae7ad11 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 16:02:59 +0800 Subject: [PATCH 14/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index d1ac3b383..bf4d8512b 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -63,9 +63,9 @@ jobs: steps: - name: Check SupportedDiopiFunctions.txt run: | - cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && - git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || - { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } - name: Run clang-tidy run: | if [[ "${GETRUNNER}" == *sco* ]];then From c276aded73c63900aefcdc149fab767f74caae95 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 16:27:56 +0800 Subject: [PATCH 15/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index bf4d8512b..0d4eccde8 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -63,13 +63,23 @@ jobs: steps: - name: Check SupportedDiopiFunctions.txt run: | - cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ - git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ - { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + if [[ "${GETRUNNER}" == *sco* ]];then + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + else + ssh SH1424 """ + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + """ + fi - name: Run clang-tidy run: | if [[ "${GETRUNNER}" == *sco* ]];then srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" else + ssh SH1424 """ srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + """ fi \ No newline at end of file From 57f6f902e33c5a7b2a41172e12b5fb43e8c6c8f3 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Tue, 6 Feb 2024 18:39:33 +0800 Subject: [PATCH 16/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index 0d4eccde8..452658b88 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -80,6 +80,6 @@ jobs: srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" else ssh SH1424 """ - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh """ fi \ No newline at end of file From 5454e46d661f29bc11de1e5e368cf1e6016f9dfa Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 7 Feb 2024 18:01:23 +0800 Subject: [PATCH 17/30] add sco and 1424 --- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index eed5d7f47..e6db5cbcd 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -35,7 +35,7 @@ camb: - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" # # # mmaction2 - - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + #- model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" # # # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" From 874503915f0ed250a9ef8654b9656dbe2a242e21 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 13:39:16 +0800 Subject: [PATCH 18/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 2 ++ .github/workflows/main.yml | 4 ++-- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 4d5c600f6..319ec2496 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -42,6 +42,8 @@ jobs: steps: - name: Run-test run: | + echo "ALL_COVERAGE: ${ALL_COVERAGE}" + echo "REQUIRE_COVERAGE: ${REQUIRE_COVERAGE}" if [[ "${GETRUNNER}" == *sco* ]];then set -e srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 707c31b60..fb371140e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -258,8 +258,8 @@ jobs: uses: ./.github/workflows/_runs-on-nv-step2.yml with: runner: ${{ needs.Rsync.outputs.getrunner }} - all_coverage: ${ALL_COVERAGE} - require_coverage: ${REQUIRE_COVERAGE} + all_coverage: "${ALL_COVERAGE}" + require_coverage: "${REQUIRE_COVERAGE}" Build-PyTorch-For-Ascend-910b: diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index e6db5cbcd..b7b9beb52 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -35,7 +35,7 @@ camb: - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" # # # mmaction2 - #- model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" # # # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" @@ -74,7 +74,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" # mmaction2 - - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" From af19601595aeeab22cc005e528858a463c6f9fef Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 14:58:09 +0800 Subject: [PATCH 19/30] add sco and 1424 --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fb371140e..3ec4e25c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -258,8 +258,8 @@ jobs: uses: ./.github/workflows/_runs-on-nv-step2.yml with: runner: ${{ needs.Rsync.outputs.getrunner }} - all_coverage: "${ALL_COVERAGE}" - require_coverage: "${REQUIRE_COVERAGE}" + all_coverage: "${{ env.ALL_COVERAGE }}" + require_coverage: "${{ env.REQUIRE_COVERAGE }}" Build-PyTorch-For-Ascend-910b: From 16d13fec570e2a5809e222de89f70465093ce89e Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 15:26:59 +0800 Subject: [PATCH 20/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 56 ++++++++++++++++++++++++- .github/workflows/main.yml | 26 ------------ 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 319ec2496..83e50aced 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -227,6 +227,60 @@ jobs: cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu source ${ENV_PATH}/dipu_env srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ - || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target && exit 1 ) """ fi + + + Build-Cuda-Pt211: + name: Build-dipu-cuda-pt211 + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env 2.1.1 \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu " || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + source ${ENV_PATH}/dipu_env 2.1.1 + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Test-Cuda-Pt211: + name: Test-dipu-cuda-pt211 + needs: [Build-Cuda-Pt211] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu \ + && source ${ENV_PATH}/dipu_env 2.1.1 \ + && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 + else + ssh SH1424 """ + set -ex + source ${ENV_PATH}/dipu_env 2.1.1 + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 bash tests/run_nv_tests.sh \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 && exit 1 ) + """ + fi \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 794e88979..69ae9f734 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -286,32 +286,6 @@ jobs: all_coverage: "${{ env.ALL_COVERAGE }}" require_coverage: "${{ env.REQUIRE_COVERAGE }}" - Build-Cuda-Pt211: - name: Build-dipu-cuda-pt211 - needs: [Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Build dipu - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env 2.1.1 \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu " || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - - - Test-Cuda-Pt211: - name: Test-dipu-cuda-pt211 - needs: [Build-Cuda-Pt211] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env 2.1.1 \ - && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 - Build-PyTorch-For-Ascend-910b: name: Build-dipu-pytorch-for-ascend-910b needs: [Runs-On-Nv-Step1] From 3fc868188b0ca74de835e01820273b8409d8f217 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 15:30:03 +0800 Subject: [PATCH 21/30] add sco and 1424 --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 69ae9f734..2c637174b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -283,8 +283,8 @@ jobs: uses: ./.github/workflows/_runs-on-nv-step2.yml with: runner: ${{ needs.Rsync.outputs.getrunner }} - all_coverage: "${{ env.ALL_COVERAGE }}" - require_coverage: "${{ env.REQUIRE_COVERAGE }}" + all_coverage: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} + require_coverage: ${{ vars.REQUIRE_COVERAGE != '' && vars.REQUIRE_COVERAGE || '0' }} Build-PyTorch-For-Ascend-910b: name: Build-dipu-pytorch-for-ascend-910b From dac37a1f69b9ffed3ecdbb59cb53a8897d81973c Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 15:31:47 +0800 Subject: [PATCH 22/30] add sco and 1424 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2c637174b..78ee33b56 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -141,7 +141,7 @@ jobs: Build-Camb-Pt211: name: Build-dipu-camb-pt211 - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 From 19e94f4e8fed7c6e659d5a07edfcb2f383d45295 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 16:03:23 +0800 Subject: [PATCH 23/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 3 ++- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 83e50aced..6815a95c6 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -252,7 +252,7 @@ jobs: else ssh SH1424 """ set -ex - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source ${ENV_PATH}/dipu_env 2.1.1 srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) @@ -279,6 +279,7 @@ jobs: else ssh SH1424 """ set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu source ${ENV_PATH}/dipu_env 2.1.1 srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 bash tests/run_nv_tests.sh \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 && exit 1 ) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index b7b9beb52..fd444cbb9 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -74,7 +74,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" From cb13d97fd76d83f4ceed103fc7b0fdcb3c402f31 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 16:03:57 +0800 Subject: [PATCH 24/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 6815a95c6..f97e9089d 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -42,8 +42,6 @@ jobs: steps: - name: Run-test run: | - echo "ALL_COVERAGE: ${ALL_COVERAGE}" - echo "REQUIRE_COVERAGE: ${REQUIRE_COVERAGE}" if [[ "${GETRUNNER}" == *sco* ]];then set -e srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ From 63faf6c97a81f0eb6b40e0420aa3a2157f6782a5 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Wed, 21 Feb 2024 16:42:23 +0800 Subject: [PATCH 25/30] add sco and 1424 --- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index fd444cbb9..eed5d7f47 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -74,7 +74,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" # mmaction2 - - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" From 69311461654e59c694338ebbe99e9048d7215a1b Mon Sep 17 00:00:00 2001 From: wugeshui Date: Thu, 22 Feb 2024 11:29:16 +0800 Subject: [PATCH 26/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index f97e9089d..8ecaf01c4 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -143,6 +143,7 @@ jobs: && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) else ssh SH1424 """ + env |grep http cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source ${ENV_PATH}/dipu_env basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack From 19688a3b9a237717c3a800bd9e429a40fbc5a2b0 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Thu, 22 Feb 2024 13:05:52 +0800 Subject: [PATCH 27/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step2.yml | 3 +- .github/workflows/runs_on_1424.yml | 225 ------------------------ 2 files changed, 2 insertions(+), 226 deletions(-) delete mode 100644 .github/workflows/runs_on_1424.yml diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 8ecaf01c4..87fe37287 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -115,6 +115,8 @@ jobs: fi - name: run-one-iter-for-tradition run: | + env |grep http + env if [[ "${GETRUNNER}" == *sco* ]];then set -e cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ @@ -143,7 +145,6 @@ jobs: && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) else ssh SH1424 """ - env |grep http cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu source ${ENV_PATH}/dipu_env basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack diff --git a/.github/workflows/runs_on_1424.yml b/.github/workflows/runs_on_1424.yml deleted file mode 100644 index 4754fbd5a..000000000 --- a/.github/workflows/runs_on_1424.yml +++ /dev/null @@ -1,225 +0,0 @@ -name: runs on 1424 - -on: - workflow_dispatch: -# pull_request: -# paths-ignore: -# - "**.md" -# - ".github/ISSUE_TEMPLATE/**" -# - ".git*" -# - "CODE_OF_CONDUCT**" - -concurrency: - group: 1424-${{ github.head_ref || github.ref }} - cancel-in-progress: true - -env: - DEEPLINK_PATH: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}' - NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' - ENV_PATH: '/mnt/cache/share/deeplinkci/github' - CUDA_PARTATION: ${{ vars.SH1988_SLURM_PAR != '' && vars.SH1988_SLURM_PAR || 'pat_dev' }} - CLUSTER_V100: SH1424 - REPO: ${{ github.event.repository.name }} - ALL_COVERAGE: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} - REQUIRE_COVERAGE: ${{ vars.REQUIRE_COVERAGE != '' && vars.REQUIRE_COVERAGE || '0' }} - -jobs: - Rsync: - name: Rsync code - runs-on: github-poc-ci - steps: - - name: clone repo - run: | - set -ex - cd ${GITHUB_WORKSPACE} && rm -rf DIPU ${REPO}_DIOPI ${REPO} ${REPO}.dev - if [ -n "${{ github.event.pull_request.head.repo.full_name }}" ] && [[ ! "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then - git clone ${{ github.event.pull_request.head.repo.ssh_url }} ${REPO} - cd ${GITHUB_WORKSPACE}/${REPO} && git checkout ${{ github.event.pull_request.head.sha }} - git remote add mainrepo git@github.com:${GITHUB_REPOSITORY}.git - git fetch mainrepo && git merge --no-edit mainrepo/${{ github.base_ref }} - else - git clone https://github.com/DeepLink-org/${REPO}.git && cd ${REPO} - if [ $GITHUB_EVENT_NAME == "pull_request" ]; then - echo "${{ github.base_ref }} " - git checkout ${{ github.event.pull_request.head.sha }} && git merge --no-edit origin/${{ github.base_ref }} - else - echo $GITHUB_EVENT_NAME - git checkout ${{ github.sha }} - fi - fi - cd ${GITHUB_WORKSPACE}/${REPO}/dipu && rm -rf third_party/kineto - git clone --reference /home/autolink/rsync/sourcecode/DeepLink-org/kineto https://github.com/DeepLink-org/kineto.git third_party/kineto - git submodule update --init && cd third_party/kineto && git submodule update --init - cd ${GITHUB_WORKSPACE} && cp -R ${REPO} ${REPO}_DIOPI - cd ${REPO}/dipu && bash /home/autolink/rsync/sourcecode/update_code.sh - rsync -a /home/autolink/rsync/sourcecode/mmlab_pack . && cd mmlab_pack - bash ../scripts/ci/ci_one_iter.sh clone - # dipu_diopi depend on latest target diopi branch, not diopi in submodule. here assume diopi and dipu use same 'target branch' " github.base_ref " - cd ${GITHUB_WORKSPACE}/${REPO}_DIOPI/dipu/third_party && rm -rf DIOPI && git clone https://github.com/DeepLink-org/DIOPI.git - if [ $GITHUB_EVENT_NAME == "pull_request" ]; then - cd ./DIOPI && git checkout ${{ github.base_ref }} - fi - - name: Rsync to Server - run: | - ssh ${CLUSTER_V100} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ - && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ \ - && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}_DIOPI/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/ || echo "failure to connect to cuda" - - Build-Cuda: - name: Build-dipu-cuda - needs: [Rsync] - runs-on: github-poc-ci - env: - GPU_REQUESTS: 1 - steps: - - name: Build dipu - run: | - ssh ${CLUSTER_V100} """ - set -e - export USE_COVERAGE=ON - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu - source ${ENV_PATH}/dipu_env - rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ - || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - """ - - Tidy-Cuda: - name: Run tidy (cuda) - needs: [Build-Cuda] - runs-on: github-poc-ci - steps: - - name: clang-tidy - run: | - ssh ${CLUSTER_V100} """ - set -eo pipefail - source ~/.bashrc - cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && - git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || - { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" - """ - - Test-Cuda: - name: Test-dipu-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: github-poc-ci - env: - GPU_REQUESTS: 1 - steps: - - name: Run-test - run: | - ssh ${CLUSTER_V100} """ - set -ex - export USE_COVERAGE=ON - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source ${ENV_PATH}/dipu_env - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh - if [ "${ALL_COVERAGE}" = "ON" ]; then - bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" - fi - """ - - name: increment coverage check - if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} - run: | - ssh ${CLUSTER_V100} """ - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ - rm -rf scripts - ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts - source /mnt/cache/share/platform/env/pt2.0_diopi - bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} - """ - - Test-One-Iter_Cuda: - name: Test-one-iter-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: github-poc-ci - env: - GPU_REQUESTS: 1 - steps: - - name: build some env - run: | - ssh ${CLUSTER_V100} """ - set -ex - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source ${ENV_PATH}/dipu_env - basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH - export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH - export PYTHONPATH=\$(pwd):\$PYTHONPATH - cd mmlab_pack - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda - """ - - name: run-one-iter-for-tradition - run: | - ssh ${CLUSTER_V100} """ - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source ${ENV_PATH}/dipu_env - basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} - export PYTHONPATH=\$(pwd):\$PYTHONPATH - cd mmlab_pack - rm -rf one_iter_data - python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - """ - - name: run-one-iter-for-llm - run: | - ssh ${CLUSTER_V100} """ - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu - source ${ENV_PATH}/dipu_env - basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} - export PYTHONPATH=\$(pwd):\$PYTHONPATH - cd mmlab_pack - rm -rf one_iter_data - python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - """ - - name: Perform cleanup one iter data - if: always() - run: | - ssh ${CLUSTER_V100} """ - set -ex - echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" - scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - rm -rf one_iter_data - touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 - """ - - name: Check for failure - if: ${{ failure() }} - run: exit 1 - - Build-Cuda-Latest-Target: - name: Build-dipu-cuda-latest-target - needs: [Tidy-Cuda] - runs-on: github-poc-ci - env: - GPU_REQUESTS: 1 - steps: - - name: Build dipu diopi-latest-target - run: | - ssh ${CLUSTER_V100} """ - set -ex - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu - source ${ENV_PATH}/dipu_env - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ - || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - """ - - Test-Cuda-Latest-Target: - name: Test-dipu-cuda-latest-target - needs: [Build-Cuda-Latest-Target] - runs-on: github-poc-ci - env: - GPU_REQUESTS: 1 - steps: - - name: Run-test - run: | - ssh ${CLUSTER_V100} """ - set -ex - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu - source ${ENV_PATH}/dipu_env - srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ - || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - """ \ No newline at end of file From c455041adfbda2bfa1592373d5d7d4817f6b0dca Mon Sep 17 00:00:00 2001 From: wugeshui Date: Thu, 22 Feb 2024 13:38:19 +0800 Subject: [PATCH 28/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 2 ++ .github/workflows/_runs-on-nv-step2.yml | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index 452658b88..067adcb1b 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -42,6 +42,8 @@ jobs: else ssh SH1424 """ set -e + env |grep http + env export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source ${ENV_PATH}/dipu_env diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml index 87fe37287..f97e9089d 100644 --- a/.github/workflows/_runs-on-nv-step2.yml +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -115,8 +115,6 @@ jobs: fi - name: run-one-iter-for-tradition run: | - env |grep http - env if [[ "${GETRUNNER}" == *sco* ]];then set -e cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ From adaf54bd16e987702146a4f0ddcb3d08bf045ddb Mon Sep 17 00:00:00 2001 From: wugeshui Date: Thu, 22 Feb 2024 14:10:39 +0800 Subject: [PATCH 29/30] add sco and 1424 --- .github/workflows/_runs-on-nv-step1.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml index 067adcb1b..452658b88 100644 --- a/.github/workflows/_runs-on-nv-step1.yml +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -42,8 +42,6 @@ jobs: else ssh SH1424 """ set -e - env |grep http - env export USE_COVERAGE=ON cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu source ${ENV_PATH}/dipu_env From a728c2b252c10fceddb7af94368a1f0de2df1a89 Mon Sep 17 00:00:00 2001 From: wugeshui Date: Fri, 23 Feb 2024 18:56:50 +0800 Subject: [PATCH 30/30] add sco and 1424 --- dipu/scripts/ci/test_one_iter_large_language_model_list.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml b/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml index f0cbccc9d..9f6898ff6 100644 --- a/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml @@ -3,7 +3,7 @@ cuda: # - model_cfg: "alpaca-lora run_llama_finetune.py workdirs_alpaca_lora_llama_finetune" # transformers - model_cfg: "transformers examples/pytorch/language-modeling/llama_7b_infer.py workdirs_transformers_llama_infer" - - model_cfg: "transformers examples/pytorch/language-modeling/internlm_7b_infer.py workdirs_transformers_internlm_infer" +# - model_cfg: "transformers examples/pytorch/language-modeling/internlm_7b_infer.py workdirs_transformers_internlm_infer" # lightllm # - model_cfg: "lightllm llama_7b_via_lightllm_infer.py workdirs_lightllm_llama_infer"