diff --git a/.github/workflows/_runs-on-nv-step1.yml b/.github/workflows/_runs-on-nv-step1.yml new file mode 100644 index 000000000..452658b88 --- /dev/null +++ b/.github/workflows/_runs-on-nv-step1.yml @@ -0,0 +1,85 @@ +name: runs on nv step 1 + +on: + workflow_call: + inputs: + runner: + description: Set up the runner + type: string + required: false + default: "tps-sco-ci" + deeplink_path: + description: ci work home + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' + env_path: + description: env file path + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github' + +jobs: + Build-Cuda: + name: Build-dipu-cuda + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +240 -type d |xargs rm -rf + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env \ + && rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -e + export USE_COVERAGE=ON + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source ${ENV_PATH}/dipu_env + rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Tidy-Cuda: + name: Run tidy (cuda) + needs: [Build-Cuda] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Check SupportedDiopiFunctions.txt + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + else + ssh SH1424 """ + cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \ + git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \ + { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } + """ + fi + - name: Run clang-tidy + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" + else + ssh SH1424 """ + bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh + """ + fi \ No newline at end of file diff --git a/.github/workflows/_runs-on-nv-step2.yml b/.github/workflows/_runs-on-nv-step2.yml new file mode 100644 index 000000000..f97e9089d --- /dev/null +++ b/.github/workflows/_runs-on-nv-step2.yml @@ -0,0 +1,285 @@ +name: runs on nv step 2 + +on: + workflow_call: + inputs: + runner: + description: Set up the runner + type: string + required: false + default: "tps-sco-ci" + deeplink_path: + description: ci work home + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/' + env_path: + description: env file path + type: string + required: false + default: '/mnt/cache/share/deeplinkci/github' + all_coverage: + description: all coverage + type: string + required: true + default: 'false' + require_coverage: + description: input coverage rate + type: string + required: false + default: '0' +jobs: + Test-Cuda: + name: Test-dipu-cuda + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + ALL_COVERAGE: ${{ inputs.all_coverage }} + REQUIRE_COVERAGE: ${{ inputs.require_coverage }} + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash tests/run_nv_tests.sh" + if [ "${ALL_COVERAGE}" = "ON" ]; then + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + fi + else + ssh SH1424 """ + set -ex + export USE_COVERAGE=ON + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh + if [ "${ALL_COVERAGE}" = "ON" ]; then + bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" + fi + """ + fi + - name: increment coverage check + if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda + ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + source ${ENV_PATH}/dipu_env + bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} + else + ssh SH1424 """ + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ + rm -rf scripts + ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts + source /mnt/cache/share/platform/env/pt2.0_diopi + bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} + """ + fi + + Test-One-Iter_Cuda: + name: Test-one-iter-cuda + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: build some env + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + export basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ + srun --job-name=${GITHUB_JOB} bash -c "cd ${basic_path} \ + && export PYTHONPATH=${basic_path}/mmlab_pack:${basic_path}/mmlab_pack/mmengine:${basic_path}/mmlab_pack/mmcv:$PYTHONPATH \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && bash ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_one_iter.sh build_cuda" + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH + export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda + """ + fi + - name: run-one-iter-for-tradition + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && rm -rf one_iter_data \ + && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + else + ssh SH1424 """ + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + fi + - name: run-one-iter-for-llm + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ + && source ${ENV_PATH}/dipu_env && cd mmlab_pack \ + && rm -rf one_iter_data \ + && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + else + ssh SH1424 """ + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu + source ${ENV_PATH}/dipu_env + basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} + export PYTHONPATH=\$(pwd):\$PYTHONPATH + cd mmlab_pack + rm -rf one_iter_data + python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) + """ + fi + - name: Perform cleanup one iter data + if: always() + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + rm -rf one_iter_data + touch one_iter_data #用于占位,防止创建新的 one_iter_data 文件夹 + else + ssh SH1424 """ + set -ex + echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack + rm -rf one_iter_data + touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 + """ + fi + - name: Check for failure + if: ${{ failure() }} + run: exit 1 + + Build-Cuda-Latest-Target: + name: Build-dipu-cuda-latest-target + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu diopi-latest-target + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Test-Cuda-Latest-Target: + name: Test-dipu-cuda-latest-target + needs: [Build-Cuda-Latest-Target] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Latest-Target/dipu \ + && source ${ENV_PATH}/dipu_env \ + && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu + source ${ENV_PATH}/dipu_env + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target && exit 1 ) + """ + fi + + + Build-Cuda-Pt211: + name: Build-dipu-cuda-pt211 + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Build dipu + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ + && source ${ENV_PATH}/dipu_env 2.1.1 \ + && bash scripts/ci/nv/ci_nv_script.sh build_dipu " || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu + source ${ENV_PATH}/dipu_env 2.1.1 + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + """ + fi + + Test-Cuda-Pt211: + name: Test-dipu-cuda-pt211 + needs: [Build-Cuda-Pt211] + runs-on: ${{ inputs.runner }} + env: + GETRUNNER: ${{ inputs.runner }} + DEEPLINK_PATH: ${{ inputs.deeplink_path }} + ENV_PATH: ${{ inputs.env_path }} + CUDA_PARTATION: "pat_dev" + steps: + - name: Run-test + run: | + if [[ "${GETRUNNER}" == *sco* ]];then + set -e + srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu \ + && source ${ENV_PATH}/dipu_env 2.1.1 \ + && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 + else + ssh SH1424 """ + set -ex + cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu + source ${ENV_PATH}/dipu_env 2.1.1 + srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 bash tests/run_nv_tests.sh \ + || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 && exit 1 ) + """ + fi \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ee6b8b3b3..78ee33b56 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -36,6 +36,8 @@ jobs: Rsync: name: Rsync code runs-on: github-poc-ci + outputs: + getrunner: ${{ steps.set-run-vars.outputs.GETRUNNER }} steps: - name: clone repo run: | @@ -68,8 +70,14 @@ jobs: if [ $GITHUB_EVENT_NAME == "pull_request" ]; then cd ./DIOPI && git checkout ${{ github.base_ref }} fi - - name: Rsync to Server + - name: set output and Rsync to Server + id: set-run-vars run: | + GETRUNNER=$(cat ~/rsync/cuda_runner_dipu) + if [[ ${GETRUNNER} == *diopi* ]]; then + CUDA_CLUSTER="SH1424" + fi + echo "GETRUNNER=$GETRUNNER" >> "$GITHUB_OUTPUT" ssh ${CAMB_CLUSTER} "mkdir -p ${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source ${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}/ ${CAMB_CLUSTER}:${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source/ \ && rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}_DIOPI/ ${CAMB_CLUSTER}:${CAMB_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main/ || echo "failure to connect to camb" @@ -82,7 +90,7 @@ jobs: Build-Camb: name: Build-dipu-camb - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 @@ -133,7 +141,7 @@ jobs: Build-Camb-Pt211: name: Build-dipu-camb-pt211 - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 @@ -230,7 +238,7 @@ jobs: Build-Camb-Latest-Target: name: Build-dipu-camb-latest-target - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: github-poc-ci env: MLU_REQUESTS: 1 @@ -262,150 +270,25 @@ jobs: srun --job-name=${GITHUB_JOB} --partition=${CAMB_PARTATION} --time=40 --gres=mlu:${MLU_REQUESTS} sh tests/run_camb_tests.sh """ - Build-Cuda: - name: Build-dipu-cuda + Runs-On-Nv-Step1: + name: runs on nv step1 needs: [Rsync] - runs-on: tps-sco-ci - steps: - - name: Build dipu - run: | - set -e - cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +240 -type d |xargs rm -rf - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env \ - && rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - - Tidy-Cuda: - name: Run tidy (cuda) - needs: [Build-Cuda] - runs-on: tps-sco-ci - steps: - - name: Check SupportedDiopiFunctions.txt - run: | - cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && - git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || - { echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } - - name: Run clang-tidy - run: | - srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" - - Test-Cuda: - name: Test-dipu-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env \ - && bash tests/run_nv_tests.sh" - if [ "${ALL_COVERAGE}" = "ON" ]; then - bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" - fi - - name: increment coverage check - if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda - ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts - source ${CUDA_ENV_PATH}/dipu_env - bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} - - Test-One-Iter_Cuda: - name: Test-one-iter-cuda - needs: [Build-Cuda, Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: build some env - run: | - set -e - export basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ - srun --job-name=${GITHUB_JOB} bash -c "cd ${basic_path} \ - && export PYTHONPATH=${basic_path}/mmlab_pack:${basic_path}/mmlab_pack/mmengine:${basic_path}/mmlab_pack/mmcv:$PYTHONPATH \ - && source ${CUDA_ENV_PATH}/dipu_env && cd mmlab_pack \ - && bash ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_one_iter.sh build_cuda" - - name: run-one-iter-for-tradition - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ - && source ${CUDA_ENV_PATH}/dipu_env && cd mmlab_pack \ - && rm -rf one_iter_data \ - && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - - name: run-one-iter-for-llm - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \ - && source ${CUDA_ENV_PATH}/dipu_env && cd mmlab_pack \ - && rm -rf one_iter_data \ - && python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) - - name: Perform cleanup one iter data - if: always() - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack - rm -rf one_iter_data - touch one_iter_data #用于占位,防止创建新的 one_iter_data 文件夹 - - name: Check for failure - if: ${{ failure() }} - run: exit 1 - - Build-Cuda-Latest-Target: - name: Build-dipu-cuda-latest-target - needs: [Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Build dipu diopi-latest-target - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) + uses: ./.github/workflows/_runs-on-nv-step1.yml + with: + runner: ${{ needs.Rsync.outputs.getrunner }} - Test-Cuda-Latest-Target: - name: Test-dipu-cuda-latest-target - needs: [Build-Cuda-Latest-Target] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Latest-Target/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env \ - && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target - - Build-Cuda-Pt211: - name: Build-dipu-cuda-pt211 - needs: [Tidy-Cuda] - runs-on: tps-sco-ci - steps: - - name: Build dipu - run: | - set -e - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env 2.1.1 \ - && bash scripts/ci/nv/ci_nv_script.sh build_dipu " || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - - - Test-Cuda-Pt211: - name: Test-dipu-cuda-pt211 - needs: [Build-Cuda-Pt211] - runs-on: tps-sco-ci - steps: - - name: Run-test - run: | - set -e - srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu \ - && source ${CUDA_ENV_PATH}/dipu_env 2.1.1 \ - && bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 + Runs-On-Nv-Step2: + name: runs on nv step2 + needs: [Rsync,Runs-On-Nv-Step1] + uses: ./.github/workflows/_runs-on-nv-step2.yml + with: + runner: ${{ needs.Rsync.outputs.getrunner }} + all_coverage: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} + require_coverage: ${{ vars.REQUIRE_COVERAGE != '' && vars.REQUIRE_COVERAGE || '0' }} Build-PyTorch-For-Ascend-910b: name: Build-dipu-pytorch-for-ascend-910b - needs: [Tidy-Cuda] + needs: [Runs-On-Nv-Step1] runs-on: tps-ascend-ci-910b steps: - name: Build PyTorch diff --git a/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml b/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml index f0cbccc9d..9f6898ff6 100644 --- a/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_large_language_model_list.yaml @@ -3,7 +3,7 @@ cuda: # - model_cfg: "alpaca-lora run_llama_finetune.py workdirs_alpaca_lora_llama_finetune" # transformers - model_cfg: "transformers examples/pytorch/language-modeling/llama_7b_infer.py workdirs_transformers_llama_infer" - - model_cfg: "transformers examples/pytorch/language-modeling/internlm_7b_infer.py workdirs_transformers_internlm_infer" +# - model_cfg: "transformers examples/pytorch/language-modeling/internlm_7b_infer.py workdirs_transformers_internlm_infer" # lightllm # - model_cfg: "lightllm llama_7b_via_lightllm_infer.py workdirs_lightllm_llama_infer"