add v100 on 1424 #4
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: runs on 1424 | |
on: | |
workflow_dispatch: | |
pull_request: | |
paths-ignore: | |
- "**.md" | |
- ".github/ISSUE_TEMPLATE/**" | |
- ".git*" | |
- "CODE_OF_CONDUCT**" | |
concurrency: | |
group: 1424-${{ github.head_ref || github.ref }} | |
cancel-in-progress: true | |
env: | |
NFS_PATH: '/mnt/cache/share/parrotsci/github/cibuild/${{ github.repository }}' | |
ENV_PATH: '/mnt/cache/share/platform/cienv' | |
CLUSTER_V100: SH1424 | |
REPO: ${{ github.event.repository.name }} | |
ALL_COVERAGE: ${{ (contains( github.ref, 'main') || startsWith(github.ref, 'refs/heads/v') || startsWith(github.ref, 'refs/heads/dev')) && 'ON' || 'OFF' }} | |
REQUIRE_COVERAGE: ${{ vars.REQUIRE_COVERAGE != '' && vars.REQUIRE_COVERAGE || '0' }} | |
jobs: | |
Rsync: | |
name: Rsync code | |
runs-on: github-poc-ci | |
steps: | |
- name: clone repo | |
run: | | |
set -ex | |
cd ${GITHUB_WORKSPACE} && rm -rf DIPU ${REPO}_DIOPI ${REPO} ${REPO}.dev | |
if [ -n "${{ github.event.pull_request.head.repo.full_name }}" ] && [[ ! "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then | |
git clone ${{ github.event.pull_request.head.repo.ssh_url }} ${REPO} | |
cd ${GITHUB_WORKSPACE}/${REPO} && git checkout ${{ github.event.pull_request.head.sha }} | |
git remote add mainrepo [email protected]:${GITHUB_REPOSITORY}.git | |
git fetch mainrepo && git merge --no-edit mainrepo/${{ github.base_ref }} | |
else | |
git clone https://github.com/DeepLink-org/${REPO}.git && cd ${REPO} | |
if [ $GITHUB_EVENT_NAME == "pull_request" ]; then | |
echo "${{ github.base_ref }} " | |
git checkout ${{ github.event.pull_request.head.sha }} && git merge --no-edit origin/${{ github.base_ref }} | |
else | |
echo $GITHUB_EVENT_NAME | |
git checkout ${{ github.sha }} | |
fi | |
fi | |
cd ${GITHUB_WORKSPACE}/${REPO}/dipu && rm -rf third_party/kineto | |
git clone --reference /home/autolink/rsync/sourcecode/DeepLink-org/kineto https://github.com/DeepLink-org/kineto.git third_party/kineto | |
git submodule update --init && cd third_party/kineto && git submodule update --init | |
cd ${GITHUB_WORKSPACE} && cp -R ${REPO} ${REPO}_DIOPI | |
cd ${REPO}/dipu && bash /home/autolink/rsync/sourcecode/update_code.sh | |
rsync -a /home/autolink/rsync/sourcecode/mmlab_pack . && cd mmlab_pack | |
bash ../scripts/ci/ci_one_iter.sh clone | |
# dipu_diopi depend on latest target diopi branch, not diopi in submodule. here assume diopi and dipu use same 'target branch' " github.base_ref " | |
cd ${GITHUB_WORKSPACE}/${REPO}_DIOPI/dipu/third_party && rm -rf DIOPI && git clone https://github.com/DeepLink-org/DIOPI.git | |
if [ $GITHUB_EVENT_NAME == "pull_request" ]; then | |
cd ./DIOPI && git checkout ${{ github.base_ref }} | |
fi | |
- name: Rsync to Server | |
run: | | |
ssh ${CLUSTER_V100} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main" \ | |
&& rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ \ | |
&& rsync -a --delete ${GITHUB_WORKSPACE}/${REPO}_DIOPI/ ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/ || echo "failure to connect to cuda" | |
Build-Cuda: | |
name: Build-dipu-cuda | |
needs: [Rsync] | |
runs-on: github-poc-ci | |
env: | |
GPU_REQUESTS: 1 | |
steps: | |
- name: Build dipu | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -e | |
export USE_COVERAGE=ON | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ | |
|| ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) | |
""" | |
Tidy-Cuda: | |
name: Run tidy (cuda) | |
needs: [Build-Cuda] | |
runs-on: github-poc-ci | |
steps: | |
- name: clang-tidy | |
run: | | |
ssh $CUDA_CLUSTER """ | |
set -eo pipefail | |
source ~/.bashrc | |
cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && | |
git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || | |
{ echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; } | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh" | |
""" | |
Test-Cuda: | |
name: Test-dipu-cuda | |
needs: [Build-Cuda, Tidy-Cuda] | |
runs-on: github-poc-ci | |
env: | |
GPU_REQUESTS: 1 | |
steps: | |
- name: Run-test | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -ex | |
export USE_COVERAGE=ON | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh | |
if [ "${ALL_COVERAGE}" = "ON" ]; then | |
bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail" | |
fi | |
""" | |
- name: increment coverage check | |
if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }} | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -e | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/ | |
rm -rf scripts | |
ln -s ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts | |
source /mnt/cache/share/platform/env/pt2.0_diopi | |
bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE} | |
""" | |
Test-One-Iter_Cuda: | |
name: Test-one-iter-cuda | |
needs: [Build-Cuda, Tidy-Cuda] | |
runs-on: github-poc-ci | |
env: | |
GPU_REQUESTS: 1 | |
steps: | |
- name: build some env | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -ex | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack | |
export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH | |
export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH | |
export PYTHONPATH=\$(pwd):\$PYTHONPATH | |
cd mmlab_pack | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda | |
""" | |
- name: run-one-iter-for-tradition | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack | |
source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} | |
export PYTHONPATH=\$(pwd):\$PYTHONPATH | |
cd mmlab_pack | |
rm -rf one_iter_data | |
python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) | |
""" | |
- name: run-one-iter-for-llm | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
basic_path=${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack | |
source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path} | |
export PYTHONPATH=\$(pwd):\$PYTHONPATH | |
cd mmlab_pack | |
rm -rf one_iter_data | |
python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:${GPU_REQUESTS}" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1) | |
""" | |
- name: Perform cleanup one iter data | |
if: always() | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -ex | |
echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" | |
scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}" | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack | |
rm -rf one_iter_data | |
touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹 | |
""" | |
- name: Check for failure | |
if: ${{ failure() }} | |
run: exit 1 | |
Build-Cuda-Latest-Target: | |
name: Build-dipu-cuda-latest-target | |
needs: [Tidy-Cuda] | |
runs-on: github-poc-ci | |
env: | |
GPU_REQUESTS: 1 | |
steps: | |
- name: Build dipu diopi-latest-target | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -ex | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \ | |
|| ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) | |
""" | |
Test-Cuda-Latest-Target: | |
name: Test-dipu-cuda-latest-target | |
needs: [Build-Cuda-Latest-Target] | |
runs-on: github-poc-ci | |
env: | |
GPU_REQUESTS: 1 | |
steps: | |
- name: Run-test | |
run: | | |
ssh ${CUDA_CLUSTER} """ | |
set -ex | |
cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu | |
source scripts/ci/nv/ci_nv_env.sh | |
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:${GPU_REQUESTS} --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \ | |
|| ( cd ${CUDA_CI_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) | |
""" |