-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts to create benchmark in ci. (#896)
* add scripts and config files for the perf test. * add ci related files which make this ci decoupled with one_iter. * fix python black
- Loading branch information
1 parent
fb62c85
commit 1bffe33
Showing
3 changed files
with
406 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
#!/bin/bash | ||
|
||
function check_and_clone_repository() { | ||
repo_name=$1 | ||
branch_name=$2 | ||
current_path=$(pwd) | ||
repo_path="$current_path/$repo_name" | ||
if [ "$repo_name" == "mmcv" ] || [ "$repo_name" == "mmengine" ]; then | ||
clone_url="https://github.com/open-mmlab/$repo_name.git" | ||
else | ||
clone_url="https://github.com/DeepLink-org/$repo_name.git" | ||
fi | ||
if [ -d "$repo_path" ]; then | ||
cd $repo_name | ||
current_branch=$(git rev-parse --abbrev-ref HEAD)_$(git rev-parse HEAD)_$(git describe --tags 2>/dev/null || echo "none") | ||
if [[ "$current_branch" =~ "$branch_name" ]]; then | ||
echo "$repo_name $branch_name is right" | ||
cd .. | ||
else | ||
git checkout main && git pull && git checkout $branch_name | ||
cd .. | ||
fi | ||
else | ||
cd $current_path && rm -rf $repo_name | ||
git clone -b ${branch_name} ${clone_url} || (git clone ${clone_url} && cd $repo_name && git checkout ${branch_name} && cd ..) | ||
fi | ||
} | ||
|
||
function clone_needed_repo() { | ||
set -e | ||
# clone some repositories | ||
SMART_VERSION=dev_for_mmcv2.0 | ||
TRANSFORMERS=main | ||
LIGHTLLM=main | ||
DEEPLINKEXT=2a47138de420a0147e8de70685e628d3732135d7 | ||
ALPACALORA=sco_benchmark_finetune | ||
|
||
check_and_clone_repository "SMART" ${SMART_VERSION} | ||
check_and_clone_repository "transformers" ${TRANSFORMERS} | ||
check_and_clone_repository "lightllm" ${LIGHTLLM} | ||
check_and_clone_repository "DeepLinkExt" ${DEEPLINKEXT} | ||
check_and_clone_repository "alpaca-lora" ${ALPACALORA} | ||
cd .. | ||
} | ||
|
||
function build_needed_repo_cuda() { | ||
cd mmcv | ||
MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i | ||
cd .. | ||
cd DeepLinkExt | ||
python setup.py build_ext -i | ||
cd .. | ||
cd alpaca-lora | ||
pip install -r requirements.txt | ||
cd .. | ||
} | ||
|
||
function build_needed_repo_camb() { | ||
cd mmcv | ||
MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i | ||
cd .. | ||
} | ||
|
||
function build_needed_repo_ascend() { | ||
cd mmcv | ||
MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i | ||
cd .. | ||
} | ||
|
||
function build_needed_repo_kunlunxin() { | ||
echo "skip" | ||
} | ||
|
||
|
||
function export_repo_pythonpath(){ | ||
basic_path="$2" | ||
if [ "$1" = "cuda" ]; then | ||
echo "Executing CUDA operation in pythonpath..." | ||
export PYTHONPATH=${basic_path}:$PYTHONPATH | ||
export PYTHONPATH=${basic_path}/transformers/src:$PYTHONPATH | ||
export PYTHONPATH=${basic_path}/lightllm:$PYTHONPATH | ||
|
||
# set the environment variable for the transformers repository | ||
export HF_HOME=${basic_path}/huggingface | ||
export HUGGINGFACE_HUB_CACHE=/mnt/lustre/share_data/PAT/datasets/hub | ||
|
||
export PYTHONPATH=${basic_path}/mmcv:$PYTHONPATH | ||
export PYTHONPATH=${basic_path}/SMART/tools/one_iter_tool/one_iter:$PYTHONPATH | ||
echo "python path: $PYTHONPATH" | ||
fi | ||
} | ||
|
||
|
||
function build_dataset(){ | ||
# link dataset | ||
if [ "$1" = "cuda" ]; then | ||
echo "Executing CUDA operation in build dataset..." | ||
rm -rf data | ||
mkdir data | ||
ln -s /mnt/lustre/share_data/PAT/datasets/Imagenet data/imagenet | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mscoco2017 data/coco | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmseg/cityscapes data/cityscapes | ||
ln -s /mnt/lustre/share_data/PAT/datasets/Kinetics400 data/kinetics400 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/icdar2015 data/icdar2015 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mjsynth data/mjsynth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/kitti data/kitti | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/checkpoint/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/stable-diffusion-v1-5 data/stable-diffusion-v1-5 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/llama_1B_oneiter data/llama_1B_oneiter | ||
|
||
elif [ "$1" = "camb" ]; then | ||
echo "Executing CAMB operation in build dataset..." | ||
rm -rf data | ||
mkdir data | ||
ln -s /mnt/lustre/share_data/PAT/datasets/Imagenet data/imagenet | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mscoco2017 data/coco | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmseg/cityscapes data/cityscapes | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet3d/mmdet3d_kitti data/kitti | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmaction/Kinetics400 data/kinetics400 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmocr/icdar2015 data/icdar2015 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmocr/mjsynth data/mjsynth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/checkpoint/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/pretrain/torchvision/resnet50-0676ba61.pth data/resnet50-0676ba61.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/pretrain/vgg16_caffe-292e1171.pth data/vgg16_caffe-292e1171.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/pretrain/darknet53-a628ea1b.pth data/darknet53-a628ea1b.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmpose/pretrain/hrnet_w32-36af842e.pth data/hrnet_w32-36af842e.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/pretrain/mmcv/resnet50_v1c-2cccc1ad.pth data/resnet50_v1c-2cccc1ad.pth | ||
|
||
elif [ "$1" = "ascend" ]; then | ||
echo "Executing ASCEND operation in build dataset..." | ||
rm -rf data | ||
mkdir data | ||
ln -s /mnt/lustre/share_data/PAT/datasets/Imagenet data/imagenet | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mscoco2017 data/coco | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmseg/cityscapes data/cityscapes | ||
ln -s /mnt/lustre/share_data/PAT/datasets/kitti data/kitti | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmaction/Kinetics400 data/kinetics400 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmocr/icdar2015 data/icdar2015 | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmocr/mjsynth data/mjsynth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/checkpoint/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/pretrain/torchvision/resnet50-0676ba61.pth data/resnet50-0676ba61.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/pretrain/vgg16_caffe-292e1171.pth data/vgg16_caffe-292e1171.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmdet/pretrain/darknet53-a628ea1b.pth data/darknet53-a628ea1b.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/mmpose/pretrain/hrnet_w32-36af842e.pth data/hrnet_w32-36af842e.pth | ||
ln -s /mnt/lustre/share_data/PAT/datasets/pretrain/mmcv/resnet50_v1c-2cccc1ad.pth data/resnet50_v1c-2cccc1ad.pth | ||
elif [ "$1" = "ascend910b" ]; then | ||
echo "Executing ASCEND operation in build dataset..." | ||
rm -rf data | ||
mkdir data | ||
ln -s /mnt/cache/share/datasets/Imagenet data/imagenet | ||
ln -s /mnt/cache/share/datasets/mscoco2017 data/coco | ||
ln -s /mnt/cache/share/datasets/mmseg/cityscapes data/cityscapes | ||
ln -s /mnt/cache/share/datasets/kitti data/kitti | ||
ln -s /mnt/cache/share/datasets/mmaction/Kinetics400 data/kinetics400 | ||
ln -s /mnt/cache/share/datasets/mmocr/icdar2015 data/icdar2015 | ||
ln -s /mnt/cache/share/datasets/mmocr/mjsynth data/mjsynth | ||
ln -s /mnt/cache/share/datasets/mmdet/checkpoint/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth | ||
ln -s /mnt/cache/share/datasets/pretrain/torchvision/resnet50-0676ba61.pth data/resnet50-0676ba61.pth | ||
ln -s /mnt/cache/share/datasets/mmdet/pretrain/vgg16_caffe-292e1171.pth data/vgg16_caffe-292e1171.pth | ||
ln -s /mnt/cache/share/datasets/mmdet/pretrain/darknet53-a628ea1b.pth data/darknet53-a628ea1b.pth | ||
ln -s /mnt/cache/share/datasets/mmpose/pretrain/hrnet_w32-36af842e.pth data/hrnet_w32-36af842e.pth | ||
ln -s /mnt/cache/share/datasets/pretrain/mmcv/resnet50_v1c-2cccc1ad.pth data/resnet50_v1c-2cccc1ad.pth | ||
elif [ "$1" = "kunlunxin" ]; then | ||
echo "Executing KUNLUNXIN operation in build dataset..." | ||
rm -rf data | ||
mkdir data | ||
ln -s /mnt/cache/share/datasets/imagenet data/imagenet | ||
|
||
else | ||
echo "Invalid parameter. Please specify 'cuda' 'camb' 'ascend' or 'kunlunxin'." | ||
exit 1 | ||
fi | ||
} | ||
|
||
|
||
case $1 in | ||
clone) | ||
clone_needed_repo;; | ||
build_cuda) | ||
build_needed_repo_cuda | ||
build_dataset cuda;; | ||
build_camb) | ||
build_needed_repo_camb | ||
build_dataset camb;; | ||
build_ascend) | ||
build_needed_repo_ascend | ||
build_dataset ascend;; | ||
build_ascend910b) | ||
build_needed_repo_ascend | ||
build_dataset ascend910b;; | ||
build_kunlunxin) | ||
build_needed_repo_kunlunxin | ||
build_dataset kunlunxin;; | ||
export_pythonpath_camb) | ||
export_repo_pythonpath camb $2;; | ||
export_pythonpath_cuda) | ||
export_repo_pythonpath cuda $2;; | ||
export_pythonpath_ascend) | ||
export_repo_pythonpath ascend $2;; | ||
export_pythonpath_kunlunxin) | ||
export_repo_pythonpath kunlunxin $2;; | ||
*) | ||
echo -e "[ERROR] Incorrect option:" $1; | ||
esac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
import os | ||
import sys | ||
from multiprocessing import Pool | ||
import subprocess as sp | ||
import time | ||
import yaml | ||
import multiprocessing | ||
import argparse | ||
import logging | ||
import json | ||
|
||
log_format = "%(asctime)s - %(levelname)s: %(message)s" | ||
logging.basicConfig(level=logging.INFO, format=log_format, datefmt="%Y-%m-%d %H:%M:%S") | ||
|
||
|
||
def run_cmd(cmd: str) -> None: | ||
cp = sp.run(cmd, shell=True, encoding="utf-8") | ||
if cp.returncode != 0: | ||
error = ( | ||
f"Some thing wrong has happened when running command [{cmd}]:{cp.stderr}" | ||
) | ||
raise Exception(error) | ||
|
||
|
||
def parse_device_task(device): | ||
# get json file path | ||
device_config = dict() | ||
current_path = os.path.dirname(os.path.realpath(__file__)) | ||
config_path = current_path + "/test_perf_config.json" | ||
with open(config_path) as json_config: | ||
json_content = json.loads(json_config.read()) | ||
if device in json_content: | ||
device_config = json_content[device] | ||
return device_config | ||
|
||
|
||
def process_test_perf(log_file, clear_log, task: dict) -> None: | ||
# READ CONFIG | ||
|
||
task_name = task["name"] | ||
storage_path = os.getcwd() + "/perf_data/" + task_name | ||
partition = task["partition"] | ||
job_name = "trial" | ||
gpu_requests = task["gpu_requests"] | ||
relative_workdir = task["relative_workdir"] | ||
task_script = task["script"] | ||
filter_pattern = task["filter"] | ||
op_args = task["op_args"] | ||
|
||
os.environ["ONE_ITER_TOOL_STORAGE_PATH"] = storage_path | ||
os.environ["DIPU_FORCE_FALLBACK_OPS_LIST"] = ( | ||
task["fallback_op_list"] if "fallback_op_list" in task else "" | ||
) | ||
|
||
logging.info(f"task_name = {task_name}") | ||
|
||
if not os.path.exists(storage_path): | ||
os.makedirs(storage_path) | ||
|
||
# GENERATE RUN COMMAND | ||
cmd_run_test_perf = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} python {task_script} {op_args}" | ||
if device == "sco": | ||
current_path = os.getcwd() | ||
parent_directory = os.path.dirname(current_path) | ||
cmd_run_test_perf = f"""srun --job-name={job_name} bash -c "cd {parent_directory} && source scripts/ci/ci_one_iter.sh export_pythonpath_cuda {current_path} && source /mnt/cache/share/deeplinkci/github/dipu_env && cd mmlab_pack && source environment_exported && export ONE_ITER_TOOL_STORAGE_PATH={storage_path} && python {current_path}/{task_script}" """ | ||
|
||
print(cmd_run_test_perf) | ||
|
||
current_path = os.getcwd() | ||
os.chdir(relative_workdir) | ||
if clear_log: | ||
run_cmd(cmd_run_test_perf + f" 2>&1 > {current_path}/{log_file}") | ||
else: | ||
run_cmd(cmd_run_test_perf + f" 2>&1 >> {current_path}/{log_file}") | ||
os.chdir(current_path) | ||
|
||
print("MATCH_PATTERN:", filter_pattern) | ||
import re | ||
|
||
log_content = open(f"{current_path}/{log_file}").read() | ||
pattern = re.compile(filter_pattern) | ||
match_result = pattern.search(log_content) | ||
run_perf = 0.0 | ||
|
||
if match_result: | ||
match_result = match_result.group(0) | ||
float_pattern = re.compile("\d+(\.\d+)?") | ||
run_perf = float(float_pattern.search(match_result).group(0)) | ||
print("RUNNING PERF:{}".format(run_perf)) | ||
|
||
|
||
def run_perf_task(device_config): | ||
error_flag = multiprocessing.Value("i", 0) # if encount error | ||
|
||
device = device_config["name"] | ||
|
||
logging.info("we use {}!".format(device)) | ||
logging.info(f"main process id (ppid): {os.getpid()} {os.getppid()}") | ||
logging.info(f"python path: {os.environ.get('PYTHONPATH', None)}") | ||
|
||
os.environ["DIPU_DUMP_OP_ARGS"] = "0" | ||
os.environ["DIPU_DEBUG_ALLOCATOR"] = "0" | ||
os.environ["ONE_ITER_TOOL_DEVICE"] = "dipu" | ||
|
||
current_path = os.path.dirname(os.path.realpath(__file__)) | ||
env_file_path = os.path.join(current_path, "environment_exported") | ||
env_variables = os.environ | ||
keywords_to_filter = ["DIPU", "ONE_ITER"] | ||
if os.path.exists(env_file_path): | ||
os.remove(env_file_path) | ||
|
||
with open("environment_exported", "w") as file: | ||
file.write("pwd\n") | ||
for key, value in env_variables.items(): | ||
if any(keyword in key for keyword in keywords_to_filter): | ||
file.write(f'export {key}="{value}"\n') | ||
|
||
tasks = device_config["tasks"] | ||
logging.info(f"tasks nums: {len(tasks)}") | ||
|
||
if not os.path.exists("perf_data"): | ||
os.mkdir("perf_data") | ||
|
||
pool = Pool(max_parall) | ||
log_files = [] | ||
try: | ||
for i in range(len(tasks)): | ||
task = tasks[i] | ||
log_file = f"child_{i % max_parall}_log.txt" | ||
log_files.append(log_file) | ||
pool.apply_async( | ||
process_test_perf, | ||
args=( | ||
log_file, | ||
True, | ||
task, | ||
), | ||
error_callback=handle_error, | ||
) | ||
logging.info("Waiting for all subprocesses done...") | ||
pool.close() | ||
pool.join() | ||
for log_file in log_files: | ||
print_file(log_file) | ||
if error_flag.value != 0: | ||
exit(1) | ||
logging.info("All subprocesses done.") | ||
except Exception as e: | ||
logging.error(e) | ||
exit(1) | ||
|
||
|
||
def handle_error(error: str) -> None: | ||
logging.error(f"Error: {error}") | ||
if pool is not None: | ||
logging.error("Kill all!") | ||
pool.terminate() | ||
error_flag.value = 1 | ||
|
||
|
||
def print_file(file_name): | ||
with open(file_name) as f: | ||
lines = f.read() | ||
logging.info(lines) | ||
|
||
|
||
if __name__ == "__main__": | ||
# set some params | ||
max_parall = 8 | ||
parser = argparse.ArgumentParser(description="set some params.") | ||
parser.add_argument("device", type=str, help="the device to use") | ||
parser.add_argument("job_name", type=str, help="the name of the job") | ||
args = parser.parse_args() | ||
|
||
device = args.device | ||
job_name = args.job_name | ||
|
||
device_config = parse_device_task(device) | ||
print(device_config) | ||
|
||
logging.info(f"device: {device}, job_name: {job_name}") | ||
run_perf_task(device_config) |
Oops, something went wrong.