Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into nvidia-llama2
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Jul 11, 2024
2 parents 9e00e11 + 9908101 commit 340dd6b
Show file tree
Hide file tree
Showing 25 changed files with 584 additions and 16 deletions.
115 changes: 115 additions & 0 deletions script/app-mlperf-inference-intel/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,121 @@ variations:
- pip-package
- optimum

sdxl:
group: model
env:
CM_BENCHMARK: STANDALONE_SDXL
CM_MODEL: stable-diffusion-xl

sdxl,pytorch:
adr:
conda-package:
tags: _name.sdxl-pt
deps:
- tags: get,conda,_name.sdxl-pt
- tags: get,python,_conda.sdxl-pt
adr:
conda-python:
version: "3.9"
- names:
- conda-package
- mkl
tags: get,generic,conda-package,_package.mkl,_source.conda-forge
- names:
- conda-package
- mkl-include
tags: get,generic,conda-package,_package.mkl-include,_source.intel
- names:
- conda-package
- llvm-openmp
tags: get,generic,conda-package,_package.llvm-openmp,_source.conda-forge
- names:
- conda-package
- ncurses
tags: get,generic,conda-package,_package.ncurses,_source.conda-forge
- tags: get,generic-sys-util,_numactl
- tags: get,generic,conda-package,_package.jemalloc,_source.conda-forge
names:
- conda-package
- jemalloc
- tags: get,generic-python-lib,_package.torch,_path.https://download.pytorch.org/whl/nightly/cpu/torch-2.3.0.dev20231214%2Bcpu-cp39-cp39-linux_x86_64.whl
names:
- pip-package
- pip-torch
- tags: get,generic-python-lib,_package.torchvision,_path.https://download.pytorch.org/whl/nightly/cpu/torchvision-0.18.0.dev20231214%2Bcpu-cp39-cp39-linux_x86_64.whl
names:
- pip-package
- pip-torchvision
- tags: get,generic-python-lib,_torch
names:
- pip-package
- torch
- tags: install,diffusers,from.src,_for-intel-mlperf-inference-v4.0-sdxl
names:
- diffusers-from-src
- tags: install,ipex,from.src,_for-intel-mlperf-inference-v4.0-sdxl
names:
- ipex-from-src
- tags: get,generic,conda-package,_package.ninja
names:
- conda-package
- ninja
- tags: get,mlcommons,inference,src
names:
- inference-src
- tags: get,mlcommons,inference,loadgen,_custom-python,_keep-build
names:
- inference-loadgen

sdxl,build-harness:
deps:
- tags: get,generic-python-lib,_package.pybind11[global]
names:
- pip-package
- pybind11

sdxl,run-harness:
deps:
- tags: get,ml-model,sdxl,_fp32,_pytorch
- tags: get,dataset,coco2014,original,_validation
- tags: get,generic-python-lib,_package.opencv-python
names:
- pip-package
- opencv
- tags: get,generic-python-lib,_package.transformers
names:
- pip-package
- transformers
- tags: get,generic-python-lib,_package.accelerate
names:
- pip-package
- accelerate
- tags: get,generic-python-lib,_package.open-clip-torch
names:
- pip-package
- open-clip-torch
- tags: get,generic-python-lib,_package.pycocotools
names:
- pip-package
- pycocotools
- tags: get,generic-python-lib,_package.torchmetrics[image]
names:
- pip-package
- torchmetrics
- tags: get,generic-python-lib,_torchvision
version: "0.17.1"
names:
- pip-package
- torchvision
- tags: get,generic-python-lib,_package.py-libnuma
names:
- pip-package
- libnuma





resnet50,pytorch:
adr:
conda-package:
Expand Down
27 changes: 27 additions & 0 deletions script/app-mlperf-inference-intel/build_sdxl_harness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
cd ${CM_HARNESS_CODE_ROOT}

cd utils
cmd=" python -m pip install ."

echo "$cmd"
eval "$cmd"
test "$?" -eq 0 || exit "$?"

cd ../tools
wget https://raw.githubusercontent.com/mlcommons/inference/master/text_to_image/tools/coco.py
test "$?" -eq 0 || exit "$?"
cd ..

mkdir -p coco2014/captions
wget -P coco2014/captions/ https://raw.githubusercontent.com/mlcommons/inference/master/text_to_image/coco2014/captions/captions_source.tsv
test "$?" -eq 0 || exit "$?"

mkdir -p coco2014/latents
wget -P coco2014/latents/ https://github.com/mlcommons/inference/raw/master/text_to_image/tools/latents.pt
test "$?" -eq 0 || exit "$?"

cd tools/
bash download-coco-2014-calibration.sh --download-path ${PWD}/../coco2014/warmup_dataset --num-workers 1
test "$?" -eq 0 || exit "$?"
cd ..

16 changes: 14 additions & 2 deletions script/app-mlperf-inference-intel/calibrate_dlrm_v2_model.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
#!/bin/bash

export MODEL_DIR=${CM_ML_MODEL_FILE_WITH_PATH}
export DATA_DIR=/mnt/dlrm_data
echo ${CM_HARNESS_CODE_ROOT}
cd ${CM_HARNESS_CODE_ROOT}
numactl -m 1 python python/dump_torch_model.py --model-path=$MODEL_DIR --dataset-path=$DATA_DIR
exit 1
python -m pip install scikit-learn==1.3.0 torchsnapshot torchrec==0.3.2
test $? -eq 0 || exit $?
python -m pip install fbgemm-gpu==0.3.2 --index-url https://download.pytorch.org/whl/cpu
test $? -eq 0 || exit $?
python python/dump_torch_model.py --model-path=$MODEL_DIR --dataset-path=$DATA_DIR
test $? -eq 0 || exit $?

python python/calibration.py \
--max-batchsize=65536 \
--model-path=${MODEL_DIR}/../dlrm-multihot-pytorch.pt \
--dataset-path=/mnt/dlrm_data/ \
--use-int8 --calibration
test $? -eq 0 || exit $?
12 changes: 12 additions & 0 deletions script/app-mlperf-inference-intel/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def preprocess(i):
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == 'v4.0':
if 'gptj' in ml_model:
code_base_folder = "ITREX"
if 'dlrm-v2' in ml_model:
code_base_folder = "pytorch-cpu-int8"

harness_root = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], 'closed', 'Intel', 'code', ml_model, code_base_folder)

Expand Down Expand Up @@ -91,6 +93,8 @@ def preprocess(i):
i['run_script_input']['script_name'] = "build_bert_harness"
env['CM_MLPERF_INFERENCE_INTEL_HARNESS_PATH'] = os.path.join(os.getcwd(), "harness", "build", "bert_inference")
env['DATA_PATH'] = os.path.join(os.getcwd(), "harness", "bert")
elif "stable-diffusion" in env['CM_MODEL']:
i['run_script_input']['script_name'] = "build_sdxl_harness"
elif "resnet50" in env['CM_MODEL']:
i['run_script_input']['script_name'] = "build_resnet50_harness"
env['CM_MLPERF_INFERENCE_INTEL_HARNESS_PATH'] = os.path.join(os.getcwd(), "harness", "build", "resnet50_inference")
Expand Down Expand Up @@ -162,6 +166,14 @@ def preprocess(i):
env['CM_RUN_DIR'] = env['CM_MLPERF_OUTPUT_DIR']
env['CM_RUN_CMD'] = f"bash {os.path.join(i['run_script_input']['path'],'run_3d-unet_harness.sh')} "

elif 'dlrm' in env['CM_MODEL']:
env['CM_RUN_DIR'] = i['run_script_input']['path']
env['CM_RUN_CMD'] = f"bash {os.path.join(i['run_script_input']['path'],'run_dlrm_v2_harness.sh')} "

elif 'stable-diffusion' in env['CM_MODEL']:
env['CM_RUN_DIR'] = i['run_script_input']['path']
env['CM_RUN_CMD'] = "bash run_sdxl_harness.sh " + ("--accuracy" if env['CM_MLPERF_LOADGEN_MODE'] == "accuracy" else "")

elif "gptj" in env['CM_MODEL']:
env['CM_RUN_DIR'] = i['run_script_input']['path']
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v3.1":
Expand Down
60 changes: 60 additions & 0 deletions script/app-mlperf-inference-intel/run_dlrm_v2_harness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
export MODEL_DIR=${CM_ML_MODEL_FILE_WITH_PATH}
export DATA_DIR=/mnt/dlrm_data


NUM_SOCKETS=${CM_HOST_CPU_SOCKETS:-2}
export NUM_SOCKETS=$NUM_SOCKETS
export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`
export CPUS_PER_SOCKET=$((num_physical_cores/NUM_SOCKETS))
echo $CPUS_PER_SOCKET
export CPUS_PER_PROCESS=24
#${CPUS_PER_SOCKET}
export CPUS_PER_INSTANCE=1
export CPUS_FOR_LOADGEN=1
export BATCH_SIZE=100
export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX

export LD_PRELOAD=${CM_CONDA_LIB_PATH}/libiomp5.so

export KMP_BLOCKTIME=1
export OMP_NUM_THREADS=$CPUS_PER_INSTANCE
export KMP_AFFINITY="granularity=fine,compact,1,0"
export DNNL_PRIMITIVE_CACHE_CAPACITY=20971520
export DLRM_DIR=$PWD/python/model
#export TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=30469645312

mode="Offline"
extra_option="--samples-per-query-offline=204800"

int8_cfg="--int8-configure-dir=int8_configure.json"
echo "Running $mode bs=$batch_size $dtype $test_type $DNNL_MAX_CPU_ISA"

export CUDA_VISIBLE_DEVICES=""
extra_option=" $extra_option --use-int8"
export EXTRA_OPS="$extra_option"

#export number_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`

model_path="$MODEL_DIR/dlrm-multihot-pytorch.pt"
profile=dlrm-multihot-pytorch
cd ${CM_HARNESS_CODE_ROOT}
OUTPUT_DIR="${CM_MLPERF_OUTPUT_DIR}"

if [[ "${CM_MLPERF_LOADGEN_MODE}" == "accuracy" ]]; then
accuracy_opt=" --accuracy"
else
accuracy_opt=""
fi

USER_CONF="${CM_MLPERF_USER_CONF}"
cmd="python -u python/runner.py --profile $profile $common_opt --model dlrm --model-path $model_path \
--config ${CM_MLPERF_CONF} --user-config ${CM_MLPERF_USER_CONF} \
--dataset multihot-criteo --dataset-path $DATA_DIR --output $OUTPUT_DIR $EXTRA_OPS \
--max-ind-range=40000000 --samples-to-aggregate-quantile-file=${PWD}/tools/dist_quantile.txt \
--max-batchsize=$BATCH_SIZE --scenario=${CM_MLPERF_LOADGEN_SCENARIO} ${accuracy_opt}"


echo "$cmd"
#exit 1
eval "$cmd"
49 changes: 49 additions & 0 deletions script/app-mlperf-inference-intel/run_sdxl_harness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

export KMP_BLOCKTIME=1
export KMP_AFFINITY=granularity=fine,compact,1,0
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
# export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
#

BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}

export num_physical_cores=$(lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l)
num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')



OUTPUT_DIR="${CM_MLPERF_OUTPUT_DIR}"
MODEL_PATH="${SDXL_CHECKPOINT_PATH}"
cd ${CM_HARNESS_CODE_ROOT}

NUM_PROC=1
CPUS_PER_PROC=16
WORKERS_PER_PROC=1
TOTAL_SAMPLE_COUNT=5000
BATCH_SIZE=8

FD_MAX=$(ulimit -n -H)
ulimit -n $((FD_MAX - 1))

echo "Start time: $(date)"
cmd="python -u main.py \
--dtype bfloat16 \
--device 'cpu' \
--scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
--mode ${LOADGEN_MODE} \
--num-proc ${NUM_PROC} \
--cpus-per-proc ${CPUS_PER_PROC} \
--model-path ${MODEL_PATH} \
--batch-size ${BATCH_SIZE} \
--mlperf-conf ${CM_MLPERF_CONF} \
--user-conf ${CM_MLPERF_USER_CONF} \
--workers-per-proc ${WORKERS_PER_PROC} \
--total-sample-count ${TOTAL_SAMPLE_COUNT} \
--log-dir ${OUTPUT_DIR} "

echo "$cmd"
eval "$cmd"
test $? -eq 0 || exit $?
echo "End time: $(date)"

8 changes: 8 additions & 0 deletions script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,17 @@ variations:
- tags: get,generic-python-lib,_package.onnxruntime
names:
- onnxruntime
- tags: get,generic-python-lib,_package.colored
names:
- colored
- tags: get,generic-python-lib,_package.nvidia-ammo
names:
- nvidia-ammo
version: 0.7.4
env:
CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: "https://pypi.nvidia.com"
CM_GENERIC_PYTHON_PIP_EXTRA: "--no-cache-dir"
CM_SDXL_ACCURACY_RUN_DEVICE: " gpu"
- tags: get,generic-python-lib,_package.optimum
names:
- optimum
Expand Down Expand Up @@ -1015,6 +1019,10 @@ variations:
group: device-memory
env:
CM_NVIDIA_GPU_MEMORY: "80"
gpu_memory.#:
group: device-memory
env:
CM_NVIDIA_GPU_MEMORY: "#"

singlestream,resnet50:
env:
Expand Down
1 change: 0 additions & 1 deletion script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,6 @@ variations:
CM_MLPERF_INFERENCE_TEST_QPS: "0.05"
default_variations:
precision: float16
device: cuda
add_deps_recursive:
mlperf-inference-implementation:
tags: _sdxl
Expand Down
2 changes: 1 addition & 1 deletion script/get-dataset-coco2014/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def postprocess(i):
env = i['env']
if env.get('CM_DATASET_CALIBRATION','') == "no":
env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install')
env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data')
#env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data')
env['CM_DATASET_CAPTIONS_DIR_PATH'] = os.path.join(os.getcwd(), 'install', 'captions')
env['CM_DATASET_LATENTS_DIR_PATH'] = os.path.join(os.getcwd(), 'install', 'latents')
else:
Expand Down
2 changes: 1 addition & 1 deletion script/get-dataset-openorca/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
"uid": "9252c4d90d5940b7",
"variations": {
"60": {
"default": true,
"env": {
"CM_DATASET_SIZE": "60"
},
Expand All @@ -58,6 +57,7 @@
"group": "dataset-type"
},
"full": {
"default": true,
"env": {
"CM_DATASET_SIZE": "24576"
},
Expand Down
2 changes: 1 addition & 1 deletion script/get-docker/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def preprocess(i):
return {'return':0}

def detect_version(i):
r = i['automation'].parse_version({'match_text': r'Docker version\s*([\d.]+)',
r = i['automation'].parse_version({'match_text': r'[Docker|podman] version\s*([\d.]+)',
'group_number': 1,
'env_key':'CM_DOCKER_VERSION',
'which_env':i['env']})
Expand Down
Loading

0 comments on commit 340dd6b

Please sign in to comment.