Skip to content

Commit

Permalink
Merge branch 'GATEOverflow:mlperf-inference' into sdxl_accuracy_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Oct 17, 2024
2 parents 8232182 + 84b1de7 commit a08e402
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build-system]
requires = ["setuptools>=60", "wheel", "cmind @ git+https://[email protected]/mlcommons/ck.git@a4c6a7b477af5f1e7099c55f5468a47854adaa6c#egg=cmind&subdirectory=cm"]
requires = ["setuptools>=60", "wheel", "cmind"]
9 changes: 7 additions & 2 deletions script/app-mlperf-inference-amd/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ variations:
- tags: get,generic-python-lib,_package.vllm
names:
- vllm
- tags: get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1
extra_cache_tags: inference,submissions
- tags: get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only
extra_cache_tags: inference,results
env:
CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_INFERENCE_RESULTS_PATH

Expand Down Expand Up @@ -315,5 +315,10 @@ variations:
env:
CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.0

r4.1_default:
group: version
env:
CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.1

docker:
real_run: False
32 changes: 26 additions & 6 deletions script/app-mlperf-inference-amd/run-llama2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,13 @@ set -xeu
N_SAMPLES=${N_SAMPLES:-24576} #24576 #3072 #2457 #6
TP=1
DP=${DP:-8}
WD=${WD:-0}
SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip

export HIP_FORCE_DEV_KERNARG=1
export VLLM_USE_TRITON_FLASH_ATTN=0
export VLLM_FP8_PADDING=1
export VLLM_FP8_ACT_PADDING=1
export VLLM_FP8_WEIGHT_PADDING=1
export VLLM_FP8_REDUCE_CONV=1
export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0

export HARNESS_DISABLE_VLLM_LOGS=1
export VLLM_LOGGING_LEVEL=ERROR
Expand All @@ -33,8 +30,8 @@ LOG_DIR=${CM_MLPERF_OUTPUT_DIR}

cp $USER_CONF ${LOG_DIR}/user.conf

cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \
--scenario ${CM_MLPERF_LOADGEN_SCENARIO \
COMMON_CMD_OPTIONS="\
--scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
--output-log-dir ${LOG_DIR} \
--model-path $MODEL_PATH \
--mlperf-conf $MLPERF_CONF \
Expand All @@ -49,8 +46,31 @@ cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_O
-dp ${DP} \
--quantization fp8 \
--quantized-weights-path ${QUANTIZED_WEIGHTS_PATH} \
--quantization-param-path ${QUANTIZATION_PARAM_PATH} \
--quantization-param-path ${QUANTIZATION_PARAM_PATH}"

if [ "${CM_MLPERF_LOADGEN_MODE}" == "accuracy" ]; then
COMMON_CMD_OPTIONS+=" --accuracy"
fi

if [ "${CM_MLPERF_LOADGEN_SCENARIO}" == "Offline" ]; then
WD=${WD:-0}
SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip
export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0
# generate run command
cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \
${COMMON_CMD_OPTIONS} \
--warmup-duration ${WD} \
--sorting ${SORTING} \
--enforce-eager True \
--gpu-memory-utilization 0.99"
else
# generate run command
cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_SyncServer.py \
${COMMON_CMD_OPTIONS} \
--enable-warm-up \
--enable-batcher"
fi

echo "${cmd}"
# uncomment the below lines for testing
#eval "${cmd}"
4 changes: 2 additions & 2 deletions script/get-ml-model-llama2/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@
"tags": "get,preprocessed,dataset,openorca,_calibration,_mlc"
},
{
"tags": "get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1",
"extra_cache_tags": "inference,submissions",
"tags": "get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only",
"extra_cache_tags": "inference,results",
"env": {
"CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_MLPERF_INFERENCE_RESULTS_PATH"
}
Expand Down
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,15 @@ def custom_function(self):
def get_sys_platform(self):
self.system = platform.system()

with open("README.md", "r") as f:
long_description = f.read()

setup(
name='cm4mlops',
version='0.1',
long_description='CM automations and scripts for MLOps',
long_description_content_type='text/x-rst',
version='0.3',
long_description=long_description,
long_description_content_type='text/markdown',
url="https://github.com/mlcommons/cm4mlops",
packages=[],
install_requires=[
"setuptools>=60",
Expand Down

0 comments on commit a08e402

Please sign in to comment.