Merge branch 'GATEOverflow:mlperf-inference' into sdxl_accuracy_fixes

GATEOverflow · Oct 17, 2024 · a08e402 · a08e402
2 parents 8232182 + 84b1de7
commit a08e402
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 14 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,2 +1,2 @@
 [build-system]
-requires = ["setuptools>=60", "wheel", "cmind @ git+https://[email protected]/mlcommons/ck.git@a4c6a7b477af5f1e7099c55f5468a47854adaa6c#egg=cmind&subdirectory=cm"]
+requires = ["setuptools>=60", "wheel", "cmind"]
diff --git a/script/app-mlperf-inference-amd/_cm.yaml b/script/app-mlperf-inference-amd/_cm.yaml
@@ -255,8 +255,8 @@ variations:
       - tags: get,generic-python-lib,_package.vllm
         names:
           - vllm
-      - tags: get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1
-        extra_cache_tags: inference,submissions
+      - tags: get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only
+        extra_cache_tags: inference,results
         env:
            CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_INFERENCE_RESULTS_PATH
 
@@ -315,5 +315,10 @@ variations:
     env:
       CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.0
 
+  r4.1_default:
+    group: version
+    env:
+      CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.1
+
 docker:
   real_run: False
diff --git a/script/app-mlperf-inference-amd/run-llama2.sh b/script/app-mlperf-inference-amd/run-llama2.sh
@@ -5,16 +5,13 @@ set -xeu
 N_SAMPLES=${N_SAMPLES:-24576} #24576 #3072 #2457 #6
 TP=1
 DP=${DP:-8}
-WD=${WD:-0}
-SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip
 
 export HIP_FORCE_DEV_KERNARG=1
 export VLLM_USE_TRITON_FLASH_ATTN=0
 export VLLM_FP8_PADDING=1
 export VLLM_FP8_ACT_PADDING=1
 export VLLM_FP8_WEIGHT_PADDING=1
 export VLLM_FP8_REDUCE_CONV=1
-export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0
 
 export HARNESS_DISABLE_VLLM_LOGS=1
 export VLLM_LOGGING_LEVEL=ERROR
@@ -33,8 +30,8 @@ LOG_DIR=${CM_MLPERF_OUTPUT_DIR}
 
 cp $USER_CONF ${LOG_DIR}/user.conf
 
-cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \
-    --scenario ${CM_MLPERF_LOADGEN_SCENARIO \
+COMMON_CMD_OPTIONS="\
+    --scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
     --output-log-dir ${LOG_DIR} \
     --model-path $MODEL_PATH \
     --mlperf-conf $MLPERF_CONF \
@@ -49,8 +46,31 @@ cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_O
     -dp ${DP} \
     --quantization fp8 \
     --quantized-weights-path ${QUANTIZED_WEIGHTS_PATH} \
-    --quantization-param-path ${QUANTIZATION_PARAM_PATH} \
+    --quantization-param-path ${QUANTIZATION_PARAM_PATH}"
+
+if [ "${CM_MLPERF_LOADGEN_MODE}" == "accuracy" ]; then
+    COMMON_CMD_OPTIONS+=" --accuracy"
+fi
+
+if [ "${CM_MLPERF_LOADGEN_SCENARIO}" == "Offline" ]; then
+    WD=${WD:-0}
+    SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip
+    export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0
+    # generate run command
+    cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \
+    ${COMMON_CMD_OPTIONS} \
     --warmup-duration ${WD} \
     --sorting ${SORTING} \
     --enforce-eager True \
     --gpu-memory-utilization 0.99" 
+else
+    # generate run command
+    cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_SyncServer.py \
+    ${COMMON_CMD_OPTIONS} \
+    --enable-warm-up \
+    --enable-batcher"
+fi
+
+echo "${cmd}"
+# uncomment the below lines for testing 
+#eval "${cmd}"
diff --git a/script/get-ml-model-llama2/_cm.json b/script/get-ml-model-llama2/_cm.json
@@ -174,8 +174,8 @@
           "tags": "get,preprocessed,dataset,openorca,_calibration,_mlc"
         },
         {
-          "tags": "get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1",
-          "extra_cache_tags": "inference,submissions",
+          "tags": "get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only",
+          "extra_cache_tags": "inference,results",
           "env": {
             "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_MLPERF_INFERENCE_RESULTS_PATH"
           }

diff --git a/setup.py b/setup.py
@@ -127,11 +127,15 @@ def custom_function(self):
     def get_sys_platform(self):
         self.system =  platform.system() 
 
+with open("README.md", "r") as f:
+    long_description = f.read()
+
 setup(
     name='cm4mlops',
-    version='0.1',
-    long_description='CM automations and scripts for MLOps',
-    long_description_content_type='text/x-rst',
+    version='0.3',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    url="https://github.com/mlcommons/cm4mlops",
     packages=[],
     install_requires=[
         "setuptools>=60",