Skip to content

Commit

Permalink
Merge branch 'GATEOverflow:mlperf-inference' into redhat_llama2
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Jul 17, 2024
2 parents 1cb8235 + 8a8f6bb commit 91eafcd
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
6 changes: 6 additions & 0 deletions script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b")
backend = env['CM_MLPERF_BACKEND']
device = env['CM_MLPERF_DEVICE'] if env['CM_MLPERF_DEVICE'] != "gpu" else "cuda"

cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \
" --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \
" --dataset-path " + env['CM_DATASET_PREPROCESSED_PATH'] + \
Expand All @@ -305,12 +306,17 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
scenario_extra_options + mode_extra_options + \
" --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION']

if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '':
env['CM_VLLM_SERVER_MODEL_NAME'] = env.get("CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct"
#env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000"
cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm "
else:
cmd += f" --model-path {env['MODEL_DIR']}"

if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '':
cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}"

cmd = cmd.replace("--count", "--total-sample-count")

elif "mixtral-8x7b" in env['CM_MODEL']:
Expand Down
1 change: 1 addition & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ input_mapping:
nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH
tp_size: CM_NVIDIA_TP_SIZE
vllm_model_name: CM_VLLM_SERVER_MODEL_NAME
num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS

new_state_keys:
- app_mlperf_inference_*
Expand Down

0 comments on commit 91eafcd

Please sign in to comment.