Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into redhat_llama2
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Jul 15, 2024
2 parents 9ce6826 + 05e8196 commit 9ccdb4a
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 15 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md)
[![Python Version](https://img.shields.io/badge/python-3+-blue.svg)](https://github.com/mlcommons/ck/tree/master/cm/cmind)
[![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://github.com/mlcommons/ck).
[![Downloads](https://static.pepy.tech/badge/cmind)](https://pepy.tech/project/cmind)
[![Downloads](https://static.pepy.tech/badge/cm4mlops)](https://pepy.tech/project/cm4mlops)

[![CM script automation test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-scripts.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-scripts.yml)
[![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml)
Expand Down
14 changes: 10 additions & 4 deletions script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,11 @@ deps:
CM_MODEL:
- dlrm-v2-99
- dlrm-v2-99.9
skip_if_env:
skip_if_any_env:
DLRM_DATA_PATH:
- 'on'
CM_RUN_STATE_DOCKER:
- 'yes'
names:
- criteo-preprocessed
tags: get,dataset,preprocessed,criteo
Expand All @@ -174,9 +176,11 @@ deps:
CM_MODEL:
- dlrm-v2-99
- dlrm-v2-99.9
skip_if_env:
skip_if_any_env:
DLRM_DATA_PATH:
- on
CM_RUN_STATE_DOCKER:
- 'yes'
names:
- dlrm-model
tags: get,ml-model,dlrm,_pytorch
Expand Down Expand Up @@ -379,13 +383,16 @@ variations:
version: 1.13.1

sdxl:
new_env_keys:
- CM_SDXL_ACCURACY_RUN_DEVICE
group: model
env:
CM_MODEL: stable-diffusion-xl
CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: quantization, affine fusion
CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion"
CM_ML_MODEL_INPUTS_DATA_TYPE: int32
CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8
CM_SDXL_ACCURACY_RUN_DEVICE: "gpu"
deps:
- tags: get,generic-python-lib,_package.diffusers
names:
Expand Down Expand Up @@ -433,7 +440,6 @@ variations:
env:
CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: "https://pypi.nvidia.com"
CM_GENERIC_PYTHON_PIP_EXTRA: "--no-cache-dir"
CM_SDXL_ACCURACY_RUN_DEVICE: " gpu"
- tags: get,generic-python-lib,_package.optimum
names:
- optimum
Expand Down
73 changes: 64 additions & 9 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ variations:
default:
true
default_variations:
reproducibility: r4.1_default
reproducibility: r4.1-dev_default
add_deps_recursive:
imagenet-accuracy-script:
tags: _float32
Expand Down Expand Up @@ -246,6 +246,17 @@ variations:
default_variations:
backend: onnxruntime

nvidia-original,r4.1-dev_default:
docker:
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public

nvidia-original,r4.1-dev_default,gptj_:
docker:
deps:
- tags: get,ml-model,gptj,_nvidia,_fp8
update_tags_from_env_with_prefix:
tp_size: CM_NVIDIA_TP_SIZE

nvidia-original,r4.1_default:
docker:
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public
Expand All @@ -260,6 +271,16 @@ variations:
env:
BUILD_TRTLLM: 1

nvidia-original,r4.1-dev_default,llama2-70b_:
docker:
deps:
- tags: get,ml-model,llama2-70b,_nvidia,_fp8
update_tags_from_env_with_prefix:
_tp_size.:
- CM_NVIDIA_TP_SIZE
env:
BUILD_TRTLLM: 1

nvidia-original,r4.1_default,llama2-70b_:
docker:
deps:
Expand Down Expand Up @@ -292,7 +313,7 @@ variations:
default_variations:
backend: tensorrt
device: cuda
reproducibility: r4.1_default
reproducibility: r4.1-dev_default
group:
implementation
add_deps_recursive:
Expand Down Expand Up @@ -350,7 +371,7 @@ variations:
default_variations:
device: cpu
backend: pytorch
reproducibility: r4.1_default
reproducibility: r4.1-dev_default
prehook_deps:
- names:
- intel
Expand All @@ -375,7 +396,7 @@ variations:
default_variations:
device: cuda
backend: openshift
reproducibility: r4.0_default
reproducibility: r4.1-dev_default
prehook_deps:
- names:
- redhat
Expand All @@ -399,7 +420,7 @@ variations:
default_variations:
device: qaic
backend: glow
reproducibility: r4.0_default
reproducibility: r4.1-dev_default
prehook_deps:
- names:
- kilt
Expand Down Expand Up @@ -571,9 +592,6 @@ variations:
- all
CM_MLPERF_ACCURACY_RESULTS_DIR:
- 'on'
skip_if_env:
CM_MLPERF_IMPLEMENTATION:
- nvidia
names:
- mlperf-accuracy-script
- coco2014-accuracy-script
Expand Down Expand Up @@ -1060,7 +1078,7 @@ variations:

valid,retinanet:
adr:
openimages-accuracy-script:
openimages-accuracy-script-disabled:
tags: _nvidia-pycocotools

valid:
Expand Down Expand Up @@ -1239,6 +1257,24 @@ variations:
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'

r4.0-dev_default:
group:
reproducibility
add_deps_recursive:
nvidia-inference-common-code:
version: r3.1
tags: _ctuning
nvidia-inference-server:
version: r3.1
tags: _ctuning
intel-harness:
tags: _v3.1
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'

r4.0_default:
group:
reproducibility
Expand All @@ -1257,6 +1293,25 @@ variations:
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'

#uses public code for inference v4.1
r4.1-dev_default:
group:
reproducibility
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
tags: _go
nvidia-inference-server:
version: r4.0
tags: _go
intel-harness:
tags: _v4.0
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'

r4.1_default:
group:
reproducibility
Expand Down
3 changes: 3 additions & 0 deletions script/process-mlperf-accuracy/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@
"coco2014-original"
],
"tags": "get,dataset,coco2014,original"
},
{
"tags": "get,generic-python-lib,_package.ijson"
}
],
"env": {
Expand Down
2 changes: 1 addition & 1 deletion script/process-mlperf-accuracy/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def preprocess(i):

if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '':
extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' "
elif not os.path.exists(os.path.join(result_dir, "images")):
else:
extra_options += f" --compliance-images-path {os.path.join(result_dir, 'images')} "

if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '':
Expand Down
12 changes: 12 additions & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,24 @@ variations:
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r3.1_default
group: benchmark-version

r4.0-dev:
env:
CM_MLPERF_INFERENCE_VERSION: '4.0-dev'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0-dev_default
group: benchmark-version

r4.0:
env:
CM_MLPERF_INFERENCE_VERSION: '4.0'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default
group: benchmark-version

r4.1-dev:
env:
CM_MLPERF_INFERENCE_VERSION: '4.1-dev'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default
group: benchmark-version

r4.1:
env:
CM_MLPERF_INFERENCE_VERSION: '4.1'
Expand Down
35 changes: 35 additions & 0 deletions script/run-vllm-server/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
uid: c3eff27c791048aa
alias: run-vllm-server

automation_alias: script
automation_uid: 5b4e0237da074764

cache: false

category: DevOps automation

tags:
- run
- server
- vllm
- vllm-server

input_mapping:
model: CM_VLLM_SERVER_MODEL_NAME
tp_size: CM_VLLM_SERVER_TP_SIZE
pp_size: CM_VLLM_SERVER_PP_SIZE
distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND
api_key: CM_VLLM_SERVER_API_KEY

deps:
- tags: get,python3,get-python3
version_max: "3.11.999"
version_max_usable: "3.11.0"

- tags: get,ml-model,huggingface,zoo,_clone-repo
update_tags_from_env_with_prefix:
_model-stub.:
- CM_VLLM_SERVER_MODEL_NAME
enable_if_env:
CM_VLLM_SERVER_MODEL_NAME: [ on ]
- tags: get,generic-python-lib,_package.vllm
49 changes: 49 additions & 0 deletions script/run-vllm-server/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from cmind import utils
import os, subprocess

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

cmd_args = ""

model_name = env.get("CM_VLLM_SERVER_MODEL_NAME", False)
if not model_name:
return {'return': 1, 'error': 'Model name not specified'}
else:
cmd_args += f" --model {env['CM_ML_MODEL_PATH']} --served-model-name {model_name}"

tp_size = env.get("CM_VLLM_SERVER_TP_SIZE", False)
if tp_size:
cmd_args += f" --tensor-parallel-size {tp_size}"

pp_size = env.get("CM_VLLM_SERVER_PP_SIZE", False)
if pp_size:
cmd_args += f" --pipeline-parallel-size {pp_size}"

api_key = env.get("CM_VLLM_SERVER_API_KEY", "root")
if pp_size:
cmd_args += f" --api-key {api_key}"

distributed_executor_backend = env.get("CM_VLLM_SERVER_DIST_EXEC_BACKEND", False)
if distributed_executor_backend:
cmd_args += f" --distributed-executor-backend {distributed_executor_backend}"

cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} -m vllm.entrypoints.openai.api_server {cmd_args}"
print(cmd)

env['CM_VLLM_RUN_CMD'] = cmd

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':0}
6 changes: 6 additions & 0 deletions script/run-vllm-server/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

echo ${CM_VLLM_RUN_CMD}

${CM_VLLM_RUN_CMD}
test $? -eq 0 || exit 1

0 comments on commit 9ccdb4a

Please sign in to comment.