From 13f181492f664159bb60e95ed5889041bd4cdf4b Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Thu, 14 Mar 2024 09:06:03 -0400 Subject: [PATCH] Add option for config file in playground (#72) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add option for config file in playground * Update playground/README.md Co-authored-by: Sébastien Han --------- Co-authored-by: Sébastien Han --- playground/README.md | 38 +++++++++++++++++++++++++++++++++++++- playground/run.sh | 14 +++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/playground/README.md b/playground/README.md index 4810099ac..556c98a6e 100644 --- a/playground/README.md +++ b/playground/README.md @@ -23,7 +23,9 @@ cd ../ ### Deploy Model Service -Deploy the LLM server and volume mount the model of choice. +#### Single Model Service: + +Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable. ```bash podman run --rm -it -d \ @@ -34,3 +36,37 @@ podman run --rm -it -d \ -e PORT=8001 \ playground:image` ``` + +#### Multiple Model Service: + +To enable dynamic loading and unloading of different models present on your machine, you can start the model service with a `CONFIG_PATH` instead of a `MODEL_PATH`. + +Here is an example `models_config.json` with two quantization variants of mistral-7B. +```json +{ + "host": "0.0.0.0", + "port": 8001, + "models": [ + { + "model": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf", + "model_alias": "mistral_Q4", + "chat_format": "mistral", + }, + { + "model": "models/mistral-7b-instruct-v0.1.Q5_K_M.gguf", + "model_alias": "mistral_Q5", + "chat_format": "mistral", + }, + + ] +} +``` + +Now run the container with the specified config file. +```bash +podman run --rm -it -d \ + -p 8001:8001 \ + -v Local/path/to/locallm/models:/locallm/models:ro,Z \ + -e CONFIG_PATH=models/ \ + playground:image +``` \ No newline at end of file diff --git a/playground/run.sh b/playground/run.sh index a1670cb28..8611eda40 100644 --- a/playground/run.sh +++ b/playground/run.sh @@ -1,2 +1,14 @@ #!/bin/bash -python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"} +if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then + python -m llama_cpp.server --config_file ${CONFIG_PATH} + exit 0 +fi + +if [ ${MODEL_PATH} ]; then + python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"} + exit 0 +fi + +echo "Please set either a CONFIG_PATH or a MODEL_PATH" +exit 1 +