Add option for config file in playground (containers#72)

* add option for config file in playground * Update playground/README.md Co-authored-by: Sébastien Han <[email protected]> --------- Co-authored-by: Sébastien Han <[email protected]>
mhdawson · Mar 14, 2024 · 13f1814 · 13f1814
1 parent 71a6cb3
commit 13f1814
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 2 deletions.
diff --git a/playground/README.md b/playground/README.md
@@ -23,7 +23,9 @@ cd ../
 
 ### Deploy Model Service
 
-Deploy the LLM server and volume mount the model of choice.
+#### Single Model Service:
+
+Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable.
 
 ```bash
 podman run --rm -it -d \
@@ -34,3 +36,37 @@ podman run --rm -it -d \
         -e PORT=8001 \
         playground:image`
 ```
+
+#### Multiple Model Service:
+
+To enable dynamic loading and unloading of different models present on your machine, you can start the model service with a `CONFIG_PATH` instead of a `MODEL_PATH`.
+
+Here is an example `models_config.json` with two quantization variants of mistral-7B.
+```json
+{
+    "host": "0.0.0.0",
+    "port": 8001,
+    "models": [
+        {
+            "model": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+            "model_alias": "mistral_Q4",
+            "chat_format": "mistral",
+        },
+        {
+            "model": "models/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
+            "model_alias": "mistral_Q5",
+            "chat_format": "mistral",
+        },
+
+    ]
+}
+```
+
+Now run the container with the specified config file. 
+```bash
+podman run --rm -it -d \
+        -p 8001:8001 \
+        -v Local/path/to/locallm/models:/locallm/models:ro,Z \
+        -e CONFIG_PATH=models/<config-filename> \
+        playground:image
+```
diff --git a/playground/run.sh b/playground/run.sh
@@ -1,2 +1,14 @@
 #!/bin/bash
-python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"}
+if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then
+    python -m llama_cpp.server --config_file ${CONFIG_PATH}
+    exit 0
+fi
+
+if [ ${MODEL_PATH} ]; then
+    python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"}
+    exit 0
+fi
+
+echo "Please set either a CONFIG_PATH or a MODEL_PATH"
+exit 1
+