From 13f181492f664159bb60e95ed5889041bd4cdf4b Mon Sep 17 00:00:00 2001
From: Michael Clifford <mcliffor@redhat.com>
Date: Thu, 14 Mar 2024 09:06:03 -0400
Subject: [PATCH] Add option for config file in playground (#72)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add option for config file in playground

* Update playground/README.md

Co-authored-by: Sébastien Han <seb@redhat.com>

---------

Co-authored-by: Sébastien Han <seb@redhat.com>
---
 playground/README.md | 38 +++++++++++++++++++++++++++++++++++++-
 playground/run.sh    | 14 +++++++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/playground/README.md b/playground/README.md
index 4810099ac..556c98a6e 100644
--- a/playground/README.md
+++ b/playground/README.md
@@ -23,7 +23,9 @@ cd ../
 
 ### Deploy Model Service
 
-Deploy the LLM server and volume mount the model of choice.
+#### Single Model Service:
+
+Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable.
 
 ```bash
 podman run --rm -it -d \
@@ -34,3 +36,37 @@ podman run --rm -it -d \
         -e PORT=8001 \
         playground:image`
 ```
+
+#### Multiple Model Service:
+
+To enable dynamic loading and unloading of different models present on your machine, you can start the model service with a `CONFIG_PATH` instead of a `MODEL_PATH`.
+
+Here is an example `models_config.json` with two quantization variants of mistral-7B.
+```json
+{
+    "host": "0.0.0.0",
+    "port": 8001,
+    "models": [
+        {
+            "model": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+            "model_alias": "mistral_Q4",
+            "chat_format": "mistral",
+        },
+        {
+            "model": "models/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
+            "model_alias": "mistral_Q5",
+            "chat_format": "mistral",
+        },
+
+    ]
+}
+```
+
+Now run the container with the specified config file. 
+```bash
+podman run --rm -it -d \
+        -p 8001:8001 \
+        -v Local/path/to/locallm/models:/locallm/models:ro,Z \
+        -e CONFIG_PATH=models/<config-filename> \
+        playground:image
+```
\ No newline at end of file
diff --git a/playground/run.sh b/playground/run.sh
index a1670cb28..8611eda40 100644
--- a/playground/run.sh
+++ b/playground/run.sh
@@ -1,2 +1,14 @@
 #!/bin/bash
-python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"}
+if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then
+    python -m llama_cpp.server --config_file ${CONFIG_PATH}
+    exit 0
+fi
+
+if [ ${MODEL_PATH} ]; then
+    python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${CHAT_FORMAT:="llama-2"}
+    exit 0
+fi
+
+echo "Please set either a CONFIG_PATH or a MODEL_PATH"
+exit 1
+