fixing llamacpp pytyon docs and makefile

Signed-off-by: greg pereira <[email protected]>
containers · Apr 4, 2024 · 7a31c9e · 7a31c9e
1 parent 9d63c26
commit 7a31c9e
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 33 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,7 @@
 *_pycache_*
 port_check.lock
 *build
+models/*
+!models/convert_models/*
+!models/Containerfile
+!models/README.md
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -1,6 +1,20 @@
+APP := llamacpp_python
+PORT := 8001
+IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
+CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
+VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
+
 .PHONY: build
 build:
-	podman build -t ghcr.io/ai-lab-recipes/model_servers .
+	podman build -t $(IMAGE) . -f base/Containerfile
+
+.PHONY: build-cuda
+build-cuda:
+	podman build -t $(CUDA_IMAGE) . -f cuda/Containerfile
+
+.PHONY: build-vulkan
+build-vulkan:
+	podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile
 
 llama-2-7b-chat.Q5_K_S.gguf:
 	curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
@@ -12,10 +26,23 @@ mistral-7b-instruct-v0.1.Q4_K_M.gguf:
 install:
 	pip install -r tests/requirements.txt
 
-.PHONY: run
-run:
-	podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers
+.PHONY: run-linux
+run-linux:
+	cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models:ro,Z -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE); cd model_servers/llamacpp_python;
+
+.PHONY: run-darwin
+run-darwin:
+	cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE); cd model_servers/llamacpp_python;
+
+.PHONY: run-linux-local
+run-linux-local:
+	cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models:ro,Z -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE); cd model_servers/llamacpp_python;	
+
+.PHONY:  run-darwin-local
+run-darwin-local:
+	cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT)  $(IMAGE); cd model_servers/llamacpp_python;
+
 
 .PHONY: test
 test:
-	pytest --log-cli-level NOTSET
+	pytest --log-cli-level NOTSET
diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md
@@ -1,16 +1,21 @@
 ### Build Model Service
 
+For the standard model service image:
 
 ```bash
-cd model_servers/llamacpp_python
-podman build -t playground -f base/Containerfile .
+make -f Makefile build
 ```
 
-or
+For the Cuda variant image:
 
 ```bash
-cd model_servers/llamacpp_python
-make -f base/Makefile build
+make -f Makefile build-cuda
+```
+
+For the Vulkan variant image:
+
+```bash
+make -f Makefile build-vulkan
 ```
 
 ### Download Model
@@ -22,39 +27,26 @@ At the time of this writing, 2 models are known to work with this service
 - **Mistral-7b**
     - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
 
+It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.
+
 ```bash
-cd ../models
-wget <Download URL>
-cd ../
+cd ../../models
+curl -sLO <Download URL> 
+cd model_servers/llamacpp_python
 ```
 
-or
+or:
 
 ```bash
-make -f Makefile models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+make -f Makefile mistral-7b-instruct-v0.1.Q4_K_M.gguf
+make -f Makefile llama-2-7b-chat.Q5_K_S.gguf
 ```
 
 ### Deploy Model Service
 
 #### Single Model Service:
 
-Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable.
-
-```bash
-podman run --rm -it -d \
-        -p 8001:8001 \
-        -v Local/path/to/locallm/models:/locallm/models:ro,Z \
-        -e MODEL_PATH=models/<model-filename> \
-        -e HOST=0.0.0.0 \
-        -e PORT=8001 \
-        playground`
-```
-
-or
-
-```bash
-make -f Makefile run
-```
+Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable. As the volume mount syntax varies between linux and darwin distrobutions, call the make comand for your os, ex: `make -f Makefile run-linux` or `make -f Makefile run-darwin`, or their local variants, ex: `make -f Makefile run-linux-local` or `make -f Makefile run-darwin-local`. The podman equivalent of this is:
 
 #### Multiple Model Service:
 
@@ -100,4 +92,4 @@ Running tests
 
 ```bash
 make -f Makefile test
-```
+```