Skip to content

Commit

Permalink
fixing llamacpp pytyon docs and makefile
Browse files Browse the repository at this point in the history
Signed-off-by: greg pereira <[email protected]>
  • Loading branch information
Gregory-Pereira committed Apr 4, 2024
1 parent 9d63c26 commit 7a31c9e
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 33 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@
*_pycache_*
port_check.lock
*build
models/*
!models/convert_models/*
!models/Containerfile
!models/README.md
37 changes: 32 additions & 5 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
APP := llamacpp_python
PORT := 8001
IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

.PHONY: build
build:
podman build -t ghcr.io/ai-lab-recipes/model_servers .
podman build -t $(IMAGE) . -f base/Containerfile

.PHONY: build-cuda
build-cuda:
podman build -t $(CUDA_IMAGE) . -f cuda/Containerfile

.PHONY: build-vulkan
build-vulkan:
podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile

llama-2-7b-chat.Q5_K_S.gguf:
curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
Expand All @@ -12,10 +26,23 @@ mistral-7b-instruct-v0.1.Q4_K_M.gguf:
install:
pip install -r tests/requirements.txt

.PHONY: run
run:
podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers
.PHONY: run-linux
run-linux:
cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models:ro,Z -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE); cd model_servers/llamacpp_python;

.PHONY: run-darwin
run-darwin:
cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE); cd model_servers/llamacpp_python;

.PHONY: run-linux-local
run-linux-local:
cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models:ro,Z -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE); cd model_servers/llamacpp_python;

.PHONY: run-darwin-local
run-darwin-local:
cd ../..; podman run -it -d -p $(PORT):$(PORT) -v ./models:/locallm/models -e MODEL_PATH=/locallm/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE); cd model_servers/llamacpp_python;


.PHONY: test
test:
pytest --log-cli-level NOTSET
pytest --log-cli-level NOTSET
48 changes: 20 additions & 28 deletions model_servers/llamacpp_python/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
### Build Model Service

For the standard model service image:

```bash
cd model_servers/llamacpp_python
podman build -t playground -f base/Containerfile .
make -f Makefile build
```

or
For the Cuda variant image:

```bash
cd model_servers/llamacpp_python
make -f base/Makefile build
make -f Makefile build-cuda
```

For the Vulkan variant image:

```bash
make -f Makefile build-vulkan
```

### Download Model
Expand All @@ -22,39 +27,26 @@ At the time of this writing, 2 models are known to work with this service
- **Mistral-7b**
- Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)

It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.

```bash
cd ../models
wget <Download URL>
cd ../
cd ../../models
curl -sLO <Download URL>
cd model_servers/llamacpp_python
```

or
or:

```bash
make -f Makefile models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
make -f Makefile mistral-7b-instruct-v0.1.Q4_K_M.gguf
make -f Makefile llama-2-7b-chat.Q5_K_S.gguf
```

### Deploy Model Service

#### Single Model Service:

Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable.

```bash
podman run --rm -it -d \
-p 8001:8001 \
-v Local/path/to/locallm/models:/locallm/models:ro,Z \
-e MODEL_PATH=models/<model-filename> \
-e HOST=0.0.0.0 \
-e PORT=8001 \
playground`
```

or

```bash
make -f Makefile run
```
Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable. As the volume mount syntax varies between linux and darwin distrobutions, call the make comand for your os, ex: `make -f Makefile run-linux` or `make -f Makefile run-darwin`, or their local variants, ex: `make -f Makefile run-linux-local` or `make -f Makefile run-darwin-local`. The podman equivalent of this is:

#### Multiple Model Service:

Expand Down Expand Up @@ -100,4 +92,4 @@ Running tests

```bash
make -f Makefile test
```
```

0 comments on commit 7a31c9e

Please sign in to comment.