Skip to content

Commit

Permalink
fixing llamacpp pytyon docs and makefile
Browse files Browse the repository at this point in the history
Signed-off-by: greg pereira <[email protected]>
  • Loading branch information
Gregory-Pereira committed Apr 6, 2024
1 parent 9d63c26 commit 7346200
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 36 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@
*_pycache_*
port_check.lock
*build
models/*
!models/convert_models/*
!models/Containerfile
!models/README.md
57 changes: 50 additions & 7 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,64 @@
APP := llamacpp_python
PORT := 8001

IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

# ----- MODEL OPTIONS -----

LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf

MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf

# --- END MODEL OPTIONS ---

MODEL_PATH_FROM_ROOT := locallm/models/$(MISTRAL_MODEL_NAME) # CHOOSE MODEL HERE BY NAME
RELATIVE_MODEL_PATH := ../../models

BIND_MOUNT_OPTIONS := ro
OS := $(shell uname -s)
ifeq ($(OS),Linux)
BIND_MOUNT_OPTIONS := ro,Z
endif

.Phony: all
all: build download-mistral-7b-instruct-v0.1.Q4_K_M.gguf run

.PHONY: build
build:
podman build -t ghcr.io/ai-lab-recipes/model_servers .
podman build -t $(IMAGE) . -f base/Containerfile

.PHONY: build-cuda
build-cuda:
podman build -t $(CUDA_IMAGE) . -f cuda/Containerfile

.PHONY: build-vulkan
build-vulkan:
podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile

llama-2-7b-chat.Q5_K_S.gguf:
curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
.PHONY: download-model-llama
download-model-llama:
cd ../../models && \
curl -s -S -L -f $(LLAMA_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@

mistral-7b-instruct-v0.1.Q4_K_M.gguf:
curl -s -S -L -f https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
.PHONY: download-model-mistral
download-model-mistral:
cd ../../models && \
curl -s -S -L -f $(MISTRAL_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: run
run:
podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers
cd ../.. && \
podman run -it -d -p $(PORT):$(PORT) -v ./models:$(MODEL_PATH_FROM_ROOT):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODEL_PATH_FROM_ROOT) -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE);

.PHONY: test
test:
pytest --log-cli-level NOTSET
$(MAKE) all
pytest --log-cli-level NOTSET
50 changes: 21 additions & 29 deletions model_servers/llamacpp_python/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
### Build Model Service

For the standard model service image:

```bash
cd model_servers/llamacpp_python
podman build -t playground -f base/Containerfile .
make -f Makefile build
```

or
For the Cuda variant image:

```bash
cd model_servers/llamacpp_python
make -f base/Makefile build
make -f Makefile build-cuda
```

For the Vulkan variant image:

```bash
make -f Makefile build-vulkan
```

### Download Model
Expand All @@ -22,39 +27,26 @@ At the time of this writing, 2 models are known to work with this service
- **Mistral-7b**
- Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)

It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.

```bash
cd ../models
wget <Download URL>
cd ../
cd ../../models
curl -sLO <Download URL>
cd model_servers/llamacpp_python
```

or
or:

```bash
make -f Makefile models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
make -f Makefile download-model-mistral
make -f Makefile download-model-llama
```

### Deploy Model Service

#### Single Model Service:

Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable.

```bash
podman run --rm -it -d \
-p 8001:8001 \
-v Local/path/to/locallm/models:/locallm/models:ro,Z \
-e MODEL_PATH=models/<model-filename> \
-e HOST=0.0.0.0 \
-e PORT=8001 \
playground`
```

or

```bash
make -f Makefile run
```
Deploy the LLM server and volume mount the model of choice using the `MODEL_PATH` environment variable. The model_server is most easily deploy from calling the make command: `make -f Makefile run`

#### Multiple Model Service:

Expand Down Expand Up @@ -82,7 +74,7 @@ Here is an example `models_config.json` with two quantization variants of mistra
}
```

Now run the container with the specified config file.
Now run the container with the specified config file. Note: the following command runs with linux bind mount options, for Darwin remove the `,Z` from the volume directive.

```bash
podman run --rm -it -d \
Expand All @@ -100,4 +92,4 @@ Running tests

```bash
make -f Makefile test
```
```

0 comments on commit 7346200

Please sign in to comment.