Skip to content

Commit

Permalink
abstracting model downloads and file-normalization to models dir
Browse files Browse the repository at this point in the history
Signed-off-by: greg pereira <[email protected]>
  • Loading branch information
Gregory-Pereira committed Apr 8, 2024
1 parent dbfa978 commit 4b754a0
Show file tree
Hide file tree
Showing 13 changed files with 205 additions and 104 deletions.
11 changes: 3 additions & 8 deletions .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,12 @@ jobs:
- name: Build Image
id: build_image
uses: redhat-actions/[email protected]
with:
image: ${{ env.REGISTRY }}/${{ matrix.image_name }}
platforms: linux/amd64, linux/arm64
tags: latest
containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile
context: model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.image_name }}/
run: make bulid

- name: Download model
working-directory: ./model_servers/${{ matrix.image_name }}/
run: make ${{ matrix.model }}
run: make trigger-download-model-${{ matrix.model }}

- name: Set up Python
uses: actions/[email protected]
Expand Down
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
port_check.lock
*build
models/*
model_servers/llamacpp_python/model.gguf
model_servers/*/model.file
!models/convert_models/*
!models/Containerfile
!models/README.md
!models/README.md

66 changes: 35 additions & 31 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,6 @@ IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

# ----- MODEL OPTIONS -----

LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf

TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf

MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf

# --- END MODEL OPTIONS ---

SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME))
SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL))

RELATIVE_MODELS_PATH := ../../models
MODELS_PATH := /locallm/models

BIND_MOUNT_OPTIONS := ro
Expand All @@ -31,7 +14,7 @@ ifeq ($(OS),Linux)
endif

.Phony: all
all: build mistral run
all: build trigger-download-model-mistral run

.PHONY: build
build:
Expand All @@ -45,18 +28,24 @@ build-cuda:
build-vulkan:
podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile

.PHONY: download-model-tiny-llama
download-model-tiny-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)
########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
### NOTE: you should not add these trigger make targets for new models.
# Adding new models should implemented in the [models directory](../../models).
# These are just provided here for bootstrapping and testing different model types.

.PHONY: download-model-llama
download-model-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
.PHONY: trigger-download-model-tiny-llama # small .gguf model for testing
trigger-download-model-tiny-llama:
cd ../../models && \
make -f Makefile download-model-tiny-llama && \
make -f Makefile normalize-model-file-tiny-llama

.PHONY: trigger-download-model-mistral # default model
trigger-download-model-mistral:
cd ../../models && \
make -f Makefile download-model-mistral && \
make -f Makefile normalize-model-file-mistral

.PHONY: mistral
mistral:
curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
############################ END DOWNLOAD MAKE TARGETS ############################

.PHONY: install
install:
Expand All @@ -65,9 +54,24 @@ install:
.PHONY: run
run:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
podman run --rm -it -d -p $(PORT):$(PORT) -v ./model.file:$(MODELS_PATH)/model.file:$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/model.file -e PORT=$(PORT) --network=host $(IMAGE)

.PHONY: run-test # Note: running run-test will consume the `./model.file`, you will have to reset it with `make trigger-download-mode-<selected-model>` or copy it back in from the `../../models` dir
run-test:
@if [ ! -f "../../models/model.file" ]; then \
echo "Model file not present in the models directory."; \
echo "Please call \`make -f Makefile trigger-download-model-mistral\`."; \
echo "Or download a .gguf model and name it \`model.file\` in \`ai-lab/models directory\`."; \
exit 1; \
fi; \
mv ../../models/model.file ./model.file
pytest --log-cli-level NOTSET
rm ./model.file

.PHONY: test
.PHONY: clean
clean:
-rm ./model.file

.PHONY: test # Note running test calls run-test, which will consume the `model.file`, you will have to reset it with `make trigger-download-mode-<selected-model>` or copy it back in from the `../../models` dir
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
pytest --log-cli-level NOTSET
$(MAKE) -k run-test clean
11 changes: 3 additions & 8 deletions model_servers/llamacpp_python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,15 @@ At the time of this writing, 2 models are known to work with this service
- **Mistral-7b**
- Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)

It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.
It is suggested you place models in the [models](../../models/) directory. It is also recomended that you use the [models dir Makefile](../../models/Makefile) to view which models you can download and to download the models themselves:

```bash
cd ../../models
curl -sLO <Download URL>
make -f Makefile download-model-mistral
cd model_servers/llamacpp_python
```

or:

```bash
make -f Makefile download-model-mistral
make -f Makefile download-model-llama
```
However we have also added makefile targets [in this directory's Makefile](./Makefile) which will in-turn call those targets, ex: `make -f Makefile download-model-mistral`

### Deploy Model Service

Expand Down
15 changes: 12 additions & 3 deletions model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import pytest_container
import os
import logging

logging.info("""
Starting pytest with the following ENV vars:
REGISTRY: {REGISTRY}
IMAGE_NAME: {IMAGE_NAME}
For:
model_server: llamacpp_python
""".format(REGISTRY=os.environ['REGISTRY'], IMAGE_NAME=os.environ['IMAGE_NAME']))

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
container_path="/locallm/models/model.file",
host_path=f"./model.file",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"MODEL_PATH": "/locallm/models/model.file",
"HOST": "0.0.0.0",
"PORT": "8001"
},
Expand Down
1 change: 0 additions & 1 deletion model_servers/llamacpp_python/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

CONTAINER_IMAGES = [MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists

Expand Down
2 changes: 1 addition & 1 deletion model_servers/llamacpp_python/tooling_options.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n",
"\n",
"```bash\n",
"podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n",
"podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.file playground\n",
"```"
]
},
Expand Down
68 changes: 39 additions & 29 deletions model_servers/whispercpp/Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
PORT := 8001
APP := whispercpp
IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

# ----- MODEL OPTIONS -----

WHISPER_SMALL_MODEL_NAME := ggml-small.bin
WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin

WHISPER_BASE_MODEL_NAME := ggml-base.en.bin
WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin

SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME))
SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL))

# --- END MODEL OPTIONS ---
# CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
# VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

MODELS_PATH := /app/models

Expand All @@ -26,32 +13,55 @@ ifeq ($(OS),Linux)
endif

.PHONY: all
all: build whisper-small run
all: build trigger-download-model-whisper-small run

.PHONY: build
build:
podman build -t $(IMAGE) . -f Containerfile

.PHONY: whisper-small
whisper-small:
cd ../../models && \
curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME)
podman build -t $(IMAGE) . -f base/Containerfile

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: download-model-whisper-base
download-model-whisper-base:
########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
### NOTE: you should not add these trigger make targets for new models.
# Adding new models should implemented in the [models directory](../../models).
# These are just provided here for bootstrapping and testing different model types.

.PHONY: trigger-download-model-whisper-small # small .bin model type testing
trigger-download-model-whisper-small:
cd ../../models && \
curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME)
make -f Makefile download-model-whisper-small && \
make -f Makefile normalize-model-file-whisper-small

.PHONY: trigger-download-model-whisper-base # default model
trigger-download-model-whisper-base:
cd ../../models && \
make -f Makefile download-model-whisper-base && \
make -f Makefile normalize-model-file-whisper-base

############################ END DOWNLOAD MAKE TARGETS ############################

.PHONY: run
run:
cd ../../models && \
podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE)
podman run --rm -it -d -p $(PORT):$(PORT) -v ./model.file:$(MODELS_PATH)/model.file:$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/model.file -e PORT=$(PORT) --network=host $(IMAGE)

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
.PHONY: run-test # note running test will consume the `model.file`, you will have to reset it
run-test:
@if [ ! -f "../../models/model.file" ]; then \
echo "Model file not present in the models directory."; \
echo "Please call \`make -f Makefile trigger-download-model-whisper-base\`."; \
echo "Or download a .bin model and name it \`model.file\` in \`ai-lab/models directory\`."; \
exit 1; \
fi; \
mv ../../models/model.file ./model.file
pytest --log-cli-level NOTSET

.PHONY: clean # note running test will consume the `model.file`, you will have to reset it
clean:
-rm ./model.file

.PHONY: test # note running test will consume the `model.file`, you will have to reset it
test:
$(MAKE) -k run-test clean
15 changes: 12 additions & 3 deletions model_servers/whispercpp/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import pytest_container
import os
import logging

logging.info("""
Starting pytest with the following ENV vars:
REGISTRY: {REGISTRY}
IMAGE_NAME: {IMAGE_NAME}
For:
model_server: whispercpp
""".format(REGISTRY=os.environ['REGISTRY'], IMAGE_NAME=os.environ['IMAGE_NAME']))

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
container_path="/app/models/model.file",
host_path=f"./model.file",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"MODEL_PATH": "/app/models/model.file",
"HOST": "0.0.0.0",
"PORT": "8001"
},
Expand Down
1 change: 0 additions & 1 deletion model_servers/whispercpp/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

CONTAINER_IMAGES = [MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists

Expand Down
9 changes: 0 additions & 9 deletions models/Containerfile

This file was deleted.

Loading

0 comments on commit 4b754a0

Please sign in to comment.