Skip to content

Commit

Permalink
Merge pull request #179 from Gregory-Pereira/fix-llamacpp-builds
Browse files Browse the repository at this point in the history
llamacpp ms fix: builds and allow variable model
  • Loading branch information
rhatdan authored Apr 7, 2024
2 parents b809abc + c08ebd1 commit 8a51032
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
port_check.lock
*build
models/*
model_servers/llamacpp_python/model.gguf
!models/convert_models/*
!models/Containerfile
!models/README.md
35 changes: 22 additions & 13 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@ VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf

TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf

MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf

# --- END MODEL OPTIONS ---

MODEL_PATH_FROM_ROOT := locallm/models/$(MISTRAL_MODEL_NAME) # CHOOSE MODEL HERE BY NAME
RELATIVE_MODEL_PATH := ../../models
SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME))
SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL))

RELATIVE_MODELS_PATH := ../../models
MODELS_PATH := /locallm/models

BIND_MOUNT_OPTIONS := ro
BIND_MOUNT_OPTIONS := ro
OS := $(shell uname -s)
ifeq ($(OS),Linux)
BIND_MOUNT_OPTIONS := ro,Z
endif

.Phony: all
all: build download-mistral-7b-instruct-v0.1.Q4_K_M.gguf run
all: build download-model-mistral run

.PHONY: build
build:
Expand All @@ -39,26 +45,29 @@ build-cuda:
build-vulkan:
podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile

.PHONY: download-model-tiny-llama
download-model-tiny-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)

.PHONY: download-model-llama
download-model-llama:
cd ../../models && \
curl -s -S -L -f $(LLAMA_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)


.PHONY: download-model-mistral
download-model-mistral:
cd ../../models && \
curl -s -S -L -f $(MISTRAL_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: run
run:
cd ../.. && \
podman run -it -d -p $(PORT):$(PORT) -v ./models:$(MODEL_PATH_FROM_ROOT):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODEL_PATH_FROM_ROOT) -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE);
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)

.PHONY: test
test:
$(MAKE) all
pytest --log-cli-level NOTSET
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
pytest --log-cli-level NOTSET
8 changes: 3 additions & 5 deletions model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import pytest_container
import os


MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models",
host_path="./",
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
"MODEL_PATH": "/locallm/models/model.gguf",
"HOST": "0.0.0.0",
"PORT": "8001"
},
Expand All @@ -22,7 +21,6 @@
host_port=8001
)
],
extra_launch_args=["--net=host"]
)

def pytest_generate_tests(metafunc):
Expand Down

0 comments on commit 8a51032

Please sign in to comment.