From c08ebd1a00cd0db50dc9c30acc4adea58397be87 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Sat, 6 Apr 2024 12:58:28 -0700 Subject: [PATCH] llamacpp ms fix: builds and allow variable model Signed-off-by: greg pereira --- .gitignore | 1 + model_servers/llamacpp_python/Makefile | 35 ++++++++++++------- .../llamacpp_python/tests/conftest.py | 8 ++--- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index efabf539..1cb0446c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ port_check.lock *build models/* +model_servers/llamacpp_python/model.gguf !models/convert_models/* !models/Containerfile !models/README.md \ No newline at end of file diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 64791d0a..4356b976 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -10,22 +10,28 @@ VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf +TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf +TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf + MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf -MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf +MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf # --- END MODEL OPTIONS --- -MODEL_PATH_FROM_ROOT := locallm/models/$(MISTRAL_MODEL_NAME) # CHOOSE MODEL HERE BY NAME -RELATIVE_MODEL_PATH := ../../models +SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME)) +SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL)) + +RELATIVE_MODELS_PATH := ../../models +MODELS_PATH := /locallm/models -BIND_MOUNT_OPTIONS := ro +BIND_MOUNT_OPTIONS := ro OS := $(shell uname -s) ifeq ($(OS),Linux) BIND_MOUNT_OPTIONS := ro,Z endif .Phony: all -all: build download-mistral-7b-instruct-v0.1.Q4_K_M.gguf run +all: build download-model-mistral run .PHONY: build build: @@ -39,15 +45,18 @@ build-cuda: build-vulkan: podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile +.PHONY: download-model-tiny-llama +download-model-tiny-llama: + curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) + .PHONY: download-model-llama download-model-llama: - cd ../../models && \ - curl -s -S -L -f $(LLAMA_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@ + curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) + .PHONY: download-model-mistral download-model-mistral: - cd ../../models && \ - curl -s -S -L -f $(MISTRAL_MODEL_URL) -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@ + curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) .PHONY: install install: @@ -55,10 +64,10 @@ install: .PHONY: run run: - cd ../.. && \ - podman run -it -d -p $(PORT):$(PORT) -v ./models:$(MODEL_PATH_FROM_ROOT):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODEL_PATH_FROM_ROOT) -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE); + cd ../../models && \ + podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE) .PHONY: test test: - $(MAKE) all - pytest --log-cli-level NOTSET \ No newline at end of file + curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf + pytest --log-cli-level NOTSET diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 26fa4014..4cd3e203 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -1,18 +1,17 @@ import pytest_container import os - MS = pytest_container.Container( url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}", volume_mounts=[ pytest_container.container.BindMount( - container_path="/locallm/models", - host_path="./", + container_path="/locallm/models/model.gguf", + host_path=f"./model.gguf", flags=["ro"] ) ], extra_environment_variables={ - "MODEL_PATH": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf", + "MODEL_PATH": "/locallm/models/model.gguf", "HOST": "0.0.0.0", "PORT": "8001" }, @@ -22,7 +21,6 @@ host_port=8001 ) ], - extra_launch_args=["--net=host"] ) def pytest_generate_tests(metafunc):