abstracting model downloads and file-normalization to models dir

Signed-off-by: greg pereira <[email protected]>
containers · Apr 8, 2024 · 4b754a0 · 4b754a0
1 parent dbfa978
commit 4b754a0
Show file tree

Hide file tree

Showing 13 changed files with 205 additions and 104 deletions.
diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
@@ -45,17 +45,12 @@ jobs:
       
       - name: Build Image
         id: build_image
-        uses: redhat-actions/[email protected]
-        with:
-          image: ${{ env.REGISTRY }}/${{ matrix.image_name }}
-          platforms: linux/amd64, linux/arm64
-          tags: latest
-          containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile
-          context: model_servers/${{ matrix.image_name }}/
+        working-directory: ./model_servers/${{ matrix.image_name }}/
+        run: make bulid
 
       - name: Download model
         working-directory: ./model_servers/${{ matrix.image_name }}/
-        run: make ${{ matrix.model }}
+        run: make trigger-download-model-${{ matrix.model }}
 
       - name: Set up Python
         uses: actions/[email protected]

diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,7 @@
 port_check.lock
 *build
 models/*
-model_servers/llamacpp_python/model.gguf
+model_servers/*/model.file
 !models/convert_models/*
-!models/Containerfile
-!models/README.md
+!models/README.md
+
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -5,23 +5,6 @@ IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
 CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
 VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
 
-# ----- MODEL OPTIONS -----
-
-LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
-LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
-
-TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
-TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
-
-MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
-MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-
-# --- END MODEL OPTIONS ---
-
-SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME))
-SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL))
-
-RELATIVE_MODELS_PATH := ../../models
 MODELS_PATH := /locallm/models
 
 BIND_MOUNT_OPTIONS := ro
@@ -31,7 +14,7 @@ ifeq ($(OS),Linux)
 endif
 
 .Phony: all
-all: build mistral run
+all: build trigger-download-model-mistral run
 
 .PHONY: build
 build:
@@ -45,18 +28,24 @@ build-cuda:
 build-vulkan:
 	podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile
 
-.PHONY: download-model-tiny-llama
-download-model-tiny-llama:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)
+########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
+### NOTE: you should not add these trigger make targets for new models.
+# Adding new models should implemented in the [models directory](../../models).
+# These are just provided here for bootstrapping and testing different model types.
 
-.PHONY: download-model-llama
-download-model-llama:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
+.PHONY: trigger-download-model-tiny-llama # small .gguf model for testing
+trigger-download-model-tiny-llama:
+	cd ../../models && \
+	make -f Makefile download-model-tiny-llama && \
+	make -f Makefile normalize-model-file-tiny-llama
 
+.PHONY: trigger-download-model-mistral # default model
+trigger-download-model-mistral:
+	cd ../../models && \
+	make -f Makefile download-model-mistral && \
+	make -f Makefile normalize-model-file-mistral
 
-.PHONY: mistral
-mistral:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
+############################ END DOWNLOAD MAKE TARGETS ############################
 
 .PHONY: install
 install:
@@ -65,9 +54,24 @@ install:
 .PHONY: run
 run:
 	cd ../../models && \
-	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
+	podman run --rm -it -d -p $(PORT):$(PORT) -v ./model.file:$(MODELS_PATH)/model.file:$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/model.file -e PORT=$(PORT) --network=host $(IMAGE)
+
+.PHONY: run-test  # Note: running run-test will consume the `./model.file`, you will have to reset it with `make trigger-download-mode-<selected-model>` or copy it back in from the `../../models` dir
+run-test:
+	@if [ ! -f "../../models/model.file" ]; then \
+		echo "Model file not present in the models directory."; \
+		echo "Please call \`make -f Makefile trigger-download-model-mistral\`."; \
+		echo "Or download a .gguf model and name it \`model.file\` in \`ai-lab/models directory\`."; \
+		exit 1; \
+	fi; \
+	mv ../../models/model.file ./model.file
+	pytest --log-cli-level NOTSET
+	rm ./model.file
 
-.PHONY: test
+.PHONY: clean 
+clean:
+	-rm ./model.file
+
+.PHONY: test # Note running test calls run-test, which will consume the `model.file`, you will have to reset it with `make trigger-download-mode-<selected-model>` or copy it back in from the `../../models` dir
 test:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
-	pytest --log-cli-level NOTSET
+	$(MAKE) -k run-test clean
diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md
@@ -27,20 +27,15 @@ At the time of this writing, 2 models are known to work with this service
 - **Mistral-7b**
     - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
 
-It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.
+It is suggested you place models in the [models](../../models/) directory. It is also recomended that you use the [models dir Makefile](../../models/Makefile) to view which models you can download and to download the models themselves:
 
 ```bash
 cd ../../models
-curl -sLO <Download URL> 
+make -f Makefile download-model-mistral
 cd model_servers/llamacpp_python
 ```
 
-or:
-
-```bash
-make -f Makefile download-model-mistral
-make -f Makefile download-model-llama
-```
+However we have also added makefile targets [in this directory's Makefile](./Makefile) which will in-turn call those targets, ex: `make -f Makefile download-model-mistral`
 
 ### Deploy Model Service
 

diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
@@ -1,17 +1,26 @@
 import pytest_container
 import os
+import logging
+
+logging.info("""
+Starting pytest with the following ENV vars:
+    REGISTRY: {REGISTRY}
+    IMAGE_NAME: {IMAGE_NAME}
+For:
+    model_server: llamacpp_python
+""".format(REGISTRY=os.environ['REGISTRY'], IMAGE_NAME=os.environ['IMAGE_NAME']))
 
 MS = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
         volume_mounts=[
             pytest_container.container.BindMount(
-                container_path="/locallm/models/model.gguf",
-                host_path=f"./model.gguf",
+                container_path="/locallm/models/model.file",
+                host_path=f"./model.file",
                 flags=["ro"]
             )
         ],
         extra_environment_variables={
-            "MODEL_PATH": "/locallm/models/model.gguf",
+            "MODEL_PATH": "/locallm/models/model.file",
             "HOST": "0.0.0.0",
             "PORT": "8001"
         },

diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py
@@ -4,7 +4,6 @@
 
 CONTAINER_IMAGES = [MS]
 
-
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
     assert auto_container.connection.file("/etc/os-release").exists
 

diff --git a/model_servers/llamacpp_python/tooling_options.ipynb b/model_servers/llamacpp_python/tooling_options.ipynb
@@ -23,7 +23,7 @@
     "This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n",
     "\n",
     "```bash\n",
-    "podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n",
+    "podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.file playground\n",
     "```"
    ]
   },

diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile
@@ -1,21 +1,8 @@
 PORT := 8001
 APP := whispercpp
 IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
-CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
-VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
-
-# ----- MODEL OPTIONS -----
-
-WHISPER_SMALL_MODEL_NAME := ggml-small.bin
-WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
-
-WHISPER_BASE_MODEL_NAME := ggml-base.en.bin
-WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
-
-SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME))
-SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL))
-
-# --- END MODEL OPTIONS ---
+# CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
+# VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
 
 MODELS_PATH := /app/models
 
@@ -26,32 +13,55 @@ ifeq ($(OS),Linux)
 endif
 
 .PHONY: all
-all: build whisper-small run
+all: build trigger-download-model-whisper-small run
 
 .PHONY: build
 build:
-	podman build -t $(IMAGE) . -f Containerfile
-
-.PHONY: whisper-small
-whisper-small: 
-	cd ../../models && \
-	curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME)
+	podman build -t $(IMAGE) . -f base/Containerfile
 
 .PHONY: install
 install:
 	pip install -r tests/requirements.txt
 
-.PHONY: download-model-whisper-base
-download-model-whisper-base: 
+########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
+### NOTE: you should not add these trigger make targets for new models.
+# Adding new models should implemented in the [models directory](../../models).
+# These are just provided here for bootstrapping and testing different model types.
+
+.PHONY: trigger-download-model-whisper-small # small .bin model type testing
+trigger-download-model-whisper-small:
 	cd ../../models && \
-	curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME)
+	make -f Makefile download-model-whisper-small && \
+	make -f Makefile normalize-model-file-whisper-small
+
+.PHONY: trigger-download-model-whisper-base # default model
+trigger-download-model-whisper-base:
+	cd ../../models && \
+	make -f Makefile download-model-whisper-base && \
+	make -f Makefile normalize-model-file-whisper-base
+
+############################ END DOWNLOAD MAKE TARGETS ############################
 
 .PHONY: run
 run:
 	cd ../../models && \
-	podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE)
+	podman run --rm -it -d -p $(PORT):$(PORT) -v ./model.file:$(MODELS_PATH)/model.file:$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/model.file -e PORT=$(PORT) --network=host $(IMAGE)
 
-.PHONY: test
-test:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
+.PHONY: run-test # note running test will consume the `model.file`, you will have to reset it
+run-test:
+	@if [ ! -f "../../models/model.file" ]; then \
+		echo "Model file not present in the models directory."; \
+		echo "Please call \`make -f Makefile trigger-download-model-whisper-base\`."; \
+		echo "Or download a .bin model and name it \`model.file\` in \`ai-lab/models directory\`."; \
+		exit 1; \
+	fi; \
+	mv ../../models/model.file ./model.file
 	pytest --log-cli-level NOTSET
+
+.PHONY: clean # note running test will consume the `model.file`, you will have to reset it
+clean:
+	-rm ./model.file
+
+.PHONY: test # note running test will consume the `model.file`, you will have to reset it
+test:
+	$(MAKE) -k run-test clean
diff --git a/model_servers/whispercpp/tests/conftest.py b/model_servers/whispercpp/tests/conftest.py
@@ -1,17 +1,26 @@
 import pytest_container
 import os
+import logging
+
+logging.info("""
+Starting pytest with the following ENV vars:
+    REGISTRY: {REGISTRY}
+    IMAGE_NAME: {IMAGE_NAME}
+For:
+    model_server: whispercpp
+""".format(REGISTRY=os.environ['REGISTRY'], IMAGE_NAME=os.environ['IMAGE_NAME']))
 
 MS = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
         volume_mounts=[
             pytest_container.container.BindMount(
-                container_path="/locallm/models/model.gguf",
-                host_path=f"./model.gguf",
+                container_path="/app/models/model.file",
+                host_path=f"./model.file",
                 flags=["ro"]
             )
         ],
         extra_environment_variables={
-            "MODEL_PATH": "/locallm/models/model.gguf",
+            "MODEL_PATH": "/app/models/model.file",
             "HOST": "0.0.0.0",
             "PORT": "8001"
         },

diff --git a/model_servers/whispercpp/tests/test_alive.py b/model_servers/whispercpp/tests/test_alive.py
@@ -4,7 +4,6 @@
 
 CONTAINER_IMAGES = [MS]
 
-
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
     assert auto_container.connection.file("/etc/os-release").exists
 

diff --git a/models/Containerfile b/models/Containerfile
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,7 +4,6 @@

		CONTAINER_IMAGES = [MS]


		def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
		assert auto_container.connection.file("/etc/os-release").exists

Expand Down