From c369779de29cd421eb65d03984d7c66e374899eb Mon Sep 17 00:00:00 2001
From: greg pereira <grpereir@redhat.com>
Date: Sun, 7 Apr 2024 12:13:49 -0700
Subject: [PATCH] abstracting model downloads and file-normalization to models
 dir

Signed-off-by: greg pereira <grpereir@redhat.com>
---
 .github/workflows/chatbot.yaml                |  4 +-
 .github/workflows/model_servers.yaml          | 11 +--
 .gitignore                                    |  5 +-
 model_servers/common/Makefile.common          | 47 +++++++++++++
 model_servers/llamacpp_python/Makefile        | 67 ++++---------------
 model_servers/llamacpp_python/README.md       | 17 ++---
 .../llamacpp_python/tests/conftest.py         | 43 ++++++++++--
 .../llamacpp_python/tests/test_alive.py       |  2 +-
 model_servers/whispercpp/Makefile             | 59 +++-------------
 model_servers/whispercpp/tests/conftest.py    | 43 ++++++++++--
 model_servers/whispercpp/tests/test_alive.py  |  1 -
 models/Containerfile                          |  9 ---
 models/Makefile                               | 12 ++--
 models/README.md                              | 22 +++---
 .../chatbot/Makefile                          |  5 +-
 .../chatbot/tests/functional/conftest.py      |  2 +-
 16 files changed, 187 insertions(+), 162 deletions(-)
 create mode 100644 model_servers/common/Makefile.common
 delete mode 100644 models/Containerfile

diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml
index bb11a7faa..c6c50d707 100644
--- a/.github/workflows/chatbot.yaml
+++ b/.github/workflows/chatbot.yaml
@@ -59,8 +59,8 @@ jobs:
         run: make install
 
       - name: Download model
-        working-directory: ./model_servers/llamacpp_python
-        run: make mistral
+        working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}
+        run: make download-model-mistral
 
       - name: Run Functional Tests
         shell: bash
diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 1f1639cec..d295c4b42 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -18,6 +18,7 @@ on:
 
 env:
   REGISTRY: ghcr.io
+  REGISTRY_ORG: containers
 
 jobs:
   build-and-push-image:
@@ -26,8 +27,12 @@ jobs:
         include:
           - image_name: llamacpp_python
             model: mistral
+            model_name: mistral-7b-instruct-v0.1.Q4_K_M.gguf
+            model_path: /locallm/models
           - image_name: whispercpp
             model: whisper-small
+            model_name: ggml-small.bin
+            model_path: /app/models
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -57,7 +62,7 @@ jobs:
 
       - name: Download model
         working-directory: ./model_servers/${{ matrix.image_name }}/
-        run: make ${{ matrix.model }}
+        run: make download-model-${{ matrix.model }}
 
       - name: Set up Python
         uses: actions/setup-python@v5.0.0
@@ -70,9 +75,7 @@ jobs:
 
       - name: Run tests
         working-directory: ./model_servers/${{ matrix.image_name }}/
-        run: make test
-        env:
-          IMAGE_NAME: ${{ matrix.image_name }}
+        run: make test REGISTRY=${{ env.REGISTRY }} IMAGE_NAME=${{ env.REGISTRY_ORG }}/model_servers/${{ matrix.image_name}}:latest MODEL_NAME=$(MODEL_NAME) MODEL_PATH=$(MODEL_PATH)
 
       - name: Login to Container Registry
         if: github.event_name == 'push' && github.ref == 'refs/heads/main'
diff --git a/.gitignore b/.gitignore
index 9bef8cace..fa79bafb8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,7 @@
 port_check.lock
 *build
 models/*
-model_servers/llamacpp_python/model.gguf
-!models/convert_models/*
-!models/Containerfile
+!models/Makefile
 !models/README.md
+convert_models/converted_models
 recipes/chromedriver
diff --git a/model_servers/common/Makefile.common b/model_servers/common/Makefile.common
new file mode 100644
index 000000000..ee249451c
--- /dev/null
+++ b/model_servers/common/Makefile.common
@@ -0,0 +1,47 @@
+REGISTRY ?= quay.io
+REGISTRY_ORG ?= ai-lab
+COMPONENT ?= model_servers
+
+BIND_MOUNT_OPTIONS := ro
+OS := $(shell uname -s)
+ifeq ($(OS),Linux)
+    BIND_MOUNT_OPTIONS := Z,ro
+endif
+
+.PHONY: build
+build:
+	podman build --squash-all --build-arg $(PORT) -t $(IMAGE) . -f base/Containerfile
+
+.PHONY: install
+install:
+	pip install -r tests/requirements.txt
+
+.PHONY: test
+test:
+	@if [ ! -f "../../models/$(MODEL_NAME)" ]; then \
+		echo "Model file -- $(MODEL_NAME) -- not present in the models directory."; \
+		exit 1; \
+	else \
+        if [ ! -f "./$(MODEL_NAME)" ]; then \
+            ln -s ../../models/$(MODEL_NAME) ./$(MODEL_NAME); \
+        fi; \
+		REGISTRY=$(REGISTRY) IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) MODEL_PATH=$(MODEL_PATH) PORT=$(PORT) pytest -vvv -s ; \
+	fi;
+
+.PHONY: clean
+clean:
+	- rm ./$(MODEL_NAME) &> /dev/null
+
+.PHONY: run
+run:
+	cd ../../models && \
+	podman run -it -d -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE)
+
+.PHONY: podman-clean
+podman-clean:
+	@container_ids=$$(podman ps --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \
+	echo "removing all containers with IMAGE=$(IMAGE)"; \
+    for id in $$container_ids; do \
+        echo "Removing container: $$id,"; \
+        podman rm -f $$id; \
+    done
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 09ab49b68..3c314560a 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -1,41 +1,20 @@
 APP := llamacpp_python
-PORT := 8001
+PORT ?= 8001
 
-IMAGE := quay.io/ai-lab/$(APP):latest
-CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
-VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
+include ../common/Makefile.common
 
-# ----- MODEL OPTIONS -----
+IMAGE_NAME ?= $(REGISTRY_ORG)/$(COMPONENT)/$(APP):latest
+IMAGE := $(REGISTRY)/$(IMAGE_NAME)
+CUDA_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_cuda:latest
+VULKAN_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_vulkan:latest
 
-LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
-LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
-
-TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
-TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
-
-MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
-MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-
-# --- END MODEL OPTIONS ---
-
-SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME))
-SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL))
-
-RELATIVE_MODELS_PATH := ../../models
 MODELS_PATH := /locallm/models
+MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf
 
-BIND_MOUNT_OPTIONS := ro
-OS := $(shell uname -s)
-ifeq ($(OS),Linux)
-    BIND_MOUNT_OPTIONS := ro,Z
-endif
 
-.Phony: all
-all: build mistral run
 
-.PHONY: build
-build:
-	podman build --squash-all -t $(IMAGE) . -f base/Containerfile
+.Phony: all
+all: build download-model-mistral run
 
 .PHONY: build-cuda
 build-cuda:
@@ -45,29 +24,7 @@ build-cuda:
 build-vulkan:
 	podman build --squash-all -t $(VULKAN_IMAGE) . -f cuda/Containerfile
 
-.PHONY: download-model-tiny-llama
-download-model-tiny-llama:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)
-
-.PHONY: download-model-llama
-download-model-llama:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
-
-
-.PHONY: mistral
-mistral:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
-
-.PHONY: install
-install:
-	pip install -r tests/requirements.txt
-
-.PHONY: run
-run:
+.PHONY: download-model-mistral # default model
+download-model-mistral:
 	cd ../../models && \
-	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE)
-
-.PHONY: test
-test:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
-	pytest --log-cli-level NOTSET
+	make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f  Makefile download-model
diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md
index fccdd88e1..dc4638317 100644
--- a/model_servers/llamacpp_python/README.md
+++ b/model_servers/llamacpp_python/README.md
@@ -71,6 +71,7 @@ podman pull quay.io/ai-lab/llamacpp-python-vulkan
 ```
 
 
+
 ## Download Model(s)
 
 There are many models to choose from these days, most of which can be found on [huggingface.co](https://huggingface.co). In order to use a model with the llamacpp_python model server, it must be in GGUF format. You can either download pre-converted GGUF models directly or convert them yourself with the [model converter utility](../../convert_models/) available in this repo.
@@ -81,26 +82,26 @@ Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/res
 
 Place all models in the [models](../../models/) directory.
 
-You can use this snippet below to download models. 
+You can use this snippet below to download the default model:
 
 ```bash
-cd ../../models
-curl -sLO <Download URL> 
-cd model_servers/llamacpp_python
+make -f Makefile download-model-mistral
 ```
 
-or:
+Or you can use the generic `download-models` target from the `/models` directory to download any model file from huggingface:
 
 ```bash
-make -f Makefile download-model-mistral
-make -f Makefile download-model-llama
+cd ../../models
+make MODEL_NAME=<model_name> MODEL_URL=<model_url> -f  Makefile download-model
+# EX: make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f  Makefile download-model
 ```
 
+
 ## Deploy Model Service
 
 ### Single Model Service:
 
-To deploy the LLM server you must specify a volume mount `-v` where your models are stored on the host machine and the `MODEL_PATH` for your model of choice. The model_server is most easily deploy from calling the make command: `make -f Makefile run`
+To deploy the LLM server you must specify a volume mount `-v` where your models are stored on the host machine and the `MODEL_PATH` for your model of choice. The model_server is most easily deploy from calling the make command: `make -f Makefile run`. Of course as with all our make calls you can pass any number of the following variables: `REGISTRY`, `IMAGE_NAME`, `MODEL_NAME`, `MODEL_PATH`, and `PORT`.
 
 ```bash
 podman run --rm -it \
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
index 380262b1f..747f879a8 100644
--- a/model_servers/llamacpp_python/tests/conftest.py
+++ b/model_servers/llamacpp_python/tests/conftest.py
@@ -1,24 +1,53 @@
 import pytest_container
 import os
 
+if not 'REGISTRY' in os.environ:
+    REGISTRY = 'ghcr.io'
+else:
+    REGISTRY = os.environ['REGISTRY']
+
+if not 'IMAGE_NAME' in os.environ:
+    IMAGE_NAME = 'containers/llamacpp_python:latest'
+else:
+    IMAGE_NAME = os.environ['IMAGE_NAME']
+
+if not 'MODEL_NAME' in os.environ:
+    MODEL_NAME = 'mistral-7b-instruct-v0.1.Q4_K_M.gguf'
+else: 
+    MODEL_NAME = os.environ['MODEL_NAME']
+
+if not 'MODEL_PATH' in os.environ:
+    MODEL_PATH = "/locallm/models"
+else:
+    MODEL_PATH = os.environ['MODEL_PATH']
+
+if not 'PORT' in os.environ:
+    PORT = 8001
+else:
+    PORT = os.environ['PORT']
+    try:
+        PORT = int(PORT)
+    except:
+        PORT = 8001
+
 MS = pytest_container.Container(
-        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}",
         volume_mounts=[
             pytest_container.container.BindMount(
-                container_path="/locallm/models/model.gguf",
-                host_path=f"./model.gguf",
+                container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
+                host_path=f"./{MODEL_NAME}",
                 flags=["ro"]
             )
         ],
         extra_environment_variables={
-            "MODEL_PATH": "/locallm/models/model.gguf",
+            "MODEL_PATH": "{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
             "HOST": "0.0.0.0",
-            "PORT": "8001"
+            "PORT": f"{PORT}"
         },
         forwarded_ports=[
             pytest_container.PortForwarding(
-                container_port=8001,
-                host_port=8001
+                container_port=PORT,
+                host_port=PORT
             )
         ],
     )
diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py
index fcad510a0..b44e5467a 100644
--- a/model_servers/llamacpp_python/tests/test_alive.py
+++ b/model_servers/llamacpp_python/tests/test_alive.py
@@ -1,10 +1,10 @@
 import pytest_container
 from .conftest import MS
 import tenacity
+import os
 
 CONTAINER_IMAGES = [MS]
 
-
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
     assert auto_container.connection.file("/etc/os-release").exists
 
diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile
index 4657904b1..805acc1e7 100644
--- a/model_servers/whispercpp/Makefile
+++ b/model_servers/whispercpp/Makefile
@@ -1,57 +1,20 @@
-PORT := 8001
 APP := whispercpp
-IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
-CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
-VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest
+PORT ?= 8001
 
-# ----- MODEL OPTIONS -----
+include ../common/Makefile.common
 
-WHISPER_SMALL_MODEL_NAME := ggml-small.bin
-WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
-
-WHISPER_BASE_MODEL_NAME := ggml-base.en.bin
-WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
-
-SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME))
-SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL))
-
-# --- END MODEL OPTIONS ---
+IMAGE_NAME ?= $(REGISTRY_ORG)/$(COMPONENT)/$(APP):latest
+IMAGE ?= $(REGISTRY)/$(IMAGE_NAME)
+# CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest
+# VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest
 
 MODELS_PATH := /app/models
-
-BIND_MOUNT_OPTIONS :=  ro
-OS := $(shell uname -s)
-ifeq ($(OS),Linux)
-    BIND_MOUNT_OPTIONS := Z,ro
-endif
+MODEL_NAME ?= ggml-small.bin
 
 .PHONY: all
-all: build whisper-small run
-
-.PHONY: build
-build:
-	podman build -t $(IMAGE) . -f Containerfile
+all: build download-model-whisper-small run
 
-.PHONY: whisper-small
-whisper-small: 
+.PHONY: download-model-whisper-small # small .bin model type testing
+download-model-whisper-small:
 	cd ../../models && \
-	curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME)
-
-.PHONY: install
-install:
-	pip install -r tests/requirements.txt
-
-.PHONY: download-model-whisper-base
-download-model-whisper-base: 
-	cd ../../models && \
-	curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME)
-
-.PHONY: run
-run:
-	cd ../../models && \
-	podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE)
-
-.PHONY: test
-test:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
-	pytest --log-cli-level NOTSET
+	make MODEL_NAME=ggml-small.bin MODEL_NAME=https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin -f Makefile download-model
diff --git a/model_servers/whispercpp/tests/conftest.py b/model_servers/whispercpp/tests/conftest.py
index 380262b1f..3d8159ac7 100644
--- a/model_servers/whispercpp/tests/conftest.py
+++ b/model_servers/whispercpp/tests/conftest.py
@@ -1,24 +1,53 @@
 import pytest_container
 import os
 
+if not 'REGISTRY' in os.environ:
+    REGISTRY = 'ghcr.io'
+else:
+    REGISTRY = os.environ['REGISTRY']
+
+if not 'IMAGE_NAME' in os.environ:
+    IMAGE_NAME = 'containers/whispercpp:latest'
+else:
+    IMAGE_NAME = os.environ['IMAGE_NAME']
+
+if not 'MODEL_NAME' in os.environ:
+    MODEL_NAME = 'ggml-small.bin'
+else: 
+    MODEL_NAME = os.environ['MODEL_NAME']
+
+if not 'MODEL_PATH' in os.environ:
+    MODEL_PATH = "/app/models"
+else:
+    MODEL_PATH = os.environ['MODEL_PATH']
+
+if not 'PORT' in os.environ:
+    PORT = 8001
+else:
+    PORT = os.environ['PORT']
+    try:
+        PORT = int(PORT)
+    except:
+        PORT = 8001
+
 MS = pytest_container.Container(
-        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}",
         volume_mounts=[
             pytest_container.container.BindMount(
-                container_path="/locallm/models/model.gguf",
-                host_path=f"./model.gguf",
+                container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
+                host_path=f"./{MODEL_NAME}",
                 flags=["ro"]
             )
         ],
         extra_environment_variables={
-            "MODEL_PATH": "/locallm/models/model.gguf",
+            "MODEL_PATH": "{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
             "HOST": "0.0.0.0",
-            "PORT": "8001"
+            "PORT": f"{PORT}"
         },
         forwarded_ports=[
             pytest_container.PortForwarding(
-                container_port=8001,
-                host_port=8001
+                container_port=PORT,
+                host_port=PORT
             )
         ],
     )
diff --git a/model_servers/whispercpp/tests/test_alive.py b/model_servers/whispercpp/tests/test_alive.py
index fcad510a0..226aac1c0 100644
--- a/model_servers/whispercpp/tests/test_alive.py
+++ b/model_servers/whispercpp/tests/test_alive.py
@@ -4,7 +4,6 @@
 
 CONTAINER_IMAGES = [MS]
 
-
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
     assert auto_container.connection.file("/etc/os-release").exists
 
diff --git a/models/Containerfile b/models/Containerfile
deleted file mode 100644
index fefecb5be..000000000
--- a/models/Containerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-#https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
-#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-#https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf
-#https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
-# podman build --build-arg MODEL_URL=https://... -t quay.io/yourimage .
-FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13
-ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-WORKDIR /model
-ADD $MODEL_URL /model/model.file
diff --git a/models/Makefile b/models/Makefile
index 0953b54a9..587df303f 100644
--- a/models/Makefile
+++ b/models/Makefile
@@ -1,6 +1,8 @@
-MODEL ?= MODEL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-IMAGE ?= quay.io/ai-lab/llama:latest
+MODEL_URL ?=
+MODEL_NAME ?=
 
-.PHONY: build
-build:
-	podman build --build-arg ${MODEL} -f Containerfile -t ${IMAGE} .
+.PHONY: download-model
+download-model:
+	curl -H "Cache-Control: no-cache" -s -S -L -f $(MODEL_URL) -z $(MODEL_NAME) -o $(MODEL_NAME).tmp && \
+	mv -f $(MODEL_NAME).tmp $(MODEL_NAME) 2>/dev/null || \
+	rm -f $(MODEL_NAME).tmp $(MODEL_NAME)
diff --git a/models/README.md b/models/README.md
index f5cdd2544..27e7d0887 100644
--- a/models/README.md
+++ b/models/README.md
@@ -1,13 +1,19 @@
 # Directory to store model files
 
-The following suggested list of open models is available on huggingface.co.
+The models directory stores models and provides automation around downloading models.
 
-* https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-* https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf
-* https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
+Want to try one of our tested models? Try or or all of the following:
 
-You can easily build one of these models into a container image by executing
-
-```
-make MODEL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf IMAGE=your.registry.com/llama:latest
+```bash
+make -f Makefile download-model-llama
+make -f Makefile download-model-tiny-llama
+make -f Makefile download-model-mistral
+make -f Makefile download-model-whisper-small
+make -f Makefile download-model-whisper-base
 ```
+
+Want to download and run a model you dont see listed? This is supported with the `MODEL_NAME` and `MODEL_URL` params:
+
+```bash
+make -f Makefile download-model MODEL_URL=https://huggingface.co/andrewcanis/c4ai-command-r-v01-GGUF/resolve/main/c4ai-command-r-v01-Q4_K_S.gguf MODEL_NAME=c4ai-command-r-v01-Q4_K_S.gguf
+```
\ No newline at end of file
diff --git a/recipes/natural_language_processing/chatbot/Makefile b/recipes/natural_language_processing/chatbot/Makefile
index 7ab842dfc..aaebb5d0e 100644
--- a/recipes/natural_language_processing/chatbot/Makefile
+++ b/recipes/natural_language_processing/chatbot/Makefile
@@ -8,13 +8,12 @@ CHROMEDRIVER_INSTALLATION_PATH := $(shell realpath ../..)
 RELATIVE_MODELS_PATH := ../../../models
 
 MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
-MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-
 MODEL_NAME ?= $(MISTRAL_MODEL_NAME)
 
 .PHONY: download-model-mistral
 download-model-mistral:
-	curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
+	cd ../../../models && \
+	make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f  Makefile download-model
 
 .PHONY: install
 install:
diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
index b7e2ff687..2f5483afa 100644
--- a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
+++ b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
@@ -50,7 +50,7 @@
                 host_port=8501
             )
         ],
-        extra_launch_args=["--net=host"]
+        extra_launch_args=["--network=host"]
     )
 
 def pytest_generate_tests(metafunc):