add flag for cuda model-server run

Signed-off-by: sallyom <[email protected]>
containers · Apr 11, 2024 · 11b7a28 · 11b7a28
1 parent cbfa39e
commit 11b7a28
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 5 deletions.
diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
@@ -29,21 +29,25 @@ jobs:
             flavor: base
             directory: llamacpp_python
             platforms: linux/amd64,linux/arm64
+            test_cmd: test
           #- image_name: llamacpp_python_vulkan
           #  model: mistral
           #  flavor: vulkan
           #  directory: llamacpp_python
           #  platforms: linux/arm64
+          #  test_cmd: test
           - image_name: llamacpp_python_cuda
             model: mistral
             flavor: cuda
             directory: llamacpp_python
             platforms: linux/amd64
+            test_cmd: run-cuda
           - image_name: whispercpp
             model: whisper-small
             flavor: base
             directory: whispercpp
             platforms: linux/amd64,linux/arm64
+            test_cmd: test
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -91,7 +95,7 @@ jobs:
 
       - name: Run tests
         working-directory: ./model_servers/${{ matrix.directory }}/
-        run: make test
+        run: make ${{ matrix.test_cmd }}
         env:
           IMAGE_NAME: ${{ matrix.image_name }}
 

diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -1,9 +1,10 @@
 APP := llamacpp_python
+IMAGE_BASE := llamacpp-python
 PORT := 8001
 
-IMAGE := quay.io/ai-lab/$(APP):latest
-CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
-VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
+IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest
+CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest
+VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest
 
 # ----- MODEL OPTIONS -----
 
@@ -67,6 +68,12 @@ run:
 	cd ../../models && \
 	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
 
+# TODO: set to fail if container isn't running (set || true to enable tests temporarily)
+.PHONY: run-cuda
+run-cuda:
+	cd ../../models && \
+	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true
+
 .PHONY: test
 test:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf

diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
@@ -1,6 +1,29 @@
 import pytest_container
 import os
 
+CUDA_MS = pytest_container.Container(
+        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        volume_mounts=[
+            pytest_container.container.BindMount(
+                container_path="/locallm/models/model.gguf",
+                host_path=f"./model.gguf",
+                flags=["ro"]
+            )
+        ],
+        extra_environment_variables={
+            "MODEL_PATH": "/locallm/models/model.gguf",
+            "HOST": "0.0.0.0",
+            "PORT": "8001"
+        },
+        forwarded_ports=[
+            pytest_container.PortForwarding(
+                container_port=8001,
+                host_port=8001
+            )
+        ],
+        extra_launch_args=["--device", "nvidia.com/gpu=all"],
+    )
+
 MS = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
         volume_mounts=[

diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py
@@ -2,7 +2,7 @@
 from .conftest import MS
 import tenacity
 
-CONTAINER_IMAGES = [MS]
+CONTAINER_IMAGES = [MS, CUDA_MS]
 
 
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):