From 73a8c76eb68ba7d11eee723734ba7c20436e23d0 Mon Sep 17 00:00:00 2001
From: sallyom <somalley@redhat.com>
Date: Thu, 11 Apr 2024 19:08:45 -0400
Subject: [PATCH] add flag for cuda model-server run

Signed-off-by: sallyom <somalley@redhat.com>
---
 .github/workflows/model_servers.yaml          |  6 ++++-
 model_servers/llamacpp_python/Makefile        |  6 +++++
 .../llamacpp_python/tests/conftest.py         | 23 +++++++++++++++++++
 .../llamacpp_python/tests/test_alive.py       |  2 +-
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 12b4c0980..0c6662077 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -29,21 +29,25 @@ jobs:
             flavor: base
             directory: llamacpp_python
             platforms: linux/amd64,linux/arm64
+            test_cmd: test
           #- image_name: llamacpp_python_vulkan
           #  model: mistral
           #  flavor: vulkan
           #  directory: llamacpp_python
           #  platforms: linux/arm64
+          #  test_cmd: test
           - image_name: llamacpp_python_cuda
             model: mistral
             flavor: cuda
             directory: llamacpp_python
             platforms: linux/amd64
+            test_cmd: run-cuda
           - image_name: whispercpp
             model: whisper-small
             flavor: base
             directory: whispercpp
             platforms: linux/amd64,linux/arm64
+            test_cmd: test
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -91,7 +95,7 @@ jobs:
 
       - name: Run tests
         working-directory: ./model_servers/${{ matrix.directory }}/
-        run: make test
+        run: make ${{ matrix.test_cmd }}
         env:
           IMAGE_NAME: ${{ matrix.image_name }}
 
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 05cb7dc64..7c10aea56 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -67,6 +67,12 @@ run:
 	cd ../../models && \
 	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
 
+# TODO: set to fail if container isn't running (set || true to enable tests temporarily)
+.PHONY: run-cuda
+run-cuda:
+	cd ../../models && \
+	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true
+
 .PHONY: test
 test:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
index 380262b1f..f629d3e6c 100644
--- a/model_servers/llamacpp_python/tests/conftest.py
+++ b/model_servers/llamacpp_python/tests/conftest.py
@@ -1,6 +1,29 @@
 import pytest_container
 import os
 
+CUDA_MS = pytest_container.Container(
+        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        volume_mounts=[
+            pytest_container.container.BindMount(
+                container_path="/locallm/models/model.gguf",
+                host_path=f"./model.gguf",
+                flags=["ro"]
+            )
+        ],
+        extra_environment_variables={
+            "MODEL_PATH": "/locallm/models/model.gguf",
+            "HOST": "0.0.0.0",
+            "PORT": "8001"
+        },
+        forwarded_ports=[
+            pytest_container.PortForwarding(
+                container_port=8001,
+                host_port=8001
+            )
+        ],
+        extra_run_args=["--device", "nvidia.com/gpu=all"],
+    )
+
 MS = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
         volume_mounts=[
diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py
index fcad510a0..894eba615 100644
--- a/model_servers/llamacpp_python/tests/test_alive.py
+++ b/model_servers/llamacpp_python/tests/test_alive.py
@@ -2,7 +2,7 @@
 from .conftest import MS
 import tenacity
 
-CONTAINER_IMAGES = [MS]
+CONTAINER_IMAGES = [MS, CUDA_MS]
 
 
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):