From 73a8c76eb68ba7d11eee723734ba7c20436e23d0 Mon Sep 17 00:00:00 2001 From: sallyom Date: Thu, 11 Apr 2024 19:08:45 -0400 Subject: [PATCH] add flag for cuda model-server run Signed-off-by: sallyom --- .github/workflows/model_servers.yaml | 6 ++++- model_servers/llamacpp_python/Makefile | 6 +++++ .../llamacpp_python/tests/conftest.py | 23 +++++++++++++++++++ .../llamacpp_python/tests/test_alive.py | 2 +- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 12b4c0980..0c6662077 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -29,21 +29,25 @@ jobs: flavor: base directory: llamacpp_python platforms: linux/amd64,linux/arm64 + test_cmd: test #- image_name: llamacpp_python_vulkan # model: mistral # flavor: vulkan # directory: llamacpp_python # platforms: linux/arm64 + # test_cmd: test - image_name: llamacpp_python_cuda model: mistral flavor: cuda directory: llamacpp_python platforms: linux/amd64 + test_cmd: run-cuda - image_name: whispercpp model: whisper-small flavor: base directory: whispercpp platforms: linux/amd64,linux/arm64 + test_cmd: test runs-on: ubuntu-latest permissions: contents: read @@ -91,7 +95,7 @@ jobs: - name: Run tests working-directory: ./model_servers/${{ matrix.directory }}/ - run: make test + run: make ${{ matrix.test_cmd }} env: IMAGE_NAME: ${{ matrix.image_name }} diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 05cb7dc64..7c10aea56 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -67,6 +67,12 @@ run: cd ../../models && \ podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE) +# TODO: set to fail if container isn't running (set || true to enable tests temporarily) +.PHONY: run-cuda +run-cuda: + cd ../../models && \ + podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true + .PHONY: test test: curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 380262b1f..f629d3e6c 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -1,6 +1,29 @@ import pytest_container import os +CUDA_MS = pytest_container.Container( + url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", + volume_mounts=[ + pytest_container.container.BindMount( + container_path="/locallm/models/model.gguf", + host_path=f"./model.gguf", + flags=["ro"] + ) + ], + extra_environment_variables={ + "MODEL_PATH": "/locallm/models/model.gguf", + "HOST": "0.0.0.0", + "PORT": "8001" + }, + forwarded_ports=[ + pytest_container.PortForwarding( + container_port=8001, + host_port=8001 + ) + ], + extra_run_args=["--device", "nvidia.com/gpu=all"], + ) + MS = pytest_container.Container( url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", volume_mounts=[ diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py index fcad510a0..894eba615 100644 --- a/model_servers/llamacpp_python/tests/test_alive.py +++ b/model_servers/llamacpp_python/tests/test_alive.py @@ -2,7 +2,7 @@ from .conftest import MS import tenacity -CONTAINER_IMAGES = [MS] +CONTAINER_IMAGES = [MS, CUDA_MS] def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):