Skip to content

Commit

Permalink
add flag for cuda model-server run
Browse files Browse the repository at this point in the history
Signed-off-by: sallyom <[email protected]>
  • Loading branch information
sallyom committed Apr 11, 2024
1 parent cbfa39e commit 11b7a28
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 5 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,25 @@ jobs:
flavor: base
directory: llamacpp_python
platforms: linux/amd64,linux/arm64
test_cmd: test
#- image_name: llamacpp_python_vulkan
# model: mistral
# flavor: vulkan
# directory: llamacpp_python
# platforms: linux/arm64
# test_cmd: test
- image_name: llamacpp_python_cuda
model: mistral
flavor: cuda
directory: llamacpp_python
platforms: linux/amd64
test_cmd: run-cuda
- image_name: whispercpp
model: whisper-small
flavor: base
directory: whispercpp
platforms: linux/amd64,linux/arm64
test_cmd: test
runs-on: ubuntu-latest
permissions:
contents: read
Expand Down Expand Up @@ -91,7 +95,7 @@ jobs:

- name: Run tests
working-directory: ./model_servers/${{ matrix.directory }}/
run: make test
run: make ${{ matrix.test_cmd }}
env:
IMAGE_NAME: ${{ matrix.image_name }}

Expand Down
13 changes: 10 additions & 3 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
APP := llamacpp_python
IMAGE_BASE := llamacpp-python
PORT := 8001

IMAGE := quay.io/ai-lab/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest
CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest

# ----- MODEL OPTIONS -----

Expand Down Expand Up @@ -67,6 +68,12 @@ run:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)

# TODO: set to fail if container isn't running (set || true to enable tests temporarily)
.PHONY: run-cuda
run-cuda:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
Expand Down
23 changes: 23 additions & 0 deletions model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
import pytest_container
import os

CUDA_MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"HOST": "0.0.0.0",
"PORT": "8001"
},
forwarded_ports=[
pytest_container.PortForwarding(
container_port=8001,
host_port=8001
)
],
extra_launch_args=["--device", "nvidia.com/gpu=all"],
)

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
volume_mounts=[
Expand Down
2 changes: 1 addition & 1 deletion model_servers/llamacpp_python/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .conftest import MS
import tenacity

CONTAINER_IMAGES = [MS]
CONTAINER_IMAGES = [MS, CUDA_MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
Expand Down

0 comments on commit 11b7a28

Please sign in to comment.