From e5fb7818aa3316f920868ff4eb9c1701a58574a0 Mon Sep 17 00:00:00 2001
From: Liora Milbaum <lmilbaum@redhat.com>
Date: Thu, 11 Apr 2024 16:46:33 +0300
Subject: [PATCH] Build cuda image

Signed-off-by: Liora Milbaum <lmilbaum@redhat.com>
---
 .github/workflows/model_servers.yaml          | 50 +++++++++++++++----
 model_servers/llamacpp_python/Makefile        |  1 -
 .../llamacpp_python/cuda/Containerfile        |  5 +-
 .../llamacpp_python/tests/conftest.py         |  2 +-
 4 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 1f1639cec..bd9bef537 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -25,9 +25,17 @@ jobs:
       matrix:
         include:
           - image_name: llamacpp_python
+            image_tag: latest
             model: mistral
+            type: base
+          - image_name: llamacpp_python
+            image_tag: latest-cuda
+            model: mistral
+            type: cuda
           - image_name: whispercpp
+            image_tag: latest
             model: whisper-small
+            type: base
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -45,14 +53,25 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y qemu-user-static
       
-      - name: Build Image
-        id: build_image
+      - name: Build Image (base)
+        id: build_image_base
+        if: matrix.type == 'base'
         uses: redhat-actions/buildah-build@v2.13
         with:
           image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }}
           platforms: linux/amd64, linux/arm64
-          tags: latest
-          containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile
+          tags: ${{ matrix.image_tag }}
+          containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.type }}/Containerfile
+          context: model_servers/${{ matrix.image_name }}/
+      
+      - name: Build Image (cuda)
+        id: build_image_cuda
+        if: matrix.type == 'cuda'
+        uses: redhat-actions/buildah-build@v2.13
+        with:
+          image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }}
+          tags: ${{ matrix.image_tag }}
+          containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.type }}/Containerfile
           context: model_servers/${{ matrix.image_name }}/
 
       - name: Download model
@@ -68,11 +87,17 @@ jobs:
         working-directory: ./model_servers/${{ matrix.image_name }}/
         run: make install
 
+      - name: Install cuda dependencies
+        if: matrix.type == 'cuda'
+        working-directory: ./model_servers/${{ matrix.image_name }}/
+        run: sudo apt-get install cuda-drivers
+
       - name: Run tests
         working-directory: ./model_servers/${{ matrix.image_name }}/
         run: make test
         env:
           IMAGE_NAME: ${{ matrix.image_name }}
+          IMAGE_TAG: ${{ matrix.image_tag }}
 
       - name: Login to Container Registry
         if: github.event_name == 'push' && github.ref == 'refs/heads/main'
@@ -82,11 +107,18 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Push image
-        id: push_image
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+      - name: Push image (base)
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main' && matrix.type == 'base'
+        uses: redhat-actions/push-to-registry@v2.8
+        with:
+          image: ${{ steps.build_image_base.outputs.image }}
+          tags: ${{ steps.build_image_base.outputs.tags }}
+          registry: ${{ env.REGISTRY }}
+
+      - name: Push image (cuda)
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main' && matrix.type == 'cuda'
         uses: redhat-actions/push-to-registry@v2.8
         with:
-          image: ${{ steps.build_image.outputs.image }}
-          tags: ${{ steps.build_image.outputs.tags }}
+          image: ${{ steps.build_image_cuda.outputs.image }}
+          tags: ${{ steps.build_image_cuda.outputs.tags }}
           registry: ${{ env.REGISTRY }}
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 62e3bc91d..418de8432 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -53,7 +53,6 @@ download-model-tiny-llama:
 download-model-llama:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
 
-
 .PHONY: mistral
 mistral:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
diff --git a/model_servers/llamacpp_python/cuda/Containerfile b/model_servers/llamacpp_python/cuda/Containerfile
index ed8ff3754..68bed76d4 100644
--- a/model_servers/llamacpp_python/cuda/Containerfile
+++ b/model_servers/llamacpp_python/cuda/Containerfile
@@ -1,8 +1,7 @@
 FROM quay.io/opendatahub/workbench-images:cuda-ubi9-python-3.9-20231206
 WORKDIR /locallm
 COPY src .
-RUN pip install --upgrade pip
 ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on"
 ENV FORCE_CMAKE=1
-RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt
-ENTRYPOINT [ "sh", "run.sh" ]
\ No newline at end of file
+RUN pip install --no-cache-dir -r ./requirements.txt
+ENTRYPOINT [ "sh", "run.sh" ]
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
index 380262b1f..ea73133ee 100644
--- a/model_servers/llamacpp_python/tests/conftest.py
+++ b/model_servers/llamacpp_python/tests/conftest.py
@@ -2,7 +2,7 @@
 import os
 
 MS = pytest_container.Container(
-        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}:{os.environ['IMAGE_TAG']}",
         volume_mounts=[
             pytest_container.container.BindMount(
                 container_path="/locallm/models/model.gguf",