From e5fb7818aa3316f920868ff4eb9c1701a58574a0 Mon Sep 17 00:00:00 2001 From: Liora Milbaum Date: Thu, 11 Apr 2024 16:46:33 +0300 Subject: [PATCH] Build cuda image Signed-off-by: Liora Milbaum --- .github/workflows/model_servers.yaml | 50 +++++++++++++++---- model_servers/llamacpp_python/Makefile | 1 - .../llamacpp_python/cuda/Containerfile | 5 +- .../llamacpp_python/tests/conftest.py | 2 +- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 1f1639cec..bd9bef537 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -25,9 +25,17 @@ jobs: matrix: include: - image_name: llamacpp_python + image_tag: latest model: mistral + type: base + - image_name: llamacpp_python + image_tag: latest-cuda + model: mistral + type: cuda - image_name: whispercpp + image_tag: latest model: whisper-small + type: base runs-on: ubuntu-latest permissions: contents: read @@ -45,14 +53,25 @@ jobs: sudo apt-get update sudo apt-get install -y qemu-user-static - - name: Build Image - id: build_image + - name: Build Image (base) + id: build_image_base + if: matrix.type == 'base' uses: redhat-actions/buildah-build@v2.13 with: image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }} platforms: linux/amd64, linux/arm64 - tags: latest - containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile + tags: ${{ matrix.image_tag }} + containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.type }}/Containerfile + context: model_servers/${{ matrix.image_name }}/ + + - name: Build Image (cuda) + id: build_image_cuda + if: matrix.type == 'cuda' + uses: redhat-actions/buildah-build@v2.13 + with: + image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }} + tags: ${{ matrix.image_tag }} + containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.type }}/Containerfile context: model_servers/${{ matrix.image_name }}/ - name: Download model @@ -68,11 +87,17 @@ jobs: working-directory: ./model_servers/${{ matrix.image_name }}/ run: make install + - name: Install cuda dependencies + if: matrix.type == 'cuda' + working-directory: ./model_servers/${{ matrix.image_name }}/ + run: sudo apt-get install cuda-drivers + - name: Run tests working-directory: ./model_servers/${{ matrix.image_name }}/ run: make test env: IMAGE_NAME: ${{ matrix.image_name }} + IMAGE_TAG: ${{ matrix.image_tag }} - name: Login to Container Registry if: github.event_name == 'push' && github.ref == 'refs/heads/main' @@ -82,11 +107,18 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Push image - id: push_image - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + - name: Push image (base) + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && matrix.type == 'base' + uses: redhat-actions/push-to-registry@v2.8 + with: + image: ${{ steps.build_image_base.outputs.image }} + tags: ${{ steps.build_image_base.outputs.tags }} + registry: ${{ env.REGISTRY }} + + - name: Push image (cuda) + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && matrix.type == 'cuda' uses: redhat-actions/push-to-registry@v2.8 with: - image: ${{ steps.build_image.outputs.image }} - tags: ${{ steps.build_image.outputs.tags }} + image: ${{ steps.build_image_cuda.outputs.image }} + tags: ${{ steps.build_image_cuda.outputs.tags }} registry: ${{ env.REGISTRY }} diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 62e3bc91d..418de8432 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -53,7 +53,6 @@ download-model-tiny-llama: download-model-llama: curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) - .PHONY: mistral mistral: curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) diff --git a/model_servers/llamacpp_python/cuda/Containerfile b/model_servers/llamacpp_python/cuda/Containerfile index ed8ff3754..68bed76d4 100644 --- a/model_servers/llamacpp_python/cuda/Containerfile +++ b/model_servers/llamacpp_python/cuda/Containerfile @@ -1,8 +1,7 @@ FROM quay.io/opendatahub/workbench-images:cuda-ubi9-python-3.9-20231206 WORKDIR /locallm COPY src . -RUN pip install --upgrade pip ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on" ENV FORCE_CMAKE=1 -RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt -ENTRYPOINT [ "sh", "run.sh" ] \ No newline at end of file +RUN pip install --no-cache-dir -r ./requirements.txt +ENTRYPOINT [ "sh", "run.sh" ] diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 380262b1f..ea73133ee 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -2,7 +2,7 @@ import os MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}:{os.environ['IMAGE_TAG']}", volume_mounts=[ pytest_container.container.BindMount( container_path="/locallm/models/model.gguf",