From 5cd53de1b007d40a0c2bb12e593b25660d4be8b7 Mon Sep 17 00:00:00 2001 From: Liora Milbaum Date: Thu, 11 Apr 2024 16:46:33 +0300 Subject: [PATCH] Build cuda image Signed-off-by: Liora Milbaum --- .github/workflows/model_servers.yaml | 18 ++--------------- model_servers/llamacpp_python/Makefile | 20 +++++++++++++++++-- .../llamacpp_python/cuda/Containerfile | 5 ++--- .../llamacpp_python/tests/conftest.py | 2 +- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 96959ce6d..7691ef3ba 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -32,12 +32,6 @@ jobs: directory: llamacpp_python platforms: linux/amd64,linux/arm64 no_gpu: 1 - #- image_name: llamacpp_python_vulkan - # model: mistral - # flavor: vulkan - # directory: llamacpp_python - # platforms: linux/arm64 - # vulkan: 1 - image_name: llamacpp_python_cuda model: mistral flavor: cuda @@ -100,13 +94,6 @@ jobs: if: ${{ matrix.no_gpu }} run: make test REGISTRY=${{ env.REGISTRY }} IMAGE_NAME=${{ env.REGISTRY_ORG }}/${{ matrix.image_name}}:latest - # - name: Run cuda test # we dont have cuda tests - # working-directory: ./model_servers/${{ matrix.directory }}/ - # if: ${{ matrix.cuda }} - # run: make test-cuda - # env: - # IMAGE_NAME: ${{ matrix.image_name }} - - name: Login to Container Registry if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: redhat-actions/podman-login@v1.7 @@ -116,10 +103,9 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Push image - id: push_image if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: redhat-actions/push-to-registry@v2.8 with: - image: ${{ steps.build_image.outputs.image }} - tags: ${{ steps.build_image.outputs.tags }} + image: ${{ steps.build_image_base.outputs.image }} + tags: ${{ steps.build_image_base.outputs.tags }} registry: ${{ env.REGISTRY }} diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 883d9f150..382a186a4 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -22,7 +22,23 @@ build-cuda: build-vulkan: podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile -.PHONY: download-model-mistral # default model -download-model-mistral: +.PHONY: download-model-tiny-llama +download-model-tiny-llama: + curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) + +.PHONY: download-model-llama +download-model-llama: + curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) + +.PHONY: mistral +mistral: + curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) + +.PHONY: install +install: + pip install -r tests/requirements.txt + +.PHONY: run +run: cd ../../models && \ make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f Makefile download-model diff --git a/model_servers/llamacpp_python/cuda/Containerfile b/model_servers/llamacpp_python/cuda/Containerfile index ed8ff3754..68bed76d4 100644 --- a/model_servers/llamacpp_python/cuda/Containerfile +++ b/model_servers/llamacpp_python/cuda/Containerfile @@ -1,8 +1,7 @@ FROM quay.io/opendatahub/workbench-images:cuda-ubi9-python-3.9-20231206 WORKDIR /locallm COPY src . -RUN pip install --upgrade pip ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on" ENV FORCE_CMAKE=1 -RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt -ENTRYPOINT [ "sh", "run.sh" ] \ No newline at end of file +RUN pip install --no-cache-dir -r ./requirements.txt +ENTRYPOINT [ "sh", "run.sh" ] diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 6cafe06fe..835111ed1 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -32,7 +32,7 @@ PORT = 8001 MS = pytest_container.Container( - url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}", + url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}:{os.environ['IMAGE_TAG']}", volume_mounts=[ pytest_container.container.BindMount( container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),