Skip to content

Commit

Permalink
Build cuda image
Browse files Browse the repository at this point in the history
Signed-off-by: Liora Milbaum <[email protected]>
  • Loading branch information
lmilbaum committed Apr 15, 2024
1 parent f42a8fc commit d918be0
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 20 deletions.
14 changes: 0 additions & 14 deletions .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ jobs:
directory: llamacpp_python
platforms: linux/amd64,linux/arm64
no_gpu: 1
#- image_name: llamacpp_python_vulkan
# model: mistral
# flavor: vulkan
# directory: llamacpp_python
# platforms: linux/arm64
# vulkan: 1
- image_name: llamacpp_python_cuda
model: mistral
flavor: cuda
Expand Down Expand Up @@ -100,13 +94,6 @@ jobs:
if: ${{ matrix.no_gpu }}
run: make test REGISTRY=${{ env.REGISTRY }} IMAGE_NAME=${{ env.REGISTRY_ORG }}/${{ matrix.image_name}}:latest

# - name: Run cuda test # we dont have cuda tests
# working-directory: ./model_servers/${{ matrix.directory }}/
# if: ${{ matrix.cuda }}
# run: make test-cuda
# env:
# IMAGE_NAME: ${{ matrix.image_name }}

- name: Login to Container Registry
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: redhat-actions/[email protected]
Expand All @@ -116,7 +103,6 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}

- name: Push image
id: push_image
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: redhat-actions/[email protected]
with:
Expand Down
20 changes: 18 additions & 2 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,23 @@ build-cuda:
build-vulkan:
podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile

.PHONY: download-model-mistral # default model
download-model-mistral:
.PHONY: download-model-tiny-llama
download-model-tiny-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)

.PHONY: download-model-llama
download-model-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)

.PHONY: mistral
mistral:
curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: run
run:
cd ../../models && \
make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f Makefile download-model
5 changes: 2 additions & 3 deletions model_servers/llamacpp_python/cuda/Containerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
FROM quay.io/opendatahub/workbench-images:cuda-ubi9-python-3.9-20231206
WORKDIR /locallm
COPY src .
RUN pip install --upgrade pip
ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on"
ENV FORCE_CMAKE=1
RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt
ENTRYPOINT [ "sh", "run.sh" ]
RUN pip install --no-cache-dir -r ./requirements.txt
ENTRYPOINT [ "sh", "run.sh" ]
2 changes: 1 addition & 1 deletion model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
PORT = 8001

MS = pytest_container.Container(
url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}",
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}:{os.environ['IMAGE_TAG']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME),
Expand Down

0 comments on commit d918be0

Please sign in to comment.