diff --git a/.devcontainer/Containerfile b/.devcontainer/Containerfile new file mode 100644 index 00000000..ccd4b0d6 --- /dev/null +++ b/.devcontainer/Containerfile @@ -0,0 +1,9 @@ +FROM quay.io/containers/podman:v4.9.3 + +USER root + +COPY model_servers/llamacpp_python/base/tests/requirements.txt . + +RUN dnf install -y python3.11 python3-pip buildah git && \ + dnf clean all && \ + pip3 install -r requirements.txt diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..02bb0e80 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,12 @@ +{ + "name": "recipes", + "build": { + "dockerfile": "Containerfile", + "context": ".." + }, + "privileged": true, + "containerEnv": { + "REGISTRY": "ghcr.io", + "IMAGE_NAME": "ai-lab-recipes/playground" + } +} diff --git a/.github/workflows/build-images.yaml b/.github/workflows/build-images.yaml index f6e31081..3a29bf83 100644 --- a/.github/workflows/build-images.yaml +++ b/.github/workflows/build-images.yaml @@ -34,7 +34,7 @@ jobs: uses: tj-actions/changed-files@v42 with: files: | - playground/** + model_servers/llamacpp_python/base/** - name: Get changed rag files id: changed-files-rag @@ -96,8 +96,8 @@ jobs: image: ${{ env.MODEL_SERVICE_IMAGE }} tags: latest ${{ github.sha }} platforms: linux/amd64, linux/arm64 - context: playground - containerfiles: ./playground/Containerfile + context: model_servers/llamacpp_python + containerfiles: ./model_servers/llamacpp_python/base/Containerfile - name: Push model-service image id: push_model_service diff --git a/.github/workflows/llamacpp_python.yaml b/.github/workflows/llamacpp_python.yaml new file mode 100644 index 00000000..7a8dcd75 --- /dev/null +++ b/.github/workflows/llamacpp_python.yaml @@ -0,0 +1,48 @@ +name: llamacpp_python + +on: + pull_request: + branches: + - main + push: + branches: + - main + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository_owner }}/playground + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + services: + registry: + image: registry:2.8.3 + ports: + - 5000:5000 + steps: + - uses: actions/checkout@v4.1.1 + + - name: Login to ghcr + uses: docker/login-action@v3.1.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Buildah Action + uses: redhat-actions/buildah-build@v2.13 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: latest + containerfiles: ./model_servers/llamacpp_python/base/Containerfile + context: model_servers/llamacpp_python/ + + - name: Set up Python + uses: actions/setup-python@v5.0.0 + + - name: Run tests + run: make -f model_servers/llamacpp_python/base/Makefile test diff --git a/.gitignore b/.gitignore index 3c2129ef..50cf3e3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.gguf *.bin *_pycache_* +port_check.lock diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md index cdc8caae..7925ec3a 100644 --- a/model_servers/llamacpp_python/README.md +++ b/model_servers/llamacpp_python/README.md @@ -3,7 +3,11 @@ From this directory, ```bash -podman build -t playground:image . +podman build -t playground . +``` +or +```bash +make -f Makefile build ``` ### Download Model @@ -20,6 +24,10 @@ cd ../models wget cd ../ ``` +or +```bash +make -f Makefile models/llama-2-7b-chat.Q5_K_S.gguf +``` ### Deploy Model Service @@ -34,7 +42,11 @@ podman run --rm -it -d \ -e MODEL_PATH=models/ \ -e HOST=0.0.0.0 \ -e PORT=8001 \ - playground:image` + playground` +``` +or +```bash +make -f Makefile run ``` #### Multiple Model Service: @@ -68,5 +80,14 @@ podman run --rm -it -d \ -p 8001:8001 \ -v Local/path/to/locallm/models:/locallm/models:ro,Z \ -e CONFIG_PATH=models/ \ - playground:image + playground +``` + +### DEV environment + +The environment is implemented with devcontainer technology. + +Running tests +```bash +make -f Makefile test ``` diff --git a/model_servers/llamacpp_python/base/Containerfile b/model_servers/llamacpp_python/base/Containerfile index de459fb9..e32290f2 100644 --- a/model_servers/llamacpp_python/base/Containerfile +++ b/model_servers/llamacpp_python/base/Containerfile @@ -1,7 +1,6 @@ -FROM registry.access.redhat.com/ubi9/python-39:latest +FROM registry.access.redhat.com/ubi9/python-311:1-52 WORKDIR /locallm COPY src . -RUN pip install --upgrade pip -RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt +RUN pip install --no-cache-dir --verbose -r ./requirements.txt EXPOSE 8001 -ENTRYPOINT [ "sh", "run.sh" ] \ No newline at end of file +ENTRYPOINT [ "sh", "./run.sh" ] diff --git a/model_servers/llamacpp_python/base/Makefile b/model_servers/llamacpp_python/base/Makefile new file mode 100644 index 00000000..dacbd569 --- /dev/null +++ b/model_servers/llamacpp_python/base/Makefile @@ -0,0 +1,18 @@ +.PHONY: build +build: + podman build -f Containerfile -t ghcr.io/ai-lab-recipes/playground --format docker . + +models/llama-2-7b-chat.Q5_K_S.gguf: + curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@ + +.PHONY: install +install: + pip install -r model_servers/llamacpp_python/base/tests/requirements.txt + +.PHONY: run +run: models/llama-2-7b-chat.Q5_K_S.gguf install + podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/playground + +.PHONY: test +test: models/llama-2-7b-chat.Q5_K_S.gguf install + pytest --log-cli-level NOTSET diff --git a/model_servers/llamacpp_python/base/tests/__init__.py b/model_servers/llamacpp_python/base/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model_servers/llamacpp_python/base/tests/conftest.py b/model_servers/llamacpp_python/base/tests/conftest.py new file mode 100644 index 00000000..96deb7dd --- /dev/null +++ b/model_servers/llamacpp_python/base/tests/conftest.py @@ -0,0 +1,32 @@ +import pytest_container +import os + + +MS = pytest_container.Container( + url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}", + volume_mounts=[ + pytest_container.container.BindMount( + container_path="/locallm/models", + host_path="./models", + flags=["ro"] + ) + ], + extra_environment_variables={ + "MODEL_PATH": "models/llama-2-7b-chat.Q5_K_S.gguf", + "HOST": "0.0.0.0", + "PORT": "8001" + }, + forwarded_ports=[ + pytest_container.PortForwarding( + container_port=8001, + host_port=8001 + ) + ], + extra_launch_args=["--net=host"] + ) + +def pytest_generate_tests(metafunc): + pytest_container.auto_container_parametrize(metafunc) + +def pytest_addoption(parser): + pytest_container.add_logging_level_options(parser) diff --git a/model_servers/llamacpp_python/base/tests/requirements.txt b/model_servers/llamacpp_python/base/tests/requirements.txt new file mode 100644 index 00000000..751d336d --- /dev/null +++ b/model_servers/llamacpp_python/base/tests/requirements.txt @@ -0,0 +1,6 @@ +pip==24.0 +pytest-container==0.4.0 +pytest-testinfra==10.1.0 +pytest==8.1.1 +requests==2.31.0 +tenacity==8.2.3 diff --git a/model_servers/llamacpp_python/base/tests/test_alive.py b/model_servers/llamacpp_python/base/tests/test_alive.py new file mode 100644 index 00000000..fcad510a --- /dev/null +++ b/model_servers/llamacpp_python/base/tests/test_alive.py @@ -0,0 +1,13 @@ +import pytest_container +from .conftest import MS +import tenacity + +CONTAINER_IMAGES = [MS] + + +def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData): + assert auto_container.connection.file("/etc/os-release").exists + +@tenacity.retry(stop=tenacity.stop_after_attempt(5), wait=tenacity.wait_exponential()) +def test_alive(auto_container: pytest_container.container.ContainerData, host): + host.run_expect([0],f"curl http://localhost:{auto_container.forwarded_ports[0].host_port}",).stdout.strip() diff --git a/model_servers/llamacpp_python/src/requirements.txt b/model_servers/llamacpp_python/src/requirements.txt index bbea3dd8..a3ffc4c0 100644 --- a/model_servers/llamacpp_python/src/requirements.txt +++ b/model_servers/llamacpp_python/src/requirements.txt @@ -1 +1,2 @@ -llama-cpp-python[server] \ No newline at end of file +llama-cpp-python[server]==0.2.57 +pip==24.0