From cc9a15fc3e6180ea4fa13a7aec9e474dde157e5f Mon Sep 17 00:00:00 2001
From: Liora Milbaum <lmilbaum@redhat.com>
Date: Sun, 7 Apr 2024 21:27:17 +0300
Subject: [PATCH] Refactor model servers

Signed-off-by: Liora Milbaum <lmilbaum@redhat.com>
---
 .github/workflows/model_servers.yaml          | 102 ++++++------------
 model_servers/llamacpp_python/Makefile        |   6 +-
 model_servers/whispercpp/Makefile             |  16 ++-
 model_servers/whispercpp/base/Containerfile   |   2 +-
 model_servers/whispercpp/tests/__init__.py    |   0
 model_servers/whispercpp/tests/conftest.py    |  30 ++++++
 .../whispercpp/tests/requirements.txt         |   8 ++
 model_servers/whispercpp/tests/test_alive.py  |  13 +++
 .../chatbot/tests/functional/conftest.py      |   4 +-
 9 files changed, 100 insertions(+), 81 deletions(-)
 create mode 100644 model_servers/whispercpp/tests/__init__.py
 create mode 100644 model_servers/whispercpp/tests/conftest.py
 create mode 100644 model_servers/whispercpp/tests/requirements.txt
 create mode 100644 model_servers/whispercpp/tests/test_alive.py

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 028c688f..712bf28a 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -1,31 +1,31 @@
-name: model_servers
+name: Model Servers
 
 on:
   pull_request:
     branches:
       - main
-    paths:
-      - ./model_servers/
-      - .github/workflows/model_servers.yaml
+    # paths:
+    #   - ./model_servers/
+    #   - .github/workflows/model_servers.yaml
   push:
     branches:
       - main
-    paths:
-      - ./model_servers/
-      - .github/workflows/model_servers.yaml
+    # paths:
+    #   - ./model_servers/
+    #   - .github/workflows/model_servers.yaml
 
 env:
   REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository_owner }}/model_servers
-  # Image Repo Migration variables
-  NEW_REGISTRY: quay.io 
-  NEW_REGISTRY_USER: ai-lab+ghrobot
-  NEW_IMAGE_NAME_LLAMA:  quay.io/ai-lab/model_servers/llamacpp_python
-  IMAGE_NAME_WHISPER: quay.io/ai-lab/model_servers/whispercpp
-
 
 jobs:
   build-and-push-image:
+    strategy:
+      matrix:
+        include:
+          - image_name: llamacpp_python
+            model: mistral
+          - image_name: whispercpp
+            model: whisper-small
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -38,18 +38,24 @@ jobs:
     steps:
       - uses: actions/checkout@v4.1.1
 
+      - name: Install qemu dependency
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y qemu-user-static
+      
       - name: Build Image
         id: build_image
         uses: redhat-actions/buildah-build@v2.13
         with:
-          image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          image: ${{ env.REGISTRY }}/${{ matrix.image_name }}
+          platforms: linux/amd64, linux/arm64
           tags: latest
-          containerfiles: ./model_servers/llamacpp_python/base/Containerfile
-          context: model_servers/llamacpp_python/
+          containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile
+          context: model_servers/${{ matrix.image_name }}/
 
       - name: Download model
-        working-directory: ./model_servers/llamacpp_python/
-        run: make download-model-mistral
+        working-directory: ./model_servers/${{ matrix.image_name }}/
+        run: make ${{ matrix.model }}
 
       - name: Set up Python
         uses: actions/setup-python@v5.0.0
@@ -57,17 +63,17 @@ jobs:
           python-version: '3.11'
 
       - name: Install python dependencies
-        working-directory: ./model_servers/llamacpp_python/
+        working-directory: ./model_servers/${{ matrix.image_name }}/
         run: make install
 
       - name: Run tests
-        working-directory: ./model_servers/llamacpp_python/
+        working-directory: ./model_servers/${{ matrix.image_name }}/
         run: make test
+        env:
+          IMAGE_NAME: ${{ matrix.image_name }}
 
       - name: Login to container registry
-        if: >
-          (github.event_name == 'push' || github.event_name == 'schedule') &&
-          (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
         uses: docker/login-action@v3.1.0
         with:
           registry: ${{ env.REGISTRY }}
@@ -76,55 +82,9 @@ jobs:
 
       - name: Push image
         id: push_image
-        if: >
-          (github.event_name == 'push' || github.event_name == 'schedule') &&
-          (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
         uses: redhat-actions/push-to-registry@v2.8
         with:
           image: ${{ steps.build_image.outputs.image }}
           tags: ${{ steps.build_image.outputs.tags }}
           registry: ${{ env.REGISTRY }}
-  build-and-push-image-whispercpp:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    services:
-      registry:
-        image: registry:2.8.3
-        ports:
-          - 5000:5000
-    steps:
-      - name: Build Image Whispercpp
-        id: build_image_whisper
-        uses: redhat-actions/buildah-build@v2.13
-        working-directory: ./model_servers/whispercpp/
-        run: make build
-
-      - name: Download model Whispercpp
-        working-directory: ./model_servers/whispercpp/
-        run: make download-model-whisper-small
-
-      - name: Login to container registry
-        if: >
-          (github.event_name == 'push' || github.event_name == 'schedule') &&
-          (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
-
-        uses: docker/login-action@v3.1.0
-        with:
-          registry: ${{ env.NEW_REGISTRY }}
-          username: ${{ env.NEW_REGISTRY_USER }}
-          password: ${{ secrets.AILAB_GHROBOT_TOKEN }} # THIS NEEDS TO BE CREATED
-
-      - name: Push image
-        id: push_image
-        if: >
-          (github.event_name == 'push' || github.event_name == 'schedule') &&
-          (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
-          
-        uses: redhat-actions/push-to-registry@v2.8
-        with:
-          image: ${{ steps.build_image_whisper.outputs.image }}
-          tags: ${{ steps.build_image_whisper.outputs.tags }}
-          registry: ${{ env.NEW_REGISTRY }}
-
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 4356b976..c017559e 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -31,7 +31,7 @@ ifeq ($(OS),Linux)
 endif
 
 .Phony: all
-all: build download-model-mistral run
+all: build mistral run
 
 .PHONY: build
 build:
@@ -54,8 +54,8 @@ download-model-llama:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
 
 
-.PHONY: download-model-mistral
-download-model-mistral:
+.PHONY: mistral
+mistral:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
 
 .PHONY: install
diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile
index 53ad2891..4657904b 100644
--- a/model_servers/whispercpp/Makefile
+++ b/model_servers/whispercpp/Makefile
@@ -13,6 +13,7 @@ WHISPER_BASE_MODEL_NAME := ggml-base.en.bin
 WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
 
 SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME))
+SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL))
 
 # --- END MODEL OPTIONS ---
 
@@ -25,17 +26,21 @@ ifeq ($(OS),Linux)
 endif
 
 .PHONY: all
-all: build download-model-whisper-small run
+all: build whisper-small run
 
 .PHONY: build
 build:
 	podman build -t $(IMAGE) . -f Containerfile
 
-.PHONY: download-model-whisper-small
-download-model-whisper-small: 
+.PHONY: whisper-small
+whisper-small: 
 	cd ../../models && \
 	curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME)
 
+.PHONY: install
+install:
+	pip install -r tests/requirements.txt
+
 .PHONY: download-model-whisper-base
 download-model-whisper-base: 
 	cd ../../models && \
@@ -45,3 +50,8 @@ download-model-whisper-base:
 run:
 	cd ../../models && \
 	podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE)
+
+.PHONY: test
+test:
+	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
+	pytest --log-cli-level NOTSET
diff --git a/model_servers/whispercpp/base/Containerfile b/model_servers/whispercpp/base/Containerfile
index 398b6d45..fa9dff1e 100644
--- a/model_servers/whispercpp/base/Containerfile
+++ b/model_servers/whispercpp/base/Containerfile
@@ -19,6 +19,6 @@ COPY --from=builder /app /app
 COPY --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /bin/
 COPY --from=mwader/static-ffmpeg:6.1.1 /ffprobe /bin/
 
-COPY run.sh /app/
+COPY src /app/
 ENV AUDIO_FILE=/app/jfk.wav
 ENTRYPOINT ["sh", "run.sh"]
diff --git a/model_servers/whispercpp/tests/__init__.py b/model_servers/whispercpp/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/model_servers/whispercpp/tests/conftest.py b/model_servers/whispercpp/tests/conftest.py
new file mode 100644
index 00000000..4cd3e203
--- /dev/null
+++ b/model_servers/whispercpp/tests/conftest.py
@@ -0,0 +1,30 @@
+import pytest_container
+import os
+
+MS = pytest_container.Container(
+        url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
+        volume_mounts=[
+            pytest_container.container.BindMount(
+                container_path="/locallm/models/model.gguf",
+                host_path=f"./model.gguf",
+                flags=["ro"]
+            )
+        ],
+        extra_environment_variables={
+            "MODEL_PATH": "/locallm/models/model.gguf",
+            "HOST": "0.0.0.0",
+            "PORT": "8001"
+        },
+        forwarded_ports=[
+            pytest_container.PortForwarding(
+                container_port=8001,
+                host_port=8001
+            )
+        ],
+    )
+
+def pytest_generate_tests(metafunc):
+    pytest_container.auto_container_parametrize(metafunc)
+
+def pytest_addoption(parser):
+    pytest_container.add_logging_level_options(parser)
diff --git a/model_servers/whispercpp/tests/requirements.txt b/model_servers/whispercpp/tests/requirements.txt
new file mode 100644
index 00000000..22fc97f2
--- /dev/null
+++ b/model_servers/whispercpp/tests/requirements.txt
@@ -0,0 +1,8 @@
+pip==24.0
+pytest-container==0.4.0
+pytest-selenium==4.1.0
+pytest-testinfra==10.1.0
+pytest==8.1.1
+requests==2.31.0
+selenium==4.19.0
+tenacity==8.2.3
diff --git a/model_servers/whispercpp/tests/test_alive.py b/model_servers/whispercpp/tests/test_alive.py
new file mode 100644
index 00000000..fcad510a
--- /dev/null
+++ b/model_servers/whispercpp/tests/test_alive.py
@@ -0,0 +1,13 @@
+import pytest_container
+from .conftest import MS
+import tenacity
+
+CONTAINER_IMAGES = [MS]
+
+
+def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
+    assert auto_container.connection.file("/etc/os-release").exists
+
+@tenacity.retry(stop=tenacity.stop_after_attempt(5), wait=tenacity.wait_exponential())
+def test_alive(auto_container: pytest_container.container.ContainerData, host):
+    host.run_expect([0],f"curl http://localhost:{auto_container.forwarded_ports[0].host_port}",).stdout.strip()
diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
index 507bde88..81d1bd97 100644
--- a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
+++ b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
@@ -1,11 +1,9 @@
 import pytest_container
 import os
-import pytest
-from selenium import webdriver
 
 
 MS = pytest_container.Container(
-        url=f"containers-storage:{os.environ['REGISTRY']}/model_servers",
+        url=f"containers-storage:{os.environ['REGISTRY']}/llamacpp_python",
         volume_mounts=[
             pytest_container.container.BindMount(
                 container_path="/locallm/models",