From d58410a084f9dcc9f647ea956b7fdf5eea3d7cfd Mon Sep 17 00:00:00 2001
From: sallyom <somalley@redhat.com>
Date: Fri, 29 Mar 2024 13:54:11 -0400
Subject: [PATCH] update mistral model to K_M and image updates to ai-lab

Signed-off-by: sallyom <somalley@redhat.com>
---
 .github/workflows/model_servers.yaml                   |  2 +-
 ai-lab-recipes-images.md                               | 10 ++++------
 model_servers/llamacpp_python/Makefile                 |  5 ++++-
 model_servers/llamacpp_python/README.md                |  4 ++--
 model_servers/llamacpp_python/tests/conftest.py        |  2 +-
 model_servers/llamacpp_python/tooling_options.ipynb    |  2 +-
 models/Containerfile                                   |  4 ++--
 .../natural_language_processing/chatbot/ai-lab.yaml    |  4 ++--
 .../chatbot/quadlet/chatbot.yaml                       |  4 ++--
 .../code-generation/ai-lab.yaml                        |  2 +-
 .../code-generation/quadlet/codegen.image              |  2 +-
 recipes/natural_language_processing/rag/ai-lab.yaml    |  4 ++--
 .../natural_language_processing/summarizer/ai-lab.yaml |  4 ++--
 .../summarizer/quadlet/summarizer.image                |  4 ++--
 .../summarizer/quadlet/summarizer.yaml                 |  8 ++++----
 15 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 5968a8c7b..76381c16f 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -49,7 +49,7 @@ jobs:
 
       - name: Download model
         working-directory: ./model_servers/llamacpp_python/
-        run: make llama-2-7b-chat.Q5_K_S.gguf
+        run: make mistral-7b-instruct-v0.1.Q4_K_M.gguf
 
       - name: Set up Python
         uses: actions/setup-python@v5.0.0
diff --git a/ai-lab-recipes-images.md b/ai-lab-recipes-images.md
index 85ec8f4d2..25e97e4b6 100644
--- a/ai-lab-recipes-images.md
+++ b/ai-lab-recipes-images.md
@@ -1,8 +1,8 @@
 ## Images (x86_64, aarch64) currently built from GH Actions in this repository
 
-- quay.io/redhat-et/locallm-model-service:latest
+- quay.io/ai-lab/llamacpp-python:latest
 - quay.io/redhat-et/locallm-text-summarizer:latest
-- quay.io/redhat-et/locallm-chatbot:latest
+- quay.io/ai-lab/chatbot:latest
 - quay.io/redhat-et/locallm-rag:latest
 - quay.io/redhat-et/locallm-codegen:latest
 - quay.io/redhat-et/locallm-chromadb:latest
@@ -11,9 +11,7 @@
 
 ## Model Images (x86_64, aarch64) currently in `quay.io/redhat-et/locallm-*`
 
-- quay.io/redhat-et/locallm-llama-2-7b:latest
-    - [model download link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf)
-- quay.io/redhat-et/locallm-mistral-7b-gguf:latest
-    - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf)
+- quay.io/ai-lab/mistral-7b-instruct:latest
+    - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
 - quay.io/redhat-et/locallm-codellama-7b-gguf:latest
     - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf) 
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 0e51b103c..ec152e0f0 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -5,13 +5,16 @@ build:
 llama-2-7b-chat.Q5_K_S.gguf:
 	curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
 
+mistral-7b-instruct-v0.1.Q4_K_M.gguf:
+	curl -s -S -L -f https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf  -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
+
 .PHONY: install
 install:
 	pip install -r tests/requirements-test.txt
 
 .PHONY: run
 run:
-	podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers
+	podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers
 
 .PHONY: test
 test:
diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md
index 90541ecf6..98b7a5daf 100644
--- a/model_servers/llamacpp_python/README.md
+++ b/model_servers/llamacpp_python/README.md
@@ -20,7 +20,7 @@ At the time of this writing, 2 models are known to work with this service
 - **Llama2-7b**
     - Download URL: [https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf)
 - **Mistral-7b**
-    - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf)
+    - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
 
 ```bash
 cd ../models
@@ -29,7 +29,7 @@ cd ../
 ```
 or
 ```bash
-make -f Makefile models/llama-2-7b-chat.Q5_K_S.gguf
+make -f Makefile models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
 ```
 
 ### Deploy Model Service
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
index a9af975b6..26fa40148 100644
--- a/model_servers/llamacpp_python/tests/conftest.py
+++ b/model_servers/llamacpp_python/tests/conftest.py
@@ -12,7 +12,7 @@
             )
         ],
         extra_environment_variables={
-            "MODEL_PATH": "models/llama-2-7b-chat.Q5_K_S.gguf",
+            "MODEL_PATH": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
             "HOST": "0.0.0.0",
             "PORT": "8001"
         },
diff --git a/model_servers/llamacpp_python/tooling_options.ipynb b/model_servers/llamacpp_python/tooling_options.ipynb
index 5f830aa4a..ebad2174c 100644
--- a/model_servers/llamacpp_python/tooling_options.ipynb
+++ b/model_servers/llamacpp_python/tooling_options.ipynb
@@ -23,7 +23,7 @@
     "This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n",
     "\n",
     "```bash\n",
-    "podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf playground\n",
+    "podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n",
     "```"
    ]
   },
diff --git a/models/Containerfile b/models/Containerfile
index e359bf7cb..50f0abec6 100644
--- a/models/Containerfile
+++ b/models/Containerfile
@@ -1,9 +1,9 @@
 #https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
-#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf
+#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
 #https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf
 #https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
 # podman build --build-arg MODEL_URL=https://... -t quay.io/yourimage .
 FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13
-ARG MODEL_URL
+ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
 WORKDIR /model
 ADD $MODEL_URL .
diff --git a/recipes/natural_language_processing/chatbot/ai-lab.yaml b/recipes/natural_language_processing/chatbot/ai-lab.yaml
index 812c9d288..ddac72f43 100644
--- a/recipes/natural_language_processing/chatbot/ai-lab.yaml
+++ b/recipes/natural_language_processing/chatbot/ai-lab.yaml
@@ -15,7 +15,7 @@ application:
         - amd64
       ports:
         - 8001
-      image: quay.io/redhat-et/locallm-model-service:latest
+      image: quay.io/ai-lab/llamacppp-python:latest
     - name: streamlit-chat-app
       contextdir: .
       containerfile: builds/Containerfile
@@ -24,4 +24,4 @@ application:
         - amd64
       ports:
         - 8501
-      image: quay.io/redhat-et/locallm-chatbot:latest
\ No newline at end of file
+      image: quay.io/ai-lab/chatbot:latest
diff --git a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
index 540d8d0d9..c7b479064 100644
--- a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
+++ b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
@@ -8,7 +8,7 @@ spec:
   initContainers:
   - name: model-file
     image: quay.io/ai-lab/mistral-7b-instruct:latest
-    command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"]
+    command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"]
     volumeMounts:
     - name: model-file
       mountPath: /shared
@@ -29,7 +29,7 @@ spec:
     - name: PORT
       value: 8001
     - name: MODEL_PATH
-      value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf
+      value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf
     image: quay.io/ai-lab/llamacpp-python:latest
     name: chatbot-model-service
     ports:
diff --git a/recipes/natural_language_processing/code-generation/ai-lab.yaml b/recipes/natural_language_processing/code-generation/ai-lab.yaml
index 56331ecf7..fa5e7eb96 100644
--- a/recipes/natural_language_processing/code-generation/ai-lab.yaml
+++ b/recipes/natural_language_processing/code-generation/ai-lab.yaml
@@ -15,7 +15,7 @@ application:
         - amd64
       ports:
         - 8001
-      image: quay.io/redhat-et/locallm-model-service:latest
+      image: quay.io/ai-lab/llamacpp-python:latest
     - name: codegen-app
       contextdir: .
       containerfile: builds/Containerfile
diff --git a/recipes/natural_language_processing/code-generation/quadlet/codegen.image b/recipes/natural_language_processing/code-generation/quadlet/codegen.image
index f64815c82..8e733138c 100644
--- a/recipes/natural_language_processing/code-generation/quadlet/codegen.image
+++ b/recipes/natural_language_processing/code-generation/quadlet/codegen.image
@@ -3,5 +3,5 @@ WantedBy=codegen.service
 
 [Image]
 Image=quay.io/redhat-et/locallm-codellama-7b-gguf:latest
-Image=quay.io/redhat-et/locallm-model-service:latest
+Image=quay.io/ai-lab/llamacpp-python:latest
 Image=quay.io/redhat-et/locallm-codegen:latest
diff --git a/recipes/natural_language_processing/rag/ai-lab.yaml b/recipes/natural_language_processing/rag/ai-lab.yaml
index f22a71950..cc133910a 100644
--- a/recipes/natural_language_processing/rag/ai-lab.yaml
+++ b/recipes/natural_language_processing/rag/ai-lab.yaml
@@ -15,7 +15,7 @@ application:
         - amd64
       ports:
         - 8001
-      image: quay.io/redhat-et/locallm-model-service:latest
+      image: quay.io/ai-lab/llamacpp-python:latest
     - name: chromadb-server
       contextdir: ../../../vector_dbs/chromadb
       containerfile: Containerfile
@@ -34,4 +34,4 @@ application:
         - amd64
       ports:
         - 8501
-      image: quay.io/redhat-et/locallm-rag:latest
\ No newline at end of file
+      image: quay.io/redhat-et/locallm-rag:latest
diff --git a/recipes/natural_language_processing/summarizer/ai-lab.yaml b/recipes/natural_language_processing/summarizer/ai-lab.yaml
index 22e72778e..f3bbced46 100644
--- a/recipes/natural_language_processing/summarizer/ai-lab.yaml
+++ b/recipes/natural_language_processing/summarizer/ai-lab.yaml
@@ -15,7 +15,7 @@ application:
         - amd64
       ports:
         - 8001
-      image: quay.io/redhat-et/locallm-model-service:latest
+      image: quay.io/ai-lab/llamacpp-python:latest
     - name: streamlit-summary-app
       contextdir: .
       containerfile: builds/Containerfile
@@ -24,4 +24,4 @@ application:
         - amd64
       ports:
         - 8501
-      image: quay.io/redhat-et/locallm-text-summarizer:latest
\ No newline at end of file
+      image: quay.io/redhat-et/locallm-text-summarizer:latest
diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.image b/recipes/natural_language_processing/summarizer/quadlet/summarizer.image
index 9511d5dfc..c43364398 100644
--- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.image
+++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.image
@@ -2,6 +2,6 @@
 WantedBy=summarizer.service
 
 [Image]
-Image=quay.io/redhat-et/locallm-mistral-7b-gguf:latest
-Image=quay.io/redhat-et/locallm-model-service:latest
+Image=quay.io/ai-lab/mistral-7b-instruct:latest
+Image=quay.io/ai-lab/llamacpp-python:latest
 Image=quay.io/redhat-et/locallm-text-summarizer:latest
diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
index a1d8c7086..94546b920 100644
--- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
+++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
@@ -7,8 +7,8 @@ metadata:
 spec:
   initContainers:
   - name: model-file
-    image: quay.io/redhat-et/locallm-mistral-7b-gguf:latest
-    command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"]
+    image: quay.io/ai-lab/mistral-7b-instruct:latest
+    command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"]
     volumeMounts:
     - name: model-file
       mountPath: /shared
@@ -29,8 +29,8 @@ spec:
     - name: PORT
       value: 8001
     - name: MODEL_PATH
-      value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf
-    image: quay.io/redhat-et/locallm-model-service:latest
+      value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+    image: quay.io/ai-lab/llamacpp-python:latest
     name: summarizer-model-service
     ports:
     - containerPort: 8001