Merge pull request #350 from Gregory-Pereira/311-swap-granite-for-mis…

…tral-cleanup swap granite for mistral cleanup + model and recipe housekeeping
containers · Apr 29, 2024 · 2b94847 · 2b94847
2 parents 22a93cd + fcb16cf
commit 2b94847
Show file tree

Hide file tree

Showing 21 changed files with 66 additions and 41 deletions.
diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml
@@ -63,7 +63,7 @@ jobs:
 
       - name: Download model
         working-directory: ./models
-        run: make download-model-mistral
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/.github/workflows/codegen.yaml b/.github/workflows/codegen.yaml
@@ -63,7 +63,7 @@ jobs:
 
       - name: Download model
         working-directory: ./models
-        run: make download-model-mistral
+        run: make download-model-mistral-code
 
       - name: Run Functional Tests
         shell: bash

diff --git a/.github/workflows/model_image_build_push.yaml b/.github/workflows/model_image_build_push.yaml
@@ -43,6 +43,9 @@ jobs:
             label: Q4_K_M
             url: https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
             platforms: linux/amd64,linux/arm64
+          - image_name: whisper-small
+            url: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
+            platforms: linux/amd64,linux/arm64
     runs-on: ubuntu-latest
     permissions:
       contents: read

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
@@ -27,13 +27,13 @@ jobs:
       matrix:
         include:
           - image_name: llamacpp_python
-            model: mistral
+            model: granite
             flavor: base
             directory: llamacpp_python
             platforms: linux/amd64,linux/arm64
             no_gpu: 1
           - image_name: llamacpp_python_cuda
-            model: mistral
+            model: granite
             flavor: cuda
             directory: llamacpp_python
             platforms: linux/amd64

diff --git a/.github/workflows/rag.yaml b/.github/workflows/rag.yaml
@@ -68,7 +68,7 @@ jobs:
 
       - name: Download model
         working-directory: ./models
-        run: make download-model-mistral
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/.github/workflows/summarizer.yaml b/.github/workflows/summarizer.yaml
@@ -63,7 +63,7 @@ jobs:
 
       - name: Download model
         working-directory: ./models
-        run: make download-model-mistral
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/ailab-images.md b/ailab-images.md
@@ -1,19 +1,38 @@
-## Images (x86_64, aarch64) currently built from GH Actions in this repository
+## Model Server Images (amd64, arm64) currently built from GH Actions in this repository
 
 - quay.io/ai-lab/llamacpp_python:latest
-- quay.io/ai-lab/llamacpp_python_cuda:latest
-- quay.io/ai-lab/llamacpp_python_vulkan:latest
+- quay.io/ai-lab/llamacpp-python-cuda:latest
+- quay.io/ai-lab/llamacpp-python-vulkan:latest
+- quay.io/redhat-et/locallm-object-detection-server:latest
+
+## Recipe Images (amd64, arm64)
 - quay.io/ai-lab/summarizer:latest
 - quay.io/ai-lab/chatbot:latest
 - quay.io/ai-lab/rag:latest
 - quay.io/ai-lab/codegen:latest
-- quay.io/ai-lab/chromadb:latest
 - quay.io/redhat-et/locallm-object-detection-client:latest
-- quay.io/redhat-et/locallm-object-detection-server:latest
 
-## Model Images (x86_64, aarch64)
+## Dependency images
+
+Images used in the `Bootc` aspect of this repo or tooling images
 
+- quay.io/ai-lab/nvidia-builder:latest
+- quay.io/ai-lab/instructlab-nvidia:latest
+- quay.io/ai-lab/nvidia-bootc:latest
+
+- quay.io/ai-lab/chromadb:latest
+- quay.io/ai-lab/model-converter:latest
+
+## Model Images (amd64, arm64)
+
+- quay.io/ai-lab/merlinite-7b-lab:latest
+    - [model download link](https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf)
+- quay.io/ai-lab/granite-7b-lab:latest
+    - [model download link](https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf)
 - quay.io/ai-lab/mistral-7b-instruct:latest
     - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf)
-- quay.io/ai-lab/codellama-7b:latest
-    - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf) 
+- quay.io/ai-lab/mistral-7b-code-16k-qlora:latest
+    - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf)
+- quay.io/ai-lab/whisper-small:latest
+    - [model download link](https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin)
+
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -25,5 +25,5 @@ build-vulkan:
 
 .PHONY: download-model-granite # default model
 download-model-granite:
-	cd ../../models && \
-	make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f  Makefile download-model
+	cd ../../models/ && \
+	make download-model-granite
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
@@ -13,7 +13,7 @@
     IMAGE_NAME = os.environ['IMAGE_NAME']
 
 if not 'MODEL_NAME' in os.environ:
-    MODEL_NAME = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf'
+    MODEL_NAME = 'granite-7b-lab-Q4_K_M.gguf'
 else: 
     MODEL_NAME = os.environ['MODEL_NAME']
 

diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile
@@ -14,7 +14,7 @@ MODEL_NAME ?= ggml-small.bin
 .PHONY: all
 all: build download-model-whisper-small run
 
-.PHONY: download-model-whisper-small # small .bin model type testing
+.PHONY: download-model-whisper-small
 download-model-whisper-small:
 	cd ../../models && \
 	make download-model-whisper-small
diff --git a/models/Containerfile b/models/Containerfile
@@ -1,8 +1,8 @@
 # Suggested alternative open AI Models
-# 	    https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
+# 	    https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf (Default)
 #	    https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf
 # 	    https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
-#	    https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf (Default)
+#	    https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
 #	    https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf
 #	    https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
 # podman build --build-arg="MODEL_URL=https://..." -t quay.io/yourimage .

diff --git a/models/Makefile b/models/Makefile
@@ -25,15 +25,19 @@ download-model-granite:
 download-model-merlinite:
 	$(MAKE) MODEL_URL=https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf MODEL_NAME=merlinite-7b-lab-Q4_K_M.gguf download-model
 
-.PHONY: download-model-whisper-small # small .bin model type testing
+.PHONY: download-model-whisper-small
 download-model-whisper-small:
 	$(MAKE) MODEL_NAME=ggml-small.bin MODEL_URL=https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin download-model
 
 .PHONY: download-model-mistral
 download-model-mistral:
 	$(MAKE) MODEL_NAME=mistral-7b-instruct-v0.2.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf download-model
 
+.PHONY: download-model-mistral-code
+download-model-mistral-code:
+	$(MAKE) MODEL_NAME=mistral-7b-code-16k-qlora.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf download-model
+
 .PHONY: clean
 clean:
-	rm -f *tmp
-	rm -f mistral* whisper* granite* merlinite*
+	-rm -f *tmp
+	-rm -f mistral* ggml-* granite* merlinite*
diff --git a/models/README.md b/models/README.md
@@ -8,6 +8,7 @@ Want to try one of our tested models? Try one or all of the following:
 make download-model-granite
 make download-model-merlinite
 make download-model-mistral
+make download-model-mistral-code
 make download-model-whisper-small
 ```
 

diff --git a/recipes/audio/audio_to_text/bootc/Containerfile b/recipes/audio/audio_to_text/bootc/Containerfile
@@ -14,9 +14,9 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=audio-to-text
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/whisper-small:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
-ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
+ARG SERVER_IMAGE=quay.io/ai-lab/whispercpp:latest
 ARG TARGETARCH
 
 # Add quadlet files to setup system to automatically run AI application on boot

diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common
@@ -4,7 +4,7 @@ REGISTRY_ORG ?= ai-lab
 IMAGE_NAME ?= $(REGISTRY_ORG)/${APP}:latest
 APP_IMAGE ?= $(REGISTRY)/$(IMAGE_NAME)
 CHROMADB_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/chromadb:latest
-MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest
+MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest
 SERVER_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest
 SSH_PUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub;)
 BOOTC_IMAGE ?= quay.io/$(REGISTRY_ORG)/${APP}-bootc:latest
@@ -62,10 +62,10 @@ UNZIP_EXISTS ?= $(shell command -v unzip)
 RELATIVE_MODELS_PATH := ?=
 RELATIVE_TESTS_PATH := ?=
 
-MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.2.Q4_K_M.gguf
-MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
+GRANITE_MODEL_NAME := granite-7b-lab-Q4_K_M.gguf
+GRANITE_MODEL_URL := https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
 
-MODEL_NAME ?= $(MISTRAL_MODEL_NAME)
+MODEL_NAME ?= $(GRANITE_MODEL_NAME)
 
 .PHONY: install
 install::

diff --git a/recipes/common/README.md b/recipes/common/README.md
@@ -30,7 +30,7 @@ used to override defaults for a variety of make targets.
 |DISK_TYPE           | Disk type to be created by BOOTC_IMAGE_BUILDER 	    | `qcow2` (Options: ami, iso, vmdk, raw)		      |
 |DISK_UID            | Disk UID to be specified by BOOTC_IMAGE_BUILDER      | `$(shell id -u)`					      |
 |DISK_GID            | Disk GID to be specified by BOOTC_IMAGE_BUILDER      | `$(shell id -g)`					      |
-|MODEL_IMAGE 	     | AI Model to be used by application             	    | `$(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest`|
+|MODEL_IMAGE 	     | AI Model to be used by application             	    | `$(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest`|
 |SERVER_IMAGE 	     | AI Model Server Application                    	    | `$(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest`    |
 |SSH_PUBKEY 	     | SSH Public key preloaded in bootc image.             | `$(shell cat ${HOME}/.ssh/id_rsa.pub;)`		      |
 |FROM 		     | Overrides first FROM instruction within Containerfile| `FROM` line defined in the Containerfile		      |

diff --git a/recipes/natural_language_processing/chatbot/bootc/Containerfile b/recipes/natural_language_processing/chatbot/bootc/Containerfile
@@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=chatbot
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG TARGETARCH

diff --git a/recipes/natural_language_processing/codegen/Makefile b/recipes/natural_language_processing/codegen/Makefile
@@ -8,3 +8,4 @@ RECIPE_BINARIES_PATH := $(shell realpath ../../common/bin)
 RELATIVE_MODELS_PATH := ../../../models
 RELATIVE_TESTS_PATH := ../tests
 MODEL_IMAGE := quay.io/ai-lab/mistral-7b-code-16k-qlora:latest
+MODEL_NAME := mistral-7b-code-16k-qlora.Q4_K_M.gguf
diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md
@@ -52,18 +52,15 @@ In order to build this application we will need two models, a Vector Database, a
 
 ### Download models
 
-If you are just getting started, we recommend using [Mistral-7B-Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1). This is a well
-performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted
-and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of
-ways to get a GGUF version of Mistral-7B, but the simplest is to download a pre-converted one from
-[huggingface.co](https://huggingface.co) here: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF.
+If you are just getting started, we recommend using [Granite-7B-Lab](https://huggingface.co/instructlab/granite-7b-lab-GGUF). This is a well
+performant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).
 
 The recommended model can be downloaded using the code snippet below:
 
 ```bash
-cd models
-wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
-cd ../
+cd ../../../models
+curl -sLO https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
+cd ../recipes/natural_language_processing/rag
 ```
 
 _A full list of supported open models is forthcoming._  

diff --git a/recipes/natural_language_processing/rag/bootc/Containerfile b/recipes/natural_language_processing/rag/bootc/Containerfile
@@ -15,7 +15,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=rag
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG CHROMADBImage=quay.io/ai-lab/chromadb

diff --git a/recipes/natural_language_processing/summarizer/bootc/Containerfile b/recipes/natural_language_processing/summarizer/bootc/Containerfile
@@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=summarizer
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG TARGETARCH