diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml index dc59ea21..ad243f6d 100644 --- a/.github/workflows/chatbot.yaml +++ b/.github/workflows/chatbot.yaml @@ -63,7 +63,7 @@ jobs: - name: Download model working-directory: ./models - run: make download-model-mistral + run: make download-model-granite - name: Run Functional Tests shell: bash diff --git a/.github/workflows/codegen.yaml b/.github/workflows/codegen.yaml index 5a3a5cd8..ce59f556 100644 --- a/.github/workflows/codegen.yaml +++ b/.github/workflows/codegen.yaml @@ -63,7 +63,7 @@ jobs: - name: Download model working-directory: ./models - run: make download-model-mistral + run: make download-model-mistral-code - name: Run Functional Tests shell: bash diff --git a/.github/workflows/model_image_build_push.yaml b/.github/workflows/model_image_build_push.yaml index 5eaa73f0..685b504b 100644 --- a/.github/workflows/model_image_build_push.yaml +++ b/.github/workflows/model_image_build_push.yaml @@ -43,6 +43,9 @@ jobs: label: Q4_K_M url: https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf platforms: linux/amd64,linux/arm64 + - image_name: whisper-small + url: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin + platforms: linux/amd64,linux/arm64 runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 6997c44c..ecbc928f 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -27,13 +27,13 @@ jobs: matrix: include: - image_name: llamacpp_python - model: mistral + model: granite flavor: base directory: llamacpp_python platforms: linux/amd64,linux/arm64 no_gpu: 1 - image_name: llamacpp_python_cuda - model: mistral + model: granite flavor: cuda directory: llamacpp_python platforms: linux/amd64 diff --git a/.github/workflows/rag.yaml b/.github/workflows/rag.yaml index 7daa03de..d72eedf3 100644 --- a/.github/workflows/rag.yaml +++ b/.github/workflows/rag.yaml @@ -68,7 +68,7 @@ jobs: - name: Download model working-directory: ./models - run: make download-model-mistral + run: make download-model-granite - name: Run Functional Tests shell: bash diff --git a/.github/workflows/summarizer.yaml b/.github/workflows/summarizer.yaml index 7c62509a..470e8996 100644 --- a/.github/workflows/summarizer.yaml +++ b/.github/workflows/summarizer.yaml @@ -63,7 +63,7 @@ jobs: - name: Download model working-directory: ./models - run: make download-model-mistral + run: make download-model-granite - name: Run Functional Tests shell: bash diff --git a/ailab-images.md b/ailab-images.md index 1c1e37ce..d79c24df 100644 --- a/ailab-images.md +++ b/ailab-images.md @@ -1,19 +1,38 @@ -## Images (x86_64, aarch64) currently built from GH Actions in this repository +## Model Server Images (amd64, arm64) currently built from GH Actions in this repository - quay.io/ai-lab/llamacpp_python:latest -- quay.io/ai-lab/llamacpp_python_cuda:latest -- quay.io/ai-lab/llamacpp_python_vulkan:latest +- quay.io/ai-lab/llamacpp-python-cuda:latest +- quay.io/ai-lab/llamacpp-python-vulkan:latest +- quay.io/redhat-et/locallm-object-detection-server:latest + +## Recipe Images (amd64, arm64) - quay.io/ai-lab/summarizer:latest - quay.io/ai-lab/chatbot:latest - quay.io/ai-lab/rag:latest - quay.io/ai-lab/codegen:latest -- quay.io/ai-lab/chromadb:latest - quay.io/redhat-et/locallm-object-detection-client:latest -- quay.io/redhat-et/locallm-object-detection-server:latest -## Model Images (x86_64, aarch64) +## Dependency images + +Images used in the `Bootc` aspect of this repo or tooling images +- quay.io/ai-lab/nvidia-builder:latest +- quay.io/ai-lab/instructlab-nvidia:latest +- quay.io/ai-lab/nvidia-bootc:latest + +- quay.io/ai-lab/chromadb:latest +- quay.io/ai-lab/model-converter:latest + +## Model Images (amd64, arm64) + +- quay.io/ai-lab/merlinite-7b-lab:latest + - [model download link](https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf) +- quay.io/ai-lab/granite-7b-lab:latest + - [model download link](https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf) - quay.io/ai-lab/mistral-7b-instruct:latest - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf) -- quay.io/ai-lab/codellama-7b:latest - - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf) +- quay.io/ai-lab/mistral-7b-code-16k-qlora:latest + - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf) +- quay.io/ai-lab/whisper-small:latest + - [model download link](https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin) + diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 9fdf09bb..f2f24268 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -25,5 +25,5 @@ build-vulkan: .PHONY: download-model-granite # default model download-model-granite: - cd ../../models && \ - make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f Makefile download-model + cd ../../models/ && \ + make download-model-granite diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 50a59398..c1ba0381 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -13,7 +13,7 @@ IMAGE_NAME = os.environ['IMAGE_NAME'] if not 'MODEL_NAME' in os.environ: - MODEL_NAME = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf' + MODEL_NAME = 'granite-7b-lab-Q4_K_M.gguf' else: MODEL_NAME = os.environ['MODEL_NAME'] diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile index 7486379b..2f639c8f 100644 --- a/model_servers/whispercpp/Makefile +++ b/model_servers/whispercpp/Makefile @@ -14,7 +14,7 @@ MODEL_NAME ?= ggml-small.bin .PHONY: all all: build download-model-whisper-small run -.PHONY: download-model-whisper-small # small .bin model type testing +.PHONY: download-model-whisper-small download-model-whisper-small: cd ../../models && \ make download-model-whisper-small diff --git a/models/Containerfile b/models/Containerfile index b53ab9c2..e00a0edd 100644 --- a/models/Containerfile +++ b/models/Containerfile @@ -1,8 +1,8 @@ # Suggested alternative open AI Models -# https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf +# https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf (Default) # https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf # https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf (Default) +# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf # https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf # https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin # podman build --build-arg="MODEL_URL=https://..." -t quay.io/yourimage . diff --git a/models/Makefile b/models/Makefile index e58eb647..92ff4a0d 100644 --- a/models/Makefile +++ b/models/Makefile @@ -25,7 +25,7 @@ download-model-granite: download-model-merlinite: $(MAKE) MODEL_URL=https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf MODEL_NAME=merlinite-7b-lab-Q4_K_M.gguf download-model -.PHONY: download-model-whisper-small # small .bin model type testing +.PHONY: download-model-whisper-small download-model-whisper-small: $(MAKE) MODEL_NAME=ggml-small.bin MODEL_URL=https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin download-model @@ -33,7 +33,11 @@ download-model-whisper-small: download-model-mistral: $(MAKE) MODEL_NAME=mistral-7b-instruct-v0.2.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf download-model +.PHONY: download-model-mistral-code +download-model-mistral-code: + $(MAKE) MODEL_NAME=mistral-7b-code-16k-qlora.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf download-model + .PHONY: clean clean: - rm -f *tmp - rm -f mistral* whisper* granite* merlinite* + -rm -f *tmp + -rm -f mistral* ggml-* granite* merlinite* diff --git a/models/README.md b/models/README.md index c8f6c27f..f2ecaf9f 100644 --- a/models/README.md +++ b/models/README.md @@ -8,6 +8,7 @@ Want to try one of our tested models? Try one or all of the following: make download-model-granite make download-model-merlinite make download-model-mistral +make download-model-mistral-code make download-model-whisper-small ``` diff --git a/recipes/audio/audio_to_text/bootc/Containerfile b/recipes/audio/audio_to_text/bootc/Containerfile index d53c26d1..6a6c0921 100644 --- a/recipes/audio/audio_to_text/bootc/Containerfile +++ b/recipes/audio/audio_to_text/bootc/Containerfile @@ -14,9 +14,9 @@ RUN set -eu; mkdir -p /usr/ssh && \ echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys ARG RECIPE=audio-to-text -ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest +ARG MODEL_IMAGE=quay.io/ai-lab/whisper-small:latest ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest -ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest +ARG SERVER_IMAGE=quay.io/ai-lab/whispercpp:latest ARG TARGETARCH # Add quadlet files to setup system to automatically run AI application on boot diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common index d3116b0d..de812b5a 100644 --- a/recipes/common/Makefile.common +++ b/recipes/common/Makefile.common @@ -4,7 +4,7 @@ REGISTRY_ORG ?= ai-lab IMAGE_NAME ?= $(REGISTRY_ORG)/${APP}:latest APP_IMAGE ?= $(REGISTRY)/$(IMAGE_NAME) CHROMADB_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/chromadb:latest -MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest +MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest SERVER_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest SSH_PUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub;) BOOTC_IMAGE ?= quay.io/$(REGISTRY_ORG)/${APP}-bootc:latest @@ -62,10 +62,10 @@ UNZIP_EXISTS ?= $(shell command -v unzip) RELATIVE_MODELS_PATH := ?= RELATIVE_TESTS_PATH := ?= -MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.2.Q4_K_M.gguf -MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf +GRANITE_MODEL_NAME := granite-7b-lab-Q4_K_M.gguf +GRANITE_MODEL_URL := https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -MODEL_NAME ?= $(MISTRAL_MODEL_NAME) +MODEL_NAME ?= $(GRANITE_MODEL_NAME) .PHONY: install install:: diff --git a/recipes/common/README.md b/recipes/common/README.md index 3d0fc9e9..88b9d4db 100644 --- a/recipes/common/README.md +++ b/recipes/common/README.md @@ -30,7 +30,7 @@ used to override defaults for a variety of make targets. |DISK_TYPE | Disk type to be created by BOOTC_IMAGE_BUILDER | `qcow2` (Options: ami, iso, vmdk, raw) | |DISK_UID | Disk UID to be specified by BOOTC_IMAGE_BUILDER | `$(shell id -u)` | |DISK_GID | Disk GID to be specified by BOOTC_IMAGE_BUILDER | `$(shell id -g)` | -|MODEL_IMAGE | AI Model to be used by application | `$(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest`| +|MODEL_IMAGE | AI Model to be used by application | `$(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest`| |SERVER_IMAGE | AI Model Server Application | `$(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest` | |SSH_PUBKEY | SSH Public key preloaded in bootc image. | `$(shell cat ${HOME}/.ssh/id_rsa.pub;)` | |FROM | Overrides first FROM instruction within Containerfile| `FROM` line defined in the Containerfile | diff --git a/recipes/natural_language_processing/chatbot/bootc/Containerfile b/recipes/natural_language_processing/chatbot/bootc/Containerfile index 9907863e..c84e0ea0 100644 --- a/recipes/natural_language_processing/chatbot/bootc/Containerfile +++ b/recipes/natural_language_processing/chatbot/bootc/Containerfile @@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \ echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys ARG RECIPE=chatbot -ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest +ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest ARG TARGETARCH diff --git a/recipes/natural_language_processing/codegen/Makefile b/recipes/natural_language_processing/codegen/Makefile index c45d8a13..b355dc4a 100644 --- a/recipes/natural_language_processing/codegen/Makefile +++ b/recipes/natural_language_processing/codegen/Makefile @@ -8,3 +8,4 @@ RECIPE_BINARIES_PATH := $(shell realpath ../../common/bin) RELATIVE_MODELS_PATH := ../../../models RELATIVE_TESTS_PATH := ../tests MODEL_IMAGE := quay.io/ai-lab/mistral-7b-code-16k-qlora:latest +MODEL_NAME := mistral-7b-code-16k-qlora.Q4_K_M.gguf diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md index 9f17fe87..2595dcb2 100644 --- a/recipes/natural_language_processing/rag/README.md +++ b/recipes/natural_language_processing/rag/README.md @@ -52,18 +52,15 @@ In order to build this application we will need two models, a Vector Database, a ### Download models -If you are just getting started, we recommend using [Mistral-7B-Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1). This is a well -performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted -and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of -ways to get a GGUF version of Mistral-7B, but the simplest is to download a pre-converted one from -[huggingface.co](https://huggingface.co) here: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF. +If you are just getting started, we recommend using [Granite-7B-Lab](https://huggingface.co/instructlab/granite-7b-lab-GGUF). This is a well +performant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). The recommended model can be downloaded using the code snippet below: ```bash -cd models -wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf -cd ../ +cd ../../../models +curl -sLO https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf +cd ../recipes/natural_language_processing/rag ``` _A full list of supported open models is forthcoming._ diff --git a/recipes/natural_language_processing/rag/bootc/Containerfile b/recipes/natural_language_processing/rag/bootc/Containerfile index 0e7e7927..021be3d2 100644 --- a/recipes/natural_language_processing/rag/bootc/Containerfile +++ b/recipes/natural_language_processing/rag/bootc/Containerfile @@ -15,7 +15,7 @@ RUN set -eu; mkdir -p /usr/ssh && \ echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys ARG RECIPE=rag -ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest +ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest ARG CHROMADBImage=quay.io/ai-lab/chromadb diff --git a/recipes/natural_language_processing/summarizer/bootc/Containerfile b/recipes/natural_language_processing/summarizer/bootc/Containerfile index f44199fc..aeada03b 100644 --- a/recipes/natural_language_processing/summarizer/bootc/Containerfile +++ b/recipes/natural_language_processing/summarizer/bootc/Containerfile @@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \ echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys ARG RECIPE=summarizer -ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest +ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest ARG TARGETARCH