diff --git a/recipes/natural_language_processing/summarizer/Makefile b/recipes/natural_language_processing/summarizer/Makefile new file mode 100644 index 000000000..c314cb6ab --- /dev/null +++ b/recipes/natural_language_processing/summarizer/Makefile @@ -0,0 +1,50 @@ +APP ?= summarizer +MODELIMAGE ?= quay.io/ai-lab/mistral-7b-instruct:latest +APPIMAGE ?= quay.io/ai-lab/${APP}:latest +SERVERIMAGE ?= quay.io/ai-lab/llamacpp-python:latest +SSHPUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub;) +BOOTCIMAGE ?= quay.io/ai-lab/${APP}-bootc:latest + +.PHONY: build +build: + podman build -f builds/Containerfile -t ${APPIMAGE} . + +.PHONY: bootc +bootc: + podman build --cap-add SYS_ADMIN --build-arg "SSHPUBKEY=$(SSHPUBKEY)" -f bootc/Containerfile -t ${BOOTCIMAGE} . + +.PHONY: quadlet +quadlet: + # Modify quadlet files to match the server, model and app image + mkdir -p build + sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \ + -e "s|APPIMAGE|${APPIMAGE}|g" \ + -e "s|MODELIMAGE|${MODELIMAGE}|g" \ + quadlet/${APP}.image \ + > build/${APP}.image + sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \ + -e "s|APPIMAGE|${APPIMAGE}|g" \ + -e "s|MODELIMAGE|${MODELIMAGE}|g" \ + quadlet/${APP}.yaml \ + > build/${APP}.yaml + cp quadlet/${APP}.kube build/${APP}.kube + +.PHONY: install +install: + wget https://www.slimjetbrowser.com/chrome/files/103.0.5060.53/google-chrome-stable_current_amd64.deb + sudo dpkg -i google-chrome-stable_current_amd64.deb + wget https://chromedriver.storage.googleapis.com/103.0.5060.53/chromedriver_linux64.zip + unzip chromedriver_linux64.zip + pip install -r tests/requirements.txt + +.PHONY: run +run: + podman run -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 ghcr.io/ai-lab-recipes/${APP} + +.PHONY: test +test: + python3 -m pytest -vvv --driver=Chrome --driver-path=./chromedriver tests + +.PHONY: clean +clean: + rm -rf build diff --git a/recipes/natural_language_processing/summarizer/bootc/Containerfile b/recipes/natural_language_processing/summarizer/bootc/Containerfile new file mode 100644 index 000000000..c33decb60 --- /dev/null +++ b/recipes/natural_language_processing/summarizer/bootc/Containerfile @@ -0,0 +1,57 @@ +# Example: an AI powered sample application is embedded as a systemd service +# via Podman quadlet files in /usr/share/containers/systemd +# +# Use build command: +# podman build --build-arg "sshpubkey=$(cat $HOME/.ssh/id_rsa.pub)" -t quay.io/exampleos/myos . +# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your +# public key into the image, allowing root access via ssh. + +FROM quay.io/centos-bootc/centos-bootc:stream9 +ARG SSHPUBKEY + +RUN mkdir /usr/etc-system && \ + echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ + echo ${SSHPUBKEY} > /usr/etc-system/root.keys && chmod 0600 /usr/etc-system/root.keys + +# pre-pull workload images: +# Comment the pull commands to keep bootc image smaller. +# The quadlet .image file added above pulls following images on boot if not +# pre-pulled here + +ARG RECIPE=summarizer +ARG MODELIMAGE=quay.io/ai-lab/mistral-7b-instruct:latest +ARG APPIMAGE=quay.io/ai-lab/${RECIPE}:latest +ARG SERVERIMAGE=quay.io/ai-lab/llamacpp-python:latest + +# Add quadlet files to setup system to automatically run AI application on boot +COPY quadlet/${RECIPE}.kube quadlet/${RECIPE}.yaml /usr/share/containers/systemd + +# Modify quadlet files to match the server, model and app image +RUN sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \ + -e "s|APPIMAGE|${APPIMAGE}|g" \ + -e "s|MODELIMAGE|${MODELIMAGE}|g" \ + -i \ + /usr/share/containers/systemd/${RECIPE}.yaml + +# Because images are prepulled, no need for .image quadlet +# COPY quadlet/${RECIPE}.image /usr/share/containers/systemd +# RUN sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \ +# -e "s|APPIMAGE|${APPIMAGE}|g" \ +# -e "s|MODELIMAGE|${MODELIMAGE}|g" \ +# -i \ +# /usr/share/containers/systemd/${RECIPE}.image + +# Setup /usr/lib/containers/storage as an additional store for images. +# Remove once the base images have this set by default. +RUN sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \ + /etc/containers/storage.conf + +# Added for running as an OCI Container to prevent Overlay on Overlay issues. +VOLUME /var/lib/containers + +# Prepull the model, model_server & application images to populate the system. +RUN podman pull --root /usr/lib/containers/storage ${SERVERIMAGE} +RUN podman pull --root /usr/lib/containers/storage ${APPIMAGE} +RUN podman pull --root /usr/lib/containers/storage ${MODELIMAGE} + +RUN podman system reset --force 2>/dev/null diff --git a/recipes/natural_language_processing/summarizer/bootc/README.md b/recipes/natural_language_processing/summarizer/bootc/README.md new file mode 100644 index 000000000..ab98c25c0 --- /dev/null +++ b/recipes/natural_language_processing/summarizer/bootc/README.md @@ -0,0 +1,93 @@ +## Embed workload (AI sample applications) in a bootable container image + +### Create a custom centos-bootc:stream9 image + +* [Containerfile](./Containerfile) - embeds an LLM-powered sample chat application. + +Details on the application can be found [in the chatbot/README.md](../README.md). By default, this Containerfile includes a model-server +that is meant to run with CPU - no additional GPU drivers or toolkits are embedded. You can substitute the llamacpp_python model-server image +for one that has GPU drivers and toolkits with additional build-args. The `FROM` must be replaced with a base image that has the necessary +kernel drivers and toolkits if building for GPU enabled systems. For an example of an NVIDIA/CUDA base image, +see [NVIDIA bootable image example](https://gitlab.com/bootc-org/examples/-/tree/main/nvidia?ref_type=heads) + +In order to pre-pull the workload images, you need to build from the same architecture you're building for. +If not pre-pulling the workload images, you can cross build (ie, build from a Mac for an X86_64 system). +To build the derived bootc image for x86_64 architecture, run the following: + +```bash +cd recipes/natural_language_processing/chatbot + +# for CPU powered sample LLM application +# to switch to an alternate platform like aarch64, pass --platform linux/arm64 +# the --cap-add SYS_ADMIN switch is needed when you are embedding Podman +# commands within the container build. If the registry you are pulling images +# from requires authentication, then you will need to volume mount the +# auth_json file with SELinux separation disabled. +podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ + --security-opt label=disable \ + -v ${XDG_RUNTIME_DIR}/containers/auth.json:/run/containers/0/auth.json \ + --cap-add SYS_ADMIN \ + -t quay.io/yourrepo/youros:tag . + +# for GPU powered sample LLM application with llamacpp cuda model server +podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ + --build-arg "model-server-image="quay.io/redhat-et/locallm-llamacpp-cuda-model-server:latest" \ + --from \ + --cap-add SYS_ADMIN \ + --platform linux/amd64 \ + -t quay.io/yourrepo/youros:tag . + +podman push quay.io/yourrepo/youros:tag +``` + +### Update a bootc-enabled system with the new derived image + +To build a disk image from an OCI bootable image, you can refer to [bootc-org/examples](https://gitlab.com/bootc-org/examples). +For this example, we will assume a bootc enabled system is already running. +If already running a bootc-enabled OS, `bootc switch` can be used to update the system to target a new bootable OCI image with embedded workloads. + +SSH into the bootc-enabled system and run: + +```bash +bootc switch quay.io/yourrepo/youros:tag +``` + +The necessary image layers will be downloaded from the OCI registry, and the system will prompt you to reboot into the new operating system. +From this point, with any subsequent modifications and pushes to the `quay.io/yourrepo/youreos:tag` OCI image, your OS can be updated with: + +```bash +bootc upgrade +``` + +### Accessing the embedded workloads + +The chatbot can be accessed by visiting port `8150` of the running bootc system. +They will be running as systemd services from Podman quadlet files placed at `/usr/share/containers/systemd/` on the bootc system. +For more information about running containerized applications as systemd services with Podman, refer to this +[Podman quadlet post](https://www.redhat.com/sysadmin/quadlet-podman) or, [podman documentation](https://podman.io/docs) + +To monitor the sample applications, SSH into the bootc system and run either: + +```bash +systemctl status chatbot +``` + +You can also view the pods and containers that are managed with systemd by running: + +``` +podman pod list +podman ps -a +``` + +To stop the sample applications, SSH into the bootc system and run: + +```bash +systemctl stop chatbot +``` + +To run the sample application _not_ as a systemd service, stop the services then +run the appropriate commands based on the application you have embedded. + +```bash +podman kube play /usr/share/containers/systemd/chatbot.yaml +``` diff --git a/recipes/natural_language_processing/summarizer/quadlet/README.md b/recipes/natural_language_processing/summarizer/quadlet/README.md index f73e67237..49fe11f41 100644 --- a/recipes/natural_language_processing/summarizer/quadlet/README.md +++ b/recipes/natural_language_processing/summarizer/quadlet/README.md @@ -1,9 +1,9 @@ -### Run summarizer-langchain as a systemd service +### Run summarizer as a systemd service ```bash -cp summarizer.yaml /etc/containers/systemd/summarizer.yaml -cp summarizer.kube.example /etc/containers/summarizer.kube -cp summarizer.image /etc/containers/summarizer.image +cp ../build/summarizer.yaml /etc/containers/systemd/summarizer.yaml +cp ../build/summarizer.kube /etc/containers/summarizer.kube +cp ../build/summarizer.image /etc/containers/summarizer.image /usr/libexec/podman/quadlet --dryrun (optional) systemctl daemon-reload systemctl start summarizer diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.kube.example b/recipes/natural_language_processing/summarizer/quadlet/summarizer.kube similarity index 100% rename from recipes/natural_language_processing/summarizer/quadlet/summarizer.kube.example rename to recipes/natural_language_processing/summarizer/quadlet/summarizer.kube diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml index 94546b920..4d64aad1e 100644 --- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml +++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml @@ -7,7 +7,7 @@ metadata: spec: initContainers: - name: model-file - image: quay.io/ai-lab/mistral-7b-instruct:latest + image: MODELIMAGE command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"] volumeMounts: - name: model-file @@ -16,7 +16,7 @@ spec: - env: - name: MODEL_SERVICE_ENDPOINT value: http://0.0.0.0:8001/v1 - image: quay.io/redhat-et/locallm-text-summarizer:latest + image: APPIMAGE name: summarizer-inference ports: - containerPort: 8501 @@ -30,7 +30,7 @@ spec: value: 8001 - name: MODEL_PATH value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf - image: quay.io/ai-lab/llamacpp-python:latest + image: SERVERIMAGE name: summarizer-model-service ports: - containerPort: 8001