Merge pull request containers#160 from rhatdan/summarizer

Add bootc support for summarizer
mhdawson · Apr 4, 2024 · b3ca9ae · b3ca9ae
2 parents e630e7c + f79ca3f
commit b3ca9ae
Show file tree

Hide file tree

Showing 6 changed files with 207 additions and 7 deletions.
diff --git a/recipes/natural_language_processing/summarizer/Makefile b/recipes/natural_language_processing/summarizer/Makefile
@@ -0,0 +1,50 @@
+APP ?= summarizer
+MODELIMAGE ?= quay.io/ai-lab/mistral-7b-instruct:latest
+APPIMAGE ?= quay.io/ai-lab/${APP}:latest
+SERVERIMAGE ?= quay.io/ai-lab/llamacpp-python:latest
+SSHPUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub;)
+BOOTCIMAGE ?= quay.io/ai-lab/${APP}-bootc:latest
+
+.PHONY: build
+build:
+	podman build -f builds/Containerfile -t ${APPIMAGE} .
+
+.PHONY: bootc
+bootc:
+	podman build --cap-add SYS_ADMIN --build-arg "SSHPUBKEY=$(SSHPUBKEY)" -f bootc/Containerfile -t ${BOOTCIMAGE} .
+
+.PHONY: quadlet
+quadlet:
+	# Modify quadlet files to match the server, model and app image
+	mkdir -p build
+	sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \
+	    -e "s|APPIMAGE|${APPIMAGE}|g" \
+	    -e "s|MODELIMAGE|${MODELIMAGE}|g" \
+	    quadlet/${APP}.image \
+	    > build/${APP}.image
+	sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \
+	    -e "s|APPIMAGE|${APPIMAGE}|g" \
+	    -e "s|MODELIMAGE|${MODELIMAGE}|g" \
+	    quadlet/${APP}.yaml \
+	    > build/${APP}.yaml
+	cp quadlet/${APP}.kube build/${APP}.kube
+
+.PHONY: install
+install:
+	wget https://www.slimjetbrowser.com/chrome/files/103.0.5060.53/google-chrome-stable_current_amd64.deb
+	sudo dpkg -i google-chrome-stable_current_amd64.deb
+	wget https://chromedriver.storage.googleapis.com/103.0.5060.53/chromedriver_linux64.zip
+	unzip chromedriver_linux64.zip
+	pip install -r tests/requirements.txt
+
+.PHONY: run
+run: 
+	podman run -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 ghcr.io/ai-lab-recipes/${APP}
+
+.PHONY: test
+test:
+	python3 -m pytest -vvv --driver=Chrome --driver-path=./chromedriver tests
+
+.PHONY: clean
+clean:
+	rm -rf build
diff --git a/recipes/natural_language_processing/summarizer/bootc/Containerfile b/recipes/natural_language_processing/summarizer/bootc/Containerfile
@@ -0,0 +1,57 @@
+# Example: an AI powered sample application is embedded as a systemd service
+# via Podman quadlet files in /usr/share/containers/systemd
+#
+# Use build command:
+# podman build --build-arg "sshpubkey=$(cat $HOME/.ssh/id_rsa.pub)" -t quay.io/exampleos/myos .
+# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
+# public key into the image, allowing root access via ssh.
+
+FROM quay.io/centos-bootc/centos-bootc:stream9
+ARG SSHPUBKEY
+
+RUN mkdir /usr/etc-system && \
+    echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
+    echo ${SSHPUBKEY} > /usr/etc-system/root.keys && chmod 0600 /usr/etc-system/root.keys
+
+# pre-pull workload images:
+# Comment the pull commands to keep bootc image smaller.
+# The quadlet .image file added above pulls following images on boot if not
+# pre-pulled here
+
+ARG RECIPE=summarizer
+ARG MODELIMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG APPIMAGE=quay.io/ai-lab/${RECIPE}:latest
+ARG SERVERIMAGE=quay.io/ai-lab/llamacpp-python:latest
+
+# Add quadlet files to setup system to automatically run AI application on boot
+COPY quadlet/${RECIPE}.kube quadlet/${RECIPE}.yaml /usr/share/containers/systemd
+
+# Modify quadlet files to match the server, model and app image
+RUN sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \
+    -e "s|APPIMAGE|${APPIMAGE}|g" \
+    -e "s|MODELIMAGE|${MODELIMAGE}|g" \
+    -i \
+    /usr/share/containers/systemd/${RECIPE}.yaml
+
+# Because images are prepulled, no need for .image quadlet
+# COPY quadlet/${RECIPE}.image /usr/share/containers/systemd
+# RUN sed -e "s|SERVERIMAGE|${SERVERIMAGE}|" \
+#    -e "s|APPIMAGE|${APPIMAGE}|g" \
+#    -e "s|MODELIMAGE|${MODELIMAGE}|g" \
+#    -i \
+#    /usr/share/containers/systemd/${RECIPE}.image
+
+# Setup /usr/lib/containers/storage as an additional store for images.
+# Remove once the base images have this set by default.
+RUN sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
+        /etc/containers/storage.conf
+
+# Added for running as an OCI Container to prevent Overlay on Overlay issues.
+VOLUME /var/lib/containers
+
+# Prepull the model, model_server & application images to populate the system.
+RUN podman pull --root /usr/lib/containers/storage ${SERVERIMAGE}
+RUN podman pull --root /usr/lib/containers/storage ${APPIMAGE}
+RUN podman pull --root /usr/lib/containers/storage ${MODELIMAGE}
+
+RUN podman system reset --force 2>/dev/null
diff --git a/recipes/natural_language_processing/summarizer/bootc/README.md b/recipes/natural_language_processing/summarizer/bootc/README.md
@@ -0,0 +1,93 @@
+## Embed workload (AI sample applications) in a bootable container image
+
+### Create a custom centos-bootc:stream9 image
+
+* [Containerfile](./Containerfile) - embeds an LLM-powered sample chat application.
+
+Details on the application can be found [in the chatbot/README.md](../README.md). By default, this Containerfile includes a model-server
+that is meant to run with CPU - no additional GPU drivers or toolkits are embedded. You can substitute the llamacpp_python model-server image
+for one that has GPU drivers and toolkits with additional build-args. The `FROM` must be replaced with a base image that has the necessary
+kernel drivers and toolkits if building for GPU enabled systems. For an example of an NVIDIA/CUDA base image,
+see [NVIDIA bootable image example](https://gitlab.com/bootc-org/examples/-/tree/main/nvidia?ref_type=heads)
+
+In order to pre-pull the workload images, you need to build from the same architecture you're building for.
+If not pre-pulling the workload images, you can cross build (ie, build from a Mac for an X86_64 system).
+To build the derived bootc image for x86_64 architecture, run the following:
+
+```bash
+cd recipes/natural_language_processing/chatbot
+
+# for CPU powered sample LLM application
+# to switch to an alternate platform like aarch64, pass --platform linux/arm64
+# the --cap-add SYS_ADMIN switch is needed when you are embedding Podman
+# commands within the container build. If the registry you are pulling images
+# from requires authentication, then you will need to volume mount the
+# auth_json file with SELinux separation disabled.
+podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \
+           --security-opt label=disable \
+	   -v ${XDG_RUNTIME_DIR}/containers/auth.json:/run/containers/0/auth.json \
+	   --cap-add SYS_ADMIN \
+	   -t quay.io/yourrepo/youros:tag .
+
+# for GPU powered sample LLM application with llamacpp cuda model server
+podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \
+           --build-arg "model-server-image="quay.io/redhat-et/locallm-llamacpp-cuda-model-server:latest" \
+           --from <YOUR BOOTABLE IMAGE WITH NVIDIA/CUDA> \
+           --cap-add SYS_ADMIN \
+           --platform linux/amd64 \
+           -t quay.io/yourrepo/youros:tag .
+
+podman push quay.io/yourrepo/youros:tag
+```
+
+### Update a bootc-enabled system with the new derived image
+
+To build a disk image from an OCI bootable image, you can refer to [bootc-org/examples](https://gitlab.com/bootc-org/examples).
+For this example, we will assume a bootc enabled system is already running.
+If already running a bootc-enabled OS, `bootc switch` can be used to update the system to target a new bootable OCI image with embedded workloads.
+
+SSH into the bootc-enabled system and run:
+
+```bash
+bootc switch quay.io/yourrepo/youros:tag
+```
+
+The necessary image layers will be downloaded from the OCI registry, and the system will prompt you to reboot into the new operating system.
+From this point, with any subsequent modifications and pushes to the `quay.io/yourrepo/youreos:tag` OCI image, your OS can be updated with:
+
+```bash
+bootc upgrade
+```
+
+### Accessing the embedded workloads
+
+The chatbot can be accessed by visiting port `8150` of the running bootc system.
+They will be running as systemd services from Podman quadlet files placed at `/usr/share/containers/systemd/` on the bootc system.
+For more information about running containerized applications as systemd services with Podman, refer to this
+[Podman quadlet post](https://www.redhat.com/sysadmin/quadlet-podman) or, [podman documentation](https://podman.io/docs)
+
+To monitor the sample applications, SSH into the bootc system and run either:
+
+```bash
+systemctl status chatbot
+```
+
+You can also view the pods and containers that are managed with systemd by running:
+
+```
+podman pod list
+podman ps -a
+```
+
+To stop the sample applications, SSH into the bootc system and run:
+
+```bash
+systemctl stop chatbot
+```
+
+To run the sample application _not_ as a systemd service, stop the services then
+run the appropriate commands based on the application you have embedded.
+
+```bash
+podman kube play /usr/share/containers/systemd/chatbot.yaml
+```
diff --git a/recipes/natural_language_processing/summarizer/quadlet/README.md b/recipes/natural_language_processing/summarizer/quadlet/README.md
@@ -1,9 +1,9 @@
-### Run summarizer-langchain as a systemd service
+### Run summarizer as a systemd service
 
 ```bash
-cp summarizer.yaml /etc/containers/systemd/summarizer.yaml
-cp summarizer.kube.example /etc/containers/summarizer.kube
-cp summarizer.image /etc/containers/summarizer.image
+cp ../build/summarizer.yaml /etc/containers/systemd/summarizer.yaml
+cp ../build/summarizer.kube /etc/containers/summarizer.kube
+cp ../build/summarizer.image /etc/containers/summarizer.image
 /usr/libexec/podman/quadlet --dryrun (optional)
 systemctl daemon-reload
 systemctl start summarizer

diff --git a/...ummarizer/quadlet/summarizer.kube.example → ...essing/summarizer/quadlet/summarizer.kube b/...ummarizer/quadlet/summarizer.kube.example → ...essing/summarizer/quadlet/summarizer.kube
diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
@@ -7,7 +7,7 @@ metadata:
 spec:
   initContainers:
   - name: model-file
-    image: quay.io/ai-lab/mistral-7b-instruct:latest
+    image: MODELIMAGE
     command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"]
     volumeMounts:
     - name: model-file
@@ -16,7 +16,7 @@ spec:
   - env:
     - name: MODEL_SERVICE_ENDPOINT
       value: http://0.0.0.0:8001/v1
-    image: quay.io/redhat-et/locallm-text-summarizer:latest
+    image: APPIMAGE
     name: summarizer-inference
     ports:
     - containerPort: 8501
@@ -30,7 +30,7 @@ spec:
       value: 8001
     - name: MODEL_PATH
       value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-    image: quay.io/ai-lab/llamacpp-python:latest
+    image: SERVERIMAGE
     name: summarizer-model-service
     ports:
     - containerPort: 8001