From 84d37ae47de9f2403b887410225da044ebae5458 Mon Sep 17 00:00:00 2001 From: sallyom Date: Thu, 28 Mar 2024 17:12:20 -0400 Subject: [PATCH] update README Signed-off-by: sallyom Co-authored-by: MichaelClifford --- README.md | 52 +++++----- locallm-images.md => ai-lab-recipes-images.md | 0 embed-workloads/Containerfile-codegen | 28 ------ embed-workloads/Containerfile-nvidia | 36 ------- embed-workloads/README.md | 99 ------------------- .../quadlets/ai-codegenerator/README.md | 28 ------ .../quadlets/ai-codegenerator/codegen.image | 7 -- .../ai-codegenerator/codegen.kube.example | 16 --- .../quadlets/ai-codegenerator/codegen.yaml | 45 --------- .../quadlets/ai-summarizer/README.md | 28 ------ .../quadlets/ai-summarizer/summarizer.image | 7 -- .../ai-summarizer/summarizer.kube.example | 16 --- .../quadlets/ai-summarizer/summarizer.yaml | 45 --------- 13 files changed, 28 insertions(+), 379 deletions(-) rename locallm-images.md => ai-lab-recipes-images.md (100%) delete mode 100644 embed-workloads/Containerfile-codegen delete mode 100644 embed-workloads/Containerfile-nvidia delete mode 100644 embed-workloads/README.md delete mode 100644 embed-workloads/quadlets/ai-codegenerator/README.md delete mode 100644 embed-workloads/quadlets/ai-codegenerator/codegen.image delete mode 100644 embed-workloads/quadlets/ai-codegenerator/codegen.kube.example delete mode 100644 embed-workloads/quadlets/ai-codegenerator/codegen.yaml delete mode 100644 embed-workloads/quadlets/ai-summarizer/README.md delete mode 100644 embed-workloads/quadlets/ai-summarizer/summarizer.image delete mode 100644 embed-workloads/quadlets/ai-summarizer/summarizer.kube.example delete mode 100644 embed-workloads/quadlets/ai-summarizer/summarizer.yaml diff --git a/README.md b/README.md index 6d6e10488..c1e150195 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,51 @@ -# Locallm +# AI Lab Recipes This repo contains recipes for building and running containerized AI and LLM Applications locally with podman. -These containerized AI recipes can be used to help developers quickly prototype new AI and LLM based applications, without the need for relying on any other externally hosted services. Since they are already containerized, it also helps developers move quickly from prototype to production. +These containerized AI recipes can be used to help developers quickly prototype new AI and LLM based applications, without the need for relying +on any other externally hosted services. Since they are already containerized, it also helps developers move quickly from prototype to production. -## Current Recipes: +## Model services + +[model servers examples](./model_servers) -* [Model Service](#model-service) -* [Chatbot](#chatbot) -* [Text Summarization](#text-summarization) -* [Code Generation](#code-generation) -* [RAG](#rag-application) (Retrieval Augmented Generation) -* [Fine-tuning](#fine-tuning) +#### What's a model server? -### Model service +A model server is a program that serves machine-learning models and makes their functions available via API so that +applications can incorporate AI. This repository provides descriptions and files for building several model servers. -A model service that can be used for various applications with various models is included in this repository. -Learn how to build and run the model service here: [Llamacpp_python model service](/model_servers/llamacpp_python/README.md). +Many of the sample applications rely on the `llamacpp_python` model server by default. This server can be used for various applications with various models. +However, each sample application can be paired with a variety of model servers. -### Chatbot +Learn how to build and run the llamacpp_python model by following the [llamacpp_python model server README.](/model_servers/llamacpp_python/README.md). -A simple chatbot using the [Streamlit UI](https://docs.streamlit.io/). Learn how to build and run this application here: [Chatbot](/chatbot-langchain/). +## Current Recipes: -### Text Summarization +There are several sample applications in this repository. They live in the [recipes](./recipes) folder. +They fall under the categories: -An LLM app that can summarize arbitrarily long text inputs with the [Streamlit UI](https://docs.streamlit.io/). Learn how to build and run this application here: -[Text Summarization](/summarizer-langchain/). +* [audio](./recipes/audio) +* [computer-vision](./recipes/computer_vision) +* [multimodal](./recipes/multimodal) +* [natural language processing](./recipes/natural_language_processing) -### Code generation -A simple chatbot using the [Streamlit UI](https://docs.streamlit.io/). Learn how to build and run this application here: [Code Generation](/code-generation/). +Most of the sample applications follow a similar pattern that includes a model-server and an inference application. +Many sample applications utilize the [Streamlit UI](https://docs.streamlit.io/). -### RAG +Learn how to build and run each application by visiting each of the categories above. For example +the [chatbot recipe](./recipes/natural_language_processing/chatbot). -A chatbot using the [Streamlit UI](https://docs.streamlit.io/) and Retrieval Augmented Generation. Learn how to build and run this application here: [RAG](/rag-langchain/). -### Fine Tuning +## Fine Tuning This application allows a user to select a model and a data set they'd like to fine-tune that model on. Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. -Learn how to build and run this model training job here: [Fine-tuning](/finetune/). + +Learn how to build and run this model training job here: [Fine tuning example](/finetune/). ## Current Locallm Images built from this repository -Images for all sample applications and models are tracked in [locallm-images.md](./locallm-images.md) +Images for many sample applications and models are available in `quay.io`. All currently built images are tracked in +[ai-lab-recipes-images.md](./ai-lab-recipes-images.md) diff --git a/locallm-images.md b/ai-lab-recipes-images.md similarity index 100% rename from locallm-images.md rename to ai-lab-recipes-images.md diff --git a/embed-workloads/Containerfile-codegen b/embed-workloads/Containerfile-codegen deleted file mode 100644 index af0fde7f0..000000000 --- a/embed-workloads/Containerfile-codegen +++ /dev/null @@ -1,28 +0,0 @@ -# In this example, an AI powered sample application will be embedded as a systemd service -# by placing podman quadlet files in /etc/containers/systemd - -FROM quay.io/centos-bootc/centos-bootc:stream9 -# Build like this: -# -# podman build --build-arg "sshpubkey=$(cat ~/.ssh/mykey.pub)" -t quay.io/exampleos/example-image . -#Substitute YOUR public key for the below-private key holder for the following public key will have root access -ARG sshpubkey - -RUN mkdir /usr/etc-system && \ - echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ - echo $sshpubkey > /usr/etc-system/root.keys && chmod 0600 /usr/etc-system/root.keys - -RUN dnf install -y vim && dnf clean all - -# Code-generation application -COPY embed-workloads/quadlets/ai-codegenerator/codegen.kube.example /usr/share/containers/systemd/codegen.kube -COPY embed-workloads/quadlets/ai-codegenerator/codegen.yaml /usr/share/containers/systemd/codegen.yaml -COPY embed-workloads/quadlets/ai-codegenerator/codegen.image /usr/share/containers/systemd/codegen.image - -# pre-load workload images -# uncomment out to pre-load the workload images. -# Keep commented to keep bootc image smaller. -# With above quadlet .image file, these will be pulled on boot -#RUN podman pull quay.io/redhat-et/locallm-mistral-7b-gguf:latest -#RUN podman pull quay.io/redhat-et/locallm-codegen:latest -#RUN podman pull quay.io/redhat-et/locallm-model-service:latest diff --git a/embed-workloads/Containerfile-nvidia b/embed-workloads/Containerfile-nvidia deleted file mode 100644 index 012e3d78e..000000000 --- a/embed-workloads/Containerfile-nvidia +++ /dev/null @@ -1,36 +0,0 @@ -# In this example, an AI powered sample application will be embedded as a systemd service -# by placing podman quadlet files in /etc/containers/systemd - -# In practice you will need a base image with -# a compatible kernel+driver -FROM quay.io/centos-bootc/centos-bootc:stream9 -# Build like this: -# -# podman build --build-arg "sshpubkey=$(cat ~/.ssh/mykey.pub)" -t quay.io/exampleos/example-image . -#Substitute YOUR public key for the below-private key holder for the following public key will have root access -ARG sshpubkey - -RUN mkdir /usr/etc-system && \ - echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ - echo $sshpubkey > /usr/etc-system/root.keys && chmod 0600 /usr/etc-system/root.keys - -# cuda and nvidia repos and -COPY nvidia/etc/ /etc/ -COPY nvidia/usr/ /usr/ -RUN dnf -y module install nvidia-driver && \ - dnf install -y nvidia-container-toolkit && \ - dnf install -y vim && \ - dnf clean all - -# Code-generation application -COPY embed-workloads/quadlets/ai-summarizer/summarizer.kube.example /usr/share/containers/systemd/summarizer.kube -COPY embed-workloads/quadlets/ai-summarizer/summarizer.yaml /usr/share/containers/systemd/summarizer.yaml -COPY embed-workloads/quadlets/ai-summarizer/summarizer.image /usr/share/containers/systemd/summarizer.image - -# pre-load workload images -# uncomment to pre-load the workload images. -# Keep commented to keep bootc image smaller. -# With above quadlet .image file, these will be pulled on boot -#RUN podman pull quay.io/redhat-et/locallm-mistral-7b-gguf:latest -#RUN podman pull quay.io/redhat-et/locallm-text-summarizer:latest -#RUN podman pull quay.io/redhat-et/locallm-llamacpp-cuda-model-server:latest diff --git a/embed-workloads/README.md b/embed-workloads/README.md deleted file mode 100644 index a37081d4e..000000000 --- a/embed-workloads/README.md +++ /dev/null @@ -1,99 +0,0 @@ -## Embed workload (AI sample applications) in a bootable container image - -### Create a custom centos-bootc:stream9 image - -This example assumes you will deploy on an x86_64 machine. -This example assumes you will build from an x84_64 system. -There is a [relevant issue](https://github.com/CentOS/centos-bootc/issues/282) -for cross-building with `podman build` and running nested podman commands. -The workload container images are pre-pulled into the bootable OCI image -(using nested `podman pull` within the Containerfiles). -This example was tested building from fedora. - -Two example Containerfiles are included in this folder. - -* [Containerfile-codegen](./Containerfile-codegen) - embeds an LLM-powered sample code generator chat application. Details on the application -can be found [here](https://github.com/containers/ai-lab-recipes/tree/main/code-generation). This Containerfile includes a model-server -that is meant to run with CPU - no additional GPU drivers or toolkits are embedded. - -* [Containerfile-nvidia](./Containerfile-nvidia) - embeds an LLM-powered sample text summarizer application. Details on the application -can be found [here](https://github.com/containers/ai-lab-recipes/tree/main/summarizer). This Containerfile includes a model-server -meant to run with GPU acceleration. NVIDIA kernel drivers and the NVIDIA CUDA toolkit is embedded. This makes for a very large (> 10G) OCI image. - -To build the derived bootc image for x86_64 architecture, run the following from the root of this repository: - -```bash -# for CPU powered sample LLM application -podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ - --cap-add SYS_ADMIN \ - --platform linux/amd64 \ - -t quay.io/yourrepo/youros:tag \ - -f embed-workloads/Containerfile-codegen . - -# for GPU powered sample LLM application -podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" \ - --cap-add SYS_ADMIN \ - --platform linux/amd64 \ - -t quay.io/yourrepo/youros:tag \ - -f embed-workloads/Containerfile-nvidia . - -podman push quay.io/yourrepo/youros:tag -``` - -### Update a bootc-enabled system with the new derived image - -To build a disk image from an OCI bootable image, you can refer to other examples in this repository. -For this example, we will assume a bootc enabled system is already running. -If already running a bootc-enabled OS, `bootc switch` can be used to update the system to target a new bootable OCI image with embedded workloads. - -SSH into the bootc-enabled system and run: - -```bash -bootc switch quay.io/yourrepo/youros:tag -``` - -The necessary image layers will be downloaded from the OCI registry, and the system will prompt you to reboot into the new operating system. -From this point, with any subsequent modifications and pushes to the `quay.io/yourrepo/youreos:tag` OCI image, your OS can be updated with: - -```bash -bootc upgrade -``` - -### Accessing the embedded workloads - -The text summarizer and code generator applications can be accessed by visiting port `8150` of the running bootc system. -They will be running as systemd services from podman quadlet files placed at `/etc/containers/systemd/` on the bootc system. For more information -about running containerized applications as systemd services with podman, refer to this -[podman quadlet post](https://www.redhat.com/sysadmin/quadlet-podman) or, [podman documentation](https://podman.io/docs) - -To monitor the sample applications, SSH into the bootc system and run either: - -```bash -systemctl status codegen -or -systemctl status summarizer -``` - -You can also view the pods and containers that are managed with systemd by running: - -``` -podman pod list -podman ps -a -``` - -To stop the sample applications, SSH into the bootc system and run: - -```bash -systemctl stop codegen -or -systemctl stop summarizer -``` - -To run the sample application _not_ as a systemd service, stop the services then -run the appropriate commands based on the application you have embedded. - -```bash -podman kube play /etc/containers/systemd/codegen.yaml -or -podman kube play /etc/containers/systemd/summarizer.yaml -``` diff --git a/embed-workloads/quadlets/ai-codegenerator/README.md b/embed-workloads/quadlets/ai-codegenerator/README.md deleted file mode 100644 index 957dba53b..000000000 --- a/embed-workloads/quadlets/ai-codegenerator/README.md +++ /dev/null @@ -1,28 +0,0 @@ -### Run code-generator chat application locally as a podman pod - -There are pre-built images and a pod definition to run the code-generation example application. -To run locally, - -```bash -podman kube play ./codegen.yaml -``` -To monitor locally, - -```bash -podman pod list -podman ps -podman logs -``` - -The application should be acessible at `http://localhost:8501`. It will take a few minutes for the model to load. - -### Run code-generator as a systemd service - -```bash -cp codegen.yaml /etc/containers/systemd/codegen.yaml -cp codegen.kube.example /etc/containers/codegen.kube -cp codegen.image /etc/containers/codegen.image -/usr/libexec/podman/quadlet --dryrun (optional) -systemctl daemon-reload -systemctl start codegen -``` diff --git a/embed-workloads/quadlets/ai-codegenerator/codegen.image b/embed-workloads/quadlets/ai-codegenerator/codegen.image deleted file mode 100644 index f64815c82..000000000 --- a/embed-workloads/quadlets/ai-codegenerator/codegen.image +++ /dev/null @@ -1,7 +0,0 @@ -[Install] -WantedBy=codegen.service - -[Image] -Image=quay.io/redhat-et/locallm-codellama-7b-gguf:latest -Image=quay.io/redhat-et/locallm-model-service:latest -Image=quay.io/redhat-et/locallm-codegen:latest diff --git a/embed-workloads/quadlets/ai-codegenerator/codegen.kube.example b/embed-workloads/quadlets/ai-codegenerator/codegen.kube.example deleted file mode 100644 index 3c64098e3..000000000 --- a/embed-workloads/quadlets/ai-codegenerator/codegen.kube.example +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=Python script to run against downloaded LLM -Documentation=man:podman-generate-systemd(1) -Wants=network-online.target -After=network-online.target -RequiresMountsFor=%t/containers - -[Kube] -# Point to the yaml file in the same directory -Yaml=codegen.yaml - -[Service] -Restart=always - -[Install] -WantedBy=default.target diff --git a/embed-workloads/quadlets/ai-codegenerator/codegen.yaml b/embed-workloads/quadlets/ai-codegenerator/codegen.yaml deleted file mode 100644 index 6e3b2e188..000000000 --- a/embed-workloads/quadlets/ai-codegenerator/codegen.yaml +++ /dev/null @@ -1,45 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - labels: - app: codegen - name: codegen -spec: - initContainers: - - name: model-file - image: quay.io/redhat-et/locallm-codellama-7b-gguf:latest - command: ['/usr/bin/install', "/model/codellama-7b-instruct.Q4_K_M.gguf", "/shared/"] - volumeMounts: - - name: model-file - mountPath: /shared - containers: - - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 - image: quay.io/redhat-et/locallm-codegen:latest - name: codegen-inference - ports: - - containerPort: 8501 - hostPort: 8501 - securityContext: - runAsNonRoot: true - - env: - - name: HOST - value: 0.0.0.0 - - name: PORT - value: 8001 - - name: MODEL_PATH - value: /model/codellama-7b-instruct.Q4_K_M.gguf - image: quay.io/redhat-et/locallm-model-service:latest - name: codegen-model-service - ports: - - containerPort: 8001 - hostPort: 8001 - securityContext: - runAsNonRoot: true - volumeMounts: - - name: model-file - mountPath: /model - volumes: - - name: model-file - emptyDir: {} diff --git a/embed-workloads/quadlets/ai-summarizer/README.md b/embed-workloads/quadlets/ai-summarizer/README.md deleted file mode 100644 index 24b92a8c1..000000000 --- a/embed-workloads/quadlets/ai-summarizer/README.md +++ /dev/null @@ -1,28 +0,0 @@ -### Run text summarizer application locally as a podman pod - -There are pre-built images and a pod definition to run the text-summarizer example application. -To run locally, - -```bash -podman kube play ./summarizer.yaml -``` -To monitor locally, - -```bash -podman pod list -podman ps -podman logs -``` - -The application should be acessible at `http://localhost:8501`. It will take a few minutes for the model to load. - -### Run summarizer application as a systemd service - -```bash -cp summarizer.yaml /etc/containers/systemd/summarizer.yaml -cp summarizer.kube.example /etc/containers/summarizer.kube -cp summarizer.image /etc/containers/summarizer.image -/usr/libexec/podman/quadlet --dryrun (optional) -systemctl daemon-reload -systemctl start summarizer -``` diff --git a/embed-workloads/quadlets/ai-summarizer/summarizer.image b/embed-workloads/quadlets/ai-summarizer/summarizer.image deleted file mode 100644 index cf4815cbc..000000000 --- a/embed-workloads/quadlets/ai-summarizer/summarizer.image +++ /dev/null @@ -1,7 +0,0 @@ -[Install] -WantedBy=summarizer.service - -[Image] -Image=quay.io/redhat-et/locallm-mistral-7b-gguf:latest -Image=quay.io/redhat-et/locallm-llamacpp-cuda-model-server:latest -Image=quay.io/redhat-et/locallm-text-summarizer:latest diff --git a/embed-workloads/quadlets/ai-summarizer/summarizer.kube.example b/embed-workloads/quadlets/ai-summarizer/summarizer.kube.example deleted file mode 100644 index a89ca0727..000000000 --- a/embed-workloads/quadlets/ai-summarizer/summarizer.kube.example +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=Python script to run against downloaded LLM -Documentation=man:podman-generate-systemd(1) -Wants=network-online.target -After=network-online.target -RequiresMountsFor=%t/containers - -[Kube] -# Point to the yaml file in the same directory -Yaml=summarizer.yaml - -[Service] -Restart=always - -[Install] -WantedBy=default.target diff --git a/embed-workloads/quadlets/ai-summarizer/summarizer.yaml b/embed-workloads/quadlets/ai-summarizer/summarizer.yaml deleted file mode 100644 index 3d0a03a1b..000000000 --- a/embed-workloads/quadlets/ai-summarizer/summarizer.yaml +++ /dev/null @@ -1,45 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - labels: - app: summarizer - name: summarizer -spec: - initContainers: - - name: model-file - image: quay.io/redhat-et/locallm-mistral-7b-gguf:latest - command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] - volumeMounts: - - name: model-file - mountPath: /shared - containers: - - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 - image: quay.io/redhat-et/locallm-text-summarizer:latest - name: summarizer-inference - ports: - - containerPort: 8501 - hostPort: 8501 - securityContext: - runAsNonRoot: true - - env: - - name: HOST - value: 0.0.0.0 - - name: PORT - value: 8001 - - name: MODEL_PATH - value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf - image: quay.io/redhat-et/locallm-llamacpp-cuda-model-server:latest - name: summarizer-model-service - ports: - - containerPort: 8001 - hostPort: 8001 - securityContext: - runAsNonRoot: true - volumeMounts: - - name: model-file - mountPath: /model - volumes: - - name: model-file - emptyDir: {}