From f7a2ff654cb28476c8f081d15d1ffb40cfef3c7b Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Wed, 1 May 2024 06:07:58 -0400 Subject: [PATCH] Add cloud-init build feature Improve help and README to describe new functionality. Signed-off-by: Daniel J Walsh --- .gitignore | 2 ++ README.md | 4 +++ training/Makefile | 74 +++++++++++++++++++++++++++++++++++----- training/Makefile.common | 47 +++++++++++++++++++++++++ training/README.md | 43 ++++++++++++++++------- training/cloud/Makefile | 23 +++++++++++++ 6 files changed, 171 insertions(+), 22 deletions(-) create mode 100644 training/Makefile.common create mode 100644 training/cloud/Makefile diff --git a/.gitignore b/.gitignore index efc8276a..347e2d36 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ models/* convert_models/converted_models recipes/common/bin/* */.venv/ +training/cloud/examples +training/instructlab/instructlab diff --git a/README.md b/README.md index df182bf9..2eb662ce 100644 --- a/README.md +++ b/README.md @@ -50,3 +50,7 @@ For example, learn how to run the [chatbot recipe here](./recipes/natural_langua Images for many sample applications and models are available in `quay.io`. All currently built images are tracked in [ailab-images.md](./ailab-images.md) + +## [Training](./training/README.md) + +Linux Operating System Bootable containers enabled for AI Training diff --git a/training/Makefile b/training/Makefile index 80f3d444..b073a846 100644 --- a/training/Makefile +++ b/training/Makefile @@ -1,15 +1,71 @@ default: help help: - @echo "Please choose one of the following targets:" - @echo " - amd" - @echo " - nvidia" - @echo " - intel" + @echo "To build a bootable container image you first need to create instructlab container images for a particular vendor " + @echo + @echo " - make instruct-amd" + @echo " - make instruct-intel" + @echo " - make instruct-nvidia" + @echo " - make instruct-vllm" + @echo + @echo "Once instruct images are created, creat bootc container images" + @echo + @echo " - make bootc-amd" + @echo " - make bootc-intel" + @echo " - make bootc-nvidia" + @echo " - make bootc-vllm" + @echo + @echo "If these images are going to be used on a cloud, you might want to add cloud-init." + @echo + @echo " - make cloud-amd" + @echo " - make cloud-intel" + @echo " - make cloud-nvidia" + @echo " - make cloud-vllm" -.PHONY: amd nvidia intel +# +# Create instructlab AI container images +# +.PHONY: +instruct-amd: + make -C instructlab amd + +.PHONY: +instruct-nvidia: + make -C instructlab nvidia + +.PHONY: +instruct: instruct-amd instruct-nvidia + +# +# Create bootc container images prepared for AI +# +.PHONY: amd nvidia intel vllm amd: - make -C amd-bootc/ image -nvidia: - make -C nvidia-bootc/ dtk image + make -C amd-bootc/ bootc intel: - make -C intel-bootc/ image + make -C intel-bootc/ bootc +nvidia: + make -C nvidia-bootc/ dtk bootc +vllm: + make -C vllm/ image + +# +# Make Bootc container images preinstalled with cloud-init +# +.PHONY: +cloud-amd: + make VENDOR=amd -C cloud + +.PHONY: +cloud-intel: + make VENDOR=intel -C cloud + +.PHONY: +cloud-nvidia: + make VENDOR=nvidia -C cloud + +.PHONY: +cloud: cloud-amd cloud-intel cloud + +clean: + rm -rf build diff --git a/training/Makefile.common b/training/Makefile.common new file mode 100644 index 00000000..b7ff2936 --- /dev/null +++ b/training/Makefile.common @@ -0,0 +1,47 @@ +FROM ?= +VENDOR ?= + +REGISTRY ?= quay.io +REGISTRY_ORG ?= ai-lab +IMAGE_NAME ?= $(VENDOR)-bootc +IMAGE_TAG ?= latest + +CONTAINER_TOOL ?= podman +CONTAINER_TOOL_EXTRA_ARGS ?= + +ARCH ?= + +DRIVER_VERSION ?= +KERNEL_VERSION ?= + +ARCH ?= + +INSTRUCTLAB_IMAGE = $(REGISTRY)/$(REGISTRY_ORG)/instructlab-$(VENDOR):$(IMAGE_TAG) +INSTRUCTLAB_IMAGE_ID = $(shell $(CONTAINER_TOOL) image inspect $(INSTRUCTLAB_IMAGE) --format {{.Id}}) +WRAPPER = $(CURDIR)/../ilab-wrapper/ilab +OUTDIR = $(CURDIR)/../build + +SSH_PUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub 2> /dev/null) + +.PHONY: prepare-files +prepare-files: $(OUTDIR)/$(WRAPPER) $(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID) + +$(OUTDIR): + mkdir -p $(OUTDIR) + +$(OUTDIR)/$(WRAPPER): $(OUTDIR) + cp -f $(WRAPPER) $(OUTDIR) + +$(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID): + @mkdir -p $(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID) + $(CONTAINER_TOOL) push --compress=false $(INSTRUCTLAB_IMAGE) oci:$(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID)/ + +.PHONY: check-sshkey +check-sshkey: + @test -n "$(SSH_PUBKEY)" || \ + (echo -n "Error: no ssh key defined! "; \ + echo "Create ~/.ssh/id_rsa.pub or set SSH_PUBKEY"; exit 1) + +.PHONY: push +push: + podman push "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/training/README.md b/training/README.md index 470af845..11337bdf 100644 --- a/training/README.md +++ b/training/README.md @@ -1,15 +1,21 @@ -Bare metal bootc containers +Linux Operating System Bootable containers enabled for AI Training === In order to run accelerated AI workloads, we've prepared [bootc](https://github.com/containers/bootc) container images for the major AI platforms. # Makefile targets -| Target | Description | -|-------------|---------------------------------------------------------| -| amd | Create bootable container for AMD platform | -| nvidia | Create bootable container for NVidia platform | -| intel | Create bootable container for Intel Habanalabs platform | +| Target | Description | +|-----------------|---------------------------------------------------------------------| +| instruct-amd | Create instruct lab image for bootable container for AMD platform | +| instruct-intel | Create instruct lab image for bootable container for Intel platform | +| instruct-nvidia | Create instruct lab image for bootable container for Nvidia platform| +| amd | Create bootable container for AMD platform | +| intel | Create bootable container for Intel Habanalabs platform | +| nvidia | Create bootable container for NVidia platform | +| cloud-amd | Add cloud-init to bootable container for AMD platform | +| cloud-intel | Add cloud-init to bootable container for Intel platform | +| cloud-nvidia | Add cloud-init to bootable container for Nvidia platform | # Makefile variables @@ -23,6 +29,14 @@ In order to run accelerated AI workloads, we've prepared [bootc](https://github. | CONTAINER_TOOL | Container tool used for build | `podman` | | CONTAINER_TOOL_EXTRA_ARGS | Container tool extra arguments | ` ` | +# How to build Instructlab containers + +In order to do AI Training you need to build instructlab container images. + +Simply execute `make instructlab-`. For example to build the `instructlab-amd`, `instructlab-intel` and `instructlab-nvidia` containers, respectively: + +Once you have these container images built it is time to build bootc container images. + # How to build bootc container images In order to build the images (by default based on CentOS Stream), a simple `make ` should be enough. For example to build the `nvidia`, `amd` and `intel` bootc containers, respectively: @@ -33,13 +47,7 @@ make amd make intel ``` -Using the Makefile variables listed above, the builds can be customized. For example to build the NVidia image and tag it with `myregistry.com/ai-training/nvidia:v1`: - -``` -make nvidia REGISTRY=myregistry.com REGISTRY_ORG=ai-training IMAGE_NAME=nvidia IMAGE_TAG=v1 -``` - -# How to build bootc container images based on Red Hat Enterprise Linux +## How to build bootc container images based on Red Hat Enterprise Linux In order to build the training images based on Red Hat Enterprise Linux bootc images, the appropriate base container image must be used in the `FROM` field and the build process must be run on an *entitled Red Hat 9.x Enterprise Linux* with a valid subscription. @@ -57,6 +65,15 @@ Of course, the other Makefile variables are still available, so the following is make nvidia REGISTRY=myregistry.com REGISTRY_ORG=ai-training IMAGE_NAME=nvidia IMAGE_TAG=v1 FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 ``` +# How to build Cloud ready images + +Bootc container images can be installed on physical machines, virtual machines and in the cloud. Often it is useful to add the cloud-init package when running the operarting systems in the cloud. + +To add cloud-init to your existing bootc container image, executing make cloud- +``` +make nvidia REGISTRY=myregistry.com REGISTRY_ORG=ai-training IMAGE_NAME=nvidia IMAGE_TAG=v1 should be enough. For example to build the `cloud-nvidia`, `cloud-amd` and `cloud-intel` bootc containers, respectively: +``` + # Troubleshooting Sometimes, interrupting the build process may lead to wanting a complete restart of the process. For those cases, we can instruct `podman` to start from scratch and discard the cached layers. This is possible by passing the `--no-cache` parameter to the build process by using the `CONTAINER_TOOL_EXTRA_ARGS` variable: diff --git a/training/cloud/Makefile b/training/cloud/Makefile new file mode 100644 index 00000000..83c0cb62 --- /dev/null +++ b/training/cloud/Makefile @@ -0,0 +1,23 @@ +default: cloud + +include ../Makefile.common + +REGISTRY ?= quay.io +REGISTRY_ORG ?= ai-lab +IMAGE_TAG ?= latest + +.PHONY: init +init: + git clone https://gitlab.com/bootc-org/examples.git 2> /dev/null || true + (cd examples; git pull origin main) + +.PHONY: cloud +cloud: init + "${CONTAINER_TOOL}" build \ + $(ARCH:%=--platform linux/%) \ + --tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}-cloud:${IMAGE_TAG}" \ + --from="${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \ + examples/cloud-init + +.PHONY: push +push: push-amd push-nvidia