From 6a42054fe1f430c248894b08d3950af19586d464 Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Thu, 2 May 2024 07:15:19 -0400 Subject: [PATCH] Use OCI Transport for embeded images to save time No reason to use containers/storage for instructlab or vllm since we are only building for embedding within a bootc image. By storing directly in OCI, we can save many minutes and lots of disk size. Signed-off-by: Daniel J Walsh --- training/Makefile.common | 5 +---- training/amd-bootc/Containerfile | 12 ++++++------ training/amd-bootc/Makefile | 2 +- training/instructlab/Makefile | 15 ++------------- training/intel-bootc/Containerfile | 9 +++++++++ training/intel-bootc/Makefile | 2 ++ training/nvidia-bootc/Containerfile | 9 ++++++--- training/nvidia-bootc/Makefile | 2 +- training/vllm/Makefile | 12 +++--------- 9 files changed, 31 insertions(+), 37 deletions(-) diff --git a/training/Makefile.common b/training/Makefile.common index bdbb67e5..84057521 100644 --- a/training/Makefile.common +++ b/training/Makefile.common @@ -18,6 +18,7 @@ KERNEL_VERSION ?= ARCH ?= INSTRUCTLAB_IMAGE = $(REGISTRY)/$(REGISTRY_ORG)/instructlab-$(VENDOR):$(IMAGE_TAG) +VLLM_IMAGE = $(REGISTRY)/$(REGISTRY_ORG)/vllm:$(IMAGE_TAG) INSTRUCTLAB_IMAGE_ID = $(shell $(CONTAINER_TOOL) image inspect $(INSTRUCTLAB_IMAGE) --format {{.Id}}) WRAPPER = $(CURDIR)/../ilab-wrapper/ilab OUTDIR = $(CURDIR)/../build @@ -33,10 +34,6 @@ $(OUTDIR): $(OUTDIR)/$(WRAPPER): $(OUTDIR) cp -f $(WRAPPER) $(OUTDIR) -$(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID): - @mkdir -p $(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID) - $(CONTAINER_TOOL) push --compress=false $(INSTRUCTLAB_IMAGE) oci:$(OUTDIR)/$(INSTRUCTLAB_IMAGE_ID)/ - .PHONY: check-sshkey check-sshkey: @test -n "$(SSH_PUBKEY)" || \ diff --git a/training/amd-bootc/Containerfile b/training/amd-bootc/Containerfile index cbc3c013..50a8bd3d 100644 --- a/training/amd-bootc/Containerfile +++ b/training/amd-bootc/Containerfile @@ -8,16 +8,14 @@ RUN dnf install -y \ ${EXTRA_RPM_PACKAGES} \ && dnf clean all -ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest" - # Setup /usr/lib/containers/storage as an additional store for images. # Remove once the base images have this set by default. RUN sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \ /etc/containers/storage.conf && \ cp /run/.input/ilab /usr/local/bin/ilab -ARG INSTRUCTLAB_IMAGE -ARG INSTRUCTLAB_IMAGE_ID +ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest" +ARG VLLM_IMAGE RUN sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' /usr/local/bin/ilab RUN sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' /usr/local/bin/ilab @@ -27,5 +25,7 @@ RUN sed -i "s%__REPLACE_CONTAINER_NAME__%${INSTRUCTLAB_IMAGE}%" /usr/local/bin/i VOLUME /var/lib/containers # Prepull the instructlab image -RUN podman --root /usr/lib/containers/storage pull oci:/run/.input/${INSTRUCTLAB_IMAGE_ID} -RUN podman --root /usr/lib/containers/storage image tag ${INSTRUCTLAB_IMAGE_ID} ${INSTRUCTLAB_IMAGE} +RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/vllm) && \ + podman --root /usr/lib/containers/storage image tag ${IID} ${VLLM_IMAGE} +RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-amd) && \ + podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE} diff --git a/training/amd-bootc/Makefile b/training/amd-bootc/Makefile index 66f6e4a4..2d88d0bd 100644 --- a/training/amd-bootc/Makefile +++ b/training/amd-bootc/Makefile @@ -13,7 +13,7 @@ bootc: prepare-files -v ${OUTDIR}:/run/.input:ro \ --tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \ --build-arg "INSTRUCTLAB_IMAGE=$(INSTRUCTLAB_IMAGE)" \ - --build-arg "INSTRUCTLAB_IMAGE_ID=$(INSTRUCTLAB_IMAGE_ID)" \ + --build-arg "VLLM_IMAGE=$(VLLM_IMAGE)" \ $(EXTRA_RPM_PACKAGES:%=--build-arg EXTRA_RPM_PACKAGES=%) \ $(FROM:%=--from=%) \ ${CONTAINER_TOOL_EXTRA_ARGS} . diff --git a/training/instructlab/Makefile b/training/instructlab/Makefile index 697653f4..dacf7563 100644 --- a/training/instructlab/Makefile +++ b/training/instructlab/Makefile @@ -11,19 +11,8 @@ instructlab: .PHONY: nvidia nvidia: instructlab - podman build --squash-all -t ${REGISTRY}/${REGISTRY_ORG}/instructlab-$@:${IMAGE_TAG} instructlab/containers/cuda + podman build --layers=false --squash-all -t oci:../build/instructlab-$@ instructlab/containers/cuda .PHONY: amd amd: instructlab - podman build --squash-all -t ${IMAGE} ${REGISTRY}/${REGISTRY_ORG}/instructlab-$@:${IMAGE_TAG} -f instructlab/containers/rocm/Containerfile instructlab - -.PHONY: push-amd -push-amd: - podman push ${REGISTRY}/${REGISTRY_ORG}/instructlab-amd:${IMAGE_TAG} - -.PHONY: push-nvidia -push-nvidia: - podman push ${REGISTRY}/${REGISTRY_ORG}/instructlab-nvidia:${IMAGE_TAG} - -.PHONY: push -push: push-amd push-nvidia + podman build --layers=false --squash-all -t oci:../build/instructlab-$@ -f instructlab/containers/rocm/Containerfile instructlab diff --git a/training/intel-bootc/Containerfile b/training/intel-bootc/Containerfile index b2179ae3..afbd8042 100644 --- a/training/intel-bootc/Containerfile +++ b/training/intel-bootc/Containerfile @@ -55,6 +55,9 @@ COPY --from=builder --chown=0:0 /tmp/habanalabs/lib/firmware/habanalabs/gaudi3 / COPY --from=builder --chown=0:0 /tmp/habanalabs*.xz /tmp/ COPY --from=builder --chown=0:0 /tmp/hl-smi /usr/bin/ +ARG INSTRUCTLAB_IMAGE +ARG VLLM_IMAGE + RUN if [ "${KERNEL_VERSION}" == "" ]; then \ RELEASE=$(dnf info --installed kernel-core | grep Release | awk -F: '{print $2}' | tr -d '[:blank:]') \ && VERSION=$(dnf info --installed kernel-core | grep Version | awk -F: '{print $2}' | tr -d '[:blank:]') \ @@ -69,3 +72,9 @@ RUN if [ "${KERNEL_VERSION}" == "" ]; then \ && depmod -a ${KERNEL_VERSION}.${TARGET_ARCH} \ && xargs --no-run-if-empty dnf install -y <<< "${EXTRA_RPM_PACKAGES}" \ && dnf clean all + +# Prepull the instructlab image +RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/vllm) && \ + podman --root /usr/lib/containers/storage image tag ${IID} ${VLLM_IMAGE} +#RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \ +# podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE} diff --git a/training/intel-bootc/Makefile b/training/intel-bootc/Makefile index 411ee707..c2c3eb10 100644 --- a/training/intel-bootc/Makefile +++ b/training/intel-bootc/Makefile @@ -13,4 +13,6 @@ bootc: $(FROM:%=--build-arg BASEIMAGE=%) \ $(DRIVER_VERSION:%=--build-arg DRIVER_VERSION=%) \ $(KERNEL_VERSION:%=--build-arg KERNEL_VERSION=%) \ + --build-arg "INSTRUCTLAB_IMAGE=$(INSTRUCTLAB_IMAGE)" \ + --build-arg "VLLM_IMAGE=$(VLLM_IMAGE)" \ ${CONTAINER_TOOL_EXTRA_ARGS} . diff --git a/training/nvidia-bootc/Containerfile b/training/nvidia-bootc/Containerfile index d43bc002..130c36b2 100644 --- a/training/nvidia-bootc/Containerfile +++ b/training/nvidia-bootc/Containerfile @@ -149,7 +149,7 @@ RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \ cp /run/.input/ilab /usr/local/bin/ilab ARG INSTRUCTLAB_IMAGE -ARG INSTRUCTLAB_IMAGE_ID +ARG VLLM_IMAGE RUN sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' /usr/local/bin/ilab RUN sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' /usr/local/bin/ilab @@ -158,6 +158,9 @@ RUN sed -i "s%__REPLACE_CONTAINER_NAME__%${INSTRUCTLAB_IMAGE}%" /usr/local/bin/i # Added for running as an OCI Container to prevent Overlay on Overlay issues. VOLUME /var/lib/containers -RUN podman --root /usr/lib/containers/storage pull oci:/run/.input/${INSTRUCTLAB_IMAGE_ID} -RUN podman --root /usr/lib/containers/storage image tag ${INSTRUCTLAB_IMAGE_ID} ${INSTRUCTLAB_IMAGE} +RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/vllm) && \ + podman --root /usr/lib/containers/storage image tag ${IID} ${VLLM_IMAGE} +RUN IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-nvidia) && \ + podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE} + RUN podman system reset --force 2>/dev/null diff --git a/training/nvidia-bootc/Makefile b/training/nvidia-bootc/Makefile index 3ff13afd..65f60a97 100644 --- a/training/nvidia-bootc/Makefile +++ b/training/nvidia-bootc/Makefile @@ -39,6 +39,6 @@ bootc: dtk check-sshkey prepare-files $(DRIVER_VERSION:%=--build-arg DRIVER_VERSION=%) \ $(CUDA_VERSION:%=--build-arg CUDA_VERSION=%) \ --build-arg "INSTRUCTLAB_IMAGE=$(INSTRUCTLAB_IMAGE)" \ - --build-arg "INSTRUCTLAB_IMAGE_ID=$(INSTRUCTLAB_IMAGE_ID)" \ + --build-arg "VLLM_IMAGE=$(VLLM_IMAGE)" \ --build-arg "SSHPUBKEY=$(SSH_PUBKEY)" \ ${CONTAINER_TOOL_EXTRA_ARGS} . diff --git a/training/vllm/Makefile b/training/vllm/Makefile index 2400b530..2da8d952 100644 --- a/training/vllm/Makefile +++ b/training/vllm/Makefile @@ -1,13 +1,5 @@ -REGISTRY ?= quay.io -REGISTRY_ORG ?= ai-lab -IMAGE_NAME ?= vllm -IMAGE_TAG ?= latest - CONTAINER_TOOL ?= podman -DRIVER_VERSION ?= -KERNEL_VERSION ?= - default: image .PHONY: image @@ -15,4 +7,6 @@ image: "${CONTAINER_TOOL}" build \ $(ARCH:%=--platform linux/%) \ --file Containerfile \ - --tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \ \ No newline at end of file + --layers=false \ + --squash-all \ + --tag oci:../build/vllm .