From 2f2d608f71d82c433e7016199b9d5ab69466d50f Mon Sep 17 00:00:00 2001 From: "Jason T. Greene" Date: Sun, 18 Aug 2024 20:41:36 +0000 Subject: [PATCH] Fix long container startup times The use of a uid map leads to a new layer with all files chowned. This takes several seconds due to the size of the instructlab container (26GB). Normally this would be a one time cost where the idmap layer is cached and reusued accross container creations; however, since the container is stored on a read-only additional image store, no caching is performed. Address the problem by creating a derived empty contianer in mutable container storage. This allows the 1k idmap layer to be created in the smae area, yet reuses the layers in additional image store. Signed-off-by: Jason T. Greene --- training/ilab-wrapper/ilab | 11 ++++++++++- training/nvidia-bootc/Containerfile | 4 +++- training/nvidia-bootc/duplicated/ilab-wrapper/ilab | 11 ++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/training/ilab-wrapper/ilab b/training/ilab-wrapper/ilab index afed6432..6cd232dd 100755 --- a/training/ilab-wrapper/ilab +++ b/training/ilab-wrapper/ilab @@ -49,7 +49,8 @@ check_insights # Template values replaced by container build CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__" -IMAGE_NAME="__REPLACE_IMAGE_NAME__" +SOURCE_IMAGE="__REPLACE_IMAGE_NAME__" +IMAGE_NAME="localhost/instructlab:__REPLACE_IMAGE_TAG__" ENTRYPOINT="ilab" PARAMS=("$@") @@ -144,4 +145,12 @@ PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it "--env" "HF_TOKEN" "${IMAGE_NAME}") +sudo podman image exists "$IMAGE_NAME" +if [ "$?" != "0" ]; then + echo "Initializing ilab container..." + id=$(sudo podman create "$SOURCE_IMAGE") + sudo podman commit "$id" "$IMAGE_NAME" + sudo podman rm "$id" +fi + exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}" diff --git a/training/nvidia-bootc/Containerfile b/training/nvidia-bootc/Containerfile index 64a18716..95174a45 100644 --- a/training/nvidia-bootc/Containerfile +++ b/training/nvidia-bootc/Containerfile @@ -173,10 +173,12 @@ RUN chmod +x /usr/bin/ilab ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-nvidia:latest" ARG INSTRUCTLAB_IMAGE_PULL_SECRET="instructlab-nvidia-pull" -RUN for i in /usr/bin/ilab*; do \ +RUN export INSTRUCTLAB_TAG=$(echo ${INSTRUCTLAB_IMAGE} | cut -f 2 -d ':') && \ + for i in /usr/bin/ilab*; do \ sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' $i; \ sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' $i; \ sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \ + sed -i "s%__REPLACE_IMAGE_TAG__%${INSTRUCTLAB_TAG}%" $i; \ done # Added for running as an OCI Container to prevent Overlay on Overlay issues. diff --git a/training/nvidia-bootc/duplicated/ilab-wrapper/ilab b/training/nvidia-bootc/duplicated/ilab-wrapper/ilab index afed6432..6cd232dd 100755 --- a/training/nvidia-bootc/duplicated/ilab-wrapper/ilab +++ b/training/nvidia-bootc/duplicated/ilab-wrapper/ilab @@ -49,7 +49,8 @@ check_insights # Template values replaced by container build CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__" -IMAGE_NAME="__REPLACE_IMAGE_NAME__" +SOURCE_IMAGE="__REPLACE_IMAGE_NAME__" +IMAGE_NAME="localhost/instructlab:__REPLACE_IMAGE_TAG__" ENTRYPOINT="ilab" PARAMS=("$@") @@ -144,4 +145,12 @@ PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it "--env" "HF_TOKEN" "${IMAGE_NAME}") +sudo podman image exists "$IMAGE_NAME" +if [ "$?" != "0" ]; then + echo "Initializing ilab container..." + id=$(sudo podman create "$SOURCE_IMAGE") + sudo podman commit "$id" "$IMAGE_NAME" + sudo podman rm "$id" +fi + exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"