Skip to content

Commit

Permalink
Fix long container startup times
Browse files Browse the repository at this point in the history
The use of a uid map leads to a new layer with all files chowned.
This takes several seconds due to the size of the instructlab
container (26GB). Normally this would be a one time cost where
the idmap layer is cached and reusued accross container creations;
however, since the container is stored on a read-only additional
image store, no caching is performed.

Address the problem by creating a derived empty contianer in
mutable container storage. This allows the 1k idmap layer to be
created in the smae area, yet reuses the layers in additional
image store.

Signed-off-by: Jason T. Greene <[email protected]>
  • Loading branch information
n1hility committed Aug 19, 2024
1 parent b9aaba6 commit 2f2d608
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
11 changes: 10 additions & 1 deletion training/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ check_insights

# Template values replaced by container build
CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__"
IMAGE_NAME="__REPLACE_IMAGE_NAME__"
SOURCE_IMAGE="__REPLACE_IMAGE_NAME__"
IMAGE_NAME="localhost/instructlab:__REPLACE_IMAGE_TAG__"

ENTRYPOINT="ilab"
PARAMS=("$@")
Expand Down Expand Up @@ -144,4 +145,12 @@ PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it
"--env" "HF_TOKEN"
"${IMAGE_NAME}")

sudo podman image exists "$IMAGE_NAME"
if [ "$?" != "0" ]; then
echo "Initializing ilab container..."
id=$(sudo podman create "$SOURCE_IMAGE")
sudo podman commit "$id" "$IMAGE_NAME"
sudo podman rm "$id"
fi

exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"
4 changes: 3 additions & 1 deletion training/nvidia-bootc/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,12 @@ RUN chmod +x /usr/bin/ilab
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-nvidia:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="instructlab-nvidia-pull"

RUN for i in /usr/bin/ilab*; do \
RUN export INSTRUCTLAB_TAG=$(echo ${INSTRUCTLAB_IMAGE} | cut -f 2 -d ':') && \
for i in /usr/bin/ilab*; do \
sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' $i; \
sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' $i; \
sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \
sed -i "s%__REPLACE_IMAGE_TAG__%${INSTRUCTLAB_TAG}%" $i; \
done

# Added for running as an OCI Container to prevent Overlay on Overlay issues.
Expand Down
11 changes: 10 additions & 1 deletion training/nvidia-bootc/duplicated/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ check_insights

# Template values replaced by container build
CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__"
IMAGE_NAME="__REPLACE_IMAGE_NAME__"
SOURCE_IMAGE="__REPLACE_IMAGE_NAME__"
IMAGE_NAME="localhost/instructlab:__REPLACE_IMAGE_TAG__"

ENTRYPOINT="ilab"
PARAMS=("$@")
Expand Down Expand Up @@ -144,4 +145,12 @@ PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it
"--env" "HF_TOKEN"
"${IMAGE_NAME}")

sudo podman image exists "$IMAGE_NAME"
if [ "$?" != "0" ]; then
echo "Initializing ilab container..."
id=$(sudo podman create "$SOURCE_IMAGE")
sudo podman commit "$id" "$IMAGE_NAME"
sudo podman rm "$id"
fi

exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"

0 comments on commit 2f2d608

Please sign in to comment.