From 73ada6bb9749618908a04ff7504dda490b12c993 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Wed, 17 Jul 2024 12:55:56 +0200 Subject: [PATCH 1/2] `ilab` wrapper script adjustments Ticket [RHELAI-442](https://issues.redhat.com/browse/RHELAI-442) # Background RHEL AI ships with a script in `/usr/local/bin` called `ilab` which makes running `ilab` commands feel native even though they're actually running in a podman container # Issues * The script is outdated / used several different container images for different purposes, while it should be just using the single instructlab image * The volume mounts were incorrect, as instructlab now uses XDG paths * Unnecessary directory creation for `HF_CACHE` * Unnecessary GPU count logic * Script has unnecessary fiddling of `ilab` parameters, essentially creating a UX that deviates from the natural `ilab` CLI # Solutions * Changed script to use the single container image `IMAGE_NAME` (this was already the case mostly, except for old references to `LVLM_NAME` and `TRAIN_NAME` which no longer get replaced leading to a broken `PODMAN_COMMAND_SERVE`. Also adjusted entrypoint to use the `ilab` executable in the pyenv * Will now mount the host's `~/.config` and `~/.local` into the container's corresponding directories, for `instructlab` to use and for its config / data to persist across invocations * Will now mount `~/.cache` into the container's corresponding `.cache` directory, so that the information stored in the default `HF_CACHE` is also persisted across invocations * Removed unnecessary GPU count logic * Removed all parameter parsing / fiddling # Other changes Added secret/fake "shell" `ilab` subcommand which opens a shell in the wrapper's container, useful for troubleshooting issues with the wrapper itself Signed-off-by: Omer Tuchfeld --- training/ilab-wrapper/ilab | 194 +++------------------------- training/nvidia-bootc/Containerfile | 4 - 2 files changed, 19 insertions(+), 179 deletions(-) diff --git a/training/ilab-wrapper/ilab b/training/ilab-wrapper/ilab index 86d72ade..4aba23bb 100755 --- a/training/ilab-wrapper/ilab +++ b/training/ilab-wrapper/ilab @@ -1,185 +1,29 @@ #!/bin/bash # Template values replaced by container build -ENDPOINT_URL="__REPLACE_ENDPOINT_URL__" -TRAIN_DEVICE="__REPLACE_TRAIN_DEVICE__" CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__" IMAGE_NAME="__REPLACE_IMAGE_NAME__" -VLLM_NAME="__REPLACE_VLLM_NAME__" -TRAIN_NAME="__REPLACE_TRAIN_NAME__" -GPU_COUNT_COMMAND="__REPLACE_GPU_COUNT_COMMAND__" -# ENDPOINT_URL="http://0.0.0.0:8080/v1" -# TRAIN_DEVICE="cuda" -# CONTAINER_DEVICE="nvidia.com/gpu=all" -# IMAGE_NAME="quay.io/ai-lab/instructlab-nvidia:latest" -# VLLM_NAME="quay.io/ai-lab/vllm:latest" -# TRAIN_NAME="quay.io/ai-lab/deepspeed-trainer:latest" -# GPU_COUNT_COMMAND="nvidia-ctk --quiet cdi list | grep -P nvidia.com/gpu='\d+' | wc -l" +export ENTRYPOINT="/opt/python3.11/venv/bin/ilab" +export PARAMS=("$@") -# HF caching uses relative symlink structures, so keep cache relative to -# the central working directory -CONTAINER_CACHE="/instructlab/cache" -HOST_CACHE="$(pwd)/cache" -WORKDIR="$(pwd)" -SCRIPT_DIR=$(dirname "$0") -DEFAULT_SERVE_MODEL="mistralai/Mixtral-8x7B-Instruct-v0.1" +for dir in "$HOME/.cache" "$HOME/.config" "$HOME/.local"; do + mkdir -p "$dir" +done -if [[ -z "${GPU_AMOUNT}" ]]; then - GPU_AMOUNT=$(bash -c "${GPU_COUNT_COMMAND}") - if [[ "$?" != "0" ]]; then - echo "Could not determine GPU count, set export GPU_AMOUNT= manually" - exit - fi +if [[ "$1" = "shell" ]]; then + export ENTRYPOINT=bash + export PARAMS=() fi -if [[ "$GPU_AMOUNT" -lt 2 ]]; then - echo "WARNING: You need at least 2 GPUs to load full precision models" -fi - -NPROC_PER_NODE=${GPU_AMOUNT} -EFFECTIVE_BATCH_SIZE=$((12*${GPU_AMOUNT})) -NUM_INSTRUCTIONS=5000 -NUM_EPOCHS=10 - -has_argument() { - match=$1 - shift - for arg in "$@"; do - if [[ "$arg" == *"$match"* ]]; then - return 0 - fi - done - return 1 -} - -get_argument() { - local match=$1 - shift - - local found=false - local arg - while [ "$#" -gt 0 ]; do - arg="$1" - shift - if [[ "$arg" == "$match" ]]; then - found=true - if [ "$#" -gt 0 ]; then - echo "$1" - return 0 - else - echo "" - return 0 - fi - fi - done - - if ! $found; then - echo "" - return 0 - fi -} - -get_argument_default() { - local match=$1 - local default=$2 - shift - shift - local result=$(get_argument ${match} "$@") - if [[ -z "${result}" ]]; then - echo $default - return 0 - fi - echo "${result}" -} - -get_model() { - model=$(get_argument_default "--model" "${DEFAULT_SERVE_MODEL}" "$@") - if [[ ! "${model}" =~ ^/instructlab/models.* ]]; then - echo /instructlab/models/"${model}" - else - echo "${model}" - fi -} - -mkdir -p "${HOST_CACHE}" -PODMAN_COMMAND=("podman" "run" "--rm" "-it" "--device" "${CONTAINER_DEVICE}" \ - "--security-opt" "label=disable" "--net" "host" \ - "-v" "${WORKDIR}:/instructlab" "--entrypoint" "" \ - "-e" "HF_HOME=${CONTAINER_CACHE}" \ - "-e" "HF_TOKEN=${HF_TOKEN}" \ - "${IMAGE_NAME}") -PODMAN_COMMAND_SERVE=("podman" "run" "--rm" "-it" "--device" "${CONTAINER_DEVICE}" \ - "--security-opt" "label=disable" "--net" "host" \ - "-v" "${WORKDIR}:/instructlab" \ - "--shm-size=10gb" \ - "-e" "HF_HOME=${CONTAINER_CACHE}/" \ - "-e" "HF_TOKEN=${HF_TOKEN}" \ - "${VLLM_NAME}" "--host=0.0.0.0" "--port=8080" "--tensor-parallel-size=${GPU_AMOUNT}") - -if [[ "$1" = "init" ]]; then - if ! has_argument "--repository" "$@"; then - shift - "${PODMAN_COMMAND[@]}" ilab init \ - --repository https://github.com/instructlab/taxonomy.git "$@" - exit $? - fi -elif [[ "$1" = "train" ]]; then - samples=$(get_argument_default "--num-samples" ${NUM_INSTRUCTIONS} "$@") - epochs=$(get_argument_default "--num-epochs" ${NUM_EPOCHS} "$@") - ${SCRIPT_DIR}/ilab-training-launcher ${NPROC_PER_NODE} ${EFFECTIVE_BATCH_SIZE} \ - ${TRAIN_DEVICE} ${samples} ${epochs} ${CONTAINER_DEVICE} ${TRAIN_NAME} - exit $? -elif [[ "$1" = "serve" ]]; then - # run vllm container which will serve vllm and ilab generate - args=() - model=$(get_model "$@") - if [[ "${model}" == *"${DEFAULT_SERVE_MODEL}" ]]; then - args+=("--chat-template=mixtral.jinja") - fi - args+=("--model" "${model}") - "${PODMAN_COMMAND_SERVE[@]}" "${args[@]}" - exit $? -elif [[ "$1" = "chat" ]]; then - shift - args=($@) - if ! has_argument "--endpoint-url" "$@"; then - args+=("--endpoint-url" "http://0.0.0.0:8080/v1") - fi - if ! has_argument "--model-family" "$@"; then - args+=("--model-family" "mixtral") - fi - args+=("--model" $(get_model "$@")) - "${PODMAN_COMMAND[@]}" ilab chat "${args[@]}" - exit $? -elif [[ "$1" = "generate" ]]; then - shift - args=($@) - if ! has_argument "--endpoint-url" "$@"; then - args+=("--endpoint-url" "http://0.0.0.0:8080/v1") - fi - if ! has_argument "--model-family" "$@"; then - args+=("--model-family" "mixtral") - fi - if ! has_argument "--num-instructions" "$@"; then - args+=("--num-instructions" "5000") - fi - args+=("--model" $(get_model "$@")) - echo ilab generate "${args[@]}" - - "${PODMAN_COMMAND[@]}" ilab generate "${args[@]}" - exit $? -elif [[ "$1" == "download" && $# -lt 2 ]]; then - echo "You must specify the model to download." - echo - echo "High-fidelity generation and training requires two models:" - echo - echo "Mixtral: ilab download --repository ${DEFAULT_SERVE_MODEL}" - echo "Granite: ilab download --repository ibm/granite-7b-base" - echo - echo "For more options type ilab --help" - exit 1 -fi - -"${PODMAN_COMMAND[@]}" ilab "$@" - +PODMAN_COMMAND=("podman" "run" "--rm" "-it" + "--device" "${CONTAINER_DEVICE}" + "--security-opt" "label=disable" "--net" "host" + "-v" "$HOME/.cache:/root/.cache" + "-v" "$HOME/.config:/root/.config" + "-v" "$HOME/.local:/root/.local" + "--entrypoint" "$ENTRYPOINT" + "--env" "HF_TOKEN" + "${IMAGE_NAME}") + +"${PODMAN_COMMAND[@]}" "${PARAMS[@]}" diff --git a/training/nvidia-bootc/Containerfile b/training/nvidia-bootc/Containerfile index 438394c8..438b9e34 100644 --- a/training/nvidia-bootc/Containerfile +++ b/training/nvidia-bootc/Containerfile @@ -188,15 +188,11 @@ RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \ && chmod +x /usr/bin/ilab ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-nvidia:latest" -ARG GPU_COUNT_COMMAND="nvidia-ctk --quiet cdi list | grep -P nvidia.com/gpu='\\\\d+' | wc -l" RUN for i in /usr/bin/ilab*; do \ sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' $i; \ sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' $i; \ sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \ - sed -i 's%__REPLACE_ENDPOINT_URL__%http://0.0.0.0:8080/v1%' $i; \ - sed -i "s%__REPLACE_GPU_COUNT_COMMAND__%${GPU_COUNT_COMMAND}%" $i; \ - sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' $i; \ done # Added for running as an OCI Container to prevent Overlay on Overlay issues. From 425e861dc20c8dc9a48443c4ea0f3a21f906266b Mon Sep 17 00:00:00 2001 From: Igal Tsoiref Date: Tue, 25 Jun 2024 18:27:21 +0300 Subject: [PATCH 2/2] RHELAI-429: Adding upgrade informer service Upgrade informer will run every couple of our and will be triggered by systemd timer. In order to start it on boot and run once i enabled it and timer. Disabling auto upgrade service in order to remove unexpected reboots. Service will run "bootc upgrade --check" and in case new version exists it will create motd file with upgrade info. Removed unused grow-part services Signed-off-by: Igal Tsoiref --- training/common/Makefile.common | 7 +++- .../system/bootc-generic-growpart.service | 20 --------- .../bootc-generic-growpart.service | 1 - .../systemd/system/upgrade-informer.service | 12 ++++++ .../lib/systemd/system/upgrade-informer.timer | 11 +++++ .../common/usr/libexec/bootc-generic-growpart | 41 ------------------- training/common/usr/libexec/upgrade-informer | 32 +++++++++++++++ training/nvidia-bootc/Containerfile | 13 +++++- 8 files changed, 73 insertions(+), 64 deletions(-) delete mode 100644 training/common/usr/lib/systemd/system/bootc-generic-growpart.service delete mode 120000 training/common/usr/lib/systemd/system/local-fs.target.wants/bootc-generic-growpart.service create mode 100644 training/common/usr/lib/systemd/system/upgrade-informer.service create mode 100644 training/common/usr/lib/systemd/system/upgrade-informer.timer delete mode 100755 training/common/usr/libexec/bootc-generic-growpart create mode 100755 training/common/usr/libexec/upgrade-informer diff --git a/training/common/Makefile.common b/training/common/Makefile.common index f4c3115a..306f28b3 100644 --- a/training/common/Makefile.common +++ b/training/common/Makefile.common @@ -52,7 +52,7 @@ ENABLE_RT ?= SSH_PUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub 2> /dev/null) .PHONY: prepare-files -prepare-files: $(OUTDIR)/$(WRAPPER) $(OUTDIR)/$(QLORA_WRAPPER) $(OUTDIR)/$(TRAIN_WRAPPER) $(OUTDIR) +prepare-files: $(OUTDIR)/$(WRAPPER) $(OUTDIR)/$(QLORA_WRAPPER) $(OUTDIR)/$(TRAIN_WRAPPER) $(OUTDIR) common-services $(OUTDIR): mkdir -p $(OUTDIR) @@ -63,6 +63,11 @@ $(OUTDIR)/$(QLORA_WRAPPER): $(OUTDIR) $(OUTDIR)/$(TRAIN_WRAPPER): $(OUTDIR) cp -pf $(TRAIN_WRAPPER) $(OUTDIR) +.PHONY: common-services +common-services: + mkdir -p build; cp -pR ../common/usr build + + .PHONY: check-sshkey check-sshkey: @test -n "$(SSH_PUBKEY)" || \ diff --git a/training/common/usr/lib/systemd/system/bootc-generic-growpart.service b/training/common/usr/lib/systemd/system/bootc-generic-growpart.service deleted file mode 100644 index 77bb310b..00000000 --- a/training/common/usr/lib/systemd/system/bootc-generic-growpart.service +++ /dev/null @@ -1,20 +0,0 @@ -[Unit] -Description=Bootc Fallback Root Filesystem Grow -Documentation=https://gitlab.com/fedora/bootc/docs -# For now we skip bare metal cases, and we also have nothing to do -# for containers. -ConditionVirtualization=vm -# This helps verify that we're running in a bootc/ostree based target. -ConditionPathIsMountPoint=/sysroot -# We want to run before any e.g. large container images might be pulled. -DefaultDependencies=no -Requires=sysinit.target -After=sysinit.target -Before=basic.target - -[Service] -ExecStart=/usr/libexec/bootc-generic-growpart -# So we can temporarily remount the sysroot writable -MountFlags=slave -# Just to auto-cleanup our temporary files -PrivateTmp=yes diff --git a/training/common/usr/lib/systemd/system/local-fs.target.wants/bootc-generic-growpart.service b/training/common/usr/lib/systemd/system/local-fs.target.wants/bootc-generic-growpart.service deleted file mode 120000 index c8e2408d..00000000 --- a/training/common/usr/lib/systemd/system/local-fs.target.wants/bootc-generic-growpart.service +++ /dev/null @@ -1 +0,0 @@ -../bootc-generic-growpart.service \ No newline at end of file diff --git a/training/common/usr/lib/systemd/system/upgrade-informer.service b/training/common/usr/lib/systemd/system/upgrade-informer.service new file mode 100644 index 00000000..39458c49 --- /dev/null +++ b/training/common/usr/lib/systemd/system/upgrade-informer.service @@ -0,0 +1,12 @@ +[Unit] +Description=Check for available operating system updates +ConditionPathExists=/run/ostree-booted +After=network-online.target +StartLimitIntervalSec=400 +StartLimitBurst=3 + +[Service] +Type=oneshot +ExecStart=/usr/libexec/upgrade-informer +Restart=on-failure +RestartSec=90 diff --git a/training/common/usr/lib/systemd/system/upgrade-informer.timer b/training/common/usr/lib/systemd/system/upgrade-informer.timer new file mode 100644 index 00000000..1ac82e93 --- /dev/null +++ b/training/common/usr/lib/systemd/system/upgrade-informer.timer @@ -0,0 +1,11 @@ +[Unit] +Description=Runs upgrade informer periodically +ConditionPathExists=/run/ostree-booted + +[Timer] +OnBootSec=1h +OnUnitInactiveSec=8h +RandomizedDelaySec=2h + +[Install] +WantedBy=timers.target diff --git a/training/common/usr/libexec/bootc-generic-growpart b/training/common/usr/libexec/bootc-generic-growpart deleted file mode 100755 index c2277ba3..00000000 --- a/training/common/usr/libexec/bootc-generic-growpart +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -set -eu - -backing_device=$(findmnt -vno SOURCE /sysroot) -echo "Backing device: ${backing_device}" -syspath=/sys/class/block/$(basename "${backing_device}") -if ! test -d "${syspath}"; then - echo "failed to find backing device ${syspath}"; exit 1 -fi - -# Handling devicemapper targets is a whole other thing -case $backing_device in - /dev/mapper/*) "Not growing $backing_device"; exit 0 ;; -esac - -# Note that we expect that the rootfs is on a partition -partition=$(cat "${syspath}"/partition) - -# Walk up to find the parent blockdev -parentpath=$(dirname "$(realpath "${syspath}")") -devmajmin=$(cat "${parentpath}"/dev) -parent="/dev/block/${devmajmin}" - -# Grow the partition -tmpf=$(mktemp) -# Ignore errors because growpart exits 1 if nothing changed; -# we need to check the output for NOCHANGE: -if ! /usr/bin/growpart "${parent}" "${partition}" > "${tmpf}"; then - cat "${tmpf}" - if grep -qEe '^NOCHANGE: ' "${tmpf}"; then - exit 0 - fi - echo "growpart failed" - exit 1 -fi -cat "${tmpf}" -# Now, temporarily remount the sysroot writable in our mount namespace -mount -o remount,rw /sysroot -# And defer to systemd's growfs wrapper which handles dispatching on -# the target filesystem type. -/usr/lib/systemd/systemd-growfs /sysroot diff --git a/training/common/usr/libexec/upgrade-informer b/training/common/usr/libexec/upgrade-informer new file mode 100755 index 00000000..0a36d503 --- /dev/null +++ b/training/common/usr/libexec/upgrade-informer @@ -0,0 +1,32 @@ +#!/bin/bash + +# Run the command and capture its output +output=$(bootc upgrade --check | sed -e 1q) +message_file="/etc/motd.d/upgrade-message" +bootc_auth="/etc/ostree/auth.json" + +if [[ $output == Update\ available* ]]; then + if [[ ! -f $message_file ]]; then + echo "New version was found" + bootc_image=$(echo "$output" | awk '{print $4}') + # If auth file exists we should use it + auth_params="" + if [[ -f $bootc_auth ]]; then + auth_params="--authfile $bootc_auth" + fi + + # Get image version + # shellcheck disable=SC2086 + image_version_id=$(skopeo inspect --format json $auth_params "$bootc_image" | jq '.Labels | .["image_version_id"] // empty' | tr -d '"') + + # If upgrade available, write the output to the file + echo -e "\n\n ** Attention! ** \n** A new $image_version_id version is available **\n\ +** In order to apply it run: bootc upgrade --apply \n\ +** Please note that the system will reboot after the upgrade ** \n\n" > $message_file + fi +else + echo "No upgrade was found" + rm $message_file 2> /dev/null +fi + +echo "Finished running upgrade informer" diff --git a/training/nvidia-bootc/Containerfile b/training/nvidia-bootc/Containerfile index 438b9e34..22019d76 100644 --- a/training/nvidia-bootc/Containerfile +++ b/training/nvidia-bootc/Containerfile @@ -101,6 +101,8 @@ COPY --from=builder /home/builder/yum-packaging-precompiled-kmod/RPMS/*/*.rpm /r COPY --from=builder --chmod=444 /home/builder/yum-packaging-precompiled-kmod/tmp/firmware/*.bin /lib/firmware/nvidia/${DRIVER_VERSION}/ # Temporary workaround until the permanent fix for libdnf is merged COPY nvidia-toolkit-firstboot.service /usr/lib/systemd/system/nvidia-toolkit-firstboot.service +# Enable common services +COPY build/usr /usr ARG IMAGE_VERSION_ID @@ -148,12 +150,20 @@ RUN mv /etc/selinux /etc/selinux.tmp \ dnf module enable -y nvidia-driver:${DRIVER_BRANCH} && \ dnf install -y nvidia-fabric-manager-${DRIVER_VERSION} libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION} ; \ fi \ - # Install rhc connect for insights telemetry gathering && . /etc/os-release && if [ "${ID}" == "rhel" ]; then \ + # Install rhc connect for insights telemetry gathering dnf install -y rhc rhc-worker-playbook; \ + # Adding rhel ai identity to os-release file for insights usage sed -i -e "/^VARIANT=/ {s/^VARIANT=.*/VARIANT=\"RHEL AI\"/; t}" -e "\$aVARIANT=\"RHEL AI\"" /usr/lib/os-release; \ sed -i -e "/^VARIANT_ID=/ {s/^VARIANT_ID=.*/VARIANT_ID=rhel_ai/; t}" -e "\$aVARIANT_ID=rhel_ai" /usr/lib/os-release; \ sed -i -e "/^RHEL_AI_VERSION_ID=/ {s/^RHEL_AI_VERSION_ID=.*/RHEL_AI_VERSION_ID='${IMAGE_VERSION_ID}'/; t}" -e "\$aRHEL_AI_VERSION_ID='${IMAGE_VERSION_ID}'" /usr/lib/os-release; \ + + # enable upgrade informer timer + ln -s /usr/lib/systemd/system/upgrade-informer.timer /usr/lib/systemd/system/timers.target.wants/upgrade-informer.timer; \ + # enable upgrade informer service, added as we need it to start on boot + ln -s /usr/lib/systemd/system/upgrade-informer.service /usr/lib/systemd/system/basic.target.wants/upgrade-informer.service; \ + # disable auto upgrade service + rm -f /usr/lib/systemd/system/default.target.wants/bootc-fetch-apply-updates.timer; \ fi \ && dnf clean all \ && ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants \ @@ -164,6 +174,7 @@ RUN mv /etc/selinux /etc/selinux.tmp \ && ln -s /usr/lib/systemd/system/nvidia-fabricmanager.service /etc/systemd/system/multi-user.target.wants/nvidia-fabricmanager.service \ && ln -s /usr/lib/systemd/system/nvidia-persistenced.service /etc/systemd/system/multi-user.target.wants/nvidia-persistenced.service + ARG SSHPUBKEY # The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your