Skip to content

Commit

Permalink
Merge pull request #408 from cdoern/vllm
Browse files Browse the repository at this point in the history
add vllm server container, ability to start the server, ilab generate
  • Loading branch information
n1hility authored May 1, 2024
2 parents 4fe591d + c6d34b1 commit d4870e4
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 1 deletion.
6 changes: 5 additions & 1 deletion training/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ help:
@echo " - amd"
@echo " - nvidia"
@echo " - intel"
@echo " - vllm"

.PHONY: amd nvidia intel
.PHONY: amd nvidia intel vllm
amd:
make -C amd-bootc/ bootc
nvidia:
make -C nvidia-bootc/ dtk bootc
intel:
make -C intel-bootc/ bootc
vllm:
make -C vllm/ image

.PHONY:
instruct-nvidia:
Expand All @@ -27,3 +30,4 @@ instruct: instruct-nvidia instruct-amd

clean:
rm -rf build

65 changes: 65 additions & 0 deletions training/ilab-wrapper/ilab-full
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

# Template values replaced by container build
ENDPOINT_URL="__REPLACE_ENDPOINT_URL__"
TRAIN_DEVICE="__REPLACE_TRAIN_DEVICE__"
CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__"
IMAGE_NAME="__REPLACE_IMAGE_NAME__"
GPU_AMOUNT="__REPLACE_GPU_AMOUNT__"

# HF caching uses relative symlink structures, so keep cache relative to
# the central working directory
CONTAINER_CACHE="/instructlab/cache"
HOST_CACHE="$(pwd)/cache"
WORKDIR="$(pwd)/model"

has_argument() {
match=$1
shift
for arg in "$@"; do
if [[ "$arg" == *"$match"* ]]; then
return 0
fi
done
return 1
}

mkdir -p "${HOST_CACHE}"
PODMAN_COMMAND=("podman" "run" "--rm" "-it" "--device" "${CONTAINER_DEVICE}" \
"--security-opt" "label=disable" "--net" "host" \
"-v" "${WORKDIR}:/instructlab" "--entrypoint" "" \
"-e" "HF_HOME=${CONTAINER_CACHE}" \
"${CONTAINER_NAME}")
PODMAN_COMMAND_SERVE=("podman" "run" "--rm" "-it" "--device" "${CONTAINER_DEVICE}" \
"--security-opt" "label=disable" "--net" "host" \
"-v" "${WORKDIR}:/instructlab" \
"-e" "HF_HOME=${CONTAINER_CACHE}" \
"${CONTAINER_NAME}" "--chat-template=mixtral.jinja" "--host=0.0.0.0" "--port=8080" "--model=/instructlab" "--tensor-parallel-size=${GPU_AMOUNT}")
if [[ "$1" = "init" ]]; then
if ! has_argument "--repository" "$@"; then
shift
"${PODMAN_COMMAND[@]}" ilab init \
--repository https://github.com/instructlab/taxonomy.git "$@"
exit $?
fi
elif [[ "$1" = "train" ]]; then
if ! has_argument "--device" "$@"; then
shift
"${PODMAN_COMMAND[@]}" ilab train --device ${TRAIN_DEVICE} "$@"
exit $?
fi
elif [[ "$1" = "serve" ]]; then
# run vllm container which will serve vllm and ilab generate
"${PODMAN_COMMAND_SERVE[@]}"
exit $?
elif [[ "$1" = "generate" ]]; then
# run vllm container which will serve vllm and ilab generate.
if ! has_argument "--endpoint-url" "$@"; then
shift
"${PODMAN_COMMAND[@]}" ilab generate --model /mnt/model/ --model-family=mixtral --num-instructions 5000 --endopoint-url ${ENDPOINT_URL} "$@"
exit $?
fi
fi

"${PODMAN_COMMAND[@]}" ilab "$@"

5 changes: 5 additions & 0 deletions training/vllm/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM quay.io/wxpe/tgis-vllm:release.4e3ff78

USER root
RUN ln -s /usr/lib64/libcuda.so.1 /usr/lib64/libcuda.so
COPY mixtral.jinja .
18 changes: 18 additions & 0 deletions training/vllm/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
REGISTRY ?= quay.io
REGISTRY_ORG ?= ai-lab
IMAGE_NAME ?= vllm
IMAGE_TAG ?= latest

CONTAINER_TOOL ?= podman

DRIVER_VERSION ?=
KERNEL_VERSION ?=

default: image

.PHONY: image
image:
"${CONTAINER_TOOL}" build \
$(ARCH:%=--platform linux/%) \
--file Containerfile \
--tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \
12 changes: 12 additions & 0 deletions training/vllm/mixtral.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{% set bos_token = \"<s>\" %}

{% set eos_token = \"</s>\" %}

{{ bos_token }}
{% for message in messages %}
{% if message['role'] == 'user' %}
{{ '[INST] ' + message['content'] + ' [/INST]' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'] + eos_token}}
{% endif %}
{% endfor %}

0 comments on commit d4870e4

Please sign in to comment.