diff --git a/.devcontainer/Dockerfile-prod-vllm b/.devcontainer/Dockerfile-prod-vllm index fc55a90a..875e0905 100644 --- a/.devcontainer/Dockerfile-prod-vllm +++ b/.devcontainer/Dockerfile-prod-vllm @@ -28,5 +28,7 @@ RUN aicirt --module wasm/aici_guidance_ctrl.wasm --tag guidance RUN aicirt --module wasm/aici_pyctrl.wasm --tag pyctrl --gh-module gh:microsoft/aici/pyctrl RUN aicirt --module wasm/aici_jsctrl.wasm --tag jsctrl --gh-module gh:microsoft/aici/jsctrl +ENV RUST_LOG info,tokenizers=error + # not sure about --enforce-eager -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server", "--enforce-eager", "--use-v2-block-manager", "--enable-chunked-prefill", "--aici-rt=/usr/bin/aicirt"] +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server", "--enforce-eager", "--use-v2-block-manager", "--enable-chunked-prefill", "--aici-rt=/usr/bin/aicirt", "-A--restricted"] diff --git a/py/vllm b/py/vllm index c524af5b..7cc6ba87 160000 --- a/py/vllm +++ b/py/vllm @@ -1 +1 @@ -Subproject commit c524af5ba650674f2bcf3d4d92fd4ad718f229f4 +Subproject commit 7cc6ba87bec9f783dbed7982169a1eb2fb4a8d74 diff --git a/scripts/docker-run.sh b/scripts/docker-run.sh index 8e696014..db916b8c 100755 --- a/scripts/docker-run.sh +++ b/scripts/docker-run.sh @@ -10,6 +10,13 @@ case "$1" in shift ADD_ARGS="--model microsoft/Orca-2-13b --revision refs/pr/22 --aici-tokenizer=orca" ;; + --folder) + shift + D=`cd $1; pwd` + DOCKER_ARGS="--mount type=bind,source=$D,target=/vllm-workspace/model" + ADD_ARGS="--model ./model --aici-tokenizer ./model/tokenizer.json --tokenizer ./model" + shift + ;; --shell) shift DOCKER_ARGS="--entrypoint /bin/bash -it" @@ -17,6 +24,7 @@ case "$1" in ;; esac +set -x docker run \ --privileged \ --gpus=all \ diff --git a/scripts/vllm-server.sh b/scripts/vllm-server.sh index 6297b673..fa5b10c9 100755 --- a/scripts/vllm-server.sh +++ b/scripts/vllm-server.sh @@ -3,9 +3,11 @@ set -e set -x -MODEL="microsoft/Orca-2-13b" -MODEL_REV="refs/pr/22" -AICI_TOK=orca +if [ -z "$FOLDER" ]; then + MODEL_ARGS="--model microsoft/Orca-2-13b --revision refs/pr/22 --aici-tokenizer orca" +else + MODEL_ARGS="--model ./$FOLDER --aici-tokenizer ./$FOLDER/tokenizer.json --tokenizer ./$FOLDER" +fi (cd aicirt && cargo build --release) @@ -16,9 +18,7 @@ python3 -m vllm.entrypoints.openai.api_server \ --use-v2-block-manager \ --enable-chunked-prefill \ --aici-rt ./target/release/aicirt \ - --aici-tokenizer $AICI_TOK \ - --model $MODEL \ - --revision $MODEL_REV \ + $MODEL_ARGS \ --port 4242 --host 127.0.0.1 \ "$@"