Skip to content

Commit

Permalink
update vllm docker scripts; bump vllm
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Apr 17, 2024
1 parent 59e90ab commit 5a5a28f
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 8 deletions.
4 changes: 3 additions & 1 deletion .devcontainer/Dockerfile-prod-vllm
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,7 @@ RUN aicirt --module wasm/aici_guidance_ctrl.wasm --tag guidance
RUN aicirt --module wasm/aici_pyctrl.wasm --tag pyctrl --gh-module gh:microsoft/aici/pyctrl
RUN aicirt --module wasm/aici_jsctrl.wasm --tag jsctrl --gh-module gh:microsoft/aici/jsctrl

ENV RUST_LOG info,tokenizers=error

# not sure about --enforce-eager
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server", "--enforce-eager", "--use-v2-block-manager", "--enable-chunked-prefill", "--aici-rt=/usr/bin/aicirt"]
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server", "--enforce-eager", "--use-v2-block-manager", "--enable-chunked-prefill", "--aici-rt=/usr/bin/aicirt", "-A--restricted"]
2 changes: 1 addition & 1 deletion py/vllm
8 changes: 8 additions & 0 deletions scripts/docker-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,21 @@ case "$1" in
shift
ADD_ARGS="--model microsoft/Orca-2-13b --revision refs/pr/22 --aici-tokenizer=orca"
;;
--folder)
shift
D=`cd $1; pwd`
DOCKER_ARGS="--mount type=bind,source=$D,target=/vllm-workspace/model"
ADD_ARGS="--model ./model --aici-tokenizer ./model/tokenizer.json --tokenizer ./model"
shift
;;
--shell)
shift
DOCKER_ARGS="--entrypoint /bin/bash -it"
VLLM_ARGS=""
;;
esac

set -x
docker run \
--privileged \
--gpus=all \
Expand Down
12 changes: 6 additions & 6 deletions scripts/vllm-server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
set -e
set -x

MODEL="microsoft/Orca-2-13b"
MODEL_REV="refs/pr/22"
AICI_TOK=orca
if [ -z "$FOLDER" ]; then
MODEL_ARGS="--model microsoft/Orca-2-13b --revision refs/pr/22 --aici-tokenizer orca"
else
MODEL_ARGS="--model ./$FOLDER --aici-tokenizer ./$FOLDER/tokenizer.json --tokenizer ./$FOLDER"
fi

(cd aicirt && cargo build --release)

Expand All @@ -16,9 +18,7 @@ python3 -m vllm.entrypoints.openai.api_server \
--use-v2-block-manager \
--enable-chunked-prefill \
--aici-rt ./target/release/aicirt \
--aici-tokenizer $AICI_TOK \
--model $MODEL \
--revision $MODEL_REV \
$MODEL_ARGS \
--port 4242 --host 127.0.0.1 \
"$@"

Expand Down

0 comments on commit 5a5a28f

Please sign in to comment.