better cmd line args and instructions

microsoft · Jan 24, 2024 · ef26c9c · ef26c9c
1 parent a91de08
commit ef26c9c
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -129,6 +129,20 @@ rustup target add wasm32-wasi
 rustup component add rustfmt
 ```
 
+### Running local server
+
+If you have CUDA, go to `rllm/` and run `./server.sh orca`.
+This will run the inference server with Orca-2 13B model (which is expected by testcases).
+
+If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`.
+
+Both of these commands first compile aicirt and the inference engine,
+and then run it.
+You can also try other models, see README.md files for [rllm](rllm/README.md) and
+[cpp-rllm](cpp-rllm/README.md) as well as the shell scripts themselves for details.
+
+The command line 
+
 ### Interacting with server
 
 To get started interacting with a cloud AICI server first export the API key.
@@ -152,14 +166,6 @@ Run `./aici.sh -h` to see usage info.
 If the server is running with Orca-2 13B model,
 you can also run tests with `pytest` for the DeclCtrl, or with `./scripts/test-pyctrl.sh` for PyCtrl.
 
-### Running local server
-
-To run rLLM server, go to `rllm/` and run `./server.sh orca`.
-This will run the inference server with Orca-2 13B model (which is expected by testcases).
-If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`.
-You can also try other models, see [rllm/README.md](rllm/README.md) and
-[cpp-rllm/README.md](cpp-rllm/README.md) for details.
-
 ## Security
 
 - `aicirt` runs in a separate process, and can run under a different user than the LLM engine

diff --git a/cpp-rllm/cpp-server.sh b/cpp-rllm/cpp-server.sh
@@ -46,6 +46,7 @@ case "$1" in
     ;;
   * )
     echo "usage: $0 [--cuda] [--debug] [phi2|orca|build] [rllm_args...]"
+    echo "Try $0 phi2 --help to see available rllm_args"
     exit 1
     ;;
 esac

diff --git a/rllm/README.md b/rllm/README.md
@@ -1,7 +1,9 @@
 # rLLM
 
 This is a partial port of [vLLM](https://github.com/vllm-project/vllm)
-to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs).
+to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs)
+(bindings for [libtorch](https://github.com/pytorch/pytorch/blob/main/docs/libtorch.rst)
+which is basis of [PyTorch](https://github.com/pytorch/pytorch)).
 It is mostly meant as a proving ground for AICI (AI Controller Interface) integration.
 
 

diff --git a/rllm/server.sh b/rllm/server.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 set -e
-REL=
+REL=--release
 LOOP=
 BUILD=
 ADD_ARGS=
@@ -22,20 +22,13 @@ if [ "X$P" != "X" ] ; then
   kill $P
 fi
 
-if [ "$1" = loop ] ; then
-    REL=--release
+if [ "$1" = "--loop" ] ; then
     LOOP=1
     shift
 fi
 
-if [ "$1" = bench ] ; then
-    REL=--release
-    shift
-fi
-
-if [ "$1" = warm ] ; then
-    REL=--release
-    ADD_ARGS="--warmup-only"
+if [ "$1" = "--debug" ] ; then
+    REL=
     shift
 fi
 
@@ -64,7 +57,8 @@ case "$1" in
     REL=--release
     ;;
   * )
-    echo "try one of models: phi, phi2, 7b, code, code34" 
+    echo "usage: $0 [--loop] [--debug] [phi|phi2|7b|code|orca|build] [rllm_args...]"
+    echo "Try $0 phi2 --help to see available rllm_args"
     exit 1
     ;;
 esac

diff --git a/scripts/host.sh b/scripts/host.sh
@@ -40,7 +40,7 @@ function docker_cmd() {
 
 if [ "$INNER" = 1 ] ; then
     echo "Running inner..."
-    docker_cmd "cd rllm && ./server.sh loop $MODEL"
+    docker_cmd "cd rllm && ./server.sh --loop $MODEL"
     exit 0
 fi
-Original file line number
+Diff line change
@@ Expand Up / @@ -46,6 +46,7 @@ case "$1" in @@
         ;;
       * )
         echo "usage: $0 [--cuda] [--debug] [phi2|orca|build] [rllm_args...]"
+        echo "Try $0 phi2 --help to see available rllm_args"
         exit 1
         ;;
     esac
@@ Expand Down @@