Skip to content

Commit

Permalink
fixup of options
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jan 23, 2024
1 parent a4cd3d7 commit cb0f973
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ you can also run tests with `pytest` for the DeclCtrl, or with `./scripts/test-p

To run rLLM server, go to `rllm/` and run `./server.sh orca`.
This will run the inference server with Orca-2 13B model (which is expected by testcases).
If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh cpu phi2`.
If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`.
You can also try other models, see [rllm/README.md](rllm/README.md) and
[cpp-rllm/README.md](cpp-rllm/README.md) for details.

Expand Down
5 changes: 2 additions & 3 deletions cpp-rllm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ If you're not using the supplied docker container follow the
To compile and run first aicirt and then the rllm server, run:

```bash
./cpp-server.sh cpu phi2
./cpp-server.sh phi2
```

You can also try `gpu` instead of `gpu` which will try to use CUDA.

You can also try passing `--cuda` before `phi2`.
18 changes: 6 additions & 12 deletions cpp-rllm/cpp-server.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

set -e
REL=
REL=--release
LOOP=
BUILD=
ADD_ARGS=
Expand All @@ -24,19 +24,14 @@ fi

VER="--no-default-features"

if [ "$1" = gpu ] ; then
REL=--release
if [ "$1" = "--cuda" ] ; then
VER="$VER --features cuda"
shift
elif [ "$1" = cpu ] ; then
REL=--release
shift
elif [ "$1" = debug ] ; then
fi

if [ "$1" = "--debug" ] ; then
REL=
shift
else
echo "usage: $0 [gpu|cpu|debug] [phi2|orca|build]"
exit 1
fi

case "$1" in
Expand All @@ -48,10 +43,9 @@ case "$1" in
;;
build )
BUILD=1
REL=--release
;;
* )
echo "try one of models: phi2, orca"
echo "usage: $0 [--cuda] [--debug] [phi2|orca|build] [rllm_args...]"
exit 1
;;
esac
Expand Down
2 changes: 2 additions & 0 deletions rllm/src/llamacpp/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ fn do_load(args: &mut LoaderArgs) -> Result<cpp::Model> {
let mut mparams = cpp::ModelParams::default();
// TODO: make this configurable
mparams.set_split_mode(cpp::SplitMode::None);
// don't GPU offload on Intel macs - it just fails there
#[cfg(not(all(target_os = "macos", target_arch = "x86_64")))]
mparams.n_gpu_layers = 1000;

let m = cpp::Model::from_file(file.to_str().unwrap(), mparams)?;
Expand Down

0 comments on commit cb0f973

Please sign in to comment.