Fix ngl and unwrap issue (#87)

* [Example] ggml: breaking: Must specify the n_gpu_layers on macOS Signed-off-by: hydai <[email protected]> * [Example] ggml: never use unwrap to check the stream-stdout Signed-off-by: hydai <[email protected]>
second-state · Jan 23, 2024 · 5db620f · 5db620f
1 parent a5bc02c
commit 5db620f
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 4 deletions.
diff --git a/wasmedge-ggml-llama-interactive/README.md b/wasmedge-ggml-llama-interactive/README.md
@@ -110,7 +110,18 @@ wasmedge --dir .:. \
 
 #### macOS
 
-macOS will use the Metal framework by default. You don't have to specify the `n_gpu_layers` parameter.
+macOS will use the Metal framework by default. llama.cpp supports `n_gpu_layers` now, please make sure you set the `n_gpu_layers` to offload the tensor layers into GPU.
+
+Please use the following command to ensure the tensor layers of the model is offloaded into GPU:
+
+```
+# llama2-7b-chat provides 35 GPU layers. So, we have to set a value that is large or equal to 35.
+# If you use a larger model, this value may change.
+wasmedge --dir .:. \
+  --env n_gpu_layers=35 \
+  --nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
+  wasmedge-ggml-llama-interactive.wasm default
+```
 
 #### Linux + CUDA
 

diff --git a/wasmedge-ggml-llama-interactive/src/main.rs b/wasmedge-ggml-llama-interactive/src/main.rs
@@ -169,9 +169,11 @@ fn main() {
     }
 
     // Check streaming related options.
-    if is_compute_single && options["stream-stdout"].as_bool().unwrap() {
-        println!("[ERROR] compute_single and stream_stdout cannot be enabled at the same time.");
-        std::process::exit(1);
+    if is_compute_single {
+        if let Some(true) = options["stream-stdout"].as_bool() {
+            println!("[ERROR] compute_single and stream_stdout cannot be enabled at the same time.");
+            std::process::exit(1);
+        }
     }
 
     // We support both llama and chatml prompt format.

diff --git a/wasmedge-ggml-llama-interactive/wasmedge-ggml-llama-interactive.wasm b/wasmedge-ggml-llama-interactive/wasmedge-ggml-llama-interactive.wasm