Skip to content

Commit

Permalink
[Example] ggml: add model not found test (#130)
Browse files Browse the repository at this point in the history
* [Example] ggml: add model not found test

Signed-off-by: dm4 <[email protected]>

* [Example] ggml: update wasmedge-wasi-nn to 0.7.1

Signed-off-by: dm4 <[email protected]>

* [Example] ggml: add unload test

Signed-off-by: dm4 <[email protected]>

---------

Signed-off-by: dm4 <[email protected]>
  • Loading branch information
dm4 authored Apr 23, 2024
1 parent 3300797 commit 7f67945
Show file tree
Hide file tree
Showing 22 changed files with 201 additions and 13 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/llama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,28 @@ jobs:
default \
'JSON object with 5 country names as keys and their capitals as values: '
- name: Model Not Found
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/test/model-not-found
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--nn-preload default:GGML:AUTO:model-not-found.gguf \
target/wasm32-wasi/release/wasmedge-ggml-model-not-found.wasm \
default
- name: Unload
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/test/unload
curl -LO https://huggingface.co/second-state/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
target/wasm32-wasi/release/wasmedge-ggml-unload.wasm \
default \
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
- name: Build llama-stream
run: |
cd wasmedge-ggml/llama-stream
Expand Down
2 changes: 1 addition & 1 deletion wasmedge-ggml/basic/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/chatml/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/command-r/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/embedding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/gemma/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/grammar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/llama-stream/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/llama/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/llava-base64-stream/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/llava/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/multimodel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
2 changes: 1 addition & 1 deletion wasmedge-ggml/nnrpc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
8 changes: 8 additions & 0 deletions wasmedge-ggml/test/model-not-found/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "wasmedge-ggml-model-not-found"
version = "0.1.0"
edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.1"
11 changes: 11 additions & 0 deletions wasmedge-ggml/test/model-not-found/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# `model-not-found`

Ensure that we get the `ModelNotFound` error when the model does not exist.

## Execute

```console
$ wasmedge --dir .:. \
--nn-preload default:GGML:AUTO:model-not-found.gguf \
wasmedge-ggml-model-not-found.wasm default
```
24 changes: 24 additions & 0 deletions wasmedge-ggml/test/model-not-found/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use std::env;
use wasmedge_wasi_nn::{self, BackendError, Error, ExecutionTarget, GraphBuilder, GraphEncoding};

fn main() {
let args: Vec<String> = env::args().collect();
let model_name: &str = &args[1];

// Create graph and initialize context.
let graph =
GraphBuilder::new(GraphEncoding::Ggml, ExecutionTarget::AUTO).build_from_cache(model_name);

// Check graph
match graph {
Err(Error::BackendError(BackendError::ModelNotFound)) => {
println!("Model not found");
}
Err(_) => {
panic!("Should be model not found");
}
Ok(_) => {
panic!("Should be model not found");
}
}
}
Binary file not shown.
2 changes: 1 addition & 1 deletion wasmedge-ggml/test/set-input-twice/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.0"
wasmedge-wasi-nn = "0.7.1"
8 changes: 8 additions & 0 deletions wasmedge-ggml/test/unload/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "wasmedge-ggml-unload"
version = "0.1.0"
edition = "2021"

[dependencies]
serde_json = "1.0"
wasmedge-wasi-nn = "0.7.1"
13 changes: 13 additions & 0 deletions wasmedge-ggml/test/unload/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# `unload`

Ensure that we can unload and reload the graph multiple times without errors.

## Execute

```console
$ curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
$ wasmedge --dir .:. \
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
wasmedge-ggml-unload.wasm default \
<prompt>
```
102 changes: 102 additions & 0 deletions wasmedge-ggml/test/unload/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use serde_json::json;
use serde_json::Value;
use std::env;
use wasmedge_wasi_nn::{
self, BackendError, Error, ExecutionTarget, GraphBuilder, GraphEncoding, GraphExecutionContext,
TensorType,
};

fn get_options_from_env() -> Value {
let mut options = json!({});
if let Ok(val) = env::var("enable_log") {
options["enable-log"] = serde_json::from_str(val.as_str())
.expect("invalid value for enable-log option (true/false)")
} else {
options["enable-log"] = serde_json::from_str("false").unwrap()
}
if let Ok(val) = env::var("n_gpu_layers") {
options["n-gpu-layers"] =
serde_json::from_str(val.as_str()).expect("invalid ngl value (unsigned integer")
} else {
options["n-gpu-layers"] = serde_json::from_str("0").unwrap()
}

options
}

fn get_data_from_context(context: &GraphExecutionContext, index: usize) -> String {
// Preserve for 4096 tokens with average token length 6
const MAX_OUTPUT_BUFFER_SIZE: usize = 4096 * 6;
let mut output_buffer = vec![0u8; MAX_OUTPUT_BUFFER_SIZE];
let mut output_size = context
.get_output(index, &mut output_buffer)
.expect("Failed to get output");
output_size = std::cmp::min(MAX_OUTPUT_BUFFER_SIZE, output_size);

return String::from_utf8_lossy(&output_buffer[..output_size]).to_string();
}

fn get_output_from_context(context: &GraphExecutionContext) -> String {
get_data_from_context(context, 0)
}

fn main() {
let args: Vec<String> = env::args().collect();
let model_name: &str = &args[1];

// Set options for the graph. Check our README for more details:
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
let options = get_options_from_env();

// If there is a third argument, use it as the prompt and enter non-interactive mode.
// This is mainly for the CI workflow.
if args.len() < 3 {
println!("Usage: {} <model_name> <prompt>", args[0]);
std::process::exit(1);
}
let prompt = &args[2];

// Create and inference 5 times to make sure unload function works.
for i in 0..5 {
println!("----- Test {} -----", i);
// Create graph and initialize context.
let graph = GraphBuilder::new(GraphEncoding::Ggml, ExecutionTarget::AUTO)
.config(serde_json::to_string(&options).expect("Failed to serialize options"))
.build_from_cache(model_name)
.expect("Failed to build graph");
println!("Graph {} loaded.", graph);
let mut context = graph
.init_execution_context()
.expect("Failed to init context");

// Set the prompt.
println!("Prompt:\n{}", prompt);
let tensor_data = prompt.as_bytes().to_vec();
context
.set_input(0, TensorType::U8, &[1], &tensor_data)
.expect("Failed to set input");
println!("Response:");

// Execute the inference.
match context.compute() {
Ok(_) => (),
Err(Error::BackendError(BackendError::ContextFull)) => {
println!("\n[INFO] Context full, we'll reset the context and continue.");
}
Err(Error::BackendError(BackendError::PromptTooLong)) => {
println!("\n[INFO] Prompt too long, we'll reset the context and continue.");
}
Err(err) => {
println!("\n[ERROR] {}", err);
std::process::exit(1);
}
}

// Retrieve the output.
let output = get_output_from_context(&context);
println!("{}", output.trim());

// Unload.
graph.unload().expect("Failed to unload graph");
}
}
Binary file not shown.

0 comments on commit 7f67945

Please sign in to comment.