diff --git a/wasmedge-mlx/README.md b/wasmedge-mlx/README.md
index 95dbfd4..bc054b3 100644
--- a/wasmedge-mlx/README.md
+++ b/wasmedge-mlx/README.md
@@ -70,11 +70,20 @@ wasmedge --dir .:. \
 
 There are some metadata for MLX plugin you can set.
 
+### Basic setting
+
 - model_type (required): LLM model type.
 - tokenizer (required): tokenizer.json path
 - max_token (option): maximum generate token number, default is 1024.
 - enable_debug_log (option): if print debug log, default is false.
 
+### Quantization
+
+The following three parameters need to be set together.
+- is_quantized (option): If the weight is quantized. If is_quantized is false, then MLX backend will quantize the weight. 
+- group_size (option): The group size to use for quantization.
+- q_bits (option): The number of bits to quantize to.
+
 ``` rust
 let graph = GraphBuilder::new(GraphEncoding::Mlx, ExecutionTarget::AUTO)
         .config(serde_json::to_string(&json!({"model_type": "tiny_llama_1.1B_chat_v1.0", "tokenizer":tokenizer_path, "max_token":100}))
diff --git a/wasmedge-mlx/src/main.rs b/wasmedge-mlx/src/main.rs
index 14df217..098e75e 100644
--- a/wasmedge-mlx/src/main.rs
+++ b/wasmedge-mlx/src/main.rs
@@ -24,7 +24,7 @@ fn main() {
     let args: Vec<String> = env::args().collect();
     let model_name: &str = &args[1];
     let graph = GraphBuilder::new(GraphEncoding::Mlx, ExecutionTarget::AUTO)
-        .config(serde_json::to_string(&json!({"model_type": "tiny_llama_1.1B_chat_v1.0", "tokenizer":tokenizer_path, "max_token":100})).expect("Failed to serialize options"))
+        .config(serde_json::to_string(&json!({"is_quantized":false, "group_size": 64, "q_bits": 4,"model_type": "tiny_llama_1.1B_chat_v1.0", "tokenizer":tokenizer_path, "max_token":100})).expect("Failed to serialize options"))
         .build_from_cache(model_name)
         .expect("Failed to build graph");
     let mut context = graph