diff --git a/.gitmodules b/.gitmodules
index 93c0a87..f5a5cb0 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -41,3 +41,12 @@
 [submodule "bench/ORB_SLAM2"]
 	path = bench/ORB_SLAM2
 	url = https://github.com/Multi-V-VM/ORB_SLAM2
+[submodule "bench/crewai"]
+	path = bench/crewai
+	url = https://github.com/Multi-V-VM/crewAI-examples/
+[submodule "bench/llama-wamr"]
+	path = bench/llama-wamr
+	url = https://github.com/Multi-V-VM/llama-wamr
+[submodule "lib/s2n-tls"]
+	path = lib/s2n-tls
+	url = https://github.com/Multi-V-VM/s2n-tls
diff --git a/artifact/compare_local_remote_gpt_tokens.py b/artifact/compare_local_remote_gpt_tokens.py
index 2dbf3eb..25e9197 100644
--- a/artifact/compare_local_remote_gpt_tokens.py
+++ b/artifact/compare_local_remote_gpt_tokens.py
@@ -173,7 +173,7 @@ def plot_graph(results, labels):
             bar.set_color(colors.get(label, 'gray'))  # Default to gray if color not found
         
         # Customize the plot
-        ax.set_ylabel('Performance Score')
+        ax.set_ylabel('Tokens/s')
         ax.set_xticks(x)
         ax.set_xticklabels(labels, rotation=45, fontsize=30)
         
diff --git a/artifact/graph2.py b/artifact/graph2.py
index d23c5a5..501e69a 100644
--- a/artifact/graph2.py
+++ b/artifact/graph2.py
@@ -7,11 +7,9 @@ def plot_graph(results, labels):
     plt.rc('font', **font)
     # Define colors for each platform
     colors = {
-        'MVVM-CPU': 'cyan',
+        'MVVM-Vanilla': 'cyan',
+        'MVVM-GPU': 'red',
         'SGLang-GPU': 'blue',
-        'WASI-NN-CPU': 'red',
-        'WASI-NN-GPU': 'brown',
-        'OpenAI': 'purple',
     }
     
     # Create figure and axis
@@ -28,7 +26,7 @@ def plot_graph(results, labels):
         bar.set_color(colors.get(label, 'gray'))  # Default to gray if color not found
     
     # Customize the plot
-    ax.set_ylabel('Performance Score')
+    ax.set_ylabel('Latency (s)')
     ax.set_xticks(x)
     ax.set_xticklabels(labels, rotation=45, fontsize=30)
     
@@ -36,7 +34,7 @@ def plot_graph(results, labels):
     for bar in bars:
         height = bar.get_height()
         ax.text(bar.get_x() + bar.get_width()/2., height,
-                f'{height:.2f}',
+                f'{height:.4f}',
                 ha='center', va='bottom')
     
     # Add grid for better readability
@@ -48,8 +46,8 @@ def plot_graph(results, labels):
     return plt
 
 # Example usage:
-results = [2.78, 13.71, 0.75, 0.84, 79.13263037306803]  # Example values
-platforms = ['MVVM-CPU', 'SGLang-GPU', 'WASI-NN-CPU', 'WASI-NN-GPU', 'OpenAI']
+results = [0.31, 0.000164, 0.01103125]  # Example values
+platforms = ['MVVM-Vanilla', 'MVVM-GPU', 'SGLang-GPU']
 
 # Create and save the plot
 plot = plot_graph(results, platforms)
diff --git a/artifact/graph3.py b/artifact/graph3.py
new file mode 100644
index 0000000..d924c63
--- /dev/null
+++ b/artifact/graph3.py
@@ -0,0 +1,55 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def plot_graph(results, labels):
+    font = {'size': 40}
+    plt.rc('font', **font)
+    # Define colors for each platform
+    colors = {
+        'FIFO': 'cyan',
+        'Latency Sensitive': 'red',
+        'MVVM': 'blue',
+    }
+    
+    # Create figure and axis
+    fig, ax = plt.subplots(figsize=(10, 10))
+    
+    # Create bar positions
+    x = np.arange(len(results))
+    
+    # Create bars with specified colors
+    bars = ax.bar(x, results, width=0.3)
+    
+    # Color each bar according to the platform
+    for bar, label in zip(bars, labels):
+        bar.set_color(colors.get(label, 'gray'))  # Default to gray if color not found
+    
+    # Customize the plot
+    ax.set_ylabel('Latency (s)')
+    ax.set_xticks(x)
+    ax.set_xticklabels(labels, rotation=45, fontsize=30)
+    
+    # Add value labels on top of each bar
+    for bar in bars:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height,
+                f'{height:.4f}',
+                ha='center', va='bottom')
+    
+    # Add grid for better readability
+    ax.grid(True, axis='y', linestyle='--', alpha=0.7)
+    
+    # Adjust layout to prevent label cutoff
+    plt.tight_layout()
+    
+    return plt
+
+# Example usage:
+results = [898.078, 745.393, 482.713]  # Example values
+platforms = ['FIFO', 'Latency Sensitive', 'MVVM']
+
+# Create and save the plot
+plot = plot_graph(results, platforms)
+plot.savefig('fifo_latency_vs_mvvm.pdf')
+plot.close()
\ No newline at end of file
diff --git a/artifact/graph4.py b/artifact/graph4.py
new file mode 100644
index 0000000..8bd34be
--- /dev/null
+++ b/artifact/graph4.py
@@ -0,0 +1,67 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def plot_graph(results1, results2, labels):
+    font = {'size': 40}
+    plt.rc('font', **font)
+    # Define colors for each platform
+    colors = {
+        'langgraph': 'cyan',
+        'azure_model': 'cyan',
+        'email_auto_responder_flow': 'cyan',
+        'game-builder-crew': 'cyan',
+        'instagram_post': 'cyan',
+        'job-posting': 'cyan',
+    }
+    
+    # Create figure and axis
+    fig, ax = plt.subplots(figsize=(10, 10))
+    
+    # Create bar positions
+    x = np.arange(len(results1))
+    
+    # Create bars with specified colors
+    bars1 = ax.bar(x, results1, width=0.1)
+    bars2 = ax.bar(x+0.1, results2, width=0.1)
+    
+    # Color each bar according to the platform
+    for bar, label in zip(bars1, labels):
+        bar.set_color( 'cyan')  # Default to gray if color not found
+    for bar, label in zip(bars2, labels):
+        bar.set_color( 'purple')  # Default to gray if color not found
+    # Customize the plot
+    ax.set_ylabel('Latency (s)')
+    ax.set_xticks(x)
+    ax.set_xticklabels(labels, rotation=45, fontsize=20)
+    
+    # Add value labels on top of each bar
+    for bar in bars1:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height,
+                f'{height:.2f}',
+                ha='center', va='bottom')
+    
+    for bar in bars2:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height,
+                f'{height:.2f}',
+                ha='center', va='bottom')
+    
+    # Add grid for better readability
+    ax.grid(True, axis='y', linestyle='--', alpha=0.7)
+    
+    # Adjust layout to prevent label cutoff
+    plt.tight_layout()
+    
+    return plt
+
+# Example usage:
+results1 = [1.0, 1.0, 1.0]  # Example values
+results2 = [8.823, 1.0, 1.0]  # Example values
+platforms = ["job-post","long_file_translate","write_seo_blog_humanize"]
+
+# Create and save the plot
+plot = plot_graph(results1, results2, platforms)
+plot.savefig('crewai_vs_openai.pdf')
+plot.close()
\ No newline at end of file
diff --git a/artifact/parrot_schedule.py b/artifact/parrot_schedule.py
index 753f4a1..428fd99 100644
--- a/artifact/parrot_schedule.py
+++ b/artifact/parrot_schedule.py
@@ -5,6 +5,7 @@
 import openai
 from dataclasses import dataclass
 import json
+import signal
 import subprocess
 from enum import Enum
 import asyncio
@@ -20,6 +21,53 @@ class ModelResponse:
     processing_time: float
     error: str = None
 
+class LLMScheduler:
+    def __init__(self):
+        self.latency_requirements = {}
+
+    def schedule_requests_with_llm(self, requests: List[str]) -> List[str]:
+        """
+        Use LLM to prioritize requests and return the ordered list.
+        """
+        try:
+            # Build the prompt
+            system_prompt = """
+            You are a scheduling agent for a latency-sensitive system. Your task is to reorder a list of requests 
+            to optimize system performance based on the following criteria:
+            
+            1. Short latency tasks (e.g., simple queries or calculations) should be prioritized.
+            2. Medium latency tasks (e.g., summaries or translations) come next.
+            3. High latency tasks (e.g., creative writing or complex analysis) are handled last.
+            
+            Here is the list of requests:
+            {requests}
+            
+            Return the indices of the requests in the optimal order for scheduling, starting with 1. Example: "1,2,4,3"
+            """
+            formatted_prompt = system_prompt.format(requests="\n".join([f"{i+1}. {req}" for i, req in enumerate(requests)]))
+
+            # Call the LLM
+            response = openai.ChatCompletion.create(
+                model="gpt-4o",
+                messages=[
+                    {"role": "system", "content": formatted_prompt}
+                ],
+                temperature=0,
+                max_tokens=20
+            )
+
+            # Extract the response and convert it to a list of integers
+            decision = response.choices[0].message.content.strip()
+            order = list(map(int, decision.split(',')))
+
+            # Reorder requests based on LLM's decision
+            sorted_requests = [requests[i - 1] for i in order]
+            return sorted_requests
+
+        except Exception as e:
+            print(f"Scheduling failed: {e}, returning requests in original order.")
+            return requests  # Fallback to original order
+        
 class IntelligentAgent:
     def __init__(self, api_key: str, edge_model_path: str, edge_model_bin: str):
         """
@@ -59,7 +107,7 @@ def _route_request(self, user_request: str) -> ModelType:
         
         try:
             response = openai.ChatCompletion.create(
-                model="gpt-4",
+                model="gpt-4o",
                 messages=messages,
                 temperature=0,
                 max_tokens=10
@@ -71,31 +119,89 @@ def _route_request(self, user_request: str) -> ModelType:
             print(f"Routing decision failed: {e}, defaulting to edge model")
             return ModelType.EDGE
 
-    def _call_edge_model(self, prompt: str) -> str:
+    def process_requests(requests: List[str]):
+        scheduler = Scheduler()
+        sorted_requests = scheduler.schedule_requests(requests)
+
+        results = {}
+        for idx, request in enumerate(sorted_requests):
+            # Route each request
+            model_type = _route_request(request)
+
+            # Process the request
+            if model_type == "cloud":
+                results[request] = f"Processed on Cloud Model (Request {idx + 1})"
+            else:
+                results[request] = f"Processed on Edge Model (Request {idx + 1})"
+
+        return results
+
+    async def _call_edge_model_async(self, prompt: str) -> str:
         """
-        Call the edge model using command line
+        Call the edge model asynchronously using command line and pass input via stdin.
         """
         try:
             cmd = [
                 self.edge_model_path,
-                "-t", "./llama.aot",
-                "-a", f"{self.edge_model_bin},-i,\'{prompt}\'"
+                "./wasmedge-ggml-llama.aot",
+                f"{self.edge_model_bin}"
             ]
             print(cmd)
-            
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=30  # 设置超时时间
+
+            # Create the subprocess
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
             )
-            
-            if result.returncode != 0:
-                raise Exception(f"Edge model error: {result.stderr}")
-                
-            return result.stdout
-            
-        except subprocess.TimeoutExpired:
+
+            # Send the prompt to stdin
+            stdout, stderr = await process.communicate(input=prompt.encode())
+
+            # Check for errors
+            if process.returncode != 0:
+                raise Exception(f"Edge model error: {stderr.decode()}")
+
+            # Return the stdout as a string
+            return stdout.decode()
+
+        except asyncio.TimeoutError:
+            raise Exception("Edge model timed out")
+        except Exception as e:
+            raise Exception(f"Edge model error: {str(e)}")
+
+
+    async def _run_clangd_async(self, prompt: str) -> str:
+        """
+        Call the edge model asynchronously using command line and pass input via stdin.
+        """
+        try:
+            cmd = [
+                self.edge_model_path,
+                "./clangd.aot",
+            ]
+            print(cmd)
+
+            # Create the subprocess
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            # Send the prompt to stdin
+            stdout, stderr = await process.communicate(input=prompt.encode())
+
+            # Check for errors
+            if process.returncode != 0:
+                raise Exception(f"Edge model error: {stderr.decode()}")
+
+            # Return the stdout as a string
+            return stdout.decode()
+
+        except asyncio.TimeoutError:
             raise Exception("Edge model timed out")
         except Exception as e:
             raise Exception(f"Edge model error: {str(e)}")
@@ -117,7 +223,7 @@ def _call_cloud_model(self, prompt: str) -> str:
         except Exception as e:
             raise Exception(f"Cloud model error: {str(e)}")
 
-    def process_request(self, user_request: str) -> ModelResponse:
+    def process_request(self, user_request: str, use_edge: bool = False) -> ModelResponse:
         """
         Process user request using either cloud or edge model
         """
@@ -126,7 +232,7 @@ def process_request(self, user_request: str) -> ModelResponse:
         
         try:
             # 决定使用哪个模型
-            model_type = self._route_request(user_request)
+            model_type = ModelType.EDGE if use_edge else self._route_request(user_request)
             
             # 根据路由决策调用相应模型
             if model_type == ModelType.CLOUD:
@@ -149,30 +255,79 @@ def process_request(self, user_request: str) -> ModelResponse:
                 processing_time=time.time() - start_time,
                 error=str(e)
             )
+    def control_c(self):
+        if self.process and self.process.pid:
+            os.kill(self.process.pid, signal.SIGINT)
+            print(f"Sent Control+C to subprocess (PID: {self.process.pid})")
+        else:
+            print("No subprocess to send Control+C to.")
 
 # Example usage
-if __name__ == "__main__":
+async def main():
     # 初始化agent
     agent = IntelligentAgent(
         api_key=os.environ["OPENAI_API_KEY"],
-        edge_model_path="./MVVM_checkpoint",
-        edge_model_bin="./llama32_1b.bin"
+        edge_model_path="/mnt/osdi23/MVVM/build/wasm-micro-runtime/product-mini/platforms/linux/build/iwasm",
+        edge_model_bin="./Llama-3.2-1B-Instruct-Q8_0.gguf"
     )
     
     # 测试不同复杂度的请求
     test_requests = [
-        "What is 2+2?",  # 简单计算,适合边缘模型
-        "Write a detailed analysis of the economic impact of climate change， please use scraper to do so",  # 复杂分析,需要云端模型
-        "Tell me a joke",  # 简单生成,适合边缘模型
-        "Explain quantum computing to a 5 year old"  # 需要深度解释,可能需要云端模型
+        "What is 2+2?, verify in c++", 
+        "Generate code for a simple web server in c++", 
+        "Tell me a c++ joke", 
+        "Explain quantum computing in c++" 
+    ]
+    
+    for request in test_requests:
+        print(f"\nProcessing request: {request}")
+        response = agent.process_request(request, use_edge=True)
+        print(f"Used {response.model_used.value} model")
+        print(f"Processing time: {response.processing_time:.2f}s")
+        if response.error:
+            print(f"Error: {response.error}")
+        else:
+            print(f"Response: {response.content[:100]}...")  # 只显示前100个字符
+
+    # 测试不同复杂度的请求
+    test_requests = [
+        "What is 2+2?, verify in c++", 
+        "Tell me a c++ joke", 
+        "Generate code for a simple web server in c++", 
+        "Explain quantum computing in c++" 
     ]
     
     for request in test_requests:
         print(f"\nProcessing request: {request}")
-        response = agent.process_request(request)
+        response = agent.process_request(request, use_edge=False)
         print(f"Used {response.model_used.value} model")
         print(f"Processing time: {response.processing_time:.2f}s")
         if response.error:
             print(f"Error: {response.error}")
         else:
-            print(f"Response: {response.content[:100]}...")  # 只显示前100个字符
\ No newline at end of file
+            print(f"Response: {response.content[:100]}...")  # 只显示前100个字符
+
+
+    normal_request = "What is 2+2?, verify in c++"
+    test_requests = [
+        "what's 2+2",
+        "generate code for a simple web server",
+        "tell me a joke",
+        "explain quantum computing"
+    ]
+    task = asyncio.create_task(agent._call_edge_model_async(normal_request))
+    
+    # Simulate sending Control+C after 5 seconds
+    await asyncio.sleep(5)
+    agent.control_c()
+
+    for request in test_requests:
+        print(f"\nProcessing request: {request}")
+        response = agent.process_request(request, use_edge=False)
+        print(f"Used {response.model_used.value} model")
+        print(f"Processing time: {response.processing_time:.2f}s")
+        if response.error:
+            print(f"Error: {response.error}")
+        else:
+            print(f"Response: {response.content[:100]}...")  # 只显示前100个字符
+    
diff --git a/bench/crewai b/bench/crewai
new file mode 160000
index 0000000..756034d
--- /dev/null
+++ b/bench/crewai
@@ -0,0 +1 @@
+Subproject commit 756034d346e8aef0a770eb8df188d510249d80a5
diff --git a/bench/llama-wamr b/bench/llama-wamr
new file mode 160000
index 0000000..76bd793
--- /dev/null
+++ b/bench/llama-wamr
@@ -0,0 +1 @@
+Subproject commit 76bd7935cf97d259b55b85b7980235d856f9e8b6
diff --git a/lib/s2n-tls b/lib/s2n-tls
new file mode 160000
index 0000000..ee391c7
--- /dev/null
+++ b/lib/s2n-tls
@@ -0,0 +1 @@
+Subproject commit ee391c72c08ae909de6669d5823ff3dc4a789c36