diff --git a/.gitmodules b/.gitmodules index 93c0a87..f5a5cb0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -41,3 +41,12 @@ [submodule "bench/ORB_SLAM2"] path = bench/ORB_SLAM2 url = https://github.com/Multi-V-VM/ORB_SLAM2 +[submodule "bench/crewai"] + path = bench/crewai + url = https://github.com/Multi-V-VM/crewAI-examples/ +[submodule "bench/llama-wamr"] + path = bench/llama-wamr + url = https://github.com/Multi-V-VM/llama-wamr +[submodule "lib/s2n-tls"] + path = lib/s2n-tls + url = https://github.com/Multi-V-VM/s2n-tls diff --git a/artifact/compare_local_remote_gpt_tokens.py b/artifact/compare_local_remote_gpt_tokens.py index 2dbf3eb..25e9197 100644 --- a/artifact/compare_local_remote_gpt_tokens.py +++ b/artifact/compare_local_remote_gpt_tokens.py @@ -173,7 +173,7 @@ def plot_graph(results, labels): bar.set_color(colors.get(label, 'gray')) # Default to gray if color not found # Customize the plot - ax.set_ylabel('Performance Score') + ax.set_ylabel('Tokens/s') ax.set_xticks(x) ax.set_xticklabels(labels, rotation=45, fontsize=30) diff --git a/artifact/graph2.py b/artifact/graph2.py index d23c5a5..501e69a 100644 --- a/artifact/graph2.py +++ b/artifact/graph2.py @@ -7,11 +7,9 @@ def plot_graph(results, labels): plt.rc('font', **font) # Define colors for each platform colors = { - 'MVVM-CPU': 'cyan', + 'MVVM-Vanilla': 'cyan', + 'MVVM-GPU': 'red', 'SGLang-GPU': 'blue', - 'WASI-NN-CPU': 'red', - 'WASI-NN-GPU': 'brown', - 'OpenAI': 'purple', } # Create figure and axis @@ -28,7 +26,7 @@ def plot_graph(results, labels): bar.set_color(colors.get(label, 'gray')) # Default to gray if color not found # Customize the plot - ax.set_ylabel('Performance Score') + ax.set_ylabel('Latency (s)') ax.set_xticks(x) ax.set_xticklabels(labels, rotation=45, fontsize=30) @@ -36,7 +34,7 @@ def plot_graph(results, labels): for bar in bars: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height, - f'{height:.2f}', + f'{height:.4f}', ha='center', va='bottom') # Add grid for better readability @@ -48,8 +46,8 @@ def plot_graph(results, labels): return plt # Example usage: -results = [2.78, 13.71, 0.75, 0.84, 79.13263037306803] # Example values -platforms = ['MVVM-CPU', 'SGLang-GPU', 'WASI-NN-CPU', 'WASI-NN-GPU', 'OpenAI'] +results = [0.31, 0.000164, 0.01103125] # Example values +platforms = ['MVVM-Vanilla', 'MVVM-GPU', 'SGLang-GPU'] # Create and save the plot plot = plot_graph(results, platforms) diff --git a/artifact/graph3.py b/artifact/graph3.py new file mode 100644 index 0000000..d924c63 --- /dev/null +++ b/artifact/graph3.py @@ -0,0 +1,55 @@ +import matplotlib.pyplot as plt +import numpy as np + + +def plot_graph(results, labels): + font = {'size': 40} + plt.rc('font', **font) + # Define colors for each platform + colors = { + 'FIFO': 'cyan', + 'Latency Sensitive': 'red', + 'MVVM': 'blue', + } + + # Create figure and axis + fig, ax = plt.subplots(figsize=(10, 10)) + + # Create bar positions + x = np.arange(len(results)) + + # Create bars with specified colors + bars = ax.bar(x, results, width=0.3) + + # Color each bar according to the platform + for bar, label in zip(bars, labels): + bar.set_color(colors.get(label, 'gray')) # Default to gray if color not found + + # Customize the plot + ax.set_ylabel('Latency (s)') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, fontsize=30) + + # Add value labels on top of each bar + for bar in bars: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.4f}', + ha='center', va='bottom') + + # Add grid for better readability + ax.grid(True, axis='y', linestyle='--', alpha=0.7) + + # Adjust layout to prevent label cutoff + plt.tight_layout() + + return plt + +# Example usage: +results = [898.078, 745.393, 482.713] # Example values +platforms = ['FIFO', 'Latency Sensitive', 'MVVM'] + +# Create and save the plot +plot = plot_graph(results, platforms) +plot.savefig('fifo_latency_vs_mvvm.pdf') +plot.close() \ No newline at end of file diff --git a/artifact/graph4.py b/artifact/graph4.py new file mode 100644 index 0000000..8bd34be --- /dev/null +++ b/artifact/graph4.py @@ -0,0 +1,67 @@ +import matplotlib.pyplot as plt +import numpy as np + + +def plot_graph(results1, results2, labels): + font = {'size': 40} + plt.rc('font', **font) + # Define colors for each platform + colors = { + 'langgraph': 'cyan', + 'azure_model': 'cyan', + 'email_auto_responder_flow': 'cyan', + 'game-builder-crew': 'cyan', + 'instagram_post': 'cyan', + 'job-posting': 'cyan', + } + + # Create figure and axis + fig, ax = plt.subplots(figsize=(10, 10)) + + # Create bar positions + x = np.arange(len(results1)) + + # Create bars with specified colors + bars1 = ax.bar(x, results1, width=0.1) + bars2 = ax.bar(x+0.1, results2, width=0.1) + + # Color each bar according to the platform + for bar, label in zip(bars1, labels): + bar.set_color( 'cyan') # Default to gray if color not found + for bar, label in zip(bars2, labels): + bar.set_color( 'purple') # Default to gray if color not found + # Customize the plot + ax.set_ylabel('Latency (s)') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, fontsize=20) + + # Add value labels on top of each bar + for bar in bars1: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.2f}', + ha='center', va='bottom') + + for bar in bars2: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.2f}', + ha='center', va='bottom') + + # Add grid for better readability + ax.grid(True, axis='y', linestyle='--', alpha=0.7) + + # Adjust layout to prevent label cutoff + plt.tight_layout() + + return plt + +# Example usage: +results1 = [1.0, 1.0, 1.0] # Example values +results2 = [8.823, 1.0, 1.0] # Example values +platforms = ["job-post","long_file_translate","write_seo_blog_humanize"] + +# Create and save the plot +plot = plot_graph(results1, results2, platforms) +plot.savefig('crewai_vs_openai.pdf') +plot.close() \ No newline at end of file diff --git a/artifact/parrot_schedule.py b/artifact/parrot_schedule.py index 753f4a1..428fd99 100644 --- a/artifact/parrot_schedule.py +++ b/artifact/parrot_schedule.py @@ -5,6 +5,7 @@ import openai from dataclasses import dataclass import json +import signal import subprocess from enum import Enum import asyncio @@ -20,6 +21,53 @@ class ModelResponse: processing_time: float error: str = None +class LLMScheduler: + def __init__(self): + self.latency_requirements = {} + + def schedule_requests_with_llm(self, requests: List[str]) -> List[str]: + """ + Use LLM to prioritize requests and return the ordered list. + """ + try: + # Build the prompt + system_prompt = """ + You are a scheduling agent for a latency-sensitive system. Your task is to reorder a list of requests + to optimize system performance based on the following criteria: + + 1. Short latency tasks (e.g., simple queries or calculations) should be prioritized. + 2. Medium latency tasks (e.g., summaries or translations) come next. + 3. High latency tasks (e.g., creative writing or complex analysis) are handled last. + + Here is the list of requests: + {requests} + + Return the indices of the requests in the optimal order for scheduling, starting with 1. Example: "1,2,4,3" + """ + formatted_prompt = system_prompt.format(requests="\n".join([f"{i+1}. {req}" for i, req in enumerate(requests)])) + + # Call the LLM + response = openai.ChatCompletion.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": formatted_prompt} + ], + temperature=0, + max_tokens=20 + ) + + # Extract the response and convert it to a list of integers + decision = response.choices[0].message.content.strip() + order = list(map(int, decision.split(','))) + + # Reorder requests based on LLM's decision + sorted_requests = [requests[i - 1] for i in order] + return sorted_requests + + except Exception as e: + print(f"Scheduling failed: {e}, returning requests in original order.") + return requests # Fallback to original order + class IntelligentAgent: def __init__(self, api_key: str, edge_model_path: str, edge_model_bin: str): """ @@ -59,7 +107,7 @@ def _route_request(self, user_request: str) -> ModelType: try: response = openai.ChatCompletion.create( - model="gpt-4", + model="gpt-4o", messages=messages, temperature=0, max_tokens=10 @@ -71,31 +119,89 @@ def _route_request(self, user_request: str) -> ModelType: print(f"Routing decision failed: {e}, defaulting to edge model") return ModelType.EDGE - def _call_edge_model(self, prompt: str) -> str: + def process_requests(requests: List[str]): + scheduler = Scheduler() + sorted_requests = scheduler.schedule_requests(requests) + + results = {} + for idx, request in enumerate(sorted_requests): + # Route each request + model_type = _route_request(request) + + # Process the request + if model_type == "cloud": + results[request] = f"Processed on Cloud Model (Request {idx + 1})" + else: + results[request] = f"Processed on Edge Model (Request {idx + 1})" + + return results + + async def _call_edge_model_async(self, prompt: str) -> str: """ - Call the edge model using command line + Call the edge model asynchronously using command line and pass input via stdin. """ try: cmd = [ self.edge_model_path, - "-t", "./llama.aot", - "-a", f"{self.edge_model_bin},-i,\'{prompt}\'" + "./wasmedge-ggml-llama.aot", + f"{self.edge_model_bin}" ] print(cmd) - - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=30 # 设置超时时间 + + # Create the subprocess + process = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE ) - - if result.returncode != 0: - raise Exception(f"Edge model error: {result.stderr}") - - return result.stdout - - except subprocess.TimeoutExpired: + + # Send the prompt to stdin + stdout, stderr = await process.communicate(input=prompt.encode()) + + # Check for errors + if process.returncode != 0: + raise Exception(f"Edge model error: {stderr.decode()}") + + # Return the stdout as a string + return stdout.decode() + + except asyncio.TimeoutError: + raise Exception("Edge model timed out") + except Exception as e: + raise Exception(f"Edge model error: {str(e)}") + + + async def _run_clangd_async(self, prompt: str) -> str: + """ + Call the edge model asynchronously using command line and pass input via stdin. + """ + try: + cmd = [ + self.edge_model_path, + "./clangd.aot", + ] + print(cmd) + + # Create the subprocess + process = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + # Send the prompt to stdin + stdout, stderr = await process.communicate(input=prompt.encode()) + + # Check for errors + if process.returncode != 0: + raise Exception(f"Edge model error: {stderr.decode()}") + + # Return the stdout as a string + return stdout.decode() + + except asyncio.TimeoutError: raise Exception("Edge model timed out") except Exception as e: raise Exception(f"Edge model error: {str(e)}") @@ -117,7 +223,7 @@ def _call_cloud_model(self, prompt: str) -> str: except Exception as e: raise Exception(f"Cloud model error: {str(e)}") - def process_request(self, user_request: str) -> ModelResponse: + def process_request(self, user_request: str, use_edge: bool = False) -> ModelResponse: """ Process user request using either cloud or edge model """ @@ -126,7 +232,7 @@ def process_request(self, user_request: str) -> ModelResponse: try: # 决定使用哪个模型 - model_type = self._route_request(user_request) + model_type = ModelType.EDGE if use_edge else self._route_request(user_request) # 根据路由决策调用相应模型 if model_type == ModelType.CLOUD: @@ -149,30 +255,79 @@ def process_request(self, user_request: str) -> ModelResponse: processing_time=time.time() - start_time, error=str(e) ) + def control_c(self): + if self.process and self.process.pid: + os.kill(self.process.pid, signal.SIGINT) + print(f"Sent Control+C to subprocess (PID: {self.process.pid})") + else: + print("No subprocess to send Control+C to.") # Example usage -if __name__ == "__main__": +async def main(): # 初始化agent agent = IntelligentAgent( api_key=os.environ["OPENAI_API_KEY"], - edge_model_path="./MVVM_checkpoint", - edge_model_bin="./llama32_1b.bin" + edge_model_path="/mnt/osdi23/MVVM/build/wasm-micro-runtime/product-mini/platforms/linux/build/iwasm", + edge_model_bin="./Llama-3.2-1B-Instruct-Q8_0.gguf" ) # 测试不同复杂度的请求 test_requests = [ - "What is 2+2?", # 简单计算,适合边缘模型 - "Write a detailed analysis of the economic impact of climate change, please use scraper to do so", # 复杂分析,需要云端模型 - "Tell me a joke", # 简单生成,适合边缘模型 - "Explain quantum computing to a 5 year old" # 需要深度解释,可能需要云端模型 + "What is 2+2?, verify in c++", + "Generate code for a simple web server in c++", + "Tell me a c++ joke", + "Explain quantum computing in c++" + ] + + for request in test_requests: + print(f"\nProcessing request: {request}") + response = agent.process_request(request, use_edge=True) + print(f"Used {response.model_used.value} model") + print(f"Processing time: {response.processing_time:.2f}s") + if response.error: + print(f"Error: {response.error}") + else: + print(f"Response: {response.content[:100]}...") # 只显示前100个字符 + + # 测试不同复杂度的请求 + test_requests = [ + "What is 2+2?, verify in c++", + "Tell me a c++ joke", + "Generate code for a simple web server in c++", + "Explain quantum computing in c++" ] for request in test_requests: print(f"\nProcessing request: {request}") - response = agent.process_request(request) + response = agent.process_request(request, use_edge=False) print(f"Used {response.model_used.value} model") print(f"Processing time: {response.processing_time:.2f}s") if response.error: print(f"Error: {response.error}") else: - print(f"Response: {response.content[:100]}...") # 只显示前100个字符 \ No newline at end of file + print(f"Response: {response.content[:100]}...") # 只显示前100个字符 + + + normal_request = "What is 2+2?, verify in c++" + test_requests = [ + "what's 2+2", + "generate code for a simple web server", + "tell me a joke", + "explain quantum computing" + ] + task = asyncio.create_task(agent._call_edge_model_async(normal_request)) + + # Simulate sending Control+C after 5 seconds + await asyncio.sleep(5) + agent.control_c() + + for request in test_requests: + print(f"\nProcessing request: {request}") + response = agent.process_request(request, use_edge=False) + print(f"Used {response.model_used.value} model") + print(f"Processing time: {response.processing_time:.2f}s") + if response.error: + print(f"Error: {response.error}") + else: + print(f"Response: {response.content[:100]}...") # 只显示前100个字符 + diff --git a/bench/crewai b/bench/crewai new file mode 160000 index 0000000..756034d --- /dev/null +++ b/bench/crewai @@ -0,0 +1 @@ +Subproject commit 756034d346e8aef0a770eb8df188d510249d80a5 diff --git a/bench/llama-wamr b/bench/llama-wamr new file mode 160000 index 0000000..76bd793 --- /dev/null +++ b/bench/llama-wamr @@ -0,0 +1 @@ +Subproject commit 76bd7935cf97d259b55b85b7980235d856f9e8b6 diff --git a/lib/s2n-tls b/lib/s2n-tls new file mode 160000 index 0000000..ee391c7 --- /dev/null +++ b/lib/s2n-tls @@ -0,0 +1 @@ +Subproject commit ee391c72c08ae909de6669d5823ff3dc4a789c36