diff --git a/interpreter/computer_use/loop.py b/interpreter/computer_use/loop.py
index 84a7a6cd7..a61adb598 100755
--- a/interpreter/computer_use/loop.py
+++ b/interpreter/computer_use/loop.py
@@ -7,10 +7,16 @@
 import os
 import platform
 import time
+import traceback
 import uuid
 from collections.abc import Callable
 from datetime import datetime
-from enum import StrEnum
+
+try:
+    from enum import StrEnum
+except ImportError:  # 3.10 compatibility
+    from enum import Enum as StrEnum
+
 from typing import Any, List, cast
 
 import requests
@@ -33,10 +39,19 @@
 
 BETA_FLAG = "computer-use-2024-10-22"
 
+from typing import List, Optional
+
+import uvicorn
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
 from rich import print as rich_print
 from rich.markdown import Markdown
 from rich.rule import Rule
 
+# Add this near the top of the file, with other imports and global variables
+messages: List[BetaMessageParam] = []
+
 
 def print_markdown(message):
     """
@@ -87,7 +102,7 @@ class APIProvider(StrEnum):
 * When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B <lines before> -A <lines after> <query> <filename>` to confirm output.
 * When viewing a page it can be helpful to zoom out so that you can see everything on the page.  Either that, or make sure you scroll down to see everything before deciding something isn't available.
 * When using your computer function calls, they take a while to run and send back to you.  Where possible/feasible, try to chain multiple of these calls all into one function calls request.
-* The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
 </SYSTEM_CAPABILITY>
 
 <IMPORTANT>
@@ -107,6 +122,7 @@ class APIProvider(StrEnum):
 SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
 * You are an AI assistant with access to a virtual machine running on {"Mac OS" if platform.system() == "Darwin" else platform.system()} with internet access.
 * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
+* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
 </SYSTEM_CAPABILITY>"""
 
 # Update the SYSTEM_PROMPT for Mac OS
@@ -175,6 +191,8 @@ async def sampling_loop(
             elif isinstance(chunk, BetaRawContentBlockDeltaEvent):
                 if chunk.delta.type == "text_delta":
                     print(f"{chunk.delta.text}", end="", flush=True)
+                    yield {"type": "chunk", "chunk": chunk.delta.text}
+                    await asyncio.sleep(0)
                     if current_block and current_block.type == "text":
                         current_block.text += chunk.delta.text
                 elif chunk.delta.type == "input_json_delta":
@@ -189,10 +207,13 @@ async def sampling_loop(
                         # Finished a tool call
                         # print()
                         current_block.input = json.loads(current_block.partial_json)
+                        # yield {"type": "chunk", "chunk": current_block.input}
                         delattr(current_block, "partial_json")
                     else:
                         # Finished a message
                         print("\n")
+                        yield {"type": "chunk", "chunk": "\n"}
+                        await asyncio.sleep(0)
                     response_content.append(current_block)
                     current_block = None
 
@@ -231,7 +252,9 @@ async def sampling_loop(
                 tool_output_callback(result, content_block.id)
 
         if not tool_result_content:
-            return messages
+            # Done!
+            yield {"type": "messages", "messages": messages}
+            break
 
         messages.append({"content": tool_result_content, "role": "user"})
 
@@ -334,6 +357,95 @@ async def main():
     provider = APIProvider.ANTHROPIC
     system_prompt_suffix = ""
 
+    # Check if running in server mode
+    if "--server" in sys.argv:
+        app = FastAPI()
+
+        # Start the mouse position checking thread when in server mode
+        mouse_thread = threading.Thread(target=check_mouse_position)
+        mouse_thread.daemon = True
+        mouse_thread.start()
+
+        # Get API key from environment variable
+        api_key = os.environ.get("ANTHROPIC_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "ANTHROPIC_API_KEY environment variable must be set when running in server mode"
+            )
+
+        @app.post("/openai/chat/completions")
+        async def chat_completion(request: ChatCompletionRequest):
+            print("BRAND NEW REQUEST")
+            # Check exit flag before processing request
+            if exit_flag:
+                return {"error": "Server shutting down due to mouse in corner"}
+
+            async def stream_response():
+                print("is this even happening")
+
+                # Instead of creating converted_messages, append the last message to global messages
+                global messages
+                messages.append(
+                    {
+                        "role": request.messages[-1].role,
+                        "content": [
+                            {"type": "text", "text": request.messages[-1].content}
+                        ],
+                    }
+                )
+
+                response_chunks = []
+
+                async def output_callback(content_block: BetaContentBlock):
+                    chunk = f"data: {json.dumps({'choices': [{'delta': {'content': content_block.text}}]})}\n\n"
+                    response_chunks.append(chunk)
+                    yield chunk
+
+                async def tool_output_callback(result: ToolResult, tool_id: str):
+                    if result.output or result.error:
+                        content = result.output if result.output else result.error
+                        chunk = f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
+                        response_chunks.append(chunk)
+                        yield chunk
+
+                try:
+                    yield f"data: {json.dumps({'choices': [{'delta': {'role': 'assistant'}}]})}\n\n"
+
+                    messages = [m for m in messages if m["content"]]
+                    print(str(messages)[-100:])
+                    await asyncio.sleep(4)
+
+                    async for chunk in sampling_loop(
+                        model=model,
+                        provider=provider,
+                        system_prompt_suffix=system_prompt_suffix,
+                        messages=messages,  # Now using global messages
+                        output_callback=output_callback,
+                        tool_output_callback=tool_output_callback,
+                        api_key=api_key,
+                    ):
+                        if chunk["type"] == "chunk":
+                            await asyncio.sleep(0)
+                            yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk['chunk']}}]})}\n\n"
+                        if chunk["type"] == "messages":
+                            messages = chunk["messages"]
+
+                    yield f"data: {json.dumps({'choices': [{'delta': {'content': '', 'finish_reason': 'stop'}}]})}\n\n"
+
+                except Exception as e:
+                    print("Error: An exception occurred.")
+                    print(traceback.format_exc())
+                    pass
+                    # raise
+                    # print(f"Error: {e}")
+                    # yield f"data: {json.dumps({'error': str(e)})}\n\n"
+
+            return StreamingResponse(stream_response(), media_type="text/event-stream")
+
+        # Instead of running uvicorn here, we'll return the app
+        return app
+
+    # Original CLI code continues here...
     print()
     print_markdown("Welcome to **Open Interpreter**.\n")
     print_markdown("---")
@@ -351,20 +463,22 @@ async def main():
     import random
 
     tips = [
-        "You can type `i` in your terminal to use Open Interpreter.",
-        "Type `wtf` in your terminal to have Open Interpreter fix the last error.",
-        "You can type prompts after `i` in your terminal, for example, `i want you to install node`. (Yes, really.)",
+        # "You can type `i` in your terminal to use Open Interpreter.",
+        "**Tip:** Type `wtf` in your terminal to have Open Interpreter fix the last error.",
+        # "You can type prompts after `i` in your terminal, for example, `i want you to install node`. (Yes, really.)",
+        "We recommend using our desktop app for the best experience. Type `d` for early access.",
+        "**Tip:** Reduce display resolution for better performance.",
     ]
 
     random_tip = random.choice(tips)
 
     markdown_text = f"""> Model set to `Claude 3.5 Sonnet (New)`, OS control enabled
 
-We recommend using our desktop app for the best experience. Type `d` for early access.
+{random_tip}
 
 **Warning:** This AI has full system access and can modify files, install software, and execute commands. By continuing, you accept all risks and responsibility.
 
-Move your mouse to any corner of the screen to exit. Reduce display resolution for better performance.
+Move your mouse to any corner of the screen to exit.
 """
 
     print_markdown(markdown_text)
@@ -411,7 +525,7 @@ def tool_output_callback(result: ToolResult, tool_id: str):
                 print(f"---\n{result.error}\n---")
 
         try:
-            messages = await sampling_loop(
+            async for chunk in sampling_loop(
                 model=model,
                 provider=provider,
                 system_prompt_suffix=system_prompt_suffix,
@@ -419,15 +533,22 @@ def tool_output_callback(result: ToolResult, tool_id: str):
                 output_callback=output_callback,
                 tool_output_callback=tool_output_callback,
                 api_key=api_key,
-            )
+            ):
+                if chunk["type"] == "messages":
+                    messages = chunk["messages"]
         except Exception as e:
-            print(f"An error occurred: {e}")
+            raise
 
     # The thread will automatically terminate when the main program exits
 
 
 def run_async_main():
-    asyncio.run(main())
+    if "--server" in sys.argv:
+        # Start uvicorn server directly without asyncio.run()
+        app = asyncio.run(main())
+        uvicorn.run(app, host="0.0.0.0", port=8000)
+    else:
+        asyncio.run(main())
 
 
 if __name__ == "__main__":
@@ -463,3 +584,13 @@ def check_mouse_position():
             print("\nMouse moved to corner. Exiting...")
             os._exit(0)
         threading.Event().wait(0.1)  # Check every 100ms
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatCompletionRequest(BaseModel):
+    messages: List[ChatMessage]
+    stream: Optional[bool] = False
diff --git a/poetry.lock b/poetry.lock
index 166c2774d..5638a52a5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -921,7 +921,7 @@ files = [
 name = "dnspython"
 version = "2.7.0"
 description = "DNS toolkit"
-optional = true
+optional = false
 python-versions = ">=3.9"
 files = [
     {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"},
@@ -991,7 +991,7 @@ files = [
 name = "email-validator"
 version = "2.2.0"
 description = "A robust email address syntax and deliverability validation library."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"},
@@ -1065,7 +1065,7 @@ boltons = ">=20.0.0"
 name = "fastapi"
 version = "0.111.1"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "fastapi-0.111.1-py3-none-any.whl", hash = "sha256:4f51cfa25d72f9fbc3280832e84b32494cf186f50158d364a8765aabf22587bf"},
@@ -1090,7 +1090,7 @@ all = ["email_validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)"
 name = "fastapi-cli"
 version = "0.0.5"
 description = "Run and manage FastAPI apps from the command line with FastAPI CLI. 🚀"
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "fastapi_cli-0.0.5-py3-none-any.whl", hash = "sha256:e94d847524648c748a5350673546bbf9bcaeb086b33c24f2e82e021436866a46"},
@@ -1699,7 +1699,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0
 name = "httptools"
 version = "0.6.4"
 description = "A collection of framework independent HTTP protocol utils."
-optional = true
+optional = false
 python-versions = ">=3.8.0"
 files = [
     {file = "httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0"},
@@ -2447,13 +2447,13 @@ test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
 
 [[package]]
 name = "litellm"
-version = "1.50.2"
+version = "1.50.4"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.50.2-py3-none-any.whl", hash = "sha256:99cac60c78037946ab809b7cfbbadad53507bb2db8ae39391b4be215a0869fdd"},
-    {file = "litellm-1.50.2.tar.gz", hash = "sha256:b244c9a0e069cc626b85fb9f5cc252114aaff1225500da30ce0940f841aef8ea"},
+    {file = "litellm-1.50.4-py3-none-any.whl", hash = "sha256:cc6992275e24a0bbb4a3b377e6842d45a8510fc85d7f255930a64bb872980a36"},
+    {file = "litellm-1.50.4.tar.gz", hash = "sha256:a7e68ef614f631b58969c2c7a5154a565ba5974558d437c8cd6c8623654880ea"},
 ]
 
 [package.dependencies]
@@ -3112,13 +3112,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.52.1"
+version = "1.52.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.52.1-py3-none-any.whl", hash = "sha256:f23e83df5ba04ee0e82c8562571e8cb596cd88f9a84ab783e6c6259e5ffbfb4a"},
-    {file = "openai-1.52.1.tar.gz", hash = "sha256:383b96c7e937cbec23cad5bf5718085381e4313ca33c5c5896b54f8e1b19d144"},
+    {file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"},
+    {file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"},
 ]
 
 [package.dependencies]
@@ -7194,7 +7194,7 @@ cli = ["click (>=5.0)"]
 name = "python-multipart"
 version = "0.0.12"
 description = "A streaming multipart parser for Python"
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "python_multipart-0.0.12-py3-none-any.whl", hash = "sha256:43dcf96cf65888a9cd3423544dd0d75ac10f7aa0c3c28a175bbcd00c9ce1aebf"},
@@ -9076,7 +9076,7 @@ zstd = ["zstandard (>=0.18.0)"]
 name = "uvicorn"
 version = "0.30.6"
 description = "The lightning-fast ASGI server."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "uvicorn-0.30.6-py3-none-any.whl", hash = "sha256:65fd46fe3fda5bdc1b03b94eb634923ff18cd35b2f084813ea79d1f103f711b5"},
@@ -9102,7 +9102,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
 name = "uvloop"
 version = "0.21.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
-optional = true
+optional = false
 python-versions = ">=3.8.0"
 files = [
     {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"},
@@ -9173,7 +9173,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
 name = "watchfiles"
 version = "0.24.0"
 description = "Simple, modern and high performance file watching and code reload in python."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "watchfiles-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:083dc77dbdeef09fa44bb0f4d1df571d2e12d8a8f985dccde71ac3ac9ac067a0"},
@@ -9679,4 +9679,4 @@ server = ["fastapi", "janus", "uvicorn"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4"
-content-hash = "93ccc7c97fda82ab0c1eaf33d921552a1404e622b90b26614877aeff249e61ea"
+content-hash = "6c5981111c854235c561b8ad1d0164f8f54bca28cc5c9302dee40ca2759ccdbf"
diff --git a/pyproject.toml b/pyproject.toml
index 6d877c3ad..21211017b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ packages = [
     {include = "interpreter"},
     {include = "scripts"},
 ]
-version = "0.4.0" # Use "-rc1", "-rc2", etc. for pre-release versions
+version = "0.4.2" # Use "-rc1", "-rc2", etc. for pre-release versions
 description = "Let language models run code"
 authors = ["Killian Lucas <killian@openinterpreter.com>"]
 readme = "README.md"
@@ -33,8 +33,6 @@ torchvision = { version = "^0.18.0", optional = true }
 easyocr = { version = "^1.7.1", optional = true }
 
 # Optional [server] dependencies
-fastapi = { version = "^0.111.0", optional = true }
-uvicorn = { version = "^0.30.1", optional = true }
 janus = { version = "^1.0.0", optional = true }
 
 # Required dependencies
@@ -71,6 +69,8 @@ webdriver-manager = "^4.0.2"
 anthropic = "^0.37.1"
 pyautogui = "^0.9.54"
 typer = "^0.12.5"
+fastapi = "^0.111.0"
+uvicorn = "^0.30.1"
 
 [tool.poetry.extras]
 os = ["opencv-python", "pyautogui", "plyer", "pywinctl", "pytesseract", "sentence-transformers", "ipywidgets", "timm", "screeninfo"]