diff --git a/interpreter/computer_use/loop.py b/interpreter/computer_use/loop.py index 84a7a6cd7..a61adb598 100755 --- a/interpreter/computer_use/loop.py +++ b/interpreter/computer_use/loop.py @@ -7,10 +7,16 @@ import os import platform import time +import traceback import uuid from collections.abc import Callable from datetime import datetime -from enum import StrEnum + +try: + from enum import StrEnum +except ImportError: # 3.10 compatibility + from enum import Enum as StrEnum + from typing import Any, List, cast import requests @@ -33,10 +39,19 @@ BETA_FLAG = "computer-use-2024-10-22" +from typing import List, Optional + +import uvicorn +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +from pydantic import BaseModel from rich import print as rich_print from rich.markdown import Markdown from rich.rule import Rule +# Add this near the top of the file, with other imports and global variables +messages: List[BetaMessageParam] = [] + def print_markdown(message): """ @@ -87,7 +102,7 @@ class APIProvider(StrEnum): * When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B -A ` to confirm output. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available. * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* The current date is {datetime.today().strftime('%A, %B %-d, %Y')}. +* The current date is {datetime.today().strftime('%A, %B %d, %Y')}. @@ -107,6 +122,7 @@ class APIProvider(StrEnum): SYSTEM_PROMPT = f""" * You are an AI assistant with access to a virtual machine running on {"Mac OS" if platform.system() == "Darwin" else platform.system()} with internet access. * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date is {datetime.today().strftime('%A, %B %d, %Y')}. """ # Update the SYSTEM_PROMPT for Mac OS @@ -175,6 +191,8 @@ async def sampling_loop( elif isinstance(chunk, BetaRawContentBlockDeltaEvent): if chunk.delta.type == "text_delta": print(f"{chunk.delta.text}", end="", flush=True) + yield {"type": "chunk", "chunk": chunk.delta.text} + await asyncio.sleep(0) if current_block and current_block.type == "text": current_block.text += chunk.delta.text elif chunk.delta.type == "input_json_delta": @@ -189,10 +207,13 @@ async def sampling_loop( # Finished a tool call # print() current_block.input = json.loads(current_block.partial_json) + # yield {"type": "chunk", "chunk": current_block.input} delattr(current_block, "partial_json") else: # Finished a message print("\n") + yield {"type": "chunk", "chunk": "\n"} + await asyncio.sleep(0) response_content.append(current_block) current_block = None @@ -231,7 +252,9 @@ async def sampling_loop( tool_output_callback(result, content_block.id) if not tool_result_content: - return messages + # Done! + yield {"type": "messages", "messages": messages} + break messages.append({"content": tool_result_content, "role": "user"}) @@ -334,6 +357,95 @@ async def main(): provider = APIProvider.ANTHROPIC system_prompt_suffix = "" + # Check if running in server mode + if "--server" in sys.argv: + app = FastAPI() + + # Start the mouse position checking thread when in server mode + mouse_thread = threading.Thread(target=check_mouse_position) + mouse_thread.daemon = True + mouse_thread.start() + + # Get API key from environment variable + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError( + "ANTHROPIC_API_KEY environment variable must be set when running in server mode" + ) + + @app.post("/openai/chat/completions") + async def chat_completion(request: ChatCompletionRequest): + print("BRAND NEW REQUEST") + # Check exit flag before processing request + if exit_flag: + return {"error": "Server shutting down due to mouse in corner"} + + async def stream_response(): + print("is this even happening") + + # Instead of creating converted_messages, append the last message to global messages + global messages + messages.append( + { + "role": request.messages[-1].role, + "content": [ + {"type": "text", "text": request.messages[-1].content} + ], + } + ) + + response_chunks = [] + + async def output_callback(content_block: BetaContentBlock): + chunk = f"data: {json.dumps({'choices': [{'delta': {'content': content_block.text}}]})}\n\n" + response_chunks.append(chunk) + yield chunk + + async def tool_output_callback(result: ToolResult, tool_id: str): + if result.output or result.error: + content = result.output if result.output else result.error + chunk = f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n" + response_chunks.append(chunk) + yield chunk + + try: + yield f"data: {json.dumps({'choices': [{'delta': {'role': 'assistant'}}]})}\n\n" + + messages = [m for m in messages if m["content"]] + print(str(messages)[-100:]) + await asyncio.sleep(4) + + async for chunk in sampling_loop( + model=model, + provider=provider, + system_prompt_suffix=system_prompt_suffix, + messages=messages, # Now using global messages + output_callback=output_callback, + tool_output_callback=tool_output_callback, + api_key=api_key, + ): + if chunk["type"] == "chunk": + await asyncio.sleep(0) + yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk['chunk']}}]})}\n\n" + if chunk["type"] == "messages": + messages = chunk["messages"] + + yield f"data: {json.dumps({'choices': [{'delta': {'content': '', 'finish_reason': 'stop'}}]})}\n\n" + + except Exception as e: + print("Error: An exception occurred.") + print(traceback.format_exc()) + pass + # raise + # print(f"Error: {e}") + # yield f"data: {json.dumps({'error': str(e)})}\n\n" + + return StreamingResponse(stream_response(), media_type="text/event-stream") + + # Instead of running uvicorn here, we'll return the app + return app + + # Original CLI code continues here... print() print_markdown("Welcome to **Open Interpreter**.\n") print_markdown("---") @@ -351,20 +463,22 @@ async def main(): import random tips = [ - "You can type `i` in your terminal to use Open Interpreter.", - "Type `wtf` in your terminal to have Open Interpreter fix the last error.", - "You can type prompts after `i` in your terminal, for example, `i want you to install node`. (Yes, really.)", + # "You can type `i` in your terminal to use Open Interpreter.", + "**Tip:** Type `wtf` in your terminal to have Open Interpreter fix the last error.", + # "You can type prompts after `i` in your terminal, for example, `i want you to install node`. (Yes, really.)", + "We recommend using our desktop app for the best experience. Type `d` for early access.", + "**Tip:** Reduce display resolution for better performance.", ] random_tip = random.choice(tips) markdown_text = f"""> Model set to `Claude 3.5 Sonnet (New)`, OS control enabled -We recommend using our desktop app for the best experience. Type `d` for early access. +{random_tip} **Warning:** This AI has full system access and can modify files, install software, and execute commands. By continuing, you accept all risks and responsibility. -Move your mouse to any corner of the screen to exit. Reduce display resolution for better performance. +Move your mouse to any corner of the screen to exit. """ print_markdown(markdown_text) @@ -411,7 +525,7 @@ def tool_output_callback(result: ToolResult, tool_id: str): print(f"---\n{result.error}\n---") try: - messages = await sampling_loop( + async for chunk in sampling_loop( model=model, provider=provider, system_prompt_suffix=system_prompt_suffix, @@ -419,15 +533,22 @@ def tool_output_callback(result: ToolResult, tool_id: str): output_callback=output_callback, tool_output_callback=tool_output_callback, api_key=api_key, - ) + ): + if chunk["type"] == "messages": + messages = chunk["messages"] except Exception as e: - print(f"An error occurred: {e}") + raise # The thread will automatically terminate when the main program exits def run_async_main(): - asyncio.run(main()) + if "--server" in sys.argv: + # Start uvicorn server directly without asyncio.run() + app = asyncio.run(main()) + uvicorn.run(app, host="0.0.0.0", port=8000) + else: + asyncio.run(main()) if __name__ == "__main__": @@ -463,3 +584,13 @@ def check_mouse_position(): print("\nMouse moved to corner. Exiting...") os._exit(0) threading.Event().wait(0.1) # Check every 100ms + + +class ChatMessage(BaseModel): + role: str + content: str + + +class ChatCompletionRequest(BaseModel): + messages: List[ChatMessage] + stream: Optional[bool] = False diff --git a/poetry.lock b/poetry.lock index 166c2774d..5638a52a5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -921,7 +921,7 @@ files = [ name = "dnspython" version = "2.7.0" description = "DNS toolkit" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"}, @@ -991,7 +991,7 @@ files = [ name = "email-validator" version = "2.2.0" description = "A robust email address syntax and deliverability validation library." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"}, @@ -1065,7 +1065,7 @@ boltons = ">=20.0.0" name = "fastapi" version = "0.111.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "fastapi-0.111.1-py3-none-any.whl", hash = "sha256:4f51cfa25d72f9fbc3280832e84b32494cf186f50158d364a8765aabf22587bf"}, @@ -1090,7 +1090,7 @@ all = ["email_validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)" name = "fastapi-cli" version = "0.0.5" description = "Run and manage FastAPI apps from the command line with FastAPI CLI. 🚀" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "fastapi_cli-0.0.5-py3-none-any.whl", hash = "sha256:e94d847524648c748a5350673546bbf9bcaeb086b33c24f2e82e021436866a46"}, @@ -1699,7 +1699,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httptools" version = "0.6.4" description = "A collection of framework independent HTTP protocol utils." -optional = true +optional = false python-versions = ">=3.8.0" files = [ {file = "httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0"}, @@ -2447,13 +2447,13 @@ test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] [[package]] name = "litellm" -version = "1.50.2" +version = "1.50.4" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.50.2-py3-none-any.whl", hash = "sha256:99cac60c78037946ab809b7cfbbadad53507bb2db8ae39391b4be215a0869fdd"}, - {file = "litellm-1.50.2.tar.gz", hash = "sha256:b244c9a0e069cc626b85fb9f5cc252114aaff1225500da30ce0940f841aef8ea"}, + {file = "litellm-1.50.4-py3-none-any.whl", hash = "sha256:cc6992275e24a0bbb4a3b377e6842d45a8510fc85d7f255930a64bb872980a36"}, + {file = "litellm-1.50.4.tar.gz", hash = "sha256:a7e68ef614f631b58969c2c7a5154a565ba5974558d437c8cd6c8623654880ea"}, ] [package.dependencies] @@ -3112,13 +3112,13 @@ files = [ [[package]] name = "openai" -version = "1.52.1" +version = "1.52.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.52.1-py3-none-any.whl", hash = "sha256:f23e83df5ba04ee0e82c8562571e8cb596cd88f9a84ab783e6c6259e5ffbfb4a"}, - {file = "openai-1.52.1.tar.gz", hash = "sha256:383b96c7e937cbec23cad5bf5718085381e4313ca33c5c5896b54f8e1b19d144"}, + {file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"}, + {file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"}, ] [package.dependencies] @@ -7194,7 +7194,7 @@ cli = ["click (>=5.0)"] name = "python-multipart" version = "0.0.12" description = "A streaming multipart parser for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "python_multipart-0.0.12-py3-none-any.whl", hash = "sha256:43dcf96cf65888a9cd3423544dd0d75ac10f7aa0c3c28a175bbcd00c9ce1aebf"}, @@ -9076,7 +9076,7 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.30.6" description = "The lightning-fast ASGI server." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "uvicorn-0.30.6-py3-none-any.whl", hash = "sha256:65fd46fe3fda5bdc1b03b94eb634923ff18cd35b2f084813ea79d1f103f711b5"}, @@ -9102,7 +9102,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.21.0" description = "Fast implementation of asyncio event loop on top of libuv" -optional = true +optional = false python-versions = ">=3.8.0" files = [ {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, @@ -9173,7 +9173,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "watchfiles" version = "0.24.0" description = "Simple, modern and high performance file watching and code reload in python." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "watchfiles-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:083dc77dbdeef09fa44bb0f4d1df571d2e12d8a8f985dccde71ac3ac9ac067a0"}, @@ -9679,4 +9679,4 @@ server = ["fastapi", "janus", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "93ccc7c97fda82ab0c1eaf33d921552a1404e622b90b26614877aeff249e61ea" +content-hash = "6c5981111c854235c561b8ad1d0164f8f54bca28cc5c9302dee40ca2759ccdbf" diff --git a/pyproject.toml b/pyproject.toml index 6d877c3ad..21211017b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ packages = [ {include = "interpreter"}, {include = "scripts"}, ] -version = "0.4.0" # Use "-rc1", "-rc2", etc. for pre-release versions +version = "0.4.2" # Use "-rc1", "-rc2", etc. for pre-release versions description = "Let language models run code" authors = ["Killian Lucas "] readme = "README.md" @@ -33,8 +33,6 @@ torchvision = { version = "^0.18.0", optional = true } easyocr = { version = "^1.7.1", optional = true } # Optional [server] dependencies -fastapi = { version = "^0.111.0", optional = true } -uvicorn = { version = "^0.30.1", optional = true } janus = { version = "^1.0.0", optional = true } # Required dependencies @@ -71,6 +69,8 @@ webdriver-manager = "^4.0.2" anthropic = "^0.37.1" pyautogui = "^0.9.54" typer = "^0.12.5" +fastapi = "^0.111.0" +uvicorn = "^0.30.1" [tool.poetry.extras] os = ["opencv-python", "pyautogui", "plyer", "pywinctl", "pytesseract", "sentence-transformers", "ipywidgets", "timm", "screeninfo"]