Merge branch 'main' into openhands-fix-issue-4127

All-Hands-AI · Oct 1, 2024 · 8e832f9 · 8e832f9
2 parents d86f7a7 + ec1a86f
commit 8e832f9
Show file tree

Hide file tree

Showing 11 changed files with 68 additions and 33 deletions.
diff --git a/docs/modules/usage/getting-started.mdx b/docs/modules/usage/getting-started.mdx
@@ -32,8 +32,7 @@ docker run -it --pull=always \
     ghcr.io/all-hands-ai/openhands:0.9
 ```
 
-You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
-or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode).
+You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), or using the [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action).
 
 ## Setup
 

diff --git a/docs/modules/usage/how-to/github-action.md b/docs/modules/usage/how-to/github-action.md
@@ -0,0 +1,15 @@
+# Using the OpenHands GitHub Action
+
+This guide explains how to use the OpenHands GitHub Action, both within the OpenHands repository and in your own projects.
+
+## Using the Action in the OpenHands Repository
+
+To use the OpenHands GitHub Action in the OpenHands repository, an OpenHands maintainer can:
+
+1. Create an issue in the repository.
+2. Add the `fix-me` label to the issue.
+3. The action will automatically trigger and attempt to resolve the issue.
+
+## Installing the Action in a New Repository
+
+To install the OpenHands GitHub Action in your own repository, follow the [directions in the OpenHands Resolver repo](https://github.com/All-Hands-AI/OpenHands-resolver?tab=readme-ov-file#using-the-github-actions-workflow).
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -72,6 +72,10 @@ const sidebars: SidebarsConfig = {
           type: 'doc',
           id: 'usage/how-to/headless-mode',
         },
+        {
+          type: 'doc',
+          id: 'usage/how-to/github-action',
+        },
         {
           type: 'doc',
           id: 'usage/how-to/custom-sandbox-guide',

diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md
@@ -69,7 +69,7 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!
 
 ```bash
 # ./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" \
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
 ./evaluation/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 16 "princeton-nlp/SWE-bench_Lite" test
 # This example runs evaluation on CodeActAgent for 300 instances on "princeton-nlp/SWE-bench_Lite"'s test set, with max 30 iteration per instances, with 16 number of workers running in parallel
 ```
@@ -163,7 +163,8 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!
 
 ```bash
 # ./evaluation/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
+evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
 # This example evaluate patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
 ```
 

diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py
@@ -81,6 +81,7 @@ def get_config(instance: pd.Series) -> AppConfig:
             # large enough timeout, since some testcases take very long to run
             timeout=1800,
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
         ),
         # do not mount workspace
         workspace_base=None,

diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -131,6 +131,7 @@ def get_config(
             # large enough timeout, since some testcases take very long to run
             timeout=300,
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
         ),
         # do not mount workspace
         workspace_base=None,

diff --git a/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh b/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh
@@ -2,10 +2,10 @@
 
 
 # API base URL
-BASE_URL="https://api.all-hands.dev/v0"
+BASE_URL="https://runtime.eval.all-hands.dev"
 
 # Get the list of runtimes
-response=$(curl --silent --location --request GET "${BASE_URL}/runtime/list" \
+response=$(curl --silent --location --request GET "${BASE_URL}/list" \
   --header "X-API-Key: ${ALLHANDS_API_KEY}")
 
 n_runtimes=$(echo $response | jq -r '.total')
@@ -16,7 +16,7 @@ runtime_ids=$(echo $response | jq -r '.runtimes | .[].runtime_id')
 counter=1
 for runtime_id in $runtime_ids; do
   echo "Stopping runtime ${counter}/${n_runtimes}: ${runtime_id}"
-  curl --silent --location --request POST "${BASE_URL}/runtime/stop" \
+  curl --silent --location --request POST "${BASE_URL}/stop" \
     --header "X-API-Key: ${ALLHANDS_API_KEY}" \
     --header "Content-Type: application/json" \
     --data-raw "{\"runtime_id\": \"${runtime_id}\"}"

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -65,7 +65,7 @@
     "@testing-library/react": "^16.0.1",
     "@testing-library/user-event": "^14.5.2",
     "@types/node": "^22.7.3",
-    "@types/react": "^18.3.9",
+    "@types/react": "^18.3.10",
     "@types/react-dom": "^18.3.0",
     "@types/react-highlight": "^0.12.8",
     "@types/react-syntax-highlighter": "^15.5.13",

diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
@@ -1,4 +1,5 @@
 import asyncio
+import concurrent.futures
 from threading import Thread
 from typing import Callable, Optional
 
@@ -75,6 +76,13 @@ async def start(
         self.thread = Thread(target=self._run, daemon=True)
         self.thread.start()
 
+        def coro_callback(task):
+            fut: concurrent.futures.Future = concurrent.futures.Future()
+            try:
+                fut.set_result(task.result())
+            except Exception as e:
+                logger.error(f'Error starting session: {e}')
+
         coro = self._start(
             runtime_name,
             config,
@@ -85,7 +93,9 @@ async def start(
             agent_configs,
             status_message_callback,
         )
-        asyncio.run_coroutine_threadsafe(coro, self.loop)  # type: ignore
+        asyncio.run_coroutine_threadsafe(coro, self.loop).add_done_callback(
+            coro_callback
+        )  # type: ignore
 
     async def _start(
         self,
@@ -172,13 +182,17 @@ def _create_runtime(
         logger.info(f'Initializing runtime `{runtime_name}` now...')
         runtime_cls = get_runtime_cls(runtime_name)
 
-        self.runtime = runtime_cls(
-            config=config,
-            event_stream=self.event_stream,
-            sid=self.sid,
-            plugins=agent.sandbox_plugins,
-            status_message_callback=status_message_callback,
-        )
+        try:
+            self.runtime = runtime_cls(
+                config=config,
+                event_stream=self.event_stream,
+                sid=self.sid,
+                plugins=agent.sandbox_plugins,
+                status_message_callback=status_message_callback,
+            )
+        except Exception as e:
+            logger.error(f'Runtime initialization failed: {e}')
+            raise
 
         if self.runtime is not None:
             logger.debug(

diff --git a/poetry.lock b/poetry.lock