diff --git a/.github/workflows/eval-runner.yml b/.github/workflows/eval-runner.yml index 9b2576a2644e..b1ace09b35de 100644 --- a/.github/workflows/eval-runner.yml +++ b/.github/workflows/eval-runner.yml @@ -1,4 +1,4 @@ -name: Run Evaluation +name: Run SWE-Bench Evaluation on: pull_request: @@ -58,24 +58,6 @@ jobs: echo "api_key = \"$DEEPSEEK_API_KEY\"" >> config.toml echo "temperature = 0.0" >> config.toml - - name: Run integration test evaluation - env: - ALLHANDS_API_KEY: ${{ secrets.ALLHANDS_EVAL_RUNTIME_API_KEY }} - RUNTIME: remote - SANDBOX_REMOTE_RUNTIME_API_URL: https://runtime.eval.all-hands.dev - EVAL_DOCKER_IMAGE_PREFIX: us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images - - run: | - poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES - - # get evaluation report - REPORT_FILE=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek-chat_maxiter_10_N* -name "report.md" -type f | head -n 1) - echo "REPORT_FILE: $REPORT_FILE" - echo "INTEGRATION_TEST_REPORT<> $GITHUB_ENV - cat $REPORT_FILE >> $GITHUB_ENV - echo >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - name: Run SWE-Bench evaluation env: ALLHANDS_API_KEY: ${{ secrets.ALLHANDS_EVAL_RUNTIME_API_KEY }} @@ -143,9 +125,6 @@ jobs: **SWE-Bench Evaluation Report** ${{ env.SWEBENCH_REPORT }} --- - **Integration Tests Evaluation Report** - ${{ env.INTEGRATION_TEST_REPORT }} - --- You can download the full evaluation outputs [here](${{ env.ARTIFACT_URL }}). - name: Post to a Slack channel diff --git a/.github/workflows/fe-unit-tests.yml b/.github/workflows/fe-unit-tests.yml index 2bced7406e6c..b720bfe34c28 100644 --- a/.github/workflows/fe-unit-tests.yml +++ b/.github/workflows/fe-unit-tests.yml @@ -35,6 +35,9 @@ jobs: - name: Install dependencies working-directory: ./frontend run: npm ci + - name: Run TypeScript compilation + working-directory: ./frontend + run: npm run make-i18n && tsc - name: Run tests and collect coverage working-directory: ./frontend run: npm run test:coverage diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml new file mode 100644 index 000000000000..120572aa0cdd --- /dev/null +++ b/.github/workflows/integration-runner.yml @@ -0,0 +1,158 @@ +name: Run Integration Tests + +on: + pull_request: + types: [labeled] + workflow_dispatch: + inputs: + reason: + description: 'Reason for manual trigger' + required: true + default: '' + schedule: + - cron: '30 22 * * *' # Runs at 10:30pm UTC every day + +env: + N_PROCESSES: 10 # Global configuration for number of parallel processes for evaluation + +jobs: + run-integration-tests: + if: github.event.label.name == 'integration-test' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + runs-on: ubuntu-latest + permissions: + contents: "read" + id-token: "write" + pull-requests: "write" + issues: "write" + strategy: + matrix: + python-version: ["3.12"] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install poetry via pipx + run: pipx install poetry + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + + - name: Comment on PR if 'integration-test' label is present + if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test' + uses: KeisukeYamashita/create-comment@v1 + with: + unique: false + comment: | + Hi! I started running the integration tests on your PR. You will receive a comment with the results shortly. + + - name: Install Python dependencies using Poetry + run: poetry install --without evaluation,llama-index + + - name: Configure config.toml for testing with Haiku + env: + LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022" + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }} + run: | + echo "[llm.eval]" > config.toml + echo "model = \"$LLM_MODEL\"" >> config.toml + echo "api_key = \"$LLM_API_KEY\"" >> config.toml + echo "base_url = \"$LLM_BASE_URL\"" >> config.toml + echo "temperature = 0.0" >> config.toml + + - name: Build environment + run: make build + + - name: Run integration test evaluation for Haiku + env: + SANDBOX_FORCE_REBUILD_RUNTIME: True + run: | + poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'haiku_run' + + # get integration tests report + REPORT_FILE_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/*haiku*_maxiter_10_N* -name "report.md" -type f | head -n 1) + echo "REPORT_FILE: $REPORT_FILE_HAIKU" + echo "INTEGRATION_TEST_REPORT_HAIKU<> $GITHUB_ENV + cat $REPORT_FILE_HAIKU >> $GITHUB_ENV + echo >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Wait a little bit + run: sleep 10 + + - name: Configure config.toml for testing with DeepSeek + env: + LLM_MODEL: "litellm_proxy/deepseek-chat" + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }} + run: | + echo "[llm.eval]" > config.toml + echo "model = \"$LLM_MODEL\"" >> config.toml + echo "api_key = \"$LLM_API_KEY\"" >> config.toml + echo "base_url = \"$LLM_BASE_URL\"" >> config.toml + echo "temperature = 0.0" >> config.toml + + - name: Run integration test evaluation for DeepSeek + env: + SANDBOX_FORCE_REBUILD_RUNTIME: True + run: | + poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'deepseek_run' + + # get integration tests report + REPORT_FILE_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek*_maxiter_10_N* -name "report.md" -type f | head -n 1) + echo "REPORT_FILE: $REPORT_FILE_DEEPSEEK" + echo "INTEGRATION_TEST_REPORT_DEEPSEEK<> $GITHUB_ENV + cat $REPORT_FILE_DEEPSEEK >> $GITHUB_ENV + echo >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Create archive of evaluation outputs + run: | + TIMESTAMP=$(date +'%y-%m-%d-%H-%M') + cd evaluation/evaluation_outputs/outputs # Change to the outputs directory + tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* # Only include the actual result directories + + - name: Upload evaluation results as artifact + uses: actions/upload-artifact@v4 + id: upload_results_artifact + with: + name: integration-test-outputs-${{ github.run_id }}-${{ github.run_attempt }} + path: integration_tests_*.tar.gz + + - name: Get artifact URLs + run: | + echo "ARTIFACT_URL=${{ steps.upload_results_artifact.outputs.artifact-url }}" >> $GITHUB_ENV + + - name: Set timestamp and trigger reason + run: | + echo "TIMESTAMP=$(date +'%Y-%m-%d-%H-%M')" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + echo "TRIGGER_REASON=pr-${{ github.event.pull_request.number }}" >> $GITHUB_ENV + elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "TRIGGER_REASON=manual-${{ github.event.inputs.reason }}" >> $GITHUB_ENV + else + echo "TRIGGER_REASON=nightly-scheduled" >> $GITHUB_ENV + fi + + - name: Comment with results and artifact link + id: create_comment + uses: KeisukeYamashita/create-comment@v1 + with: + # if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers + number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }} + unique: false + comment: | + Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }} + Commit: ${{ github.sha }} + **Integration Tests Report (Haiku)** + Haiku LLM Test Results: + ${{ env.INTEGRATION_TEST_REPORT_HAIKU }} + --- + **Integration Tests Report (DeepSeek)** + DeepSeek LLM Test Results: + ${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }} + --- + Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }}) diff --git a/.github/workflows/lint-fix.yml b/.github/workflows/lint-fix.yml index 9fa97eaaf2f1..dca35010fb80 100644 --- a/.github/workflows/lint-fix.yml +++ b/.github/workflows/lint-fix.yml @@ -5,9 +5,10 @@ on: types: [labeled] jobs: - lint-fix: + # Frontend lint fixes + lint-fix-frontend: if: github.event.label.name == 'lint-fix' - name: Fix linting issues + name: Fix frontend linting issues runs-on: ubuntu-latest permissions: contents: write @@ -20,7 +21,6 @@ jobs: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} - # Frontend lint fixes - name: Install Node.js 20 uses: actions/setup-node@v4 with: @@ -34,7 +34,36 @@ jobs: cd frontend npm run lint:fix - # Python lint fixes + # Commit and push changes if any + - name: Check for changes + id: git-check + run: | + git diff --quiet || echo "changes=true" >> $GITHUB_OUTPUT + - name: Commit and push if there are changes + if: steps.git-check.outputs.changes == 'true' + run: | + git config --local user.email "openhands@all-hands.dev" + git config --local user.name "OpenHands Bot" + git add -A + git commit -m "🤖 Auto-fix frontend linting issues" + git push + + # Python lint fixes + lint-fix-python: + if: github.event.label.name == 'lint-fix' + name: Fix Python linting issues + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + - name: Set up python uses: actions/setup-python@v5 with: @@ -58,5 +87,5 @@ jobs: git config --local user.email "openhands@all-hands.dev" git config --local user.name "OpenHands Bot" git add -A - git commit -m "🤖 Auto-fix linting issues" + git commit -m "🤖 Auto-fix Python linting issues" git push diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b6a9d327d860..1bfc8c91c6a4 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -30,10 +30,11 @@ jobs: run: | cd frontend npm install --frozen-lockfile - - name: Lint + - name: Lint and TypeScript compilation run: | cd frontend npm run lint + npm run make-i18n && tsc # Run lint on the python code lint-python: diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml index f24a8e90cbfb..2719c3773607 100644 --- a/.github/workflows/openhands-resolver.yml +++ b/.github/workflows/openhands-resolver.yml @@ -16,6 +16,11 @@ on: type: string default: "main" description: "Target branch to pull and create PR against" + base_container_image: + required: false + type: string + default: "" + description: "Custom sandbox env" secrets: LLM_MODEL: required: true @@ -139,6 +144,7 @@ jobs: echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV + echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV # Set branch variables echo "TARGET_BRANCH=${{ inputs.target_branch }}" >> $GITHUB_ENV diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 847b6c469812..b1914cbd5b29 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,14 +21,14 @@ There are many ways that you can contribute: 1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see. 2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents. -3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on. +3. **Improve the Codebase** by sending [PRs](#sending-pull-requests-to-openhands) (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on. ## What can I build? Here are a few ways you can help improve the codebase. #### UI/UX We're always looking to improve the look and feel of the application. If you've got a small fix -for something that's bugging you, feel free to open up a PR that changes the `./frontend` directory. +for something that's bugging you, feel free to open up a PR that changes the [`./frontend`](./frontend) directory. If you're looking to make a bigger change, add a new UI element, or significantly alter the style of the application, please open an issue first, or better, join the #frontend channel in our Slack @@ -46,7 +46,7 @@ We use the [SWE-bench](https://www.swebench.com/) benchmark to test our agent. Y channel in Slack to learn more. #### Adding a new agent -You may want to experiment with building new types of agents. You can add an agent to `openhands/agenthub` +You may want to experiment with building new types of agents. You can add an agent to [`openhands/agenthub`](./openhands/agenthub) to help expand the capabilities of OpenHands. #### Adding a new runtime @@ -57,8 +57,8 @@ If you work for a company that provides a cloud-based runtime, you could help us by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/base.py). #### Testing -When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites. -At the moment, we have two kinds of tests: `unit` and `integration`. Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project. +When you write code, it is also good to write tests. Please navigate to the [`./tests`](./tests) folder to see existing test suites. +At the moment, we have two kinds of tests: [`unit`](./tests/unit) and [`integration`](./evaluation/integration_tests). Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project. ## Sending Pull Requests to OpenHands @@ -103,7 +103,7 @@ Further, if you see an issue you like, please leave a "thumbs-up" or a comment, ### Making Pull Requests -We're generally happy to consider all PRs, with the evaluation process varying based on the type of change: +We're generally happy to consider all [PRs](https://github.com/All-Hands-AI/OpenHands/pulls), with the evaluation process varying based on the type of change: #### For Small Improvements diff --git a/docs/modules/usage/configuration-options.md b/docs/modules/usage/configuration-options.md new file mode 100644 index 000000000000..7a2718d7d786 --- /dev/null +++ b/docs/modules/usage/configuration-options.md @@ -0,0 +1,465 @@ +# Configuration Options + +This guide details all configuration options available for OpenHands, helping you customize its behavior and integrate it with other services. + +:::note +If you are running in [GUI Mode](https://docs.all-hands.dev/modules/usage/how-to/gui-mode), the settings available in the Settings UI will always +take precedence. +::: + +--- + +# Table of Contents + +1. [Core Configuration](#core-configuration) + - [API Keys](#api-keys) + - [Workspace](#workspace) + - [Debugging and Logging](#debugging-and-logging) + - [Session Management](#session-management) + - [Trajectories](#trajectories) + - [File Store](#file-store) + - [Task Management](#task-management) + - [Sandbox Configuration](#sandbox-configuration) + - [Miscellaneous](#miscellaneous) +2. [LLM Configuration](#llm-configuration) + - [AWS Credentials](#aws-credentials) + - [API Configuration](#api-configuration) + - [Custom LLM Provider](#custom-llm-provider) + - [Embeddings](#embeddings) + - [Message Handling](#message-handling) + - [Model Selection](#model-selection) + - [Retrying](#retrying) + - [Advanced Options](#advanced-options) +3. [Agent Configuration](#agent-configuration) + - [Microagent Configuration](#microagent-configuration) + - [Memory Configuration](#memory-configuration) + - [LLM Configuration](#llm-configuration-2) + - [ActionSpace Configuration](#actionspace-configuration) + - [Microagent Usage](#microagent-usage) +4. [Sandbox Configuration](#sandbox-configuration-2) + - [Execution](#execution) + - [Container Image](#container-image) + - [Networking](#networking) + - [Linting and Plugins](#linting-and-plugins) + - [Dependencies and Environment](#dependencies-and-environment) + - [Evaluation](#evaluation) +5. [Security Configuration](#security-configuration) + - [Confirmation Mode](#confirmation-mode) + - [Security Analyzer](#security-analyzer) + +--- + +## Core Configuration + +The core configuration options are defined in the `[core]` section of the `config.toml` file. + +**API Keys** +- `e2b_api_key` + - Type: `str` + - Default: `""` + - Description: API key for E2B + +- `modal_api_token_id` + - Type: `str` + - Default: `""` + - Description: API token ID for Modal + +- `modal_api_token_secret` + - Type: `str` + - Default: `""` + - Description: API token secret for Modal + +**Workspace** +- `workspace_base` + - Type: `str` + - Default: `"./workspace"` + - Description: Base path for the workspace + +- `cache_dir` + - Type: `str` + - Default: `"/tmp/cache"` + - Description: Cache directory path + +**Debugging and Logging** +- `debug` + - Type: `bool` + - Default: `false` + - Description: Enable debugging + +- `disable_color` + - Type: `bool` + - Default: `false` + - Description: Disable color in terminal output + +**Trajectories** +- `trajectories_path` + - Type: `str` + - Default: `"./trajectories"` + - Description: Path to store trajectories (can be a folder or a file). If it's a folder, the trajectories will be saved in a file named with the session id name and .json extension, in that folder. + +**File Store** +- `file_store_path` + - Type: `str` + - Default: `"/tmp/file_store"` + - Description: File store path + +- `file_store` + - Type: `str` + - Default: `"memory"` + - Description: File store type + +- `file_uploads_allowed_extensions` + - Type: `list of str` + - Default: `[".*"]` + - Description: List of allowed file extensions for uploads + +- `file_uploads_max_file_size_mb` + - Type: `int` + - Default: `0` + - Description: Maximum file size for uploads, in megabytes + +- `file_uploads_restrict_file_types` + - Type: `bool` + - Default: `false` + - Description: Restrict file types for file uploads + +- `file_uploads_allowed_extensions` + - Type: `list of str` + - Default: `[".*"]` + - Description: List of allowed file extensions for uploads + +**Task Management** +- `max_budget_per_task` + - Type: `float` + - Default: `0.0` + - Description: Maximum budget per task (0.0 means no limit) + +- `max_iterations` + - Type: `int` + - Default: `100` + - Description: Maximum number of iterations + +**Sandbox Configuration** +- `workspace_mount_path_in_sandbox` + - Type: `str` + - Default: `"/workspace"` + - Description: Path to mount the workspace in the sandbox + +- `workspace_mount_path` + - Type: `str` + - Default: `""` + - Description: Path to mount the workspace + +- `workspace_mount_rewrite` + - Type: `str` + - Default: `""` + - Description: Path to rewrite the workspace mount path to. You can usually ignore this, it refers to special cases of running inside another container. + +**Miscellaneous** +- `run_as_openhands` + - Type: `bool` + - Default: `true` + - Description: Run as OpenHands + +- `runtime` + - Type: `str` + - Default: `"eventstream"` + - Description: Runtime environment + +- `default_agent` + - Type: `str` + - Default: `"CodeActAgent"` + - Description: Name of the default agent + +- `jwt_secret` + - Type: `str` + - Default: `uuid.uuid4().hex` + - Description: JWT secret for authentication. Please set it to your own value. + +## LLM Configuration + +The LLM (Large Language Model) configuration options are defined in the `[llm]` section of the `config.toml` file. + +To use these with the docker command, pass in `-e LLM_