Add data challenges #369
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmark CI | |
on: | |
push: | |
branches: [master, ci-test*] | |
paths: | |
- 'benchmark/**' | |
- .github/workflows/benchmark-ci.yml | |
- '!benchmark/reports/**' | |
pull_request: | |
branches: [stable, master, release-*] | |
paths: | |
- 'benchmark/**' | |
- '!benchmark/reports/**' | |
- .github/workflows/benchmark-ci.yml | |
jobs: | |
lint: | |
runs-on: ubuntu-latest | |
env: | |
min-python-version: '3.10' | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.pull_request.head.ref }} | |
repository: ${{ github.event.pull_request.head.repo.full_name }} | |
- name: Set up Python ${{ env.min-python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ env.min-python-version }} | |
- id: get_date | |
name: Get date | |
working-directory: ./benchmark/ | |
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT | |
- name: Install Poetry | |
working-directory: ./benchmark/ | |
run: | | |
curl -sSL https://install.python-poetry.org | python - | |
- name: Install dependencies | |
working-directory: ./benchmark/ | |
run: | | |
export POETRY_VIRTUALENVS_IN_PROJECT=true | |
poetry install -vvv | |
- name: Lint with flake8 | |
working-directory: ./benchmark/ | |
run: poetry run flake8 | |
- name: Check black formatting | |
working-directory: ./benchmark/ | |
run: poetry run black . --exclude test.py --check | |
if: success() || failure() | |
- name: Check isort formatting | |
working-directory: ./benchmark/ | |
run: poetry run isort . --check | |
if: success() || failure() | |
- name: Check for unused imports and pass statements | |
working-directory: ./benchmark/ | |
run: | | |
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark" | |
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1) | |
if: success() || failure() | |
tests-agbenchmark: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
agent-name: [ forge ] | |
fail-fast: false | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.pull_request.head.ref }} | |
repository: ${{ github.event.pull_request.head.repo.full_name }} | |
submodules: true | |
- name: Set up Python ${{ env.min-python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ env.min-python-version }} | |
- name: Install Poetry | |
working-directory: ./autogpts/${{ matrix.agent-name }}/ | |
run: | | |
curl -sSL https://install.python-poetry.org | python - | |
- name: Run regression tests | |
working-directory: ./autogpts/${{ matrix.agent-name }}/ | |
run: | | |
sh run & | |
sleep 20 | |
set +e # Ignore non-zero exit codes and continue execution | |
echo "Running the following command: poetry run agbenchmark --maintain --mock" | |
poetry run agbenchmark --maintain --mock | |
EXIT_CODE=$? | |
set -e # Stop ignoring non-zero exit codes | |
# Check if the exit code was 5, and if so, exit with 0 instead | |
if [ $EXIT_CODE -eq 5 ]; then | |
echo "regression_tests.json is empty." | |
fi | |
echo "Running the following command: poetry run agbenchmark --mock" | |
poetry run agbenchmark --mock | |
echo "Running the following command: poetry run agbenchmark --mock --category=retrieval" | |
poetry run agbenchmark --mock --category=retrieval | |
echo "Running the following command: poetry run agbenchmark --mock --category=interface" | |
poetry run agbenchmark --mock --category=interface | |
echo "Running the following command: poetry run agbenchmark --mock --category=coding" | |
poetry run agbenchmark --mock --category=coding | |
echo "Running the following command: poetry run agbenchmark --test=WriteFile" | |
poetry run agbenchmark --test=WriteFile | |
sh run_benchmark serve & | |
sleep 10 | |
cd ../../benchmark | |
poetry install | |
echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed" | |
export BUILD_SKILL_TREE=true | |
poetry run agbenchmark --mock | |
poetry run pytest -vv -s tests | |
CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs" | |
if [ ! -z "$CHANGED" ]; then | |
echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed." | |
echo "$CHANGED" | |
exit 1 | |
else | |
echo "No unstaged changes." | |
fi | |
env: | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} |