Add data challenges #369

Workflow file for this run

.github/workflows/benchmark-ci.yml at 90ecb30

	name: Benchmark CI

	on:
	push:
	branches: [master, ci-test*]
	paths:
	- 'benchmark/**'
	- .github/workflows/benchmark-ci.yml
	- '!benchmark/reports/**'
	pull_request:
	branches: [stable, master, release-*]
	paths:
	- 'benchmark/**'
	- '!benchmark/reports/**'
	- .github/workflows/benchmark-ci.yml

	jobs:
	lint:
	runs-on: ubuntu-latest
	env:
	min-python-version: '3.10'

	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	with:
	fetch-depth: 0
	ref: ${{ github.event.pull_request.head.ref }}
	repository: ${{ github.event.pull_request.head.repo.full_name }}

	- name: Set up Python ${{ env.min-python-version }}
	uses: actions/setup-python@v2
	with:
	python-version: ${{ env.min-python-version }}

	- id: get_date
	name: Get date
	working-directory: ./benchmark/
	run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

	- name: Install Poetry
	working-directory: ./benchmark/
	run: \|
	curl -sSL https://install.python-poetry.org \| python -

	- name: Install dependencies
	working-directory: ./benchmark/
	run: \|
	export POETRY_VIRTUALENVS_IN_PROJECT=true
	poetry install -vvv

	- name: Lint with flake8
	working-directory: ./benchmark/
	run: poetry run flake8

	- name: Check black formatting
	working-directory: ./benchmark/
	run: poetry run black . --exclude test.py --check
	if: success() \|\| failure()

	- name: Check isort formatting
	working-directory: ./benchmark/
	run: poetry run isort . --check
	if: success() \|\| failure()

	- name: Check for unused imports and pass statements
	working-directory: ./benchmark/
	run: \|
	cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
	$cmd --check \|\| (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
	if: success() \|\| failure()

	tests-agbenchmark:
	runs-on: ubuntu-latest
	strategy:
	matrix:
	agent-name: [ forge ]
	fail-fast: false
	timeout-minutes: 20
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	with:
	fetch-depth: 0
	ref: ${{ github.event.pull_request.head.ref }}
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	submodules: true

	- name: Set up Python ${{ env.min-python-version }}
	uses: actions/setup-python@v2
	with:
	python-version: ${{ env.min-python-version }}

	- name: Install Poetry
	working-directory: ./autogpts/${{ matrix.agent-name }}/
	run: \|
	curl -sSL https://install.python-poetry.org \| python -

	- name: Run regression tests
	working-directory: ./autogpts/${{ matrix.agent-name }}/
	run: \|
	sh run &
	sleep 20
	set +e # Ignore non-zero exit codes and continue execution
	echo "Running the following command: poetry run agbenchmark --maintain --mock"

	poetry run agbenchmark --maintain --mock
	EXIT_CODE=$?
	set -e # Stop ignoring non-zero exit codes
	# Check if the exit code was 5, and if so, exit with 0 instead
	if [ $EXIT_CODE -eq 5 ]; then
	echo "regression_tests.json is empty."
	fi

	echo "Running the following command: poetry run agbenchmark --mock"
	poetry run agbenchmark --mock

	echo "Running the following command: poetry run agbenchmark --mock --category=retrieval"
	poetry run agbenchmark --mock --category=retrieval

	echo "Running the following command: poetry run agbenchmark --mock --category=interface"
	poetry run agbenchmark --mock --category=interface

	echo "Running the following command: poetry run agbenchmark --mock --category=coding"
	poetry run agbenchmark --mock --category=coding

	echo "Running the following command: poetry run agbenchmark --test=WriteFile"
	poetry run agbenchmark --test=WriteFile
	sh run_benchmark serve &
	sleep 10
	cd ../../benchmark
	poetry install
	echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
	export BUILD_SKILL_TREE=true

	poetry run agbenchmark --mock
	poetry run pytest -vv -s tests

	CHANGED=$(git diff --name-only \| grep -E '(agbenchmark/challenges)\|(../frontend/assets)') \|\| echo "No diffs"
	if [ ! -z "$CHANGED" ]; then
	echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
	echo "$CHANGED"
	exit 1
	else
	echo "No unstaged changes."
	fi
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add data challenges #369

Workflow file

Add data challenges #369

Jobs

Run details

Workflow file for this run