From ea6919b6ccf0cbadbeb83998128961c657c85b57 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Tue, 28 May 2024 20:25:58 +0200 Subject: [PATCH] improved tests --- .../test_variant_evaluators_router.py | 58 ++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py b/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py index 1b709ec391..9ff8ad60b2 100644 --- a/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py +++ b/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py @@ -150,7 +150,31 @@ async def test_get_evaluator_configs(): @pytest.mark.asyncio -async def test_create_evaluation(): +async def test_create_evaluation_auto_exact_match(): + await create_evaluation_with_evaluator("auto_exact_match_evaluator_config") + + +@pytest.mark.asyncio +async def test_create_evaluation_auto_similarity_match(): + await create_evaluation_with_evaluator("auto_similarity_match_evaluator_config") + + +@pytest.mark.asyncio +async def test_create_evaluation_auto_regex_test(): + await create_evaluation_with_evaluator("auto_regex_test_evaluator_config") + + +@pytest.mark.asyncio +async def test_create_evaluation_auto_webhook_test(): + await create_evaluation_with_evaluator("auto_webhook_test_evaluator_config") + + +@pytest.mark.asyncio +async def test_create_evaluation_auto_ai_critique(): + await create_evaluation_with_evaluator("auto_ai_critique_evaluator_config") + + +async def create_evaluation_with_evaluator(evaluator_config_name): # Fetch app, app_variant and testset app = await AppDB.find_one(AppDB.app_name == APP_NAME) app_variant = await AppVariantDB.find_one(AppVariantDB.app.id == app.id) @@ -179,7 +203,8 @@ async def test_create_evaluation(): list_of_configs_ids = [] evaluator_configs = response.json() for evaluator_config in evaluator_configs: - list_of_configs_ids.append(evaluator_config["id"]) + if evaluator_config["evaluator_key"] == evaluator_config_name: + list_of_configs_ids.append(evaluator_config["id"]) # Update payload with list of configs ids payload["evaluators_configs"] = list_of_configs_ids @@ -201,20 +226,20 @@ async def test_create_evaluation(): ) assert response_data is not None + # Wait for evaluation to finish + evaluation_id = response_data["id"] + await wait_for_evaluation_to_finish(evaluation_id) + + # Fetch evaluation results + await fetch_evaluation_results(evaluation_id) -@pytest.mark.asyncio -async def test_fetch_evaluation_status(): - evaluations = ( - await EvaluationDB.find().to_list() - ) # will return only one in this case - evaluation = evaluations[0] - # Prepare and start short-polling request +async def wait_for_evaluation_to_finish(evaluation_id): max_attempts = 12 intervals = 5 # seconds for _ in range(max_attempts): response = await test_client.get( - f"{BACKEND_API_HOST}/evaluations/{str(evaluation.id)}/status/", + f"{BACKEND_API_HOST}/evaluations/{evaluation_id}/status/", timeout=timeout, ) response_data = response.json() @@ -228,21 +253,14 @@ async def test_fetch_evaluation_status(): ), f"Evaluation status did not become '{EvaluationStatusEnum.EVALUATION_FINISHED}' within the specified polling time" -@pytest.mark.asyncio -async def test_fetch_evaluation_results(): - evaluations = ( - await EvaluationDB.find().to_list() - ) # will return only one in this case - evaluation = evaluations[0] - +async def fetch_evaluation_results(evaluation_id): response = await test_client.get( - f"{BACKEND_API_HOST}/evaluations/{str(evaluation.id)}/results/", timeout=timeout + f"{BACKEND_API_HOST}/evaluations/{evaluation_id}/results/", timeout=timeout ) response_data = response.json() assert response.status_code == 200 - assert response_data["evaluation_id"] == str(evaluation.id) - assert len(response_data["results"]) == 7 + assert response_data["evaluation_id"] == evaluation_id @pytest.mark.asyncio