Update - added sleep to wait for llm container to start

Agenta-AI · Jan 23, 2024 · dc7e7f8 · dc7e7f8
1 parent 90082bc
commit dc7e7f8
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 6 deletions.
diff --git a/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py b/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py
@@ -322,8 +322,6 @@ def auto_ai_critique_evaluator_config():
         "settings_values": {
             "open_ai_key": OPEN_AI_KEY,
             "temperature": 0.9,
-            "evaluation_prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
-            "llm_app_prompt_template": "",
-            "llm_app_inputs": [{"input_name": "country", "input_value": "tunisia"}],
+            "prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
         },
     }
diff --git a/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py b/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py
@@ -6,7 +6,6 @@
 from agenta_backend.models.api.evaluation_model import EvaluationStatusEnum
 from agenta_backend.models.db_models import (
     AppDB,
-    ConfigDB,
     TestSetDB,
     AppVariantDB,
     EvaluationDB,
@@ -23,6 +22,7 @@
 # Set global variables
 APP_NAME = "evaluation_in_backend"
 ENVIRONMENT = os.environ.get("ENVIRONMENT")
+OPEN_AI_KEY = os.environ.get("OPENAI_API_KEY")
 if ENVIRONMENT == "development":
     BACKEND_API_HOST = "http://host.docker.internal/api"
 elif ENVIRONMENT == "github":
@@ -178,6 +178,7 @@ async def test_create_evaluation():
         "variant_ids": [str(app_variant.id)],
         "evaluators_configs": [],
         "testset_id": str(testset.id),
+        "lm_providers_keys": {"openai": OPEN_AI_KEY},
         "rate_limit": {
             "batch_size": 10,
             "max_retries": 3,
@@ -199,6 +200,9 @@ async def test_create_evaluation():
     # Update payload with list of configs ids
     payload["evaluators_configs"] = list_of_configs_ids
 
+    # Sleep for 10 seconds (to allow the llm app container start completely)
+    await asyncio.sleep(10)
+
     # Make request to create evaluation
     response = await test_client.post(
         f"{BACKEND_API_HOST}/evaluations/", json=payload, timeout=timeout
@@ -220,7 +224,7 @@ async def test_fetch_evaluation_status():
 
     # Prepare and start short-polling request
     max_attempts = 10
-    intervals = 3  # seconds
+    intervals = 5  # seconds
     for _ in range(max_attempts):
         response = await test_client.get(
             f"{BACKEND_API_HOST}/evaluations/{str(evaluation.id)}/status/",
@@ -251,7 +255,7 @@ async def test_fetch_evaluation_results():
 
     assert response.status_code == 200
     assert response_data["evaluation_id"] == str(evaluation.id)
-    assert len(response_data["results"]) == 5
+    assert len(response_data["results"]) == 6
 
 
 @pytest.mark.asyncio