Skip to content

Commit

Permalink
Merge pull request #1256 from Agenta-AI/fix/backend-tests
Browse files Browse the repository at this point in the history
Enhancement: Resolve failing backend tests
  • Loading branch information
mmabrouk authored Jan 24, 2024
2 parents 808a6e9 + 8a97574 commit d392d1c
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 27 deletions.
4 changes: 4 additions & 0 deletions agenta-backend/agenta_backend/services/deployment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,7 @@ async def validate_image(image: Image) -> bool:
f"Image {image.docker_id} with tags {image.tags} not found"
)
return True


def get_deployment_uri(deployment: DeploymentDB) -> str:
return deployment.uri.replace("http://localhost", "http://host.docker.internal")
9 changes: 6 additions & 3 deletions agenta-backend/agenta_backend/services/llm_apps_service.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import asyncio
import json
import asyncio
import logging
from typing import Any, Dict, List
import traceback
from typing import Any, Dict, List

from agenta_backend.models.api.evaluation_model import AppOutput

import httpx

from agenta_backend.models.api.evaluation_model import AppOutput

# Set logger
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -245,5 +246,7 @@ async def get_parameters_from_openapi(uri: str) -> List[Dict]:
async def _get_openai_json_from_uri(uri):
async with httpx.AsyncClient() as client:
resp = await client.get(uri)
timeout = httpx.Timeout(timeout=5, read=None, write=5)
resp = await client.get(uri, timeout=timeout)
json_data = json.loads(resp.text)
return json_data
25 changes: 7 additions & 18 deletions agenta-backend/agenta_backend/tasks/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import os
import re
import traceback
from collections import defaultdict
from typing import Any, Dict, List

from agenta_backend.models.api.evaluation_model import AppOutput, NewEvaluation
from agenta_backend.models.api.evaluation_model import AppOutput
from agenta_backend.models.db_engine import DBEngine
from agenta_backend.models.db_models import (
AggregatedResult,
Expand All @@ -16,14 +15,17 @@
EvaluationScenarioResult,
Result,
)
from agenta_backend.services import evaluators_service, llm_apps_service
from agenta_backend.services import (
evaluators_service,
llm_apps_service,
deployment_manager,
)
from agenta_backend.services.db_manager import (
create_new_evaluation_scenario,
fetch_app_by_id,
fetch_app_variant_by_id,
fetch_evaluation_by_id,
fetch_evaluator_config,
fetch_evaluator_config_by_appId,
fetch_testset_by_id,
get_deployment_by_objectid,
update_evaluation,
Expand Down Expand Up @@ -83,7 +85,7 @@ def evaluate(
deployment_db = loop.run_until_complete(
get_deployment_by_objectid(app_variant_db.base.deployment)
)
uri = _get_deployment_uri(deployment_db)
uri = deployment_manager.get_deployment_uri(deployment_db)

# 2. Initialize vars
evaluators_aggregated_data = {
Expand Down Expand Up @@ -246,19 +248,6 @@ async def aggregate_evaluator_results(
return aggregated_results


def _get_deployment_uri(deployment_db) -> str:
#!NOTE: do not remove! this will be used in github workflow!
backend_environment = os.environ.get(
"ENVIRONMENT"
) # TODO @abram rename the environment variable to something other than environment!!!
if backend_environment is not None and backend_environment == "github":
return f"http://{deployment_db.container_name}" # TODO: @abram Remove this from here. Move it to the deployment manager
else:
return deployment_db.uri.replace(
"http://localhost", "http://host.docker.internal"
)


def get_app_inputs(app_variant_parameters, openapi_parameters) -> List[Dict[str, str]]:
"""
Get a list of application inputs based on the app variant parameters and openapi parameters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,6 @@ def auto_ai_critique_evaluator_config():
"settings_values": {
"open_ai_key": OPEN_AI_KEY,
"temperature": 0.9,
"evaluation_prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
"llm_app_prompt_template": "",
"llm_app_inputs": [{"input_name": "country", "input_value": "tunisia"}],
"prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from agenta_backend.models.api.evaluation_model import EvaluationStatusEnum
from agenta_backend.models.db_models import (
AppDB,
ConfigDB,
TestSetDB,
AppVariantDB,
EvaluationDB,
Expand All @@ -23,6 +22,7 @@
# Set global variables
APP_NAME = "evaluation_in_backend"
ENVIRONMENT = os.environ.get("ENVIRONMENT")
OPEN_AI_KEY = os.environ.get("OPENAI_API_KEY")
if ENVIRONMENT == "development":
BACKEND_API_HOST = "http://host.docker.internal/api"
elif ENVIRONMENT == "github":
Expand Down Expand Up @@ -178,6 +178,7 @@ async def test_create_evaluation():
"variant_ids": [str(app_variant.id)],
"evaluators_configs": [],
"testset_id": str(testset.id),
"lm_providers_keys": {"openai": OPEN_AI_KEY},
"rate_limit": {
"batch_size": 10,
"max_retries": 3,
Expand All @@ -199,6 +200,9 @@ async def test_create_evaluation():
# Update payload with list of configs ids
payload["evaluators_configs"] = list_of_configs_ids

# Sleep for 10 seconds (to allow the llm app container start completely)
await asyncio.sleep(10)

# Make request to create evaluation
response = await test_client.post(
f"{BACKEND_API_HOST}/evaluations/", json=payload, timeout=timeout
Expand All @@ -220,7 +224,7 @@ async def test_fetch_evaluation_status():

# Prepare and start short-polling request
max_attempts = 10
intervals = 3 # seconds
intervals = 5 # seconds
for _ in range(max_attempts):
response = await test_client.get(
f"{BACKEND_API_HOST}/evaluations/{str(evaluation.id)}/status/",
Expand Down Expand Up @@ -251,7 +255,7 @@ async def test_fetch_evaluation_results():

assert response.status_code == 200
assert response_data["evaluation_id"] == str(evaluation.id)
assert len(response_data["results"]) == 5
assert len(response_data["results"]) == 6


@pytest.mark.asyncio
Expand Down

0 comments on commit d392d1c

Please sign in to comment.