Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement: Resolve failing backend tests #1256

Merged
merged 3 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions agenta-backend/agenta_backend/services/deployment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,7 @@ async def validate_image(image: Image) -> bool:
f"Image {image.docker_id} with tags {image.tags} not found"
)
return True


def get_deployment_uri(deployment: DeploymentDB) -> str:
return deployment.uri.replace("http://localhost", "http://host.docker.internal")
9 changes: 6 additions & 3 deletions agenta-backend/agenta_backend/services/llm_apps_service.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import asyncio
import json
import asyncio
import logging
from typing import Any, Dict, List
import traceback
from typing import Any, Dict, List

from agenta_backend.models.api.evaluation_model import AppOutput

import httpx

from agenta_backend.models.api.evaluation_model import AppOutput

# Set logger
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -245,5 +246,7 @@ async def get_parameters_from_openapi(uri: str) -> List[Dict]:
async def _get_openai_json_from_uri(uri):
async with httpx.AsyncClient() as client:
resp = await client.get(uri)
timeout = httpx.Timeout(timeout=5, read=None, write=5)
resp = await client.get(uri, timeout=timeout)
json_data = json.loads(resp.text)
return json_data
25 changes: 7 additions & 18 deletions agenta-backend/agenta_backend/tasks/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import os
import re
import traceback
from collections import defaultdict
from typing import Any, Dict, List

from agenta_backend.models.api.evaluation_model import AppOutput, NewEvaluation
from agenta_backend.models.api.evaluation_model import AppOutput
from agenta_backend.models.db_engine import DBEngine
from agenta_backend.models.db_models import (
AggregatedResult,
Expand All @@ -16,14 +15,17 @@
EvaluationScenarioResult,
Result,
)
from agenta_backend.services import evaluators_service, llm_apps_service
from agenta_backend.services import (
evaluators_service,
llm_apps_service,
deployment_manager,
)
from agenta_backend.services.db_manager import (
create_new_evaluation_scenario,
fetch_app_by_id,
fetch_app_variant_by_id,
fetch_evaluation_by_id,
fetch_evaluator_config,
fetch_evaluator_config_by_appId,
fetch_testset_by_id,
get_deployment_by_objectid,
update_evaluation,
Expand Down Expand Up @@ -83,7 +85,7 @@ def evaluate(
deployment_db = loop.run_until_complete(
get_deployment_by_objectid(app_variant_db.base.deployment)
)
uri = _get_deployment_uri(deployment_db)
uri = deployment_manager.get_deployment_uri(deployment_db)

# 2. Initialize vars
evaluators_aggregated_data = {
Expand Down Expand Up @@ -246,19 +248,6 @@ async def aggregate_evaluator_results(
return aggregated_results


def _get_deployment_uri(deployment_db) -> str:
#!NOTE: do not remove! this will be used in github workflow!
backend_environment = os.environ.get(
"ENVIRONMENT"
) # TODO @abram rename the environment variable to something other than environment!!!
if backend_environment is not None and backend_environment == "github":
return f"http://{deployment_db.container_name}" # TODO: @abram Remove this from here. Move it to the deployment manager
else:
return deployment_db.uri.replace(
"http://localhost", "http://host.docker.internal"
)


def get_app_inputs(app_variant_parameters, openapi_parameters) -> List[Dict[str, str]]:
"""
Get a list of application inputs based on the app variant parameters and openapi parameters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,6 @@ def auto_ai_critique_evaluator_config():
"settings_values": {
"open_ai_key": OPEN_AI_KEY,
"temperature": 0.9,
"evaluation_prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
"llm_app_prompt_template": "",
"llm_app_inputs": [{"input_name": "country", "input_value": "tunisia"}],
"prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from agenta_backend.models.api.evaluation_model import EvaluationStatusEnum
from agenta_backend.models.db_models import (
AppDB,
ConfigDB,
TestSetDB,
AppVariantDB,
EvaluationDB,
Expand All @@ -23,6 +22,7 @@
# Set global variables
APP_NAME = "evaluation_in_backend"
ENVIRONMENT = os.environ.get("ENVIRONMENT")
OPEN_AI_KEY = os.environ.get("OPENAI_API_KEY")
if ENVIRONMENT == "development":
BACKEND_API_HOST = "http://host.docker.internal/api"
elif ENVIRONMENT == "github":
Expand Down Expand Up @@ -178,6 +178,7 @@ async def test_create_evaluation():
"variant_ids": [str(app_variant.id)],
"evaluators_configs": [],
"testset_id": str(testset.id),
"lm_providers_keys": {"openai": OPEN_AI_KEY},
"rate_limit": {
"batch_size": 10,
"max_retries": 3,
Expand All @@ -199,6 +200,9 @@ async def test_create_evaluation():
# Update payload with list of configs ids
payload["evaluators_configs"] = list_of_configs_ids

# Sleep for 10 seconds (to allow the llm app container start completely)
await asyncio.sleep(10)

# Make request to create evaluation
response = await test_client.post(
f"{BACKEND_API_HOST}/evaluations/", json=payload, timeout=timeout
Expand All @@ -220,7 +224,7 @@ async def test_fetch_evaluation_status():

# Prepare and start short-polling request
max_attempts = 10
intervals = 3 # seconds
intervals = 5 # seconds
for _ in range(max_attempts):
response = await test_client.get(
f"{BACKEND_API_HOST}/evaluations/{str(evaluation.id)}/status/",
Expand Down Expand Up @@ -251,7 +255,7 @@ async def test_fetch_evaluation_results():

assert response.status_code == 200
assert response_data["evaluation_id"] == str(evaluation.id)
assert len(response_data["results"]) == 5
assert len(response_data["results"]) == 6


@pytest.mark.asyncio
Expand Down
Loading