Merge branch 'oss/rbac' of https://github.com/agenta-ai/agenta into o…

…ss/rbac
Agenta-AI · Jan 23, 2024 · 9f3569b · 9f3569b
2 parents 51a5476 + 2add1fc
commit 9f3569b
Show file tree

Hide file tree

Showing 12 changed files with 231 additions and 211 deletions.
diff --git a/agenta-backend/agenta_backend/main.py b/agenta-backend/agenta_backend/main.py
@@ -20,6 +20,7 @@
     health_router,
 )
 from agenta_backend.models.db_engine import DBEngine
+from agenta_backend.open_api import open_api_tags_metadata
 
 if os.environ["FEATURE_FLAG"] in ["cloud", "ee"]:
     from agenta_backend.commons.services import templates_manager
@@ -57,7 +58,7 @@ async def lifespan(application: FastAPI, cache=True):
     yield
 
 
-app = FastAPI(lifespan=lifespan)
+app = FastAPI(lifespan=lifespan, openapi_tags=open_api_tags_metadata)
 
 allow_headers = ["Content-Type"]
 
@@ -81,17 +82,17 @@ async def lifespan(application: FastAPI, cache=True):
 
 app.include_router(health_router.router, prefix="/health")
 app.include_router(user_profile.router, prefix="/profile")
-app.include_router(app_router.router, prefix="/apps")
-app.include_router(variants_router.router, prefix="/variants")
-app.include_router(evaluation_router.router, prefix="/evaluations")
-app.include_router(human_evaluation_router.router, prefix="/human-evaluations")
-app.include_router(evaluators_router.router, prefix="/evaluators")
-app.include_router(testset_router.router, prefix="/testsets")
-app.include_router(container_router.router, prefix="/containers")
-app.include_router(environment_router.router, prefix="/environments")
-app.include_router(observability_router.router, prefix="/observability")
-app.include_router(bases_router.router, prefix="/bases")
-app.include_router(configs_router.router, prefix="/configs")
+app.include_router(app_router.router, prefix="/apps", tags=["Apps"])
+app.include_router(variants_router.router, prefix="/variants", tags=["Variants"])
+app.include_router(evaluation_router.router, prefix="/evaluations", tags=["Evaluations")
+app.include_router(human_evaluation_router.router, prefix="/human-evaluations", tags=["Human-Evaluations"])
+app.include_router(evaluators_router.router, prefix="/evaluators", tags=["Evaluators"])
+app.include_router(testset_router.router, prefix="/testsets", tags=["Testsets"])
+app.include_router(container_router.router, prefix="/containers", tags=["Containers"])
+app.include_router(environment_router.router, prefix="/environments", tags=["Environments")
+app.include_router(observability_router.router, prefix="/observability", tags=["Observability")
+app.include_router(bases_router.router, prefix="/bases", tags=["Bases"])
+app.include_router(configs_router.router, prefix="/configs", tags=["Configs"])
 
 if os.environ["FEATURE_FLAG"] in ["cloud", "ee"]:
     import agenta_backend.cloud.main as cloud

diff --git a/agenta-backend/agenta_backend/models/converters.py b/agenta-backend/agenta_backend/models/converters.py
@@ -150,11 +150,11 @@ async def human_evaluation_db_to_pydantic(
 
 
 def human_evaluation_scenario_db_to_pydantic(
-    evaluation_scenario_db: HumanEvaluationScenarioDB,
+    evaluation_scenario_db: HumanEvaluationScenarioDB, evaluation_id: str
 ) -> HumanEvaluationScenario:
     return HumanEvaluationScenario(
         id=str(evaluation_scenario_db.id),
-        evaluation_id=str(evaluation_scenario_db.evaluation.id),
+        evaluation_id=evaluation_id,
         inputs=evaluation_scenario_db.inputs,
         outputs=evaluation_scenario_db.outputs,
         vote=evaluation_scenario_db.vote,
@@ -196,11 +196,11 @@ def evaluation_scenarios_results_to_pydantic(
 
 
 def evaluation_scenario_db_to_pydantic(
-    evaluation_scenario_db: EvaluationScenarioDB,
+    evaluation_scenario_db: EvaluationScenarioDB, evaluation_id: str
 ) -> EvaluationScenario:
     return EvaluationScenario(
         id=str(evaluation_scenario_db.id),
-        evaluation_id=str(evaluation_scenario_db.evaluation.id),
+        evaluation_id=evaluation_id,
         inputs=[
             EvaluationScenarioInput(**scenario_input.dict())
             for scenario_input in evaluation_scenario_db.inputs

diff --git a/agenta-backend/agenta_backend/open_api.py b/agenta-backend/agenta_backend/open_api.py
@@ -0,0 +1,14 @@
+open_api_tags_metadata = [
+    {"name": "Variants"},
+    {"name": "Evaluations"},
+    {"name": "Evaluators"},
+    {"name": "Apps"},
+    {"name": "Human-Evaluations"},
+    {"name": "Testsets"},
+    {"name": "Containers"},
+    {"name": "Environments"},
+    {"name": "Observability"},
+    {"name": "Organizations"},
+    {"name": "Bases"},
+    {"name": "Configs"},
+]
diff --git a/agenta-backend/agenta_backend/services/evaluation_service.py b/agenta-backend/agenta_backend/services/evaluation_service.py
@@ -251,10 +251,10 @@ async def fetch_evaluation_scenarios_for_evaluation(
     """
     evaluation = await db_manager.fetch_evaluation_by_id(evaluation_id)
     scenarios = await EvaluationScenarioDB.find(
-        EvaluationScenarioDB.evaluation.id == ObjectId(evaluation.id), fetch_links=True
+        EvaluationScenarioDB.evaluation.id == ObjectId(evaluation.id)
     ).to_list()
     eval_scenarios = [
-        converters.evaluation_scenario_db_to_pydantic(scenario)
+        converters.evaluation_scenario_db_to_pydantic(scenario, str(evaluation.id))
         for scenario in scenarios
     ]
     return eval_scenarios
@@ -280,10 +280,11 @@ async def fetch_human_evaluation_scenarios_for_evaluation(
     )
     scenarios = await HumanEvaluationScenarioDB.find(
         HumanEvaluationScenarioDB.evaluation.id == ObjectId(evaluation.id),
-        fetch_links=True,
     ).to_list()
     eval_scenarios = [
-        converters.human_evaluation_scenario_db_to_pydantic(scenario)
+        converters.human_evaluation_scenario_db_to_pydantic(
+            scenario, str(evaluation.id)
+        )
         for scenario in scenarios
     ]
     return eval_scenarios

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -150,10 +150,8 @@ def auto_ai_critique(
         "correct_answer": correct_answer,
     }
 
-    for input_item in app_params.get("inputs", []):
-        input_name = input_item.get("name")
-        if input_name and input_name in inputs:
-            chain_run_args[input_name] = inputs[input_name]
+    for key, value in inputs.items():
+        chain_run_args[key] = value
 
     prompt = PromptTemplate(
         input_variables=list(chain_run_args.keys()),  # Use the keys from chain_run_args

diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py
@@ -92,7 +92,7 @@ def evaluate(
         # 2. Initialize vars
         evaluators_aggregated_data = {
             str(evaluator_config_db.id): {
-                "evaluator_key": evaluator_config.evaluator_key,
+                "evaluator_key": evaluator_config_db.evaluator_key,
                 "results": [],
             }
             for evaluator_config_db in evaluator_config_dbs
@@ -218,28 +218,34 @@ async def aggregate_evaluator_results(
     for config_id, val in evaluators_aggregated_data.items():
         evaluator_key = val["evaluator_key"] or ""
         results = val["results"] or []
-        if evaluator_key != "auto_ai_critique":
+
+        if not results:
+            average_value = 0
+        if evaluator_key == "auto_ai_critique":
+            numeric_scores = []
+            for result in results:
+                # Extract the first number found in the result value
+                match = re.search(r"\d+", result.value)
+                if match:
+                    try:
+                        score = int(match.group())
+                        numeric_scores.append(score)
+                    except ValueError:
+                        # Ignore if the extracted value is not an integer
+                        continue
+
+            # Calculate the average of numeric scores if any are present
             average_value = (
-                sum([result.value for result in results]) / len(results)
-                if results
-                else 0
+                sum(numeric_scores) / len(numeric_scores) if numeric_scores else None
             )
-        elif evaluator_key == "auto_ai_critique":
-            try:
-                average_value = (
-                    sum(
-                        [
-                            int(result.value)
-                            for result in results
-                            if isinstance(int(result.value), int)
-                        ]
-                    )
-                    / len(results)
-                    if results
-                    else 0
-                )
-            except TypeError:
+        else:
+            # Handle boolean values for auto_regex_test and other evaluators
+            if all(isinstance(result.value, bool) for result in results):
+                average_value = sum(result.value for result in results) / len(results)
+            else:
+                # Handle other data types or mixed results
                 average_value = None
+
         evaluator_config = await fetch_evaluator_config(config_id)
         aggregated_result = AggregatedResult(
             evaluator_config=evaluator_config.id,