diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py index b8716c6d33..70e31cb6a7 100644 --- a/agenta-backend/agenta_backend/models/api/evaluation_model.py +++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py @@ -110,6 +110,16 @@ class EvaluationScenarioOutput(BaseModel): value: Any +class HumanEvaluationScenarioInput(BaseModel): + input_name: str + input_value: str + + +class HumanEvaluationScenarioOutput(BaseModel): + variant_id: str + variant_output: str + + class HumanEvaluation(BaseModel): id: str app_id: str @@ -126,14 +136,17 @@ class HumanEvaluation(BaseModel): updated_at: datetime -class HumanEvaluationScenarioInput(BaseModel): - input_name: str - input_value: str - - -class HumanEvaluationScenarioOutput(BaseModel): - variant_id: str - variant_output: str +class HumanEvaluationScenario(BaseModel): + id: Optional[str] + evaluation_id: str + inputs: List[HumanEvaluationScenarioInput] + outputs: List[HumanEvaluationScenarioOutput] + vote: Optional[str] + score: Optional[Union[str, int]] + evaluation: Optional[str] + correct_answer: Optional[str] + is_pinned: Optional[bool] + note: Optional[str] class HumanEvaluationScenarioUpdate(BaseModel): diff --git a/agenta-backend/agenta_backend/models/converters.py b/agenta-backend/agenta_backend/models/converters.py index ab41e9ce56..9ccf33ad0e 100644 --- a/agenta-backend/agenta_backend/models/converters.py +++ b/agenta-backend/agenta_backend/models/converters.py @@ -9,6 +9,7 @@ EvaluationScenarioResult, EvaluatorConfigDB, HumanEvaluationDB, + HumanEvaluationScenarioDB, ImageDB, TemplateDB, AppDB, @@ -41,6 +42,7 @@ ) from agenta_backend.models.api.evaluation_model import ( HumanEvaluation, + HumanEvaluationScenario, SimpleEvaluationOutput, EvaluationScenario, Evaluation, @@ -119,6 +121,22 @@ async def human_evaluation_db_to_pydantic( ) +def human_evaluation_scenario_db_to_pydantic( + evaluation_scenario_db: HumanEvaluationScenarioDB, +) -> HumanEvaluationScenario: + return HumanEvaluationScenario( + id=str(evaluation_scenario_db.id), + evaluation_id=str(evaluation_scenario_db.evaluation.id), + inputs=evaluation_scenario_db.inputs, + outputs=evaluation_scenario_db.outputs, + vote=evaluation_scenario_db.vote, + score=evaluation_scenario_db.score, + correct_answer=evaluation_scenario_db.correct_answer, + is_pinned=evaluation_scenario_db.is_pinned or False, + note=evaluation_scenario_db.note or "", + ) + + async def aggregated_result_to_pydantic(results: List[AggregatedResult]) -> List[dict]: transformed_results = [] for result in results: diff --git a/agenta-backend/agenta_backend/routers/human_evaluation_router.py b/agenta-backend/agenta_backend/routers/human_evaluation_router.py index 6fadff5e3f..7397b9ed12 100644 --- a/agenta-backend/agenta_backend/routers/human_evaluation_router.py +++ b/agenta-backend/agenta_backend/routers/human_evaluation_router.py @@ -10,6 +10,7 @@ DeleteEvaluation, EvaluationScenarioScoreUpdate, HumanEvaluation, + HumanEvaluationScenario, HumanEvaluationScenarioUpdate, EvaluationType, NewHumanEvaluation, @@ -121,6 +122,37 @@ async def fetch_human_evaluation( ) +@router.get( + "/{evaluation_id}/evaluation_scenarios/", + response_model=List[HumanEvaluationScenario], + operation_id="fetch_evaluation_scenarios", +) +async def fetch_evaluation_scenarios( + evaluation_id: str, + request: Request, +): + """Fetches evaluation scenarios for a given evaluation ID. + + Arguments: + evaluation_id (str): The ID of the evaluation for which to fetch scenarios. + + Raises: + HTTPException: If the evaluation is not found or access is denied. + + Returns: + List[EvaluationScenario]: A list of evaluation scenarios. + """ + + user_org_data: dict = await get_user_and_org_id(request.state.user_id) + eval_scenarios = ( + await evaluation_service.fetch_human_evaluation_scenarios_for_evaluation( + evaluation_id, **user_org_data + ) + ) + + return eval_scenarios + + @router.put( "/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}/" ) diff --git a/agenta-backend/agenta_backend/services/evaluation_service.py b/agenta-backend/agenta_backend/services/evaluation_service.py index ea471a4c1b..507dd1860b 100644 --- a/agenta-backend/agenta_backend/services/evaluation_service.py +++ b/agenta-backend/agenta_backend/services/evaluation_service.py @@ -15,6 +15,7 @@ EvaluationScenarioInput, EvaluationType, HumanEvaluation, + HumanEvaluationScenario, NewEvaluation, EvaluationScenarioUpdate, CreateCustomEvaluation, @@ -325,6 +326,39 @@ async def fetch_evaluation_scenarios_for_evaluation( return eval_scenarios +async def fetch_human_evaluation_scenarios_for_evaluation( + evaluation_id: str, **user_org_data: dict +) -> List[HumanEvaluationScenario]: + """ + Fetch evaluation scenarios for a given evaluation ID. + + Args: + evaluation_id (str): The ID of the evaluation. + user_org_data (dict): User and organization data. + + Raises: + HTTPException: If the evaluation is not found or access is denied. + + Returns: + List[EvaluationScenario]: A list of evaluation scenarios. + """ + evaluation = await _fetch_human_evaluation_and_check_access( + evaluation_id=evaluation_id, + **user_org_data, + ) + print("$$$$$$ evaluation") + print(evaluation) + scenarios = await engine.find( + HumanEvaluationScenarioDB, + HumanEvaluationScenarioDB.evaluation == ObjectId(evaluation.id), + ) + eval_scenarios = [ + converters.human_evaluation_scenario_db_to_pydantic(scenario) + for scenario in scenarios + ] + return eval_scenarios + + async def update_human_evaluation_scenario( evaluation_scenario_id: str, evaluation_scenario_data: EvaluationScenarioUpdate,