Skip to content

Commit

Permalink
fetch human evaluations scenarios
Browse files Browse the repository at this point in the history
  • Loading branch information
aakrem committed Jan 2, 2024
1 parent a82e04c commit 6136099
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 8 deletions.
29 changes: 21 additions & 8 deletions agenta-backend/agenta_backend/models/api/evaluation_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ class EvaluationScenarioOutput(BaseModel):
value: Any


class HumanEvaluationScenarioInput(BaseModel):
input_name: str
input_value: str


class HumanEvaluationScenarioOutput(BaseModel):
variant_id: str
variant_output: str


class HumanEvaluation(BaseModel):
id: str
app_id: str
Expand All @@ -126,14 +136,17 @@ class HumanEvaluation(BaseModel):
updated_at: datetime


class HumanEvaluationScenarioInput(BaseModel):
input_name: str
input_value: str


class HumanEvaluationScenarioOutput(BaseModel):
variant_id: str
variant_output: str
class HumanEvaluationScenario(BaseModel):
id: Optional[str]
evaluation_id: str
inputs: List[HumanEvaluationScenarioInput]
outputs: List[HumanEvaluationScenarioOutput]
vote: Optional[str]
score: Optional[Union[str, int]]
evaluation: Optional[str]
correct_answer: Optional[str]
is_pinned: Optional[bool]
note: Optional[str]


class HumanEvaluationScenarioUpdate(BaseModel):
Expand Down
18 changes: 18 additions & 0 deletions agenta-backend/agenta_backend/models/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
EvaluationScenarioResult,
EvaluatorConfigDB,
HumanEvaluationDB,
HumanEvaluationScenarioDB,
ImageDB,
TemplateDB,
AppDB,
Expand Down Expand Up @@ -41,6 +42,7 @@
)
from agenta_backend.models.api.evaluation_model import (
HumanEvaluation,
HumanEvaluationScenario,
SimpleEvaluationOutput,
EvaluationScenario,
Evaluation,
Expand Down Expand Up @@ -119,6 +121,22 @@ async def human_evaluation_db_to_pydantic(
)


def human_evaluation_scenario_db_to_pydantic(
evaluation_scenario_db: HumanEvaluationScenarioDB,
) -> HumanEvaluationScenario:
return HumanEvaluationScenario(
id=str(evaluation_scenario_db.id),
evaluation_id=str(evaluation_scenario_db.evaluation.id),
inputs=evaluation_scenario_db.inputs,
outputs=evaluation_scenario_db.outputs,
vote=evaluation_scenario_db.vote,
score=evaluation_scenario_db.score,
correct_answer=evaluation_scenario_db.correct_answer,
is_pinned=evaluation_scenario_db.is_pinned or False,
note=evaluation_scenario_db.note or "",
)


async def aggregated_result_to_pydantic(results: List[AggregatedResult]) -> List[dict]:
transformed_results = []
for result in results:
Expand Down
32 changes: 32 additions & 0 deletions agenta-backend/agenta_backend/routers/human_evaluation_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DeleteEvaluation,
EvaluationScenarioScoreUpdate,
HumanEvaluation,
HumanEvaluationScenario,
HumanEvaluationScenarioUpdate,
EvaluationType,
NewHumanEvaluation,
Expand Down Expand Up @@ -121,6 +122,37 @@ async def fetch_human_evaluation(
)


@router.get(
"/{evaluation_id}/evaluation_scenarios/",
response_model=List[HumanEvaluationScenario],
operation_id="fetch_evaluation_scenarios",
)
async def fetch_evaluation_scenarios(
evaluation_id: str,
request: Request,
):
"""Fetches evaluation scenarios for a given evaluation ID.
Arguments:
evaluation_id (str): The ID of the evaluation for which to fetch scenarios.
Raises:
HTTPException: If the evaluation is not found or access is denied.
Returns:
List[EvaluationScenario]: A list of evaluation scenarios.
"""

user_org_data: dict = await get_user_and_org_id(request.state.user_id)
eval_scenarios = (
await evaluation_service.fetch_human_evaluation_scenarios_for_evaluation(
evaluation_id, **user_org_data
)
)

return eval_scenarios


@router.put(
"/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}/"
)
Expand Down
34 changes: 34 additions & 0 deletions agenta-backend/agenta_backend/services/evaluation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
EvaluationScenarioInput,
EvaluationType,
HumanEvaluation,
HumanEvaluationScenario,
NewEvaluation,
EvaluationScenarioUpdate,
CreateCustomEvaluation,
Expand Down Expand Up @@ -325,6 +326,39 @@ async def fetch_evaluation_scenarios_for_evaluation(
return eval_scenarios


async def fetch_human_evaluation_scenarios_for_evaluation(
evaluation_id: str, **user_org_data: dict
) -> List[HumanEvaluationScenario]:
"""
Fetch evaluation scenarios for a given evaluation ID.
Args:
evaluation_id (str): The ID of the evaluation.
user_org_data (dict): User and organization data.
Raises:
HTTPException: If the evaluation is not found or access is denied.
Returns:
List[EvaluationScenario]: A list of evaluation scenarios.
"""
evaluation = await _fetch_human_evaluation_and_check_access(
evaluation_id=evaluation_id,
**user_org_data,
)
print("$$$$$$ evaluation")
print(evaluation)
scenarios = await engine.find(
HumanEvaluationScenarioDB,
HumanEvaluationScenarioDB.evaluation == ObjectId(evaluation.id),
)
eval_scenarios = [
converters.human_evaluation_scenario_db_to_pydantic(scenario)
for scenario in scenarios
]
return eval_scenarios


async def update_human_evaluation_scenario(
evaluation_scenario_id: str,
evaluation_scenario_data: EvaluationScenarioUpdate,
Expand Down

0 comments on commit 6136099

Please sign in to comment.