Skip to content

Commit

Permalink
single model evaluation completed
Browse files Browse the repository at this point in the history
  • Loading branch information
MohammedMaaz committed Nov 27, 2023
1 parent 9ec7845 commit af58502
Show file tree
Hide file tree
Showing 19 changed files with 797 additions and 74 deletions.
6 changes: 6 additions & 0 deletions agenta-backend/agenta_backend/routers/evaluation_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,12 @@ async def fetch_results(
)
return {"results_data": results}

elif evaluation.evaluation_type == EvaluationType.single_model_test:
results = await results_service.fetch_results_for_auto_ai_critique(
evaluation_id
)
return {"results_data": results}

elif evaluation.evaluation_type == EvaluationType.auto_ai_critique:
results = await results_service.fetch_results_for_auto_ai_critique(
evaluation_id
Expand Down
2 changes: 2 additions & 0 deletions agenta-backend/agenta_backend/services/evaluation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ async def update_evaluation_scenario(
EvaluationType.auto_regex_test,
EvaluationType.auto_webhook_test,
EvaluationType.auto_ai_critique,
EvaluationType.single_model_test,
]:
new_eval_set["score"] = updated_data["score"]
elif evaluation_type == EvaluationType.human_a_b_testing:
Expand Down Expand Up @@ -550,6 +551,7 @@ def _extend_with_evaluation(evaluation_type: EvaluationType):
or evaluation_type == EvaluationType.auto_similarity_match
or evaluation_type == EvaluationType.auto_regex_test
or evaluation_type == EvaluationType.auto_webhook_test
or evaluation_type == EvaluationType.single_model_test
or EvaluationType.auto_ai_critique
):
evaluation["score"] = ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {useQueryParam} from "@/hooks/useQuery"
import EvaluationCardView from "../Evaluations/EvaluationCardView"
import {Evaluation, EvaluationScenario, KeyValuePair, Variant} from "@/lib/Types"
import {camelToSnake} from "@/lib/helpers/utils"
import {EvaluationTypeLabels, camelToSnake} from "@/lib/helpers/utils"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography
Expand Down Expand Up @@ -106,7 +106,6 @@ const useStyles = createUseStyles({
const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
evaluation,
evaluationScenarios,
columnsCount,
}) => {
const classes = useStyles()
const router = useRouter()
Expand All @@ -131,7 +130,11 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({

useEffect(() => {
if (evaluationScenarios) {
setRows(evaluationScenarios)
const obj = [...evaluationScenarios]
obj.forEach((item) =>
item.outputs.forEach((op) => (item[op.variant_id] = op.variant_output)),
)
setRows(obj)
}
}, [evaluationScenarios])

Expand Down Expand Up @@ -417,7 +420,7 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({

return (
<div>
<Title level={2}>A/B Testing Evaluation</Title>
<Title level={2}>{EvaluationTypeLabels.human_a_b_testing}</Title>
<div>
<Row align="middle">
<Col span={12}>
Expand All @@ -444,14 +447,18 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
<Row justify="end">
<Col span={10}>
<Statistic
title={`${evaluation.variants[0].variantName} is better:`}
title={`${
evaluation.variants[0]?.variantName || ""
} is better:`}
value={`${appVariant1} out of ${num_of_rows}`}
className={classes.statCorrect}
/>
</Col>
<Col span={10}>
<Statistic
title={`${evaluation.variants[1].variantName} is better:`}
title={`${
evaluation.variants[1]?.variantName || ""
} is better:`}
value={`${appVariant2} out of ${num_of_rows}`}
className={classes.statCorrect}
/>
Expand Down Expand Up @@ -494,7 +501,7 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
variants={variants}
evaluationScenarios={rows}
onRun={runEvaluation}
onVote={handleVoteClick}
onVote={(id, vote) => handleVoteClick(id, vote as string)}
onInputChange={handleInputChange}
updateEvaluationScenarioData={updateEvaluationScenarioData}
evaluation={evaluation}
Expand Down
Loading

0 comments on commit af58502

Please sign in to comment.