single model evaluation completed

Agenta-AI · Nov 27, 2023 · af58502 · af58502
1 parent 9ec7845
commit af58502
Show file tree

Hide file tree

Showing 19 changed files with 797 additions and 74 deletions.
diff --git a/agenta-backend/agenta_backend/routers/evaluation_router.py b/agenta-backend/agenta_backend/routers/evaluation_router.py
@@ -380,6 +380,12 @@ async def fetch_results(
         )
         return {"results_data": results}
 
+    elif evaluation.evaluation_type == EvaluationType.single_model_test:
+        results = await results_service.fetch_results_for_auto_ai_critique(
+            evaluation_id
+        )
+        return {"results_data": results}
+
     elif evaluation.evaluation_type == EvaluationType.auto_ai_critique:
         results = await results_service.fetch_results_for_auto_ai_critique(
             evaluation_id

diff --git a/agenta-backend/agenta_backend/services/evaluation_service.py b/agenta-backend/agenta_backend/services/evaluation_service.py
@@ -394,6 +394,7 @@ async def update_evaluation_scenario(
         EvaluationType.auto_regex_test,
         EvaluationType.auto_webhook_test,
         EvaluationType.auto_ai_critique,
+        EvaluationType.single_model_test,
     ]:
         new_eval_set["score"] = updated_data["score"]
     elif evaluation_type == EvaluationType.human_a_b_testing:
@@ -550,6 +551,7 @@ def _extend_with_evaluation(evaluation_type: EvaluationType):
         or evaluation_type == EvaluationType.auto_similarity_match
         or evaluation_type == EvaluationType.auto_regex_test
         or evaluation_type == EvaluationType.auto_webhook_test
+        or evaluation_type == EvaluationType.single_model_test
         or EvaluationType.auto_ai_critique
     ):
         evaluation["score"] = ""

diff --git a/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx b/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx
@@ -30,7 +30,7 @@ import SecondaryButton from "../SecondaryButton/SecondaryButton"
 import {useQueryParam} from "@/hooks/useQuery"
 import EvaluationCardView from "../Evaluations/EvaluationCardView"
 import {Evaluation, EvaluationScenario, KeyValuePair, Variant} from "@/lib/Types"
-import {camelToSnake} from "@/lib/helpers/utils"
+import {EvaluationTypeLabels, camelToSnake} from "@/lib/helpers/utils"
 import {testsetRowToChatMessages} from "@/lib/helpers/testset"
 
 const {Title} = Typography
@@ -106,7 +106,6 @@ const useStyles = createUseStyles({
 const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
     evaluation,
     evaluationScenarios,
-    columnsCount,
 }) => {
     const classes = useStyles()
     const router = useRouter()
@@ -131,7 +130,11 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
 
     useEffect(() => {
         if (evaluationScenarios) {
-            setRows(evaluationScenarios)
+            const obj = [...evaluationScenarios]
+            obj.forEach((item) =>
+                item.outputs.forEach((op) => (item[op.variant_id] = op.variant_output)),
+            )
+            setRows(obj)
         }
     }, [evaluationScenarios])
 
@@ -417,7 +420,7 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
 
     return (
         <div>
-            <Title level={2}>A/B Testing Evaluation</Title>
+            <Title level={2}>{EvaluationTypeLabels.human_a_b_testing}</Title>
             <div>
                 <Row align="middle">
                     <Col span={12}>
@@ -444,14 +447,18 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
                             <Row justify="end">
                                 <Col span={10}>
                                     <Statistic
-                                        title={`${evaluation.variants[0].variantName} is better:`}
+                                        title={`${
+                                            evaluation.variants[0]?.variantName || ""
+                                        } is better:`}
                                         value={`${appVariant1} out of ${num_of_rows}`}
                                         className={classes.statCorrect}
                                     />
                                 </Col>
                                 <Col span={10}>
                                     <Statistic
-                                        title={`${evaluation.variants[1].variantName} is better:`}
+                                        title={`${
+                                            evaluation.variants[1]?.variantName || ""
+                                        } is better:`}
                                         value={`${appVariant2} out of ${num_of_rows}`}
                                         className={classes.statCorrect}
                                     />
@@ -494,7 +501,7 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
                     variants={variants}
                     evaluationScenarios={rows}
                     onRun={runEvaluation}
-                    onVote={handleVoteClick}
+                    onVote={(id, vote) => handleVoteClick(id, vote as string)}
                     onInputChange={handleInputChange}
                     updateEvaluationScenarioData={updateEvaluationScenarioData}
                     evaluation={evaluation}