diff --git a/agenta-web/src/components/EvaluationTable/AICritiqueEvaluationTable.tsx b/agenta-web/src/components/EvaluationTable/AICritiqueEvaluationTable.tsx index 3034fb7787..2dbc2aab3b 100644 --- a/agenta-web/src/components/EvaluationTable/AICritiqueEvaluationTable.tsx +++ b/agenta-web/src/components/EvaluationTable/AICritiqueEvaluationTable.tsx @@ -1,6 +1,6 @@ import {useState, useEffect} from "react" import type {ColumnType} from "antd/es/table" -import {LineChartOutlined} from "@ant-design/icons" +import {CaretRightOutlined, LineChartOutlined} from "@ant-design/icons" import { Button, Card, @@ -141,6 +141,15 @@ const useStyles = createUseStyles({ color: "#3f8600", }, }, + inputTestBtn: { + width: "100%", + display: "flex", + justifyContent: "flex-end", + "& button": { + marginLeft: 10, + }, + marginTop: "0.75rem", + }, }) const AICritiqueEvaluationTable: React.FC = ({ @@ -235,35 +244,54 @@ Answer ONLY with one of the given grading or evaluation options. } const runEvaluation = async (rowIndex: number) => { - const inputParamsDict = rows[rowIndex].inputs.reduce((acc: {[key: string]: any}, item) => { - acc[item.input_name] = item.input_value - return acc - }, {}) - - const columnsDataNames = ["columnData0"] - let idx = 0 - for (const columnName of columnsDataNames) { - setRowValue(rowIndex, "evaluationFlow", EvaluationFlow.COMPARISON_RUN_STARTED) - - let result = await callVariant( - inputParamsDict, - variantData[idx].inputParams!, - variantData[idx].optParams!, - appId || "", - variants[idx].baseId || "", - variantData[idx].isChatVariant - ? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false) - : [], + try { + setEvaluationStatus(EvaluationFlow.EVALUATION_STARTED) + + const inputParamsDict = rows[rowIndex].inputs.reduce( + (acc: {[key: string]: any}, item) => { + acc[item.input_name] = item.input_value + return acc + }, + {}, ) - if (variantData[idx].isChatVariant) result = contentToChatMessageString(result) - setRowValue(rowIndex, columnName as any, result) - await evaluate(rowIndex) - setShouldFetchResults(true) - if (rowIndex === rows.length - 1) { - message.success("Evaluation Results Saved") + const columnsDataNames = ["columnData0"] + let idx = 0 + + for (const columnName of columnsDataNames) { + setRowValue(rowIndex, "evaluationFlow", EvaluationFlow.COMPARISON_RUN_STARTED) + + let result = await callVariant( + inputParamsDict, + variantData[idx].inputParams!, + variantData[idx].optParams!, + appId || "", + variants[idx].baseId || "", + variantData[idx].isChatVariant + ? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false) + : [], + ) + + if (variantData[idx].isChatVariant) { + result = contentToChatMessageString(result) + } + + setRowValue(rowIndex, columnName as any, result) + await evaluate(rowIndex) + setShouldFetchResults(true) + + if (rowIndex === rows.length - 1) { + message.success("Evaluation Results Saved") + } + + idx++ } - idx++ + + setEvaluationStatus(EvaluationFlow.EVALUATION_FINISHED) + } catch (error) { + console.error("Error during evaluation:", error) + setEvaluationStatus(EvaluationFlow.EVALUATION_FAILED) + message.error("Failed to run evaluation") } } @@ -391,6 +419,15 @@ Answer ONLY with one of the given grading or evaluation options. } /> )} + +
+ +
), },