diff --git a/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx b/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx index 8c4ca6fc27..777729b31a 100644 --- a/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx +++ b/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx @@ -381,7 +381,13 @@ const ABTestingEvaluationTable: React.FC = ({ Run All exportABTestingEvaluationData(evaluation, rows)} + onClick={() => + exportABTestingEvaluationData( + evaluation, + evaluationScenarios, + rows, + ) + } disabled={false} > Export results diff --git a/agenta-web/src/components/EvaluationTable/SingleModelEvaluationTable.tsx b/agenta-web/src/components/EvaluationTable/SingleModelEvaluationTable.tsx index efa7f1efcd..70d12a7dc3 100644 --- a/agenta-web/src/components/EvaluationTable/SingleModelEvaluationTable.tsx +++ b/agenta-web/src/components/EvaluationTable/SingleModelEvaluationTable.tsx @@ -448,7 +448,13 @@ const SingleModelEvaluationTable: React.FC = ({ Run All exportSingleModelEvaluationData(evaluation, rows)} + onClick={() => + exportSingleModelEvaluationData( + evaluation, + evaluationScenarios, + rows, + ) + } disabled={false} > Export results diff --git a/agenta-web/src/lib/helpers/evaluate.ts b/agenta-web/src/lib/helpers/evaluate.ts index 5311b1e7c2..abfac16f40 100644 --- a/agenta-web/src/lib/helpers/evaluate.ts +++ b/agenta-web/src/lib/helpers/evaluate.ts @@ -1,5 +1,5 @@ import {HumanEvaluationListTableDataType} from "@/components/Evaluations/HumanEvaluationResult" -import {Evaluation, GenericObject, Variant} from "../Types" +import {Evaluation, EvaluationScenario, GenericObject, Variant} from "../Types" import {convertToCsv, downloadCsv} from "./fileManipulations" export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { @@ -63,12 +63,23 @@ export const exportAICritiqueEvaluationData = (evaluation: Evaluation, rows: Gen downloadCsv(csvData, filename) } -export const exportABTestingEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { +export const exportABTestingEvaluationData = ( + evaluation: Evaluation, + scenarios: EvaluationScenario[], + rows: GenericObject[], +) => { const exportRow = rows.map((data, ix) => { + const inputColumns = evaluation.testset.testsetChatColumn + ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]} + : data.inputs.reduce( + (columns: any, input: {input_name: string; input_value: string}) => { + columns[`${input.input_name}`] = input.input_value + return columns + }, + {}, + ) return { - ["Inputs"]: - evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] || - data.inputs[0].input_value, + ...inputColumns, [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0 ? data?.columnData0 : data.outputs[0]?.variant_output, @@ -78,6 +89,9 @@ export const exportABTestingEvaluationData = (evaluation: Evaluation, rows: Gene ["Vote"]: evaluation.variants.find((v: Variant) => v.variantId === data.vote)?.variantName || data.vote, + ["Expected answer"]: + scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer, + ["Additional notes"]: scenarios[ix]?.note, } }) const exportCol = Object.keys(exportRow[0]) @@ -87,17 +101,31 @@ export const exportABTestingEvaluationData = (evaluation: Evaluation, rows: Gene downloadCsv(csvData, filename) } -export const exportSingleModelEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { +export const exportSingleModelEvaluationData = ( + evaluation: Evaluation, + scenarios: EvaluationScenario[], + rows: GenericObject[], +) => { const exportRow = rows.map((data, ix) => { + const inputColumns = evaluation.testset.testsetChatColumn + ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]} + : data.inputs.reduce( + (columns: any, input: {input_name: string; input_value: string}) => { + columns[`${input.input_name}`] = input.input_value + return columns + }, + {}, + ) const numericScore = parseInt(data.score) return { - ["Inputs"]: - evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] || - data.inputs[0].input_value, + ...inputColumns, [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0 ? data?.columnData0 : data.outputs[0]?.variant_output, ["Score"]: isNaN(numericScore) ? "-" : numericScore, + ["Expected answer"]: + scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer, + ["Additional notes"]: scenarios[ix]?.note, } }) const exportCol = Object.keys(exportRow[0])