Skip to content

Commit

Permalink
Merge pull request #950 from Agenta-AI/fix/evaluation-export
Browse files Browse the repository at this point in the history
Fix: Export CSV for Auto Evaluations
  • Loading branch information
mmabrouk authored Nov 28, 2023
2 parents 58c90f2 + 0d579ca commit 1123e8a
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import {createUseStyles} from "react-jss"
import {exportAICritiqueEvaluationData} from "@/lib/helpers/evaluate"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {useAppTheme} from "../Layout/ThemeContextProvider"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography

Expand Down Expand Up @@ -257,6 +257,7 @@ Answer ONLY with one of the given grading or evaluation options.
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

setRowValue(rowIndex, columnName as any, result)
await evaluate(rowIndex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {createUseStyles} from "react-jss"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {exportCustomCodeEvaluationData} from "@/lib/helpers/evaluate"
import CodeBlock from "../DynamicCodeBlock/CodeBlock"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography

Expand Down Expand Up @@ -251,6 +251,7 @@ const CustomCodeRunEvaluationTable: React.FC<CustomCodeEvaluationTableProps> = (
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

setRowValue(rowIndex, columnName as any, result)
await evaluate(rowIndex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import {evaluateWithExactMatch} from "@/lib/services/evaluations"
import {createUseStyles} from "react-jss"
import {exportExactEvaluationData} from "@/lib/helpers/evaluate"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"
import {Evaluation} from "@/lib/Types"

const {Title} = Typography
Expand Down Expand Up @@ -198,6 +198,7 @@ const ExactMatchEvaluationTable: React.FC<ExactMatchEvaluationTableProps> = ({
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

setRowValue(rowIndex, columnName, result)
setRowValue(rowIndex, "evaluationFlow", EvaluationFlow.COMPARISON_RUN_STARTED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import {globalErrorHandler} from "@/lib/helpers/errorHandler"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {exportRegexEvaluationData} from "@/lib/helpers/evaluate"
import {isValidRegex} from "@/lib/helpers/validators"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography

Expand Down Expand Up @@ -227,6 +227,7 @@ const RegexEvaluationTable: React.FC<RegexEvaluationTableProps> = ({
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

const {regexPattern, regexShouldMatch} = form.getFieldsValue()
const isCorrect = evaluateWithRegex(result, regexPattern, regexShouldMatch)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {Typography} from "antd"
import {createUseStyles} from "react-jss"
import {exportSimilarityEvaluationData} from "@/lib/helpers/evaluate"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography

Expand Down Expand Up @@ -223,6 +223,7 @@ const SimilarityMatchEvaluationTable: React.FC<SimilarityMatchEvaluationTablePro
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

const {similarityThreshold} = form.getFieldsValue()
const similarity = evaluateWithSimilarityMatch(result, rows[rowIndex].correctAnswer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import {globalErrorHandler} from "@/lib/helpers/errorHandler"
import {isValidUrl} from "@/lib/helpers/validators"
import SecondaryButton from "../SecondaryButton/SecondaryButton"
import {exportWebhookEvaluationData} from "@/lib/helpers/evaluate"
import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import {contentToChatMessageString, testsetRowToChatMessages} from "@/lib/helpers/testset"

const {Title} = Typography

Expand Down Expand Up @@ -206,6 +206,7 @@ const WebhookEvaluationTable: React.FC<WebhookEvaluationTableProps> = ({
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (variantData[idx].isChatVariant) result = contentToChatMessageString(result)

const {webhookUrl} = form.getFieldsValue()
const score = await evaluateWithWebhook(webhookUrl, {
Expand Down
52 changes: 34 additions & 18 deletions agenta-web/src/lib/helpers/evaluate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ import {HumanEvaluationListTableDataType} from "@/components/Evaluations/HumanEv
import {Evaluation, GenericObject, Variant} from "../Types"
import {convertToCsv, downloadCsv} from "./utils"

export const exportExactEvaluationData = (evaluation: any, rows: any[]) => {
const exportRow = rows.map((data) => {
export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
const exportRow = rows.map((data, ix) => {
return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand All @@ -20,10 +22,12 @@ export const exportExactEvaluationData = (evaluation: any, rows: any[]) => {
downloadCsv(csvData, filename)
}

export const exportSimilarityEvaluationData = (evaluation: any, rows: any[]) => {
const exportRow = rows.map((data) => {
export const exportSimilarityEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
const exportRow = rows.map((data, ix) => {
return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand All @@ -39,10 +43,12 @@ export const exportSimilarityEvaluationData = (evaluation: any, rows: any[]) =>
downloadCsv(csvData, filename)
}

export const exportAICritiqueEvaluationData = (evaluation: any, rows: any[]) => {
const exportRow = rows.map((data) => {
export const exportAICritiqueEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
const exportRow = rows.map((data, ix) => {
return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand Down Expand Up @@ -101,13 +107,19 @@ export const exportSingleModelEvaluationData = (evaluation: Evaluation, rows: Ge
downloadCsv(csvData, filename)
}

export const exportRegexEvaluationData = (evaluation: any, rows: any[], settings: any) => {
const exportRow = rows.map((data) => {
export const exportRegexEvaluationData = (
evaluation: Evaluation,
rows: GenericObject[],
settings: GenericObject,
) => {
const exportRow = rows.map((data, ix) => {
const isCorrect = data.score === "correct"
const isMatch = settings.regexShouldMatch ? isCorrect : !isCorrect

return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand All @@ -122,10 +134,12 @@ export const exportRegexEvaluationData = (evaluation: any, rows: any[], settings
downloadCsv(csvData, filename)
}

export const exportWebhookEvaluationData = (evaluation: any, rows: any[]) => {
const exportRow = rows.map((data) => {
export const exportWebhookEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
const exportRow = rows.map((data, ix) => {
return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand All @@ -140,10 +154,12 @@ export const exportWebhookEvaluationData = (evaluation: any, rows: any[]) => {
downloadCsv(csvData, filename)
}

export const exportCustomCodeEvaluationData = (evaluation: any, rows: any[]) => {
const exportRow = rows.map((data) => {
export const exportCustomCodeEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
const exportRow = rows.map((data, ix) => {
return {
["Inputs"]: data.inputs[0].input_value,
["Inputs"]:
evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
data.inputs[0].input_value,
[`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
? data?.columnData0
: data.outputs[0]?.variant_output,
Expand Down
7 changes: 7 additions & 0 deletions agenta-web/src/lib/helpers/testset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,10 @@ export function testsetRowToChatMessages(rowData: KeyValuePair, includeCorrectAn

return chat
}

export function contentToChatMessageString(content: string, role: ChatRole = ChatRole.Assistant) {
return JSON.stringify({
content,
role,
})
}

0 comments on commit 1123e8a

Please sign in to comment.