From 513ecdccbcc9b25217c1a536d418e5858aadc07f Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 09:26:37 +0100 Subject: [PATCH 01/30] add average cost and latency to evaluation schema --- agenta-backend/agenta_backend/models/api/evaluation_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py index 33ecc1e134..45f8f76e48 100644 --- a/agenta-backend/agenta_backend/models/api/evaluation_model.py +++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py @@ -71,6 +71,8 @@ class Evaluation(BaseModel): testset_name: Optional[str] status: Result aggregated_results: List[AggregatedResult] + average_cost: Optional[Result] + average_latency: Optional[Result] created_at: datetime updated_at: datetime From f9e826959e8260dfa3ea557ae52c02fb274c3027 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 09:29:10 +0100 Subject: [PATCH 02/30] add cost and latency to all models and related methods --- agenta-backend/agenta_backend/models/converters.py | 2 ++ agenta-backend/agenta_backend/models/db_models.py | 4 ++++ agenta-web/src/lib/Types.ts | 4 ++++ agenta-web/src/services/evaluations/index.ts | 2 ++ 4 files changed, 12 insertions(+) diff --git a/agenta-backend/agenta_backend/models/converters.py b/agenta-backend/agenta_backend/models/converters.py index a1406ec7e8..ec7c591098 100644 --- a/agenta-backend/agenta_backend/models/converters.py +++ b/agenta-backend/agenta_backend/models/converters.py @@ -145,6 +145,8 @@ async def evaluation_db_to_pydantic( aggregated_results=aggregated_results, created_at=evaluation_db.created_at, updated_at=evaluation_db.updated_at, + average_cost=evaluation_db.average_cost, + average_latency=evaluation_db.average_latency, ) diff --git a/agenta-backend/agenta_backend/models/db_models.py b/agenta-backend/agenta_backend/models/db_models.py index 9c086f7662..a3579c565f 100644 --- a/agenta-backend/agenta_backend/models/db_models.py +++ b/agenta-backend/agenta_backend/models/db_models.py @@ -266,6 +266,8 @@ class EvaluationDB(Document): variant_revision: PydanticObjectId evaluators_configs: List[PydanticObjectId] aggregated_results: List[AggregatedResult] + average_cost: Optional[Result] = None + average_latency: Optional[Result] = None created_at: datetime = Field(default=datetime.now()) updated_at: datetime = Field(default=datetime.now()) @@ -284,6 +286,8 @@ class EvaluationScenarioDB(Document): note: Optional[str] evaluators_configs: List[PydanticObjectId] results: List[EvaluationScenarioResult] + latency: Optional[int] = None + cost: Optional[int] = None created_at: datetime = Field(default=datetime.now()) updated_at: datetime = Field(default=datetime.now()) diff --git a/agenta-web/src/lib/Types.ts b/agenta-web/src/lib/Types.ts index ec0ad81a24..7577fbe428 100644 --- a/agenta-web/src/lib/Types.ts +++ b/agenta-web/src/lib/Types.ts @@ -363,6 +363,8 @@ type ValueTypeOptions = | "regex" | "object" | "error" + | "cost" + | "latency" //evaluation revamp types export interface EvaluationSettingsTemplate { @@ -442,6 +444,8 @@ export interface _Evaluation { updated_at?: string duration?: number revisions: string[] + average_latency?: TypedValue & {error: null | EvaluationError} + average_cost?: TypedValue & {error: null | EvaluationError} variant_revision_ids: string[] } diff --git a/agenta-web/src/services/evaluations/index.ts b/agenta-web/src/services/evaluations/index.ts index ae7b3b506a..7a2089544f 100644 --- a/agenta-web/src/services/evaluations/index.ts +++ b/agenta-web/src/services/evaluations/index.ts @@ -104,6 +104,8 @@ const evaluationTransformer = (item: any) => ({ revisions: item.revisions, variant_revision_ids: item.variant_revision_ids, variant_ids: item.variant_ids, + average_cost: item.average_cost, + average_latency: item.average_latency, }) export const fetchAllEvaluations = async (appId: string) => { const response = await axios.get(`/api/evaluations/`, {params: {app_id: appId}}) From 1082d8b939cfa82b2317f1a215422af045a0fcc2 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:17:45 +0100 Subject: [PATCH 03/30] add aggregate method for the llm response latency & cost --- .../services/aggregation_service.py | 31 +++++++++++++++++-- .../evaluationResults/EvaluationResults.tsx | 16 ++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/aggregation_service.py b/agenta-backend/agenta_backend/services/aggregation_service.py index d657bdf4de..b5c7cd7e28 100644 --- a/agenta-backend/agenta_backend/services/aggregation_service.py +++ b/agenta-backend/agenta_backend/services/aggregation_service.py @@ -1,8 +1,8 @@ import re import traceback -from typing import List +from typing import List, Optional -from agenta_backend.models.db_models import Result, Error +from agenta_backend.models.db_models import InvokationResult, Result, Error def aggregate_ai_critique(results: List[Result]) -> Result: @@ -73,3 +73,30 @@ def aggregate_float(results: List[Result]) -> Result: value=None, error=Error(message=str(exc), stacktrace=str(traceback.format_exc())), ) + + +def aggregate_float_from_llm_app_response( + invocation_results: List[InvokationResult], key: Optional[str] +) -> Result: + try: + if not key: + raise ValueError("Key is required to aggregate InvokationResult objects.") + + values = [ + inv_result.result.value[key] + for inv_result in invocation_results + if isinstance(inv_result.result.value, dict) + and key in inv_result.result.value + ] + + if not values: + raise ValueError("No valid values found for aggregation.") + + average_value = sum(values) / len(values) + return Result(type=key, value=average_value) + except Exception as exc: + return Result( + type="error", + value=None, + error=Error(message=str(exc), stacktrace=str(traceback.format_exc())), + ) diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx index fc9fe3af87..cd7589c521 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx @@ -246,6 +246,22 @@ const EvaluationResults: React.FC = () => { statusMapper(token)[params.data?.status.value as EvaluationStatus].label, cellRenderer: StatusRenderer, }, + { + flex: 1, + field: "average_latency", + headerName: "Latency", + minWidth: 120, + ...getFilterParams("number"), + valueGetter: (params) => getTypedValue(params?.data.average_latency) + }, + { + flex: 1, + field: "average_cost", + headerName: "Cost", + minWidth: 120, + ...getFilterParams("number"), + valueGetter: (params) => getTypedValue(params?.data.average_cost), + }, { flex: 1, field: "created_at", From 484f0f5b6b2991e1506071b046189580173560db Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:18:11 +0100 Subject: [PATCH 04/30] adjust result from llm response to contain cost and latency --- .../agenta_backend/services/llm_apps_service.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index d67ca4321e..7e8fd84d73 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -86,9 +86,17 @@ async def invoke_app( url, json=payload, timeout=httpx.Timeout(timeout=5, read=None, write=5) ) response.raise_for_status() - app_output = response.json() + app_response = response.json() return InvokationResult( - result=Result(type="text", value=app_output["message"], error=None) + result=Result( + type="object", + value={ + "output": app_response["message"], + "latency": app_response["latency"], + "cost": app_response["cost"], + }, + error=None, + ) ) except httpx.HTTPStatusError as e: From 20609b901e5a1dcefba88abbda692c6b8ff888b4 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:19:09 +0100 Subject: [PATCH 05/30] add average cost and latency to the evaluation --- .../agenta_backend/tasks/evaluations.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py index 50ce322437..32ecc22949 100644 --- a/agenta-backend/agenta_backend/tasks/evaluations.py +++ b/agenta-backend/agenta_backend/tasks/evaluations.py @@ -218,9 +218,14 @@ def evaluate( for evaluator_config_db in evaluator_config_dbs: logger.debug(f"Evaluating with evaluator: {evaluator_config_db}") if correct_answer_column in data_point: + output_value = ( + app_output.result.value["output"] + if isinstance(app_output.result.value, dict) + else app_output.result.value + ) result = evaluators_service.evaluate( evaluator_key=evaluator_config_db.evaluator_key, - output=app_output.result.value, + output=output_value, correct_answer=data_point[correct_answer_column], settings_values=evaluator_config_db.settings_values, app_params=app_variant_parameters, @@ -276,6 +281,20 @@ def evaluate( ) ) + # Add average cost and latency + average_latency = aggregation_service.aggregate_float_from_llm_app_response( + app_outputs, "latency" + ) + average_cost = aggregation_service.aggregate_float_from_llm_app_response( + app_outputs, "cost" + ) + loop.run_until_complete( + update_evaluation( + evaluation_id, + {"average_latency": average_latency, "average_cost": average_cost}, + ) + ) + except Exception as e: logger.error(f"An error occurred during evaluation: {e}") traceback.print_exc() From 43eb33321d3967c5826023061388c662d87c941a Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:19:40 +0100 Subject: [PATCH 06/30] cost and latency columns --- .../EvaluationScenarios.tsx | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index 687ae40f10..63ad400fe7 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -23,6 +23,7 @@ import {useAtom} from "jotai" import {evaluatorsAtom} from "@/lib/atoms/evaluation" import CompareOutputDiff from "@/components/CompareOutputDiff/CompareOutputDiff" import {useQueryParam} from "@/hooks/useQuery" +import {formatCost, formatLatency} from "@/lib/helpers/utils" const useStyles = createUseStyles((theme: JSSTheme) => ({ infoRow: { @@ -139,6 +140,27 @@ const EvaluationScenarios: React.FC = () => { }, }) }) + colDefs.push({ + flex: 1, + minWidth: 120, + headerName: "Cost", + field: "cost", + ...getFilterParams("text"), + valueGetter: (params) => { + return formatCost(params.data.outputs[0].result.value.cost); + }, + }); + + colDefs.push({ + flex: 1, + minWidth: 120, + headerName: "Latency", + field: "latency", + ...getFilterParams("text"), + valueGetter: (params) => { + return formatLatency(params.data.outputs[0].result.value.latency); + }, + }); return colDefs }, [evalaution, scenarios, showDiff]) From 55d7c0acadd4a241f9d70b6746818b219e15b563 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:20:09 +0100 Subject: [PATCH 07/30] fixes --- .../src/components/CompareOutputDiff/CompareOutputDiff.tsx | 2 +- agenta-web/src/components/Playground/NewVariantModal.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx b/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx index ea983e860a..7f6a1520f9 100644 --- a/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx +++ b/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx @@ -9,7 +9,7 @@ interface CompareOutputDiffProps { const CompareOutputDiff = ({variantOutput, expectedOutput}: CompareOutputDiffProps) => { const {appTheme} = useAppTheme() - const results = diffWords(variantOutput, expectedOutput) + const results = diffWords(variantOutput.output, expectedOutput) const display = results.map((part, index) => { if (part.removed) { diff --git a/agenta-web/src/components/Playground/NewVariantModal.tsx b/agenta-web/src/components/Playground/NewVariantModal.tsx index 34b5c0542c..ed8578c85b 100644 --- a/agenta-web/src/components/Playground/NewVariantModal.tsx +++ b/agenta-web/src/components/Playground/NewVariantModal.tsx @@ -35,7 +35,7 @@ const NewVariantModal: React.FC = ({ const [isInputValid, setIsInputValid] = useState(false) const handleTemplateVariantChange = (value: string) => { - let newValue = value.includes(".") ? value.split(".")[0] : value + let newValue = value.includes(".") ? value.output.split(".")[0] : value setTemplateVariantName(value) setVariantPlaceHolder(`${newValue}`) setIsInputValid(newVariantName.trim().length > 0 && value !== "Source Variant") From 6b5eb60a4bbea821530312be6d8e885e82c0acd4 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:20:45 +0100 Subject: [PATCH 08/30] improve the getTypedValue with new types --- agenta-web/src/lib/helpers/evaluate.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/agenta-web/src/lib/helpers/evaluate.ts b/agenta-web/src/lib/helpers/evaluate.ts index b904f4799a..5e5f64449c 100644 --- a/agenta-web/src/lib/helpers/evaluate.ts +++ b/agenta-web/src/lib/helpers/evaluate.ts @@ -14,6 +14,7 @@ import AlertPopup from "@/components/AlertPopup/AlertPopup" import {capitalize, round} from "lodash" import dayjs from "dayjs" import {runningStatuses} from "@/components/pages/evaluations/cellRenderers/cellRenderers" +import { formatCost, formatLatency } from "./utils" export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { const exportRow = rows.map((data, ix) => { @@ -269,11 +270,19 @@ export function getTypedValue(res?: TypedValue) { if (value === undefined) return "-" - return type === "number" - ? round(Number(value), 2) - : ["boolean", "bool"].includes(type as string) - ? capitalize(value?.toString()) - : value?.toString() + switch (type) { + case "number": + return round(Number(value), 2); + case "boolean": + case "bool": + return capitalize(value?.toString()); + case "cost": + return formatCost(Number(value)); + case "latency": + return formatLatency(Number(value)); + default: + return value?.toString(); + } } type CellDataType = "number" | "text" | "date" From 31882df2b0d65ae4eae01c590be9a4dfc461552a Mon Sep 17 00:00:00 2001 From: MohammedMaaz Date: Mon, 18 Mar 2024 19:31:17 +0500 Subject: [PATCH 09/30] formatters utils | evaluators link fixed --- agenta-web/src/lib/helpers/dateTimeHelper.ts | 4 ++++ agenta-web/src/lib/helpers/formatters.ts | 21 ++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 agenta-web/src/lib/helpers/formatters.ts diff --git a/agenta-web/src/lib/helpers/dateTimeHelper.ts b/agenta-web/src/lib/helpers/dateTimeHelper.ts index b1ac3cfa5b..434badbbf7 100644 --- a/agenta-web/src/lib/helpers/dateTimeHelper.ts +++ b/agenta-web/src/lib/helpers/dateTimeHelper.ts @@ -3,3 +3,7 @@ import dayjs from "dayjs" export const formatDate = (date: dayjs.ConfigType): string => { return dayjs(date).format("DD MMM YYYY | h:m a") } + +export const formatDate24 = (date: dayjs.ConfigType, includeSeconds = false): string => { + return dayjs(date).format("DD MMM YY, HH:mm" + (includeSeconds ? ":ss" : "")) +} diff --git a/agenta-web/src/lib/helpers/formatters.ts b/agenta-web/src/lib/helpers/formatters.ts new file mode 100644 index 0000000000..34caa04736 --- /dev/null +++ b/agenta-web/src/lib/helpers/formatters.ts @@ -0,0 +1,21 @@ +const intlNumber = new Intl.NumberFormat("en-US", { + maximumFractionDigits: 2, +}) + +const intlCurrency = new Intl.NumberFormat("en-US", { + style: "currency", + currency: "USD", + maximumFractionDigits: 4, +}) + +export const formatNumber = (value = 0) => { + return intlNumber.format(value) +} + +export const formatCurrency = (value = 0) => { + return intlCurrency.format(value) +} + +export const formatLatency = (value = 0) => { + return `${intlNumber.format(value / 1000)}s` +} From 87ed91f4ae438414a049ef255999c924dea748b1 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 11:31:30 +0100 Subject: [PATCH 10/30] use Maaz currency and latency helpers --- .../evaluations/evaluationScenarios/EvaluationScenarios.tsx | 4 ++-- agenta-web/src/lib/helpers/evaluate.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index 63ad400fe7..ba0b4857b8 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -23,7 +23,7 @@ import {useAtom} from "jotai" import {evaluatorsAtom} from "@/lib/atoms/evaluation" import CompareOutputDiff from "@/components/CompareOutputDiff/CompareOutputDiff" import {useQueryParam} from "@/hooks/useQuery" -import {formatCost, formatLatency} from "@/lib/helpers/utils" +import {formatCurrency, formatLatency} from "@/lib/helpers/formatters" const useStyles = createUseStyles((theme: JSSTheme) => ({ infoRow: { @@ -147,7 +147,7 @@ const EvaluationScenarios: React.FC = () => { field: "cost", ...getFilterParams("text"), valueGetter: (params) => { - return formatCost(params.data.outputs[0].result.value.cost); + return formatCurrency(params.data.outputs[0].result.value.cost); }, }); diff --git a/agenta-web/src/lib/helpers/evaluate.ts b/agenta-web/src/lib/helpers/evaluate.ts index 5e5f64449c..c18760d4fa 100644 --- a/agenta-web/src/lib/helpers/evaluate.ts +++ b/agenta-web/src/lib/helpers/evaluate.ts @@ -14,7 +14,7 @@ import AlertPopup from "@/components/AlertPopup/AlertPopup" import {capitalize, round} from "lodash" import dayjs from "dayjs" import {runningStatuses} from "@/components/pages/evaluations/cellRenderers/cellRenderers" -import { formatCost, formatLatency } from "./utils" +import { formatCurrency, formatLatency } from "./formatters" export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { const exportRow = rows.map((data, ix) => { @@ -277,7 +277,7 @@ export function getTypedValue(res?: TypedValue) { case "bool": return capitalize(value?.toString()); case "cost": - return formatCost(Number(value)); + return formatCurrency(Number(value)); case "latency": return formatLatency(Number(value)); default: From 3147577d29745b35d6c3e39e6643e2b9202ec566 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:31:23 +0100 Subject: [PATCH 11/30] add latency and cost to models --- agenta-backend/agenta_backend/models/api/evaluation_model.py | 2 ++ agenta-backend/agenta_backend/models/db_models.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py index 45f8f76e48..b09e12681e 100644 --- a/agenta-backend/agenta_backend/models/api/evaluation_model.py +++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py @@ -102,6 +102,8 @@ class EvaluationScenarioInput(BaseModel): class EvaluationScenarioOutput(BaseModel): result: Result + cost: Optional[float] + latency: Optional[float] class HumanEvaluationScenarioInput(BaseModel): diff --git a/agenta-backend/agenta_backend/models/db_models.py b/agenta-backend/agenta_backend/models/db_models.py index a3579c565f..4988b6273c 100644 --- a/agenta-backend/agenta_backend/models/db_models.py +++ b/agenta-backend/agenta_backend/models/db_models.py @@ -193,6 +193,8 @@ class Result(BaseModel): class InvokationResult(BaseModel): result: Result + cost: Optional[float] = None + latency: Optional[float] = None class EvaluationScenarioResult(BaseModel): @@ -213,6 +215,8 @@ class EvaluationScenarioInputDB(BaseModel): class EvaluationScenarioOutputDB(BaseModel): result: Result + cost: Optional[float] = None + latency: Optional[float] = None class HumanEvaluationScenarioInput(BaseModel): From 9847da519e6b1f40a82e79cccc2269b8519ee7a5 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:31:33 +0100 Subject: [PATCH 12/30] fix aggregation --- .../agenta_backend/services/aggregation_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/agenta-backend/agenta_backend/services/aggregation_service.py b/agenta-backend/agenta_backend/services/aggregation_service.py index b5c7cd7e28..b459766301 100644 --- a/agenta-backend/agenta_backend/services/aggregation_service.py +++ b/agenta-backend/agenta_backend/services/aggregation_service.py @@ -83,14 +83,13 @@ def aggregate_float_from_llm_app_response( raise ValueError("Key is required to aggregate InvokationResult objects.") values = [ - inv_result.result.value[key] + getattr(inv_result, key) for inv_result in invocation_results - if isinstance(inv_result.result.value, dict) - and key in inv_result.result.value + if hasattr(inv_result, key) and getattr(inv_result, key) is not None ] if not values: - raise ValueError("No valid values found for aggregation.") + raise ValueError(f"No valid values found for {key} aggregation.") average_value = sum(values) / len(values) return Result(type=key, value=average_value) From c382959eee256b0217431b6c044b152a2aaadd05 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:31:50 +0100 Subject: [PATCH 13/30] adjust schema --- .../agenta_backend/services/llm_apps_service.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 7e8fd84d73..2d1b6e56ec 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -89,14 +89,12 @@ async def invoke_app( app_response = response.json() return InvokationResult( result=Result( - type="object", - value={ - "output": app_response["message"], - "latency": app_response["latency"], - "cost": app_response["cost"], - }, + type="text", + value=app_response["message"], error=None, - ) + ), + latency=app_response["latency"], + cost=app_response["cost"], ) except httpx.HTTPStatusError as e: From f236aed60d267ea0cae924f4fd13b6e40ea519c0 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:32:07 +0100 Subject: [PATCH 14/30] adjust EvaluationScenarioOutputDB --- agenta-backend/agenta_backend/tasks/evaluations.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py index 32ecc22949..d985c22d99 100644 --- a/agenta-backend/agenta_backend/tasks/evaluations.py +++ b/agenta-backend/agenta_backend/tasks/evaluations.py @@ -272,7 +272,9 @@ def evaluate( correct_answer=correct_answer, outputs=[ EvaluationScenarioOutputDB( - result=Result(type="text", value=app_output.result.value) + result=Result(type="text", value=app_output.result.value), + latency=app_output.latency, + cost=app_output.cost ) ], results=evaluators_results, From da220cb3d4b98b3b606f73e6b2a0f949bd30d51c Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:32:21 +0100 Subject: [PATCH 15/30] fixes --- .../src/components/CompareOutputDiff/CompareOutputDiff.tsx | 2 +- .../evaluations/evaluationScenarios/EvaluationScenarios.tsx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx b/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx index 7f6a1520f9..ea983e860a 100644 --- a/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx +++ b/agenta-web/src/components/CompareOutputDiff/CompareOutputDiff.tsx @@ -9,7 +9,7 @@ interface CompareOutputDiffProps { const CompareOutputDiff = ({variantOutput, expectedOutput}: CompareOutputDiffProps) => { const {appTheme} = useAppTheme() - const results = diffWords(variantOutput.output, expectedOutput) + const results = diffWords(variantOutput, expectedOutput) const display = results.map((part, index) => { if (part.removed) { diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index ba0b4857b8..bef79b2d5e 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -147,7 +147,7 @@ const EvaluationScenarios: React.FC = () => { field: "cost", ...getFilterParams("text"), valueGetter: (params) => { - return formatCurrency(params.data.outputs[0].result.value.cost); + return formatCurrency(params.data.outputs[0].cost); }, }); @@ -158,7 +158,7 @@ const EvaluationScenarios: React.FC = () => { field: "latency", ...getFilterParams("text"), valueGetter: (params) => { - return formatLatency(params.data.outputs[0].result.value.latency); + return formatLatency(params.data.outputs[0].latency); }, }); return colDefs From 361b6d704f389e53c38d196b609e2da504f922e3 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:34:01 +0100 Subject: [PATCH 16/30] format --- .../agenta_backend/services/llm_apps_service.py | 2 +- agenta-backend/agenta_backend/tasks/evaluations.py | 2 +- .../evaluationResults/EvaluationResults.tsx | 2 +- .../evaluationScenarios/EvaluationScenarios.tsx | 10 +++++----- agenta-web/src/lib/helpers/evaluate.ts | 12 ++++++------ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 2d1b6e56ec..7c963caf3c 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -93,7 +93,7 @@ async def invoke_app( value=app_response["message"], error=None, ), - latency=app_response["latency"], + latency=app_response["latency"], cost=app_response["cost"], ) diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py index d985c22d99..162f7bcbb0 100644 --- a/agenta-backend/agenta_backend/tasks/evaluations.py +++ b/agenta-backend/agenta_backend/tasks/evaluations.py @@ -274,7 +274,7 @@ def evaluate( EvaluationScenarioOutputDB( result=Result(type="text", value=app_output.result.value), latency=app_output.latency, - cost=app_output.cost + cost=app_output.cost, ) ], results=evaluators_results, diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx index cd7589c521..d218be3970 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx @@ -252,7 +252,7 @@ const EvaluationResults: React.FC = () => { headerName: "Latency", minWidth: 120, ...getFilterParams("number"), - valueGetter: (params) => getTypedValue(params?.data.average_latency) + valueGetter: (params) => getTypedValue(params?.data.average_latency), }, { flex: 1, diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index bef79b2d5e..f3cef4af91 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -147,10 +147,10 @@ const EvaluationScenarios: React.FC = () => { field: "cost", ...getFilterParams("text"), valueGetter: (params) => { - return formatCurrency(params.data.outputs[0].cost); + return formatCurrency(params.data.outputs[0].cost) }, - }); - + }) + colDefs.push({ flex: 1, minWidth: 120, @@ -158,9 +158,9 @@ const EvaluationScenarios: React.FC = () => { field: "latency", ...getFilterParams("text"), valueGetter: (params) => { - return formatLatency(params.data.outputs[0].latency); + return formatLatency(params.data.outputs[0].latency) }, - }); + }) return colDefs }, [evalaution, scenarios, showDiff]) diff --git a/agenta-web/src/lib/helpers/evaluate.ts b/agenta-web/src/lib/helpers/evaluate.ts index c18760d4fa..55edfe758d 100644 --- a/agenta-web/src/lib/helpers/evaluate.ts +++ b/agenta-web/src/lib/helpers/evaluate.ts @@ -14,7 +14,7 @@ import AlertPopup from "@/components/AlertPopup/AlertPopup" import {capitalize, round} from "lodash" import dayjs from "dayjs" import {runningStatuses} from "@/components/pages/evaluations/cellRenderers/cellRenderers" -import { formatCurrency, formatLatency } from "./formatters" +import {formatCurrency, formatLatency} from "./formatters" export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => { const exportRow = rows.map((data, ix) => { @@ -272,16 +272,16 @@ export function getTypedValue(res?: TypedValue) { switch (type) { case "number": - return round(Number(value), 2); + return round(Number(value), 2) case "boolean": case "bool": - return capitalize(value?.toString()); + return capitalize(value?.toString()) case "cost": - return formatCurrency(Number(value)); + return formatCurrency(Number(value)) case "latency": - return formatLatency(Number(value)); + return formatLatency(Number(value)) default: - return value?.toString(); + return value?.toString() } } From 5042b9d7dac35d9ecdd05f216fa11cf40336ad9e Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:39:48 +0100 Subject: [PATCH 17/30] handle null values for cost and latency --- .../evaluationScenarios/EvaluationScenarios.tsx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index f3cef4af91..f348832ebd 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -147,7 +147,9 @@ const EvaluationScenarios: React.FC = () => { field: "cost", ...getFilterParams("text"), valueGetter: (params) => { - return formatCurrency(params.data.outputs[0].cost) + return params.data.outputs[0].cost == undefined + ? "-" + : formatCurrency(params.data.outputs[0].cost) }, }) @@ -158,7 +160,9 @@ const EvaluationScenarios: React.FC = () => { field: "latency", ...getFilterParams("text"), valueGetter: (params) => { - return formatLatency(params.data.outputs[0].latency) + return params.data.outputs[0].latency == undefined + ? "-" + : formatLatency(params.data.outputs[0].latency) }, }) return colDefs From 7c285652e253d5458dd50946a0f1d9c9246ee2dc Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:45:15 +0100 Subject: [PATCH 18/30] revert change --- agenta-web/src/components/Playground/NewVariantModal.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agenta-web/src/components/Playground/NewVariantModal.tsx b/agenta-web/src/components/Playground/NewVariantModal.tsx index ed8578c85b..34b5c0542c 100644 --- a/agenta-web/src/components/Playground/NewVariantModal.tsx +++ b/agenta-web/src/components/Playground/NewVariantModal.tsx @@ -35,7 +35,7 @@ const NewVariantModal: React.FC = ({ const [isInputValid, setIsInputValid] = useState(false) const handleTemplateVariantChange = (value: string) => { - let newValue = value.includes(".") ? value.output.split(".")[0] : value + let newValue = value.includes(".") ? value.split(".")[0] : value setTemplateVariantName(value) setVariantPlaceHolder(`${newValue}`) setIsInputValid(newVariantName.trim().length > 0 && value !== "Source Variant") From ccda0535d4da9ced0ccd98b37cdce25454cc7570 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 14:56:07 +0100 Subject: [PATCH 19/30] fix types --- .../evaluations/evaluationResults/EvaluationResults.tsx | 4 ++-- .../evaluations/evaluationScenarios/EvaluationScenarios.tsx | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx index d218be3970..2548b7a169 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx @@ -252,7 +252,7 @@ const EvaluationResults: React.FC = () => { headerName: "Latency", minWidth: 120, ...getFilterParams("number"), - valueGetter: (params) => getTypedValue(params?.data.average_latency), + valueGetter: (params) => getTypedValue(params?.data?.average_latency), }, { flex: 1, @@ -260,7 +260,7 @@ const EvaluationResults: React.FC = () => { headerName: "Cost", minWidth: 120, ...getFilterParams("number"), - valueGetter: (params) => getTypedValue(params?.data.average_cost), + valueGetter: (params) => getTypedValue(params?.data?.average_cost), }, { flex: 1, diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx index f348832ebd..c72d37550f 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx @@ -144,10 +144,9 @@ const EvaluationScenarios: React.FC = () => { flex: 1, minWidth: 120, headerName: "Cost", - field: "cost", ...getFilterParams("text"), valueGetter: (params) => { - return params.data.outputs[0].cost == undefined + return params.data?.outputs[0].cost == undefined ? "-" : formatCurrency(params.data.outputs[0].cost) }, @@ -157,10 +156,9 @@ const EvaluationScenarios: React.FC = () => { flex: 1, minWidth: 120, headerName: "Latency", - field: "latency", ...getFilterParams("text"), valueGetter: (params) => { - return params.data.outputs[0].latency == undefined + return params.data?.outputs[0].latency == undefined ? "-" : formatLatency(params.data.outputs[0].latency) }, From 5b606e48e2709ec23a7bdfd3633a8c12886e4170 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Thu, 28 Mar 2024 15:10:39 +0100 Subject: [PATCH 20/30] add cost and latency in eval scenario --- agenta-web/src/lib/Types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agenta-web/src/lib/Types.ts b/agenta-web/src/lib/Types.ts index 7577fbe428..0dd3335e37 100644 --- a/agenta-web/src/lib/Types.ts +++ b/agenta-web/src/lib/Types.ts @@ -455,7 +455,7 @@ export interface _EvaluationScenario { evaluation: _Evaluation evaluators_configs: EvaluatorConfig[] inputs: (TypedValue & {name: string})[] - outputs: {result: TypedValue}[] + outputs: {result: TypedValue; cost?: number; latency?: number}[] correct_answer?: string is_pinned?: boolean note?: string From 1a94867a23a6ed67fedddd0383da8a87034fb378 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 11:25:34 +0100 Subject: [PATCH 21/30] remove old implementation code --- agenta-backend/agenta_backend/tasks/evaluations.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py index 162f7bcbb0..7e63f49989 100644 --- a/agenta-backend/agenta_backend/tasks/evaluations.py +++ b/agenta-backend/agenta_backend/tasks/evaluations.py @@ -218,14 +218,9 @@ def evaluate( for evaluator_config_db in evaluator_config_dbs: logger.debug(f"Evaluating with evaluator: {evaluator_config_db}") if correct_answer_column in data_point: - output_value = ( - app_output.result.value["output"] - if isinstance(app_output.result.value, dict) - else app_output.result.value - ) result = evaluators_service.evaluate( evaluator_key=evaluator_config_db.evaluator_key, - output=output_value, + output=app_output.result.value, correct_answer=data_point[correct_answer_column], settings_values=evaluator_config_db.settings_values, app_params=app_variant_parameters, From 52fb90739eb2f61090c4fb6696b9e8eb9bd0035d Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Fri, 29 Mar 2024 13:39:55 +0100 Subject: [PATCH 22/30] fixed failing cypress tests --- agenta-web/cypress/e2e/eval.comparison.cy.ts | 4 ++-- agenta-web/cypress/e2e/eval.evaluations.cy.ts | 2 +- .../pages/evaluations/evaluationResults/EvaluationResults.tsx | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/agenta-web/cypress/e2e/eval.comparison.cy.ts b/agenta-web/cypress/e2e/eval.comparison.cy.ts index 56424f7808..e2101f6c49 100644 --- a/agenta-web/cypress/e2e/eval.comparison.cy.ts +++ b/agenta-web/cypress/e2e/eval.comparison.cy.ts @@ -32,8 +32,8 @@ describe("Evaluation Comparison Test", function () { }) it("Should select 2 evaluations, click on the compare button, and successfully navigate to the comparison page", () => { - cy.get("#ag-33-input").check() - cy.get("#ag-39-input").check() + cy.get("div.ag-selection-checkbox input").eq(0).check() + cy.get("div.ag-selection-checkbox input").eq(1).check() cy.get('[data-cy="evaluation-results-compare-button"]').should("not.be.disabled") cy.get('[data-cy="evaluation-results-compare-button"]').click() cy.location("pathname").should("include", "/evaluations/compare") diff --git a/agenta-web/cypress/e2e/eval.evaluations.cy.ts b/agenta-web/cypress/e2e/eval.evaluations.cy.ts index 492544bf95..2192e9a7e8 100644 --- a/agenta-web/cypress/e2e/eval.evaluations.cy.ts +++ b/agenta-web/cypress/e2e/eval.evaluations.cy.ts @@ -33,7 +33,7 @@ describe("Evaluations CRUD Operations Test", function () { it("Should select evaluation and successfully delete it", () => { cy.get(".ag-root-wrapper").should("exist") - cy.get("#ag-33-input").check() + cy.get("div.ag-selection-checkbox input").eq(0).check() cy.get(":nth-child(1) > .ant-btn > .ant-btn-icon > .anticon > svg").click() cy.get(".ant-modal-confirm-btns > :nth-child(2) > span").click() }) diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx index 2548b7a169..7a75ee454a 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx @@ -250,7 +250,7 @@ const EvaluationResults: React.FC = () => { flex: 1, field: "average_latency", headerName: "Latency", - minWidth: 120, + minWidth: 80, ...getFilterParams("number"), valueGetter: (params) => getTypedValue(params?.data?.average_latency), }, @@ -258,7 +258,7 @@ const EvaluationResults: React.FC = () => { flex: 1, field: "average_cost", headerName: "Cost", - minWidth: 120, + minWidth: 80, ...getFilterParams("number"), valueGetter: (params) => getTypedValue(params?.data?.average_cost), }, From 29e098968011330dd43672efeb018cc89851c27a Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 14:29:07 +0100 Subject: [PATCH 23/30] handle optional latency and cost in app response --- agenta-backend/agenta_backend/services/llm_apps_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 7c963caf3c..7db418b60d 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -93,8 +93,8 @@ async def invoke_app( value=app_response["message"], error=None, ), - latency=app_response["latency"], - cost=app_response["cost"], + latency=app_response.get("latency"), + cost=app_response.get("cost"), ) except httpx.HTTPStatusError as e: From f65424a605b31b8799636fd5978abb0d55aa08f0 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 15:16:06 +0100 Subject: [PATCH 24/30] add latency and cost to comparison view --- .../evaluationCompare/EvaluationCompare.tsx | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx index 547e7644b7..69fd53278c 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx @@ -25,6 +25,7 @@ import AgCustomHeader from "@/components/AgCustomHeader/AgCustomHeader" import {useAtom} from "jotai" import {evaluatorsAtom} from "@/lib/atoms/evaluation" import CompareOutputDiff from "@/components/CompareOutputDiff/CompareOutputDiff" +import { formatCurrency, formatLatency } from "@/lib/helpers/formatters" const useStyles = createUseStyles((theme: JSSTheme) => ({ table: { @@ -218,6 +219,50 @@ const EvaluationCompareMode: React.FC = () => { }) }) + variants.forEach((variant, vi) => { + colDefs.push({ + headerComponent: (props: any) => ( + + + Latency + {variant.variantName} + + + ), + minWidth: 120, + flex: 1, + valueGetter: (params) => { + const latency = params.data?.variants.find( + (item) => item.evaluationId === variant.evaluationId, + )?.output?.latency + return latency == "undefined" ? "-" : formatLatency(latency) + }, + ...getFilterParams("text"), + }) + }) + + variants.forEach((variant, vi) => { + colDefs.push({ + headerComponent: (props: any) => ( + + + Cost + {variant.variantName} + + + ), + minWidth: 120, + flex: 1, + valueGetter: (params) => { + const cost = params.data?.variants.find( + (item) => item.evaluationId === variant.evaluationId, + )?.output?.cost + return cost == "undefined" ? "-" : formatCurrency(cost) + }, + ...getFilterParams("text"), + }) + }) + return colDefs }, [rows, showDiff, evalIds]) @@ -364,3 +409,7 @@ const EvaluationCompareMode: React.FC = () => { } export default EvaluationCompareMode +function formatCost(cost: any) { + throw new Error("Function not implemented.") +} + From 21b42bb26f38381007b67e5865363b054775bb01 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 15:16:06 +0100 Subject: [PATCH 25/30] add latency and cost to comparison view --- .../pages/evaluations/evaluationCompare/EvaluationCompare.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx index 69fd53278c..3724411574 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx @@ -25,7 +25,7 @@ import AgCustomHeader from "@/components/AgCustomHeader/AgCustomHeader" import {useAtom} from "jotai" import {evaluatorsAtom} from "@/lib/atoms/evaluation" import CompareOutputDiff from "@/components/CompareOutputDiff/CompareOutputDiff" -import { formatCurrency, formatLatency } from "@/lib/helpers/formatters" +import {formatCurrency, formatLatency} from "@/lib/helpers/formatters" const useStyles = createUseStyles((theme: JSSTheme) => ({ table: { @@ -412,4 +412,3 @@ export default EvaluationCompareMode function formatCost(cost: any) { throw new Error("Function not implemented.") } - From d9661af27e9e3393f37e78ae162a80b72742a954 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 15:27:47 +0100 Subject: [PATCH 26/30] format --- agenta-web/src/lib/Types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agenta-web/src/lib/Types.ts b/agenta-web/src/lib/Types.ts index 0dd3335e37..dc853a7534 100644 --- a/agenta-web/src/lib/Types.ts +++ b/agenta-web/src/lib/Types.ts @@ -491,7 +491,7 @@ export type ComparisonResultRow = { variants: { variantId: string variantName: string - output: {result: TypedValue} + output: {result: TypedValue; cost?: number; latency?: number} evaluationId: string evaluatorConfigs: { evaluatorConfig: EvaluatorConfig From cf87d94dc41b056cbd43be3abb6b706446d64b1c Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 15:30:33 +0100 Subject: [PATCH 27/30] another fix --- .../pages/evaluations/evaluationCompare/EvaluationCompare.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx index 3724411574..93cd279fe9 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx @@ -235,7 +235,7 @@ const EvaluationCompareMode: React.FC = () => { const latency = params.data?.variants.find( (item) => item.evaluationId === variant.evaluationId, )?.output?.latency - return latency == "undefined" ? "-" : formatLatency(latency) + return latency === "undefined" ? "-" : formatLatency(latency) }, ...getFilterParams("text"), }) @@ -257,7 +257,7 @@ const EvaluationCompareMode: React.FC = () => { const cost = params.data?.variants.find( (item) => item.evaluationId === variant.evaluationId, )?.output?.cost - return cost == "undefined" ? "-" : formatCurrency(cost) + return cost === "undefined" ? "-" : formatCurrency(cost) }, ...getFilterParams("text"), }) From 82ecf5448064296b314a73747cc471751bd71115 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 15:40:19 +0100 Subject: [PATCH 28/30] fix types --- .../pages/evaluations/evaluationCompare/EvaluationCompare.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx index 93cd279fe9..c7b3c0cef8 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx @@ -235,7 +235,7 @@ const EvaluationCompareMode: React.FC = () => { const latency = params.data?.variants.find( (item) => item.evaluationId === variant.evaluationId, )?.output?.latency - return latency === "undefined" ? "-" : formatLatency(latency) + return latency === undefined ? "-" : formatLatency(latency) }, ...getFilterParams("text"), }) @@ -257,7 +257,7 @@ const EvaluationCompareMode: React.FC = () => { const cost = params.data?.variants.find( (item) => item.evaluationId === variant.evaluationId, )?.output?.cost - return cost === "undefined" ? "-" : formatCurrency(cost) + return cost === undefined ? "-" : formatCurrency(cost) }, ...getFilterParams("text"), }) From 947839218b835dd51e155872db643c70a5440a48 Mon Sep 17 00:00:00 2001 From: Akrem Abayed Date: Fri, 29 Mar 2024 16:00:54 +0100 Subject: [PATCH 29/30] fix formatter --- agenta-web/src/lib/helpers/formatters.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agenta-web/src/lib/helpers/formatters.ts b/agenta-web/src/lib/helpers/formatters.ts index 34caa04736..3026288836 100644 --- a/agenta-web/src/lib/helpers/formatters.ts +++ b/agenta-web/src/lib/helpers/formatters.ts @@ -17,5 +17,5 @@ export const formatCurrency = (value = 0) => { } export const formatLatency = (value = 0) => { - return `${intlNumber.format(value / 1000)}s` + return `${intlNumber.format(value)}s` } From e49dfb1877a96f3877b7f1967ae10c0a9c411a4b Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Sun, 31 Mar 2024 14:12:12 +0100 Subject: [PATCH 30/30] bumped ag-grid version --- agenta-web/package-lock.json | 22 +++++++++---------- agenta-web/package.json | 4 ++-- .../evaluationResults/EvaluationResults.tsx | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/agenta-web/package-lock.json b/agenta-web/package-lock.json index aa2a4e4556..ea6c7e65cc 100644 --- a/agenta-web/package-lock.json +++ b/agenta-web/package-lock.json @@ -1,11 +1,11 @@ { - "name": "dashboard", + "name": "agenta", "version": "0.12.4", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "dashboard", + "name": "agenta", "version": "0.12.4", "dependencies": { "@ant-design/colors": "^7.0.0", @@ -28,8 +28,8 @@ "@types/react-highlight-words": "^0.16.4", "@types/react-syntax-highlighter": "^15.5.7", "@types/uuid": "^9.0.7", - "ag-grid-community": "^31.0.1", - "ag-grid-react": "^31.0.1", + "ag-grid-community": "^31.2.0", + "ag-grid-react": "^31.2.0", "antd": "^5.4.7", "autoprefixer": "10.4.14", "axios": "^1.4.0", @@ -1773,16 +1773,16 @@ } }, "node_modules/ag-grid-community": { - "version": "31.0.1", - "resolved": "https://registry.npmjs.org/ag-grid-community/-/ag-grid-community-31.0.1.tgz", - "integrity": "sha512-RZQlW1DTOJHsUR/tnbnTJQKgAnDlHi05YYyTe5AgNor/1TlX1hoYdcqrGsJjvcHQgTjeEgzWOL0yf+KcqXZzxg==" + "version": "31.2.0", + "resolved": "https://registry.npmjs.org/ag-grid-community/-/ag-grid-community-31.2.0.tgz", + "integrity": "sha512-Ija6X171Iq3mFZASZlriQIIdEFqA71rZIsjQD6KHy5lMmxnoseZTX2neThBav1gvr6SA6n5B2PD6eUHdZnrUfw==" }, "node_modules/ag-grid-react": { - "version": "31.0.1", - "resolved": "https://registry.npmjs.org/ag-grid-react/-/ag-grid-react-31.0.1.tgz", - "integrity": "sha512-9nmYPsgH1YUDUDOTiyaFsysoNAx/y72ovFJKuOffZC1V7OrQMadyP6DbqGFWCqzzoLJOY7azOr51dDQzAIXLpw==", + "version": "31.2.0", + "resolved": "https://registry.npmjs.org/ag-grid-react/-/ag-grid-react-31.2.0.tgz", + "integrity": "sha512-ObFdPmF3EC7/xWZX8NjrZjURePyFa72MWjb1ZgUqDP7Wq09OSXXyKBN1qXmfUIT3h4o5+os6tCQEqoo7Op+3ZA==", "dependencies": { - "ag-grid-community": "~31.0.1", + "ag-grid-community": "31.2.0", "prop-types": "^15.8.1" }, "peerDependencies": { diff --git a/agenta-web/package.json b/agenta-web/package.json index 6f25692c17..85ea5a763d 100644 --- a/agenta-web/package.json +++ b/agenta-web/package.json @@ -39,8 +39,8 @@ "@types/react-highlight-words": "^0.16.4", "@types/react-syntax-highlighter": "^15.5.7", "@types/uuid": "^9.0.7", - "ag-grid-community": "^31.0.1", - "ag-grid-react": "^31.0.1", + "ag-grid-community": "^31.2.0", + "ag-grid-react": "^31.2.0", "antd": "^5.4.7", "autoprefixer": "10.4.14", "axios": "^1.4.0", diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx index 7a75ee454a..2548b7a169 100644 --- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx +++ b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx @@ -250,7 +250,7 @@ const EvaluationResults: React.FC = () => { flex: 1, field: "average_latency", headerName: "Latency", - minWidth: 80, + minWidth: 120, ...getFilterParams("number"), valueGetter: (params) => getTypedValue(params?.data?.average_latency), }, @@ -258,7 +258,7 @@ const EvaluationResults: React.FC = () => { flex: 1, field: "average_cost", headerName: "Cost", - minWidth: 80, + minWidth: 120, ...getFilterParams("number"), valueGetter: (params) => getTypedValue(params?.data?.average_cost), },