diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py index 0f2b1b364a..913f00c500 100644 --- a/agenta-backend/agenta_backend/models/api/evaluation_model.py +++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, Field, model_validator +from agenta_backend.utils import traces from agenta_backend.models.api.api_models import Result @@ -98,6 +99,15 @@ class EvaluatorMappingInputInterface(BaseModel): inputs: Dict[str, Any] mapping: Dict[str, Any] + @model_validator(mode="before") + def remove_trace_prefix(cls, values: Dict) -> Dict: + mapping = values.get("mapping", {}) + updated_mapping = traces.remove_trace_prefix(mapping_dict=mapping) + + # Set the modified mapping back to the values + values["mapping"] = updated_mapping + return values + class EvaluatorMappingOutputInterface(BaseModel): outputs: Dict[str, Any] diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index 3e5b1cd6f6..ae8bad50e9 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -21,6 +21,7 @@ EvaluatorMappingOutputInterface, ) from agenta_backend.utils.traces import ( + remove_trace_prefix, process_distributed_trace_into_trace_tree, get_field_value_from_trace_tree, ) @@ -934,9 +935,10 @@ async def rag_faithfulness( ) # Get required keys for rag evaluator - question_key: Union[str, None] = settings_values.get("question_key", None) - answer_key: Union[str, None] = settings_values.get("answer_key", None) - contexts_key: Union[str, None] = settings_values.get("contexts_key", None) + mapping_keys = remove_trace_prefix(settings_values=settings_values) + question_key: Union[str, None] = mapping_keys.get("question_key", None) + answer_key: Union[str, None] = mapping_keys.get("answer_key", None) + contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None) if None in [question_key, answer_key, contexts_key]: logging.error( @@ -1046,9 +1048,10 @@ async def rag_context_relevancy( ) # Get required keys for rag evaluator - question_key: Union[str, None] = settings_values.get("question_key", None) - answer_key: Union[str, None] = settings_values.get("answer_key", None) - contexts_key: Union[str, None] = settings_values.get("contexts_key", None) + mapping_keys = remove_trace_prefix(settings_values=settings_values) + question_key: Union[str, None] = mapping_keys.get("question_key", None) + answer_key: Union[str, None] = mapping_keys.get("answer_key", None) + contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None) if None in [question_key, answer_key, contexts_key]: logging.error( diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 0cdef1a2d2..d87177ff6e 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -55,13 +55,10 @@ def extract_result_from_response(response: dict): value["data"] = str(value.get("data")) if "tree" in response: - trace_tree = ( - response["tree"][0] - if isinstance(response.get("tree"), list) - else {} - ) + trace_tree = response.get("tree", {}).get("nodes", [])[0] + latency = ( - get_nested_value(trace_tree, ["time", "span"]) * 1_000_000 + get_nested_value(trace_tree, ["time", "span"]) / 1_000_000 if trace_tree else None ) @@ -108,6 +105,8 @@ def extract_result_from_response(response: dict): value = {"error": f"Unexpected error: {e}"} kind = "error" + print("Cost: ", cost) + print("Latency: ", latency) return value, kind, cost, latency