Agenta-AI · aybruhm · Nov 27, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel, Field, model_validator
 
+from agenta_backend.utils import traces
 from agenta_backend.models.api.api_models import Result
 
 
@@ -98,6 +99,15 @@ class EvaluatorMappingInputInterface(BaseModel):
     inputs: Dict[str, Any]
     mapping: Dict[str, Any]
 
+    @model_validator(mode="before")
+    def remove_trace_prefix(cls, values: Dict) -> Dict:
+        mapping = values.get("mapping", {})
+        updated_mapping = traces.remove_trace_prefix(mapping_dict=mapping)
+
+        # Set the modified mapping back to the values
+        values["mapping"] = updated_mapping
+        return values
+
 
 class EvaluatorMappingOutputInterface(BaseModel):
     outputs: Dict[str, Any]

diff --git a/agenta-backend/agenta_backend/models/shared_models.py b/agenta-backend/agenta_backend/models/shared_models.py
@@ -43,7 +43,7 @@ class CorrectAnswer(BaseModel):
 class EvaluationScenarioInput(BaseModel):
     name: str
     type: str
-    value: str
+    value: Any
 
 
 class EvaluationScenarioOutput(BaseModel):

diff --git a/agenta-backend/agenta_backend/services/aggregation_service.py b/agenta-backend/agenta_backend/services/aggregation_service.py
@@ -125,6 +125,7 @@ def sum_float_from_llm_app_response(
             raise ValueError(f"No valid values found for {key} sum aggregation.")
 
         total_value = sum(values)
+
         return Result(type=key, value=total_value)
     except Exception as exc:
         return Result(

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -21,6 +21,7 @@
     EvaluatorMappingOutputInterface,
 )
 from agenta_backend.utils.traces import (
+    remove_trace_prefix,
     process_distributed_trace_into_trace_tree,
     get_field_value_from_trace_tree,
 )
@@ -934,9 +935,10 @@ async def rag_faithfulness(
             )
 
         # Get required keys for rag evaluator
-        question_key: Union[str, None] = settings_values.get("question_key", None)
-        answer_key: Union[str, None] = settings_values.get("answer_key", None)
-        contexts_key: Union[str, None] = settings_values.get("contexts_key", None)
+        mapping_keys = remove_trace_prefix(settings_values=settings_values)
+        question_key: Union[str, None] = mapping_keys.get("question_key", None)
+        answer_key: Union[str, None] = mapping_keys.get("answer_key", None)
+        contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None)
 
         if None in [question_key, answer_key, contexts_key]:
             logging.error(
@@ -947,12 +949,23 @@ async def rag_faithfulness(
             )
 
         # Turn distributed trace into trace tree
-        trace = process_distributed_trace_into_trace_tree(output["trace"])
+        trace = {}
+        version = output.get("version")
+        if version == "3.0":
+            trace = output.get("tree", {})
+        elif version == "2.0":
+            trace = output.get("trace", {})
+
+        trace = process_distributed_trace_into_trace_tree(trace, version)
 
         # Get value of required keys for rag evaluator
-        question_val: Any = get_field_value_from_trace_tree(trace, question_key)
-        answer_val: Any = get_field_value_from_trace_tree(trace, answer_key)
-        contexts_val: Any = get_field_value_from_trace_tree(trace, contexts_key)
+        question_val: Any = get_field_value_from_trace_tree(
+            trace, question_key, version
+        )
+        answer_val: Any = get_field_value_from_trace_tree(trace, answer_key, version)
+        contexts_val: Any = get_field_value_from_trace_tree(
+            trace, contexts_key, version
+        )
 
         if None in [question_val, answer_val, contexts_val]:
             logging.error(
@@ -1035,9 +1048,10 @@ async def rag_context_relevancy(
             )
 
         # Get required keys for rag evaluator
-        question_key: Union[str, None] = settings_values.get("question_key", None)
-        answer_key: Union[str, None] = settings_values.get("answer_key", None)
-        contexts_key: Union[str, None] = settings_values.get("contexts_key", None)
+        mapping_keys = remove_trace_prefix(settings_values=settings_values)
+        question_key: Union[str, None] = mapping_keys.get("question_key", None)
+        answer_key: Union[str, None] = mapping_keys.get("answer_key", None)
+        contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None)
 
         if None in [question_key, answer_key, contexts_key]:
             logging.error(
@@ -1048,12 +1062,23 @@ async def rag_context_relevancy(
             )
 
         # Turn distributed trace into trace tree
-        trace = process_distributed_trace_into_trace_tree(output["trace"])
+        trace = {}
+        version = output.get("version")
+        if version == "3.0":
+            trace = output.get("tree", {})
+        elif version == "2.0":
+            trace = output.get("trace", {})
+
+        trace = process_distributed_trace_into_trace_tree(trace, version)
 
         # Get value of required keys for rag evaluator
-        question_val: Any = get_field_value_from_trace_tree(trace, question_key)
-        answer_val: Any = get_field_value_from_trace_tree(trace, answer_key)
-        contexts_val: Any = get_field_value_from_trace_tree(trace, contexts_key)
+        question_val: Any = get_field_value_from_trace_tree(
+            trace, question_key, version
+        )
+        answer_val: Any = get_field_value_from_trace_tree(trace, answer_key, version)
+        contexts_val: Any = get_field_value_from_trace_tree(
+            trace, contexts_key, version
+        )
 
         if None in [question_val, answer_val, contexts_val]:
             logging.error(

diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py
@@ -3,6 +3,7 @@
 import asyncio
 import traceback
 import aiohttp
+from datetime import datetime
 from typing import Any, Dict, List, Optional
 
 
@@ -55,15 +56,13 @@ def extract_result_from_response(response: dict):
                 value["data"] = str(value.get("data"))
 
             if "tree" in response:
-                trace_tree = (
-                    response["tree"][0]
-                    if isinstance(response.get("tree"), list)
-                    else {}
-                )
+                trace_tree = response.get("tree", {}).get("nodes", [])[0]
+
                 latency = (
-                    get_nested_value(trace_tree, ["time", "span"]) * 1_000_000
-                    if trace_tree
-                    else None
+                    get_nested_value(
+                        trace_tree, ["metrics", "acc", "duration", "total"]
+                    )
+                    / 1000
                 )
                 cost = get_nested_value(
                     trace_tree, ["metrics", "acc", "costs", "total"]

diff --git a/agenta-backend/agenta_backend/utils/traces.py b/agenta-backend/agenta_backend/utils/traces.py
@@ -1,14 +1,59 @@
 import logging
 import traceback
 from copy import deepcopy
-from typing import Any, Dict
 from collections import OrderedDict
+from typing import Any, Dict, Union, Optional
 
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
 
+def remove_trace_prefix(
+    mapping_dict: Optional[Dict] = None, settings_values: Optional[Dict] = None
+) -> Dict:
+    """
+    Modify the values of the mapping dictionary to remove 'trace.' prefix if it exists.
+
+    Args:
+        mapping_dict (Optional[Dict]): A dictionary containing the mapping values.
+        settings_values (Optional[Dict]): A dictionary with keys like "answer_key",
+                                          "contexts_key", "question_key" to override
+                                          specific mapping values.
+
+    Returns:
+        Dict: A dictionary with the 'trace.' prefix removed from any string values.
+
+    Raises:
+        ValueError: If neither `mapping_dict` nor `settings_values` is provided.
+
+    """
+
+    if mapping_dict is None and settings_values is None:
+        raise ValueError("No mapping dictionary or settings values provided")
+
+    # Determine which dictionary to use
+    if settings_values:
+        mapping_values = {
+            "answer_key": settings_values.get("answer_key"),
+            "contexts_key": settings_values.get("contexts_key"),
+            "question_key": settings_values.get("question_key"),
+        }
+    elif mapping_dict:
+        mapping_values = mapping_dict
+    else:
+        mapping_values = {}
+
+    # Update the mapping by removing the 'trace.' prefix from string values
+    updated_mapping_dict = {
+        key: value.replace("trace.", "") if isinstance(value, str) else value
+        for key, value in mapping_values.items()
+        if value is not None
+    }
+
+    return updated_mapping_dict
+
+
 def _make_spans_id_tree(trace):
     """
     Creates spans tree (id only) from flat spans list
@@ -63,18 +108,31 @@ def _make_nested_nodes_tree(tree: dict):
 
     ordered_tree = OrderedDict()
 
-    def add_node(node: dict, parent_tree: dict):
+    def add_node(node: Union[dict, list], parent_tree: dict):
         """
         Recursively adds a node and its children to the parent tree.
         """
+        if isinstance(node, list):
+            # If node is a list, process each item as a child node
+            for child_node in node:
+                add_node(child_node, parent_tree)
+            return
 
+        # If the node is a dictionary, proceed with its normal structure
         node_id = node["node"]["id"]
         parent_tree[node_id] = OrderedDict()
 
         # If there are child nodes, recursively add them
-        if "nodes" in node and node["nodes"] is not None:
-            for child_key, child_node in node["nodes"].items():
-                add_node(child_node, parent_tree[node_id])
+        if "nodes" in node and node["nodes"]:
+            child_nodes = node["nodes"]
+            if isinstance(child_nodes, list):
+                # If child nodes are a list, iterate over each one
+                for child_node in child_nodes:
+                    add_node(child_node, parent_tree[node_id])
+            elif isinstance(child_nodes, dict):
+                # If child nodes are a dictionary, add them recursively
+                for child_key, child_node in child_nodes.items():
+                    add_node(child_node, parent_tree[node_id])
 
     # Process the top-level nodes
     for node in tree["nodes"]:
@@ -116,10 +174,24 @@ def gather_nodes(nodes: list):
         stack = nodes[:]
         while stack:
             current = stack.pop()
+            if isinstance(current, list):
+                # If current is a list, process each item as a child node
+                stack.extend(current)  # Add each item of the list to the stack
+                continue  # Skip the rest of the logic for this item since it's a list
+
             node_id = current["node"]["id"]
             result[node_id] = current
             if "nodes" in current and current["nodes"] is not None:
-                stack.extend(current["nodes"].values())
+                # If there are child nodes, add them to the stack for further processing
+                child_nodes = current["nodes"]
+                if isinstance(child_nodes, list):
+                    stack.extend(
+                        child_nodes
+                    )  # If the child nodes are a list, add each to the stack
+                elif isinstance(child_nodes, dict):
+                    stack.extend(
+                        child_nodes.values()
+                    )  # If child nodes are a dict, add the values to the stack
         return result
 
     def extract_node_details(node_id: str, nodes: dict):
@@ -135,14 +207,9 @@ def extract_node_details(node_id: str, nodes: dict):
             "node": node_data.get("node", {}),
             "parent": node_data.get("parent", None),
             "time": node_data.get("time", {}),
-            "status": node_data.get("status"),
-            "exception": node_data.get("exception"),
             "data": node_data.get("data"),
             "metrics": node_data.get("metrics"),
             "meta": node_data.get("meta"),
-            "refs": node_data.get("refs"),
-            "links": node_data.get("links"),
-            "otel": node_data.get("otel"),
         }
 
     def recursive_flatten(current_nodes_id: dict, result: dict, nodes: dict):
@@ -156,12 +223,19 @@ def recursive_flatten(current_nodes_id: dict, result: dict, nodes: dict):
 
             # Recursively process child nodes
             if child_nodes:
-                recursive_flatten(child_nodes, result, nodes)
+                if isinstance(child_nodes, list):
+                    for child_node in child_nodes:
+                        recursive_flatten(
+                            {child_node["node"]["id"]: child_node}, result, nodes
+                        )
+                elif isinstance(child_nodes, dict):
+                    recursive_flatten(child_nodes, result, nodes)
 
     # Initialize the ordered dictionary and start the recursion
     ordered_result = dict()
     nodes = gather_nodes(nodes=tree_nodes)
     recursive_flatten(current_nodes_id=nodes_id, result=ordered_result, nodes=nodes)
+
     return list(ordered_result.values())
 
 

diff --git a/agenta-cli/agenta/sdk/tracing/inline.py b/agenta-cli/agenta/sdk/tracing/inline.py
@@ -41,7 +41,6 @@ class LifecycleDTO(BaseModel):
 class TimeDTO(BaseModel):
     start: datetime
     end: datetime
-    span: int
 
 
 class StatusCode(Enum):
@@ -846,12 +845,9 @@ def parse_from_otel_span_dto(
         else None
     )
 
-    duration = (otel_span_dto.end_time - otel_span_dto.start_time).total_seconds()
-
     time = TimeDTO(
         start=otel_span_dto.start_time,
         end=otel_span_dto.end_time,
-        span=round(duration * 1_000_000),  # microseconds
     )
 
     status = StatusDTO(
@@ -863,6 +859,13 @@ def parse_from_otel_span_dto(
 
     data, metrics, meta, tags, refs = _parse_from_attributes(otel_span_dto)
 
+    duration = (otel_span_dto.end_time - otel_span_dto.start_time).total_seconds()
+
+    if metrics is None:
+        metrics = dict()
+
+    metrics["acc.duration.total"] = round(duration * 1_000, 3)  # milliseconds
+
     root_id = str(tree_id)
     if refs is not None:
         root_id = refs.get("scenario.id", root_id)

diff --git a/agenta-web/src/components/Playground/Views/TestView.tsx b/agenta-web/src/components/Playground/Views/TestView.tsx
@@ -623,9 +623,7 @@ const App: React.FC<TestViewProps> = ({
                         const firstTraceNode = tree.nodes[0]
                         newDataList[index] = {
                             cost: firstTraceNode?.metrics?.acc?.costs?.total ?? null,
-                            latency: firstTraceNode?.time?.span
-                                ? firstTraceNode.time.span / 1_000_000
-                                : null,
+                            latency: firstTraceNode?.metrics?.acc?.duration?.total / 1000 ?? null,
                             usage: firstTraceNode?.metrics?.acc?.tokens?.total ?? null,
                         }
                     }