From 0dcf7dc9ae7752a54a368d05afa45dcbe076a694 Mon Sep 17 00:00:00 2001 From: Abram Date: Mon, 25 Nov 2024 17:11:01 +0100 Subject: [PATCH 01/15] refactor (backend): add baseresponse v3 support and backward compatibility to rag evaluators --- .../services/evaluators_service.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index 2c693e84e7..0e0e772427 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -947,7 +947,14 @@ async def rag_faithfulness( ) # Turn distributed trace into trace tree - trace = process_distributed_trace_into_trace_tree(output["trace"]) + trace = {} + version = output.get("version") + if version == "3.0": + trace = output.get("tree", {}) + elif version == "2.0": + trace = output.get("trace", {}) + + trace = process_distributed_trace_into_trace_tree(trace, version) # Get value of required keys for rag evaluator question_val: Any = get_field_value_from_trace_tree(trace, question_key) @@ -1048,7 +1055,14 @@ async def rag_context_relevancy( ) # Turn distributed trace into trace tree - trace = process_distributed_trace_into_trace_tree(output["trace"]) + trace = {} + version = output.get("version") + if version == "3.0": + trace = output.get("tree", {}) + elif version == "2.0": + trace = output.get("trace", {}) + + trace = process_distributed_trace_into_trace_tree(trace, version) # Get value of required keys for rag evaluator question_val: Any = get_field_value_from_trace_tree(trace, question_key) From 5ff5956bc8724aa3dae92db5c4fdf792a0f5db59 Mon Sep 17 00:00:00 2001 From: Abram Date: Mon, 25 Nov 2024 17:12:04 +0100 Subject: [PATCH 02/15] refactor (cli:sdk): set tree attribute to be None if not provided in inline_traces and bump pre-release version --- agenta-cli/agenta/sdk/decorators/routing.py | 2 +- agenta-cli/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-cli/agenta/sdk/decorators/routing.py b/agenta-cli/agenta/sdk/decorators/routing.py index 73d05a8571..4b331d4e08 100644 --- a/agenta-cli/agenta/sdk/decorators/routing.py +++ b/agenta-cli/agenta/sdk/decorators/routing.py @@ -373,7 +373,7 @@ async def execute_function( async def handle_success(self, result: Any, inline_trace: bool): data = None - trace = dict() + trace = None with suppress(): data = self.patch_result(result) diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml index fd116463ca..5969e0edd5 100644 --- a/agenta-cli/pyproject.toml +++ b/agenta-cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agenta" -version = "0.27.6" +version = "0.27.8a2" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" authors = ["Mahmoud Mabrouk "] From b71bba8be421963a3b68137adfd27a3a80405d08 Mon Sep 17 00:00:00 2001 From: Abram Date: Mon, 25 Nov 2024 18:56:11 +0100 Subject: [PATCH 03/15] refactor (cli:sdk): revert sdk changes due to duplicate fix (https://github.com/Agenta-AI/agenta/pull/2303) --- agenta-cli/agenta/sdk/decorators/routing.py | 2 +- agenta-cli/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agenta-cli/agenta/sdk/decorators/routing.py b/agenta-cli/agenta/sdk/decorators/routing.py index 4b331d4e08..73d05a8571 100644 --- a/agenta-cli/agenta/sdk/decorators/routing.py +++ b/agenta-cli/agenta/sdk/decorators/routing.py @@ -373,7 +373,7 @@ async def execute_function( async def handle_success(self, result: Any, inline_trace: bool): data = None - trace = None + trace = dict() with suppress(): data = self.patch_result(result) diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml index 5969e0edd5..fd116463ca 100644 --- a/agenta-cli/pyproject.toml +++ b/agenta-cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agenta" -version = "0.27.8a2" +version = "0.27.6" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" authors = ["Mahmoud Mabrouk "] From 3c63f762a4315de5044b466e2b6c89094990094d Mon Sep 17 00:00:00 2001 From: Abram Date: Mon, 25 Nov 2024 18:58:00 +0100 Subject: [PATCH 04/15] fix (bug): resolve TypeError: get_field_value_from_trace_tree() missing 1 required positional argument: 'version' --- .../services/evaluators_service.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index 0e0e772427..3e5b1cd6f6 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -957,9 +957,13 @@ async def rag_faithfulness( trace = process_distributed_trace_into_trace_tree(trace, version) # Get value of required keys for rag evaluator - question_val: Any = get_field_value_from_trace_tree(trace, question_key) - answer_val: Any = get_field_value_from_trace_tree(trace, answer_key) - contexts_val: Any = get_field_value_from_trace_tree(trace, contexts_key) + question_val: Any = get_field_value_from_trace_tree( + trace, question_key, version + ) + answer_val: Any = get_field_value_from_trace_tree(trace, answer_key, version) + contexts_val: Any = get_field_value_from_trace_tree( + trace, contexts_key, version + ) if None in [question_val, answer_val, contexts_val]: logging.error( @@ -1065,9 +1069,13 @@ async def rag_context_relevancy( trace = process_distributed_trace_into_trace_tree(trace, version) # Get value of required keys for rag evaluator - question_val: Any = get_field_value_from_trace_tree(trace, question_key) - answer_val: Any = get_field_value_from_trace_tree(trace, answer_key) - contexts_val: Any = get_field_value_from_trace_tree(trace, contexts_key) + question_val: Any = get_field_value_from_trace_tree( + trace, question_key, version + ) + answer_val: Any = get_field_value_from_trace_tree(trace, answer_key, version) + contexts_val: Any = get_field_value_from_trace_tree( + trace, contexts_key, version + ) if None in [question_val, answer_val, contexts_val]: logging.error( From 88a18e61632a9ca89dc5c11f14e0b3f23729a6ce Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 11:39:59 +0100 Subject: [PATCH 05/15] refactor (backend): add support for nodes that are of type list --- agenta-backend/agenta_backend/utils/traces.py | 53 ++++++++++++++----- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/agenta-backend/agenta_backend/utils/traces.py b/agenta-backend/agenta_backend/utils/traces.py index ea72929a31..897d313375 100644 --- a/agenta-backend/agenta_backend/utils/traces.py +++ b/agenta-backend/agenta_backend/utils/traces.py @@ -1,7 +1,7 @@ import logging import traceback from copy import deepcopy -from typing import Any, Dict +from typing import Any, Dict, Union from collections import OrderedDict @@ -63,18 +63,31 @@ def _make_nested_nodes_tree(tree: dict): ordered_tree = OrderedDict() - def add_node(node: dict, parent_tree: dict): + def add_node(node: Union[dict, list], parent_tree: dict): """ Recursively adds a node and its children to the parent tree. """ + if isinstance(node, list): + # If node is a list, process each item as a child node + for child_node in node: + add_node(child_node, parent_tree) + return + # If the node is a dictionary, proceed with its normal structure node_id = node["node"]["id"] parent_tree[node_id] = OrderedDict() # If there are child nodes, recursively add them - if "nodes" in node and node["nodes"] is not None: - for child_key, child_node in node["nodes"].items(): - add_node(child_node, parent_tree[node_id]) + if "nodes" in node and node["nodes"]: + child_nodes = node["nodes"] + if isinstance(child_nodes, list): + # If child nodes are a list, iterate over each one + for child_node in child_nodes: + add_node(child_node, parent_tree[node_id]) + elif isinstance(child_nodes, dict): + # If child nodes are a dictionary, add them recursively + for child_key, child_node in child_nodes.items(): + add_node(child_node, parent_tree[node_id]) # Process the top-level nodes for node in tree["nodes"]: @@ -116,10 +129,24 @@ def gather_nodes(nodes: list): stack = nodes[:] while stack: current = stack.pop() + if isinstance(current, list): + # If current is a list, process each item as a child node + stack.extend(current) # Add each item of the list to the stack + continue # Skip the rest of the logic for this item since it's a list + node_id = current["node"]["id"] result[node_id] = current if "nodes" in current and current["nodes"] is not None: - stack.extend(current["nodes"].values()) + # If there are child nodes, add them to the stack for further processing + child_nodes = current["nodes"] + if isinstance(child_nodes, list): + stack.extend( + child_nodes + ) # If the child nodes are a list, add each to the stack + elif isinstance(child_nodes, dict): + stack.extend( + child_nodes.values() + ) # If child nodes are a dict, add the values to the stack return result def extract_node_details(node_id: str, nodes: dict): @@ -135,14 +162,9 @@ def extract_node_details(node_id: str, nodes: dict): "node": node_data.get("node", {}), "parent": node_data.get("parent", None), "time": node_data.get("time", {}), - "status": node_data.get("status"), - "exception": node_data.get("exception"), "data": node_data.get("data"), "metrics": node_data.get("metrics"), "meta": node_data.get("meta"), - "refs": node_data.get("refs"), - "links": node_data.get("links"), - "otel": node_data.get("otel"), } def recursive_flatten(current_nodes_id: dict, result: dict, nodes: dict): @@ -156,12 +178,19 @@ def recursive_flatten(current_nodes_id: dict, result: dict, nodes: dict): # Recursively process child nodes if child_nodes: - recursive_flatten(child_nodes, result, nodes) + if isinstance(child_nodes, list): + for child_node in child_nodes: + recursive_flatten( + {child_node["node"]["id"]: child_node}, result, nodes + ) + elif isinstance(child_nodes, dict): + recursive_flatten(child_nodes, result, nodes) # Initialize the ordered dictionary and start the recursion ordered_result = dict() nodes = gather_nodes(nodes=tree_nodes) recursive_flatten(current_nodes_id=nodes_id, result=ordered_result, nodes=nodes) + return list(ordered_result.values()) From 2f3469cd730befc1a8229ff4f1323d445424cc92 Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 11:49:52 +0100 Subject: [PATCH 06/15] refactor (backend): add default 0.0 value to resolve TypeError: unsupported operand type(s) for 'float' and 'NoneType' --- .../agenta_backend/services/aggregation_service.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/agenta-backend/agenta_backend/services/aggregation_service.py b/agenta-backend/agenta_backend/services/aggregation_service.py index 9664d5aa3a..81795b327a 100644 --- a/agenta-backend/agenta_backend/services/aggregation_service.py +++ b/agenta-backend/agenta_backend/services/aggregation_service.py @@ -72,7 +72,7 @@ def aggregate_float(results: List[Result]) -> Result: """ try: - average_value = sum(result.value for result in results) / len(results) + average_value = sum(result.value or 0.0 for result in results) / len(results) return Result(type="number", value=average_value) except Exception as exc: return Result( @@ -90,7 +90,7 @@ def aggregate_float_from_llm_app_response( raise ValueError("Key is required to aggregate InvokationResult objects.") values = [ - getattr(inv_result, key) + getattr(inv_result, key) or 0.0 for inv_result in invocation_results if hasattr(inv_result, key) and getattr(inv_result, key) is not None ] @@ -116,7 +116,7 @@ def sum_float_from_llm_app_response( raise ValueError("Key is required to aggregate InvokationResult objects.") values = [ - getattr(inv_result, key) + getattr(inv_result, key) or 0.0 for inv_result in invocation_results if hasattr(inv_result, key) and getattr(inv_result, key) is not None ] @@ -124,7 +124,10 @@ def sum_float_from_llm_app_response( if not values: raise ValueError(f"No valid values found for {key} sum aggregation.") - total_value = sum(values) + total_value = sum( + values + ) # sum([3.453, None]) -> TypeError: unsupported operand type(s) for +: 'float' and 'NoneType' + return Result(type=key, value=total_value) except Exception as exc: return Result( From e08da07b56d5140449c73a528e04bdef7db3de6e Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 12:20:36 +0100 Subject: [PATCH 07/15] refactor (backend): set value type to be Any --- agenta-backend/agenta_backend/models/shared_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agenta-backend/agenta_backend/models/shared_models.py b/agenta-backend/agenta_backend/models/shared_models.py index d14ea22140..b623b66f11 100644 --- a/agenta-backend/agenta_backend/models/shared_models.py +++ b/agenta-backend/agenta_backend/models/shared_models.py @@ -43,7 +43,7 @@ class CorrectAnswer(BaseModel): class EvaluationScenarioInput(BaseModel): name: str type: str - value: str + value: Any class EvaluationScenarioOutput(BaseModel): From 3e93e11975e06872dd3cc86369e76830a772abec Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 15:15:15 +0100 Subject: [PATCH 08/15] refactor (backend): fix parameters required for rag evaluators & latency / cost computation --- .../agenta_backend/models/api/evaluation_model.py | 10 ++++++++++ .../agenta_backend/services/evaluators_service.py | 15 +++++++++------ .../agenta_backend/services/llm_apps_service.py | 11 +++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py index 0f2b1b364a..913f00c500 100644 --- a/agenta-backend/agenta_backend/models/api/evaluation_model.py +++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, Field, model_validator +from agenta_backend.utils import traces from agenta_backend.models.api.api_models import Result @@ -98,6 +99,15 @@ class EvaluatorMappingInputInterface(BaseModel): inputs: Dict[str, Any] mapping: Dict[str, Any] + @model_validator(mode="before") + def remove_trace_prefix(cls, values: Dict) -> Dict: + mapping = values.get("mapping", {}) + updated_mapping = traces.remove_trace_prefix(mapping_dict=mapping) + + # Set the modified mapping back to the values + values["mapping"] = updated_mapping + return values + class EvaluatorMappingOutputInterface(BaseModel): outputs: Dict[str, Any] diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index 3e5b1cd6f6..ae8bad50e9 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -21,6 +21,7 @@ EvaluatorMappingOutputInterface, ) from agenta_backend.utils.traces import ( + remove_trace_prefix, process_distributed_trace_into_trace_tree, get_field_value_from_trace_tree, ) @@ -934,9 +935,10 @@ async def rag_faithfulness( ) # Get required keys for rag evaluator - question_key: Union[str, None] = settings_values.get("question_key", None) - answer_key: Union[str, None] = settings_values.get("answer_key", None) - contexts_key: Union[str, None] = settings_values.get("contexts_key", None) + mapping_keys = remove_trace_prefix(settings_values=settings_values) + question_key: Union[str, None] = mapping_keys.get("question_key", None) + answer_key: Union[str, None] = mapping_keys.get("answer_key", None) + contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None) if None in [question_key, answer_key, contexts_key]: logging.error( @@ -1046,9 +1048,10 @@ async def rag_context_relevancy( ) # Get required keys for rag evaluator - question_key: Union[str, None] = settings_values.get("question_key", None) - answer_key: Union[str, None] = settings_values.get("answer_key", None) - contexts_key: Union[str, None] = settings_values.get("contexts_key", None) + mapping_keys = remove_trace_prefix(settings_values=settings_values) + question_key: Union[str, None] = mapping_keys.get("question_key", None) + answer_key: Union[str, None] = mapping_keys.get("answer_key", None) + contexts_key: Union[str, None] = mapping_keys.get("contexts_key", None) if None in [question_key, answer_key, contexts_key]: logging.error( diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 0cdef1a2d2..d87177ff6e 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -55,13 +55,10 @@ def extract_result_from_response(response: dict): value["data"] = str(value.get("data")) if "tree" in response: - trace_tree = ( - response["tree"][0] - if isinstance(response.get("tree"), list) - else {} - ) + trace_tree = response.get("tree", {}).get("nodes", [])[0] + latency = ( - get_nested_value(trace_tree, ["time", "span"]) * 1_000_000 + get_nested_value(trace_tree, ["time", "span"]) / 1_000_000 if trace_tree else None ) @@ -108,6 +105,8 @@ def extract_result_from_response(response: dict): value = {"error": f"Unexpected error: {e}"} kind = "error" + print("Cost: ", cost) + print("Latency: ", latency) return value, kind, cost, latency From 36d311f62223e8e4b4091a0baed3080f0ff9da53 Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 15:24:16 +0100 Subject: [PATCH 09/15] feat (backend): create remove_trace_prefix utility function --- agenta-backend/agenta_backend/utils/traces.py | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/agenta-backend/agenta_backend/utils/traces.py b/agenta-backend/agenta_backend/utils/traces.py index 897d313375..807466550b 100644 --- a/agenta-backend/agenta_backend/utils/traces.py +++ b/agenta-backend/agenta_backend/utils/traces.py @@ -1,14 +1,59 @@ import logging import traceback from copy import deepcopy -from typing import Any, Dict, Union from collections import OrderedDict +from typing import Any, Dict, Union, Optional logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +def remove_trace_prefix( + mapping_dict: Optional[Dict] = None, settings_values: Optional[Dict] = None +) -> Dict: + """ + Modify the values of the mapping dictionary to remove 'trace.' prefix if it exists. + + Args: + mapping_dict (Optional[Dict]): A dictionary containing the mapping values. + settings_values (Optional[Dict]): A dictionary with keys like "answer_key", + "contexts_key", "question_key" to override + specific mapping values. + + Returns: + Dict: A dictionary with the 'trace.' prefix removed from any string values. + + Raises: + ValueError: If neither `mapping_dict` nor `settings_values` is provided. + + """ + + if mapping_dict is None and settings_values is None: + raise ValueError("No mapping dictionary or settings values provided") + + # Determine which dictionary to use + if settings_values: + mapping_values = { + "answer_key": settings_values.get("answer_key"), + "contexts_key": settings_values.get("contexts_key"), + "question_key": settings_values.get("question_key"), + } + elif mapping_dict: + mapping_values = mapping_dict + else: + mapping_values = {} + + # Update the mapping by removing the 'trace.' prefix from string values + updated_mapping_dict = { + key: value.replace("trace.", "") if isinstance(value, str) else value + for key, value in mapping_values.items() + if value is not None + } + + return updated_mapping_dict + + def _make_spans_id_tree(trace): """ Creates spans tree (id only) from flat spans list From f70121c18a9acd6abe4cc5c7ef0161fbc80ac800 Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 15:44:46 +0100 Subject: [PATCH 10/15] chore (backend): remove print debug statements --- agenta-backend/agenta_backend/services/llm_apps_service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index d87177ff6e..2b874e7eb1 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -105,8 +105,6 @@ def extract_result_from_response(response: dict): value = {"error": f"Unexpected error: {e}"} kind = "error" - print("Cost: ", cost) - print("Latency: ", latency) return value, kind, cost, latency From cf3fd611b33d47dd355e95de2ceeb00641a36604 Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 15:59:00 +0100 Subject: [PATCH 11/15] chore (backend): leave aggregation of NoneType as is --- .../agenta_backend/services/aggregation_service.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/agenta-backend/agenta_backend/services/aggregation_service.py b/agenta-backend/agenta_backend/services/aggregation_service.py index 81795b327a..9f936e9196 100644 --- a/agenta-backend/agenta_backend/services/aggregation_service.py +++ b/agenta-backend/agenta_backend/services/aggregation_service.py @@ -72,7 +72,7 @@ def aggregate_float(results: List[Result]) -> Result: """ try: - average_value = sum(result.value or 0.0 for result in results) / len(results) + average_value = sum(result.value for result in results) / len(results) return Result(type="number", value=average_value) except Exception as exc: return Result( @@ -90,7 +90,7 @@ def aggregate_float_from_llm_app_response( raise ValueError("Key is required to aggregate InvokationResult objects.") values = [ - getattr(inv_result, key) or 0.0 + getattr(inv_result, key) for inv_result in invocation_results if hasattr(inv_result, key) and getattr(inv_result, key) is not None ] @@ -116,7 +116,7 @@ def sum_float_from_llm_app_response( raise ValueError("Key is required to aggregate InvokationResult objects.") values = [ - getattr(inv_result, key) or 0.0 + getattr(inv_result, key) for inv_result in invocation_results if hasattr(inv_result, key) and getattr(inv_result, key) is not None ] @@ -124,9 +124,7 @@ def sum_float_from_llm_app_response( if not values: raise ValueError(f"No valid values found for {key} sum aggregation.") - total_value = sum( - values - ) # sum([3.453, None]) -> TypeError: unsupported operand type(s) for +: 'float' and 'NoneType' + total_value = sum(values) return Result(type=key, value=total_value) except Exception as exc: From 2a7c50ab0107783ad4235b32278f0315f31e080c Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 20:55:22 +0100 Subject: [PATCH 12/15] refactor (backend): remove deprecated 'span' attribute in 'time' meta and compute latency gotten from invocating app in evaluation --- .../services/llm_apps_service.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 2b874e7eb1..34c0379265 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -3,6 +3,7 @@ import asyncio import traceback import aiohttp +from datetime import datetime from typing import Any, Dict, List, Optional @@ -36,6 +37,26 @@ def get_nested_value(d: dict, keys: list, default=None): return default +def compute_latency(start_time_str: str, end_time_str: str) -> float: + try: + # Define the format to parse the time strings + time_format = "%Y-%m-%dT%H:%M:%S.%f" + + # Convert the strings to datetime objects + start_datetime = datetime.strptime(start_time_str, time_format) + end_datetime = datetime.strptime(end_time_str, time_format) + + # Calculate the difference in seconds + time_difference = (end_datetime - start_datetime).total_seconds() + return time_difference + except Exception as e: + print(f"Error computing latency from invoking an app. Defaulting to None.") + print(" ------------------- Exception ------------------------") + print(str(e)) + print(" ------------------- End Exception ------------------------") + return None + + def extract_result_from_response(response: dict): # Initialize default values value = None @@ -57,11 +78,17 @@ def extract_result_from_response(response: dict): if "tree" in response: trace_tree = response.get("tree", {}).get("nodes", [])[0] - latency = ( - get_nested_value(trace_tree, ["time", "span"]) / 1_000_000 + start_time_str = ( + get_nested_value(trace_tree, ["time", "start"]) + if trace_tree + else None + ) + end_time_str = ( + get_nested_value(trace_tree, ["time", "end"]) if trace_tree else None ) + latency = compute_latency(start_time_str, end_time_str) cost = get_nested_value( trace_tree, ["metrics", "acc", "costs", "total"] ) From c9142ba78c299decb46974882dd69575fbfbdf66 Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 21:08:24 +0100 Subject: [PATCH 13/15] chore (backend): remove overhead in computing latency and make use of metrics.acc.duration.total to get latency (in milliseconds) --- .../services/llm_apps_service.py | 41 ++++--------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 34c0379265..1a12b9d850 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -37,26 +37,6 @@ def get_nested_value(d: dict, keys: list, default=None): return default -def compute_latency(start_time_str: str, end_time_str: str) -> float: - try: - # Define the format to parse the time strings - time_format = "%Y-%m-%dT%H:%M:%S.%f" - - # Convert the strings to datetime objects - start_datetime = datetime.strptime(start_time_str, time_format) - end_datetime = datetime.strptime(end_time_str, time_format) - - # Calculate the difference in seconds - time_difference = (end_datetime - start_datetime).total_seconds() - return time_difference - except Exception as e: - print(f"Error computing latency from invoking an app. Defaulting to None.") - print(" ------------------- Exception ------------------------") - print(str(e)) - print(" ------------------- End Exception ------------------------") - return None - - def extract_result_from_response(response: dict): # Initialize default values value = None @@ -78,17 +58,12 @@ def extract_result_from_response(response: dict): if "tree" in response: trace_tree = response.get("tree", {}).get("nodes", [])[0] - start_time_str = ( - get_nested_value(trace_tree, ["time", "start"]) - if trace_tree - else None - ) - end_time_str = ( - get_nested_value(trace_tree, ["time", "end"]) - if trace_tree - else None + latency = ( + get_nested_value( + trace_tree, ["metrics", "acc", "duration", "total"] + ) + / 1000 ) - latency = compute_latency(start_time_str, end_time_str) cost = get_nested_value( trace_tree, ["metrics", "acc", "costs", "total"] ) @@ -384,9 +359,9 @@ async def batch_invoke( "delay_between_batches" ] # Delay between batches (in seconds) - list_of_app_outputs: List[ - InvokationResult - ] = [] # Outputs after running all batches + list_of_app_outputs: List[InvokationResult] = ( + [] + ) # Outputs after running all batches headers = None if isCloudEE(): From 2de3761dfab23707cb932fac8cdf9df683abe09b Mon Sep 17 00:00:00 2001 From: Abram Date: Tue, 26 Nov 2024 21:11:32 +0100 Subject: [PATCH 14/15] style (backend): format backend --- agenta-backend/agenta_backend/services/llm_apps_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 1a12b9d850..d8ce93cb5e 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -359,9 +359,9 @@ async def batch_invoke( "delay_between_batches" ] # Delay between batches (in seconds) - list_of_app_outputs: List[InvokationResult] = ( - [] - ) # Outputs after running all batches + list_of_app_outputs: List[ + InvokationResult + ] = [] # Outputs after running all batches headers = None if isCloudEE(): From fa15ed54c1e4e3a23990674b61661a6f31012b29 Mon Sep 17 00:00:00 2001 From: Abram Date: Wed, 27 Nov 2024 10:00:34 +0100 Subject: [PATCH 15/15] refactor (sdk:web): compute latency/duration in sdk properly and update use of it in the playground --- agenta-cli/agenta/sdk/tracing/inline.py | 11 +++++++---- .../src/components/Playground/Views/TestView.tsx | 4 +--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/agenta-cli/agenta/sdk/tracing/inline.py b/agenta-cli/agenta/sdk/tracing/inline.py index 69b90fb7a9..d8309db99c 100644 --- a/agenta-cli/agenta/sdk/tracing/inline.py +++ b/agenta-cli/agenta/sdk/tracing/inline.py @@ -41,7 +41,6 @@ class LifecycleDTO(BaseModel): class TimeDTO(BaseModel): start: datetime end: datetime - span: int class StatusCode(Enum): @@ -846,12 +845,9 @@ def parse_from_otel_span_dto( else None ) - duration = (otel_span_dto.end_time - otel_span_dto.start_time).total_seconds() - time = TimeDTO( start=otel_span_dto.start_time, end=otel_span_dto.end_time, - span=round(duration * 1_000_000), # microseconds ) status = StatusDTO( @@ -863,6 +859,13 @@ def parse_from_otel_span_dto( data, metrics, meta, tags, refs = _parse_from_attributes(otel_span_dto) + duration = (otel_span_dto.end_time - otel_span_dto.start_time).total_seconds() + + if metrics is None: + metrics = dict() + + metrics["acc.duration.total"] = round(duration * 1_000, 3) # milliseconds + root_id = str(tree_id) if refs is not None: root_id = refs.get("scenario.id", root_id) diff --git a/agenta-web/src/components/Playground/Views/TestView.tsx b/agenta-web/src/components/Playground/Views/TestView.tsx index 2da370ae79..5c725cb15c 100644 --- a/agenta-web/src/components/Playground/Views/TestView.tsx +++ b/agenta-web/src/components/Playground/Views/TestView.tsx @@ -623,9 +623,7 @@ const App: React.FC = ({ const firstTraceNode = tree.nodes[0] newDataList[index] = { cost: firstTraceNode?.metrics?.acc?.costs?.total ?? null, - latency: firstTraceNode?.time?.span - ? firstTraceNode.time.span / 1_000_000 - : null, + latency: firstTraceNode?.metrics?.acc?.duration?.total / 1000 ?? null, usage: firstTraceNode?.metrics?.acc?.tokens?.total ?? null, } }