diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index 93ff69587c..a9637dee5b 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -793,9 +793,6 @@ async def auto_json_diff( lm_providers_keys: Dict[str, Any], # pylint: disable=unused-argument ) -> Result: try: - # 1. extract llm app output if app output format is v2+ - output = output.get("data", "") if isinstance(output, dict) else output - # 2. extract ground truth from data point correct_answer = get_correct_answer(data_point, settings_values) @@ -833,14 +830,21 @@ async def json_diff(input: EvaluatorInputInterface) -> EvaluatorOutputInterface: if isinstance(ground_truth, str): ground_truth = json.loads(ground_truth) # if this fails we will return an error + # 1. extract llm app output if app output format is v2+ app_output = input.inputs["prediction"] - assert isinstance(app_output, str), "App output is expected to be a string" - try: - app_output = json.loads(app_output) - except json.JSONDecodeError: - app_output = ( - {} - ) # we will return 0 score for json diff in case we cannot parse the output as json + assert isinstance( + app_output, (str, dict) + ), "App output is expected to be a string or a JSON object" + app_output = ( + app_output.get("data", "") if isinstance(app_output, dict) else app_output + ) + if isinstance(app_output, str): + try: + app_output = json.loads(app_output) + except json.JSONDecodeError: + app_output = ( + {} + ) # we will return 0 score for json diff in case we cannot parse the output as json score = compare_jsons( ground_truth=ground_truth,