diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py index 484514f073..39b23a58da 100644 --- a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py +++ b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py @@ -105,34 +105,34 @@ "description": "The name of the column in the test data that contains the correct answer", }, }, - "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.", + "description": "Compares specific one specific field within a JSON to a ground truth in the test set.", }, { "name": "JSON Diff Match", "key": "auto_json_diff", "direct_use": False, - "description": "JSON Diff evaluator compares two JSON objects to identify differences. It highlights discrepancies, additions, deletions, and modifications between the objects, providing a clear report of how they differ.", + "description": "Compares the generated JSON output to a ground truth JSON and returns a normalized score between 0 and 1 based on their differences.", "settings_template": { "compare_schema_only": { "label": "Compare Schema Only", "type": "boolean", "default": False, "advanced": True, - "description": "If set to True, we will compare the keys and the values type. Otherwise, we will compare the keys, the values and the values type.", + "description": "If set to True, only the key names and their types will be compared between prediction and ground truth, ignoring the actual values. If set to False, key names, their types, and their values will all compared.", }, "predict_keys": { "label": "Include prediction keys", "type": "boolean", "default": False, "advanced": True, - "description": "If set to True, we will check the reference (ground truth) keys. Othwerise, we will check both the reference (ground truth) and prediction (app output) keys.", + "description": "If set to True, only keys present in the ground truth will be considered. The result will be 1.0 if a key from the ground truth is correctly predicted, regardless of any additional predicted keys. Otherwise both ground truth and prediction keys will be checked.", }, "case_insensitive_keys": { "label": "Enable Case-sensitive keys", "type": "boolean", "default": False, "advanced": True, - "description": "If set to True, we will treat keys as case-insensitive, meaning 'key', 'Key', and 'KEY' would all be considered equivalent. Otherwise, we will not.", + "description": "If set to True, keys will be treated as case-insensitive, meaning 'key', 'Key', and 'KEY' are considered equivalent. Otherwise, keys will be treated as case-sensitive.", }, "correct_answer_key": { "label": "Expected Answer Column", diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py index bba32bf61a..22f7b20a51 100644 --- a/agenta-backend/agenta_backend/services/evaluators_service.py +++ b/agenta-backend/agenta_backend/services/evaluators_service.py @@ -575,9 +575,11 @@ def diff(ground_truth: Any, app_output: Any, compare_schema_only: bool) -> float ) cumulated_score += key_score - - average_score = cumulated_score / no_of_keys - return average_score + try: + average_score = cumulated_score / no_of_keys + return average_score + except ZeroDivisionError: + return 0.0 def auto_json_diff(