Merge pull request #1941 from Agenta-AI/mmabrouk/fix/fix-json-diff-eval

Improve description Json diff eval and fix minor issue
Agenta-AI · Aug 9, 2024 · dec77a7 · dec77a7
2 parents feb4b3a + 3313ef3
commit dec77a7
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 8 deletions.
diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -105,34 +105,34 @@
                 "description": "The name of the column in the test data that contains the correct answer",
             },
         },
-        "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
+        "description": "Compares specific one specific field within a JSON to a ground truth in the test set.",
     },
     {
         "name": "JSON Diff Match",
         "key": "auto_json_diff",
         "direct_use": False,
-        "description": "JSON Diff evaluator compares two JSON objects to identify differences. It highlights discrepancies, additions, deletions, and modifications between the objects, providing a clear report of how they differ.",
+        "description": "Compares the generated JSON output to a ground truth JSON and returns a normalized score between 0 and 1 based on their differences.",
         "settings_template": {
             "compare_schema_only": {
                 "label": "Compare Schema Only",
                 "type": "boolean",
                 "default": False,
                 "advanced": True,
-                "description": "If set to True, we will compare the keys and the values type. Otherwise, we will compare the keys, the values and the values type.",
+                "description": "If set to True, only the key names and their types will be compared between prediction and ground truth, ignoring the actual values. If set to False, key names, their types, and their values will all compared.",
             },
             "predict_keys": {
                 "label": "Include prediction keys",
                 "type": "boolean",
                 "default": False,
                 "advanced": True,
-                "description": "If set to True, we will check the reference (ground truth) keys. Othwerise, we will check both the reference (ground truth) and prediction (app output) keys.",
+                "description": "If set to True, only keys present in the ground truth will be considered. The result will be 1.0 if a key from the ground truth is correctly predicted, regardless of any additional predicted keys. Otherwise both ground truth and prediction keys will be checked.",
             },
             "case_insensitive_keys": {
                 "label": "Enable Case-sensitive keys",
                 "type": "boolean",
                 "default": False,
                 "advanced": True,
-                "description": "If set to True, we will treat keys as case-insensitive, meaning 'key', 'Key', and 'KEY' would all be considered equivalent. Otherwise, we will not.",
+                "description": "If set to True, keys will be treated as case-insensitive, meaning 'key', 'Key', and 'KEY' are considered equivalent. Otherwise, keys will be treated as case-sensitive.",
             },
             "correct_answer_key": {
                 "label": "Expected Answer Column",

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -575,9 +575,11 @@ def diff(ground_truth: Any, app_output: Any, compare_schema_only: bool) -> float
             )
 
         cumulated_score += key_score
-
-    average_score = cumulated_score / no_of_keys
-    return average_score
+    try:
+        average_score = cumulated_score / no_of_keys
+        return average_score
+    except ZeroDivisionError:
+        return 0.0
 
 
 def auto_json_diff(