Merge pull request #1407 from Agenta-AI/issue-1392/-Improving-the-add…

…-new-evaluation-modal Improving the add new evaluation modal
Agenta-AI · Feb 25, 2024 · 64f6847 · 64f6847
2 parents 56d3d32 + a2f9490
commit 64f6847
Show file tree

Hide file tree

Showing 6 changed files with 256 additions and 82 deletions.
diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py
@@ -10,6 +10,7 @@ class Evaluator(BaseModel):
     key: str
     direct_use: bool
     settings_template: dict
+    description: Optional[str]
 
 
 class EvaluatorConfig(BaseModel):

diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -20,11 +20,13 @@
                 "description": "The threshold value for similarity comparison",
             }
         },
+        "description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
     },
     {
         "name": "Regex Test",
         "key": "auto_regex_test",
         "direct_use": False,
+        "description": "Regex Test evaluator checks if the generated answer matches a regular expression pattern. You need to provide the regex expression and specify whether an answer is correct if it matches or does not match the regex.",
         "settings_template": {
             "regex_pattern": {
                 "label": "Regex Pattern",
@@ -52,6 +54,7 @@
                 "description": "The name of the field in the JSON output that you wish to evaluate",
             }
         },
+        "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
     },
     {
         "name": "AI Critique",
@@ -65,6 +68,7 @@
                 "description": "Template for AI critique prompts",
             }
         },
+        "description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
     },
     {
         "name": "Code Evaluation",
@@ -78,6 +82,7 @@
                 "description": "Code for evaluating submissions",
             }
         },
+        "description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
     },
     {
         "name": "Webhook test",
@@ -97,6 +102,7 @@
                 "description": "Request body for webhook URL",
             },
         },
+        "description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response indicating the correctness of the answer. You need to provide the URL of the webhook.",
     },
     {
         "name": "A/B Test",

diff --git a/agenta-web/cypress/e2e/eval.evaluators.cy.ts b/agenta-web/cypress/e2e/eval.evaluators.cy.ts
@@ -20,16 +20,18 @@ describe("Evaluators CRUD Operations Test", function () {
         it("Should successfully create an Evaluator", () => {
             cy.get('[data-cy="evaluator-card"]').should("have.length", 1)
             cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
-            cy.get('[data-cy="new-evaluator-modal-input"]').type(newEvalName)
-            cy.get('[data-cy="new-evaluator-modal-button-0"]').click()
-            cy.get(".ant-modal-footer > .ant-btn-primary > :nth-child(2)").click()
+            cy.get('[data-cy="new-evaluator-modal"]').should("exist")
+            cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
+            cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
+            cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
             cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
         })
 
         it("Should click on the edit button and successfully edit an evaluator", () => {
             cy.get('[data-cy^="evaluator-card-edit-button"]').eq(0).click()
-            cy.get('[data-cy="new-evaluator-modal-input"]').type("edit")
-            cy.get(".ant-modal-footer > .ant-btn-primary > .ant-btn-icon > .anticon > svg").click()
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type("edit")
+            cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
         })
 
         it("Should click on the delete button and successfully delete an evaluator", () => {

diff --git a/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx b/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx
@@ -38,6 +38,7 @@ const Evaluators: React.FC<Props> = () => {
     const appId = useAppId()
     const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
     const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
+    const [newEvalModalConfigOpen, setNewEvalModalConfigOpen] = useState(false)
     const [editIndex, setEditIndex] = useState<number>(-1)
     const [fetching, setFetching] = useState(false)
     const [searchTerm, setSearchTerm] = useState<string>("")
@@ -61,7 +62,8 @@ const Evaluators: React.FC<Props> = () => {
         <div className={classes.root}>
             <Space className={classes.buttonsGroup}>
                 <Input.Search
-                    onSearch={(term) => setSearchTerm(term)}
+                    value={searchTerm}
+                    onChange={(e) => setSearchTerm(e.target.value)}
                     placeholder="Search"
                     allowClear
                     enterButton
@@ -88,7 +90,7 @@ const Evaluators: React.FC<Props> = () => {
                                 evaluatorConfig={item}
                                 onEdit={() => {
                                     setEditIndex(ix)
-                                    setNewEvalModalOpen(true)
+                                    setNewEvalModalConfigOpen(true)
                                 }}
                                 onSuccessDelete={fetcher}
                             />
@@ -99,11 +101,14 @@ const Evaluators: React.FC<Props> = () => {
 
             <NewEvaluatorModal
                 open={newEvalModalOpen}
-                onCancel={() => setNewEvalModalOpen(false)}
                 onSuccess={() => {
                     setNewEvalModalOpen(false)
+                    setNewEvalModalConfigOpen(false)
                     fetcher()
                 }}
+                newEvalModalConfigOpen={newEvalModalConfigOpen}
+                setNewEvalModalConfigOpen={setNewEvalModalConfigOpen}
+                setNewEvalModalOpen={setNewEvalModalOpen}
                 editMode={editIndex !== -1}
                 initialValues={evaluatorConfigs[editIndex]}
             />