Merge branch 'main' into issue-1232/-make-inputs-unmodifyable-in-huma…

…n-evaluation-view
Agenta-AI · Feb 26, 2024 · 5913f95 · 5913f95
2 parents 55aa833 + 64f6847
commit 5913f95
Show file tree

Hide file tree

Showing 12 changed files with 289 additions and 101 deletions.
diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py
@@ -10,6 +10,7 @@ class Evaluator(BaseModel):
     key: str
     direct_use: bool
     settings_template: dict
+    description: Optional[str]
 
 
 class EvaluatorConfig(BaseModel):

diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -20,11 +20,13 @@
                 "description": "The threshold value for similarity comparison",
             }
         },
+        "description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
     },
     {
         "name": "Regex Test",
         "key": "auto_regex_test",
         "direct_use": False,
+        "description": "Regex Test evaluator checks if the generated answer matches a regular expression pattern. You need to provide the regex expression and specify whether an answer is correct if it matches or does not match the regex.",
         "settings_template": {
             "regex_pattern": {
                 "label": "Regex Pattern",
@@ -52,6 +54,7 @@
                 "description": "The name of the field in the JSON output that you wish to evaluate",
             }
         },
+        "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
     },
     {
         "name": "AI Critique",
@@ -65,6 +68,7 @@
                 "description": "Template for AI critique prompts",
             }
         },
+        "description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
     },
     {
         "name": "Code Evaluation",
@@ -78,6 +82,7 @@
                 "description": "Code for evaluating submissions",
             }
         },
+        "description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
     },
     {
         "name": "Webhook test",
@@ -97,6 +102,7 @@
                 "description": "Request body for webhook URL",
             },
         },
+        "description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response indicating the correctness of the answer. You need to provide the URL of the webhook.",
     },
     {
         "name": "A/B Test",

diff --git a/agenta-backend/poetry.lock b/agenta-backend/poetry.lock
diff --git a/agenta-backend/pyproject.toml b/agenta-backend/pyproject.toml
@@ -8,7 +8,7 @@ packages = [{include = "agenta_backend"}]
 
 [tool.poetry.dependencies]
 python = "^3.9"
-fastapi = "^0.95.1"
+fastapi = "^0.109.1"
 pydantic = "^1.10.7"
 docker = "^6.1.1"
 toml = "^0.10.2"

diff --git a/agenta-cli/agenta/cli/main.py b/agenta-cli/agenta/cli/main.py
@@ -162,6 +162,7 @@ def init(app_name: str, backend_host: str):
                     click.echo(click.style(f"Error: {ex}", fg="red"))
                     sys.exit(1)
 
+        filtered_org = None
         if where_question == "On agenta cloud":
             which_organization = questionary.select(
                 "Which organization do you want to create the app for?",

diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.12.1"
+version = "0.12.2"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = ["Mahmoud Mabrouk <[email protected]>"]
@@ -28,6 +28,7 @@ python-multipart = "^0.0.6"
 importlib-metadata = "^6.7.0"
 posthog = "^3.1.0"
 pydantic = "1.10.13"
+httpx = "^0.27.0"
 
 [tool.poetry.dev-dependencies]
 pytest = "^6.2"

diff --git a/agenta-web/cypress/e2e/eval.comparison.cy.ts b/agenta-web/cypress/e2e/eval.comparison.cy.ts
@@ -33,7 +33,7 @@ describe("Evaluation Comparison Test", function () {
 
         it("Should select 2 evaluations, click on the compare button, and successfully navigate to the comparison page", () => {
             cy.get("#ag-33-input").check()
-            cy.get("#ag-35-input").check()
+            cy.get("#ag-39-input").check()
             cy.get('[data-cy="evaluation-results-compare-button"]').should("not.be.disabled")
             cy.get('[data-cy="evaluation-results-compare-button"]').click()
             cy.location("pathname").should("include", "/evaluations/compare")

diff --git a/agenta-web/cypress/e2e/eval.evaluators.cy.ts b/agenta-web/cypress/e2e/eval.evaluators.cy.ts
@@ -20,16 +20,18 @@ describe("Evaluators CRUD Operations Test", function () {
         it("Should successfully create an Evaluator", () => {
             cy.get('[data-cy="evaluator-card"]').should("have.length", 1)
             cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
-            cy.get('[data-cy="new-evaluator-modal-input"]').type(newEvalName)
-            cy.get('[data-cy="new-evaluator-modal-button-0"]').click()
-            cy.get(".ant-modal-footer > .ant-btn-primary > :nth-child(2)").click()
+            cy.get('[data-cy="new-evaluator-modal"]').should("exist")
+            cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
+            cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
+            cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
             cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
         })
 
         it("Should click on the edit button and successfully edit an evaluator", () => {
             cy.get('[data-cy^="evaluator-card-edit-button"]').eq(0).click()
-            cy.get('[data-cy="new-evaluator-modal-input"]').type("edit")
-            cy.get(".ant-modal-footer > .ant-btn-primary > .ant-btn-icon > .anticon > svg").click()
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type("edit")
+            cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
         })
 
         it("Should click on the delete button and successfully delete an evaluator", () => {

diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx
@@ -180,6 +180,12 @@ const EvaluationResults: React.FC<Props> = () => {
                         </Link>
                     )
                 },
+                onCellClicked(params: any) {
+                    const {revisions, variants} = params.data
+                    router.push(
+                        `/apps/${appId}/playground?variant=${variants[0].variantName}&revision=${revisions[0]}`,
+                    )
+                },
                 valueGetter: (params) =>
                     `${params.data?.variants[0].variantName} #${params.data?.revisions[0]}`,
                 headerName: "Variant",
@@ -198,6 +204,9 @@ const EvaluationResults: React.FC<Props> = () => {
                 minWidth: 160,
                 tooltipValueGetter: (params) => params.value,
                 ...getFilterParams("text"),
+                onCellClicked(params) {
+                    router.push(`/apps/${appId}/testsets/${params.data?.testset.id}`)
+                },
             },
             ...evaluatorConfigs.map(
                 (config) =>
@@ -328,6 +337,9 @@ const EvaluationResults: React.FC<Props> = () => {
                                 ref={gridRef as any}
                                 rowData={evaluations}
                                 columnDefs={colDefs}
+                                rowStyle={{
+                                    cursor: "pointer",
+                                }}
                                 getRowId={(params) => params.data.id}
                                 onRowClicked={(params) => {
                                     // ignore clicks on the checkbox col

diff --git a/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx b/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx
@@ -38,6 +38,7 @@ const Evaluators: React.FC<Props> = () => {
     const appId = useAppId()
     const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
     const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
+    const [newEvalModalConfigOpen, setNewEvalModalConfigOpen] = useState(false)
     const [editIndex, setEditIndex] = useState<number>(-1)
     const [fetching, setFetching] = useState(false)
     const [searchTerm, setSearchTerm] = useState<string>("")
@@ -61,7 +62,8 @@ const Evaluators: React.FC<Props> = () => {
         <div className={classes.root}>
             <Space className={classes.buttonsGroup}>
                 <Input.Search
-                    onSearch={(term) => setSearchTerm(term)}
+                    value={searchTerm}
+                    onChange={(e) => setSearchTerm(e.target.value)}
                     placeholder="Search"
                     allowClear
                     enterButton
@@ -88,7 +90,7 @@ const Evaluators: React.FC<Props> = () => {
                                 evaluatorConfig={item}
                                 onEdit={() => {
                                     setEditIndex(ix)
-                                    setNewEvalModalOpen(true)
+                                    setNewEvalModalConfigOpen(true)
                                 }}
                                 onSuccessDelete={fetcher}
                             />
@@ -99,11 +101,14 @@ const Evaluators: React.FC<Props> = () => {
 
             <NewEvaluatorModal
                 open={newEvalModalOpen}
-                onCancel={() => setNewEvalModalOpen(false)}
                 onSuccess={() => {
                     setNewEvalModalOpen(false)
+                    setNewEvalModalConfigOpen(false)
                     fetcher()
                 }}
+                newEvalModalConfigOpen={newEvalModalConfigOpen}
+                setNewEvalModalConfigOpen={setNewEvalModalConfigOpen}
+                setNewEvalModalOpen={setNewEvalModalOpen}
                 editMode={editIndex !== -1}
                 initialValues={evaluatorConfigs[editIndex]}
             />