Skip to content

Commit

Permalink
Merge pull request #1407 from Agenta-AI/issue-1392/-Improving-the-add…
Browse files Browse the repository at this point in the history
…-new-evaluation-modal

Improving the add new evaluation modal
  • Loading branch information
aakrem authored Feb 25, 2024
2 parents 56d3d32 + a2f9490 commit 64f6847
Show file tree
Hide file tree
Showing 6 changed files with 256 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Evaluator(BaseModel):
key: str
direct_use: bool
settings_template: dict
description: Optional[str]


class EvaluatorConfig(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
"description": "The threshold value for similarity comparison",
}
},
"description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
},
{
"name": "Regex Test",
"key": "auto_regex_test",
"direct_use": False,
"description": "Regex Test evaluator checks if the generated answer matches a regular expression pattern. You need to provide the regex expression and specify whether an answer is correct if it matches or does not match the regex.",
"settings_template": {
"regex_pattern": {
"label": "Regex Pattern",
Expand Down Expand Up @@ -52,6 +54,7 @@
"description": "The name of the field in the JSON output that you wish to evaluate",
}
},
"description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
},
{
"name": "AI Critique",
Expand All @@ -65,6 +68,7 @@
"description": "Template for AI critique prompts",
}
},
"description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
},
{
"name": "Code Evaluation",
Expand All @@ -78,6 +82,7 @@
"description": "Code for evaluating submissions",
}
},
"description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
},
{
"name": "Webhook test",
Expand All @@ -97,6 +102,7 @@
"description": "Request body for webhook URL",
},
},
"description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response indicating the correctness of the answer. You need to provide the URL of the webhook.",
},
{
"name": "A/B Test",
Expand Down
12 changes: 7 additions & 5 deletions agenta-web/cypress/e2e/eval.evaluators.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@ describe("Evaluators CRUD Operations Test", function () {
it("Should successfully create an Evaluator", () => {
cy.get('[data-cy="evaluator-card"]').should("have.length", 1)
cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
cy.get('[data-cy="new-evaluator-modal-input"]').type(newEvalName)
cy.get('[data-cy="new-evaluator-modal-button-0"]').click()
cy.get(".ant-modal-footer > .ant-btn-primary > :nth-child(2)").click()
cy.get('[data-cy="new-evaluator-modal"]').should("exist")
cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
})

it("Should click on the edit button and successfully edit an evaluator", () => {
cy.get('[data-cy^="evaluator-card-edit-button"]').eq(0).click()
cy.get('[data-cy="new-evaluator-modal-input"]').type("edit")
cy.get(".ant-modal-footer > .ant-btn-primary > .ant-btn-icon > .anticon > svg").click()
cy.get('[data-cy="configure-new-evaluator-modal-input"]').type("edit")
cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
})

it("Should click on the delete button and successfully delete an evaluator", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const Evaluators: React.FC<Props> = () => {
const appId = useAppId()
const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
const [newEvalModalConfigOpen, setNewEvalModalConfigOpen] = useState(false)
const [editIndex, setEditIndex] = useState<number>(-1)
const [fetching, setFetching] = useState(false)
const [searchTerm, setSearchTerm] = useState<string>("")
Expand All @@ -61,7 +62,8 @@ const Evaluators: React.FC<Props> = () => {
<div className={classes.root}>
<Space className={classes.buttonsGroup}>
<Input.Search
onSearch={(term) => setSearchTerm(term)}
value={searchTerm}
onChange={(e) => setSearchTerm(e.target.value)}
placeholder="Search"
allowClear
enterButton
Expand All @@ -88,7 +90,7 @@ const Evaluators: React.FC<Props> = () => {
evaluatorConfig={item}
onEdit={() => {
setEditIndex(ix)
setNewEvalModalOpen(true)
setNewEvalModalConfigOpen(true)
}}
onSuccessDelete={fetcher}
/>
Expand All @@ -99,11 +101,14 @@ const Evaluators: React.FC<Props> = () => {

<NewEvaluatorModal
open={newEvalModalOpen}
onCancel={() => setNewEvalModalOpen(false)}
onSuccess={() => {
setNewEvalModalOpen(false)
setNewEvalModalConfigOpen(false)
fetcher()
}}
newEvalModalConfigOpen={newEvalModalConfigOpen}
setNewEvalModalConfigOpen={setNewEvalModalConfigOpen}
setNewEvalModalOpen={setNewEvalModalOpen}
editMode={editIndex !== -1}
initialValues={evaluatorConfigs[editIndex]}
/>
Expand Down
Loading

0 comments on commit 64f6847

Please sign in to comment.