Skip to content

Commit

Permalink
Merge branch 'main' into issue-1232/-make-inputs-unmodifyable-in-huma…
Browse files Browse the repository at this point in the history
…n-evaluation-view
  • Loading branch information
bekossy committed Feb 26, 2024
2 parents 55aa833 + 64f6847 commit 5913f95
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Evaluator(BaseModel):
key: str
direct_use: bool
settings_template: dict
description: Optional[str]


class EvaluatorConfig(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
"description": "The threshold value for similarity comparison",
}
},
"description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
},
{
"name": "Regex Test",
"key": "auto_regex_test",
"direct_use": False,
"description": "Regex Test evaluator checks if the generated answer matches a regular expression pattern. You need to provide the regex expression and specify whether an answer is correct if it matches or does not match the regex.",
"settings_template": {
"regex_pattern": {
"label": "Regex Pattern",
Expand Down Expand Up @@ -52,6 +54,7 @@
"description": "The name of the field in the JSON output that you wish to evaluate",
}
},
"description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
},
{
"name": "AI Critique",
Expand All @@ -65,6 +68,7 @@
"description": "Template for AI critique prompts",
}
},
"description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
},
{
"name": "Code Evaluation",
Expand All @@ -78,6 +82,7 @@
"description": "Code for evaluating submissions",
}
},
"description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
},
{
"name": "Webhook test",
Expand All @@ -97,6 +102,7 @@
"description": "Request body for webhook URL",
},
},
"description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response indicating the correctness of the answer. You need to provide the URL of the webhook.",
},
{
"name": "A/B Test",
Expand Down
32 changes: 16 additions & 16 deletions agenta-backend/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agenta-backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ packages = [{include = "agenta_backend"}]

[tool.poetry.dependencies]
python = "^3.9"
fastapi = "^0.95.1"
fastapi = "^0.109.1"
pydantic = "^1.10.7"
docker = "^6.1.1"
toml = "^0.10.2"
Expand Down
1 change: 1 addition & 0 deletions agenta-cli/agenta/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def init(app_name: str, backend_host: str):
click.echo(click.style(f"Error: {ex}", fg="red"))
sys.exit(1)

filtered_org = None
if where_question == "On agenta cloud":
which_organization = questionary.select(
"Which organization do you want to create the app for?",
Expand Down
3 changes: 2 additions & 1 deletion agenta-cli/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "agenta"
version = "0.12.1"
version = "0.12.2"
description = "The SDK for agenta is an open-source LLMOps platform."
readme = "README.md"
authors = ["Mahmoud Mabrouk <[email protected]>"]
Expand Down Expand Up @@ -28,6 +28,7 @@ python-multipart = "^0.0.6"
importlib-metadata = "^6.7.0"
posthog = "^3.1.0"
pydantic = "1.10.13"
httpx = "^0.27.0"

[tool.poetry.dev-dependencies]
pytest = "^6.2"
Expand Down
2 changes: 1 addition & 1 deletion agenta-web/cypress/e2e/eval.comparison.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ describe("Evaluation Comparison Test", function () {

it("Should select 2 evaluations, click on the compare button, and successfully navigate to the comparison page", () => {
cy.get("#ag-33-input").check()
cy.get("#ag-35-input").check()
cy.get("#ag-39-input").check()
cy.get('[data-cy="evaluation-results-compare-button"]').should("not.be.disabled")
cy.get('[data-cy="evaluation-results-compare-button"]').click()
cy.location("pathname").should("include", "/evaluations/compare")
Expand Down
12 changes: 7 additions & 5 deletions agenta-web/cypress/e2e/eval.evaluators.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@ describe("Evaluators CRUD Operations Test", function () {
it("Should successfully create an Evaluator", () => {
cy.get('[data-cy="evaluator-card"]').should("have.length", 1)
cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
cy.get('[data-cy="new-evaluator-modal-input"]').type(newEvalName)
cy.get('[data-cy="new-evaluator-modal-button-0"]').click()
cy.get(".ant-modal-footer > .ant-btn-primary > :nth-child(2)").click()
cy.get('[data-cy="new-evaluator-modal"]').should("exist")
cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
})

it("Should click on the edit button and successfully edit an evaluator", () => {
cy.get('[data-cy^="evaluator-card-edit-button"]').eq(0).click()
cy.get('[data-cy="new-evaluator-modal-input"]').type("edit")
cy.get(".ant-modal-footer > .ant-btn-primary > .ant-btn-icon > .anticon > svg").click()
cy.get('[data-cy="configure-new-evaluator-modal-input"]').type("edit")
cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
})

it("Should click on the delete button and successfully delete an evaluator", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ const EvaluationResults: React.FC<Props> = () => {
</Link>
)
},
onCellClicked(params: any) {
const {revisions, variants} = params.data
router.push(
`/apps/${appId}/playground?variant=${variants[0].variantName}&revision=${revisions[0]}`,
)
},
valueGetter: (params) =>
`${params.data?.variants[0].variantName} #${params.data?.revisions[0]}`,
headerName: "Variant",
Expand All @@ -198,6 +204,9 @@ const EvaluationResults: React.FC<Props> = () => {
minWidth: 160,
tooltipValueGetter: (params) => params.value,
...getFilterParams("text"),
onCellClicked(params) {
router.push(`/apps/${appId}/testsets/${params.data?.testset.id}`)
},
},
...evaluatorConfigs.map(
(config) =>
Expand Down Expand Up @@ -328,6 +337,9 @@ const EvaluationResults: React.FC<Props> = () => {
ref={gridRef as any}
rowData={evaluations}
columnDefs={colDefs}
rowStyle={{
cursor: "pointer",
}}
getRowId={(params) => params.data.id}
onRowClicked={(params) => {
// ignore clicks on the checkbox col
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const Evaluators: React.FC<Props> = () => {
const appId = useAppId()
const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
const [newEvalModalConfigOpen, setNewEvalModalConfigOpen] = useState(false)
const [editIndex, setEditIndex] = useState<number>(-1)
const [fetching, setFetching] = useState(false)
const [searchTerm, setSearchTerm] = useState<string>("")
Expand All @@ -61,7 +62,8 @@ const Evaluators: React.FC<Props> = () => {
<div className={classes.root}>
<Space className={classes.buttonsGroup}>
<Input.Search
onSearch={(term) => setSearchTerm(term)}
value={searchTerm}
onChange={(e) => setSearchTerm(e.target.value)}
placeholder="Search"
allowClear
enterButton
Expand All @@ -88,7 +90,7 @@ const Evaluators: React.FC<Props> = () => {
evaluatorConfig={item}
onEdit={() => {
setEditIndex(ix)
setNewEvalModalOpen(true)
setNewEvalModalConfigOpen(true)
}}
onSuccessDelete={fetcher}
/>
Expand All @@ -99,11 +101,14 @@ const Evaluators: React.FC<Props> = () => {

<NewEvaluatorModal
open={newEvalModalOpen}
onCancel={() => setNewEvalModalOpen(false)}
onSuccess={() => {
setNewEvalModalOpen(false)
setNewEvalModalConfigOpen(false)
fetcher()
}}
newEvalModalConfigOpen={newEvalModalConfigOpen}
setNewEvalModalConfigOpen={setNewEvalModalConfigOpen}
setNewEvalModalOpen={setNewEvalModalOpen}
editMode={editIndex !== -1}
initialValues={evaluatorConfigs[editIndex]}
/>
Expand Down
Loading

0 comments on commit 5913f95

Please sign in to comment.