Agenta-AI · aakrem · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -56,3 +56,4 @@ agenta-web/cypress/screenshots/
 agenta-web/cypress/videos/
 .nextjs_cache/
 
+rabbitmq_data/
diff --git a/agenta-backend/agenta_backend/celery_config.py b/agenta-backend/agenta_backend/celery_config.py
@@ -0,0 +1,23 @@
+import os
+from kombu import Exchange, Queue
+
+# Use environment variables with default values as fallback
+BROKER_URL = os.getenv('CELERY_BROKER_URL')
+CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND')
+CELERY_TASK_SERIALIZER = 'json'
+CELERY_ACCEPT_CONTENT = ['json']
+CELERY_RESULT_SERIALIZER = 'json'
+CELERY_TIMEZONE = 'UTC'
+
+# TODO: Can we improve this to be more dynamic?
+CELERY_QUEUES = (
+    Queue('agenta_backend.tasks.evaluations.auto_exact_match',
+          Exchange('agenta_backend.tasks.evaluations.auto_exact_match'),
+          routing_key='agenta_backend.tasks.evaluations.auto_exact_match'),
+    Queue('agenta_backend.tasks.evaluations.auto_similarity_match',
+          Exchange('agenta_backend.tasks.evaluations.auto_similarity_match'),
+          routing_key='agenta_backend.tasks.evaluations.auto_similarity_match'),
+    Queue('agenta_backend.tasks.evaluations.auto_regex_test',
+          Exchange('agenta_backend.tasks.evaluations.auto_regex_test'),
+          routing_key='agenta_backend.tasks.evaluations.auto_regex_test'),
+)
diff --git a/agenta-backend/agenta_backend/main.py b/agenta-backend/agenta_backend/main.py
@@ -1,5 +1,7 @@
 import os
+from celery import Celery
 from contextlib import asynccontextmanager
+from agenta_backend import celery_config
 
 from agenta_backend.config import settings
 from agenta_backend.routers import (
@@ -32,6 +34,8 @@
     "http://0.0.0.0:3001",
 ]
 
+celery_app = Celery('evaluation_app')
+celery_app.config_from_object(celery_config)
 
 @asynccontextmanager
 async def lifespan(application: FastAPI, cache=True):

diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py
@@ -85,6 +85,7 @@ class EvaluationScenario(BaseModel):
     note: Optional[str]
 
 
+
 class AICritiqueCreate(BaseModel):
     correct_answer: str
     llm_app_prompt_template: Optional[str]
@@ -118,6 +119,13 @@ class NewEvaluation(BaseModel):
     status: str
 
 
+class NewBulkEvaluation(BaseModel):
+    app_id: str
+    variant_ids: List[str]
+    evaluation_type: List[EvaluationType]
+    testset_id: str
+
+
 class DeleteEvaluation(BaseModel):
     evaluations_ids: List[str]
 

diff --git a/agenta-backend/agenta_backend/models/db_models.py b/agenta-backend/agenta_backend/models/db_models.py
@@ -212,6 +212,23 @@ class EvaluationScenarioOutput(EmbeddedModel):
     variant_id: str
     variant_output: str
 
+# TODO: This should be removed and replaced with EvaluationDB
+# Keppeing it for now for backwards compatibility
+class BulkEvaluationDB(Model):
+    app: AppDB = Reference(key_name="app")
+    organization: OrganizationDB = Reference(key_name="organization")
+    user: UserDB = Reference(key_name="user")
+    status: str
+    evaluation_type: List[str]
+    evaluation_type_settings: EvaluationTypeSettings
+    variants: List[ObjectId]
+    testset: TestSetDB = Reference(key_name="testsets")
-    testset: TestSetDB = Reference(key_name="testsets")
+    testsets: List[ObjectId]
-    testset: TestSetDB = Reference(key_name="testsets")
+    testsets: List[TestSetDB]
-    testset: TestSetDB = Reference(key_name="testsets")
+    testset: TestSetDB = Reference()
-    testset: TestSetDB = Reference(key_name="testsets")
+    testsets: List[ObjectId]
-    testset: TestSetDB = Reference(key_name="testsets")
+    testsets: List[TestSetDB]
-    testset: TestSetDB = Reference(key_name="testsets")
+    testset: TestSetDB = Reference()
+    created_at: Optional[datetime] = Field(default=datetime.utcnow())
+    updated_at: Optional[datetime] = Field(default=datetime.utcnow())
+
+    class Config:
+        collection = "bulk_evaluations"
+
 
 class EvaluationDB(Model):
     app: AppDB = Reference(key_name="app")
@@ -246,6 +263,25 @@ class EvaluationScenarioDB(Model):
     class Config:
         collection = "evaluation_scenarios"
 
+# TODO: This should be removed and replaced with EvaluationScenarioDB
+# Keppeing it for now for backwards compatibility
+class EvaluationScenarioDBForBulkEvaluationDB(Model):
+    user: UserDB = Reference(key_name="user")
+    organization: OrganizationDB = Reference(key_name="organization")
+    evaluation: BulkEvaluationDB = Reference(key_name="bulk_evaluations")
+    inputs: List[EvaluationScenarioInput]
+    outputs: List[EvaluationScenarioOutput]
+    vote: Optional[str]
+    score: Optional[Union[str, int]]
+    correct_answer: Optional[str]
+    created_at: Optional[datetime] = Field(default=datetime.utcnow())
+    updated_at: Optional[datetime] = Field(default=datetime.utcnow())
+    is_pinned: Optional[bool]
+    note: Optional[str]
+
+    class Config:
+        collection = "single_evaluation_scenarios"
+
 
 class CustomEvaluationDB(Model):
     evaluation_name: str

diff --git a/agenta-backend/agenta_backend/routers/evaluation_router.py b/agenta-backend/agenta_backend/routers/evaluation_router.py
@@ -16,6 +16,7 @@
     EvaluationScenarioScoreUpdate,
     EvaluationScenarioUpdate,
     ExecuteCustomEvaluationCode,
+    NewBulkEvaluation,
     NewEvaluation,
     DeleteEvaluation,
     EvaluationType,
@@ -26,6 +27,7 @@
 )
 from agenta_backend.services.evaluation_service import (
     UpdateEvaluationScenarioError,
+    evaluate_in_bulk,
     evaluate_with_ai_critique,
     fetch_custom_evaluation_names,
     fetch_custom_evaluations,
@@ -54,6 +56,39 @@
 router = APIRouter()
 
 
+@router.post("/bulk-evaluate/")
+async def create_bulk_evaluation(payload: NewBulkEvaluation, request: Request):
+    try:
+        user_org_data: dict = await get_user_and_org_id(request.state.user_id)
+
+        access_app = await check_access_to_app(
+            user_org_data=user_org_data,
+            app_id=payload.app_id,
+            check_owner=False,
+        )
+
+        if not access_app:
+            error_msg = f"You do not have access to this app: {payload.app_id}"
+            return JSONResponse(
+                {"detail": error_msg},
+                status_code=400,
+            )
+        app = await db_manager.fetch_app_by_id(app_id=payload.app_id)
+
+        if app is None:
+            raise HTTPException(status_code=404, detail="App not found")
+
+        new_evaluation_db = await evaluation_service.create_new_bulk_evaluation(
+            app,
+            payload,
+            **user_org_data
+        )
+
+        await evaluate_in_bulk(new_evaluation_db, **user_org_data)
+    except Exception as e:
+        raise HTTPException(f"Failed to evaluate AI critique: {str(e)}")
+
+
 @router.post("/", response_model=SimpleEvaluationOutput)
 async def create_evaluation(
     payload: NewEvaluation,

diff --git a/agenta-backend/agenta_backend/services/db_manager.py b/agenta-backend/agenta_backend/services/db_manager.py
@@ -21,6 +21,7 @@
 from agenta_backend.models.db_models import (
     AppDB,
     AppVariantDB,
+    BulkEvaluationDB,
     VariantBaseDB,
     ConfigDB,
     ConfigVersionDB,
@@ -1277,6 +1278,20 @@ async def fetch_evaluation_by_id(evaluation_id: str) -> Optional[EvaluationDB]:
     return evaluation
 
 
+async def fetch_bulk_evaluation_by_id(evaluation_id: str) -> Optional[BulkEvaluationDB]:
+    """Fetches a evaluation by its ID.
+    Args:
+        evaluation_id (str): The ID of the evaluation to fetch.
+    Returns:
+        EvaluationDB: The fetched evaluation, or None if no evaluation was found.
+    """
+    assert evaluation_id is not None, "evaluation_id cannot be None"
+    evaluation = await engine.find_one(
+        BulkEvaluationDB, BulkEvaluationDB.id == ObjectId(evaluation_id)
+    )
+    return evaluation
+
+
 async def fetch_evaluation_scenario_by_id(
     evaluation_scenario_id: str,
 ) -> Optional[EvaluationScenarioDB]:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -56,3 +56,4 @@ agenta-web/cypress/screenshots/
		agenta-web/cypress/videos/
		.nextjs_cache/

		rabbitmq_data/