diff --git a/agenta-cli/agenta/client/backend/__init__.py b/agenta-cli/agenta/client/backend/__init__.py
index eb6978b547..a5882f65ea 100644
--- a/agenta-cli/agenta/client/backend/__init__.py
+++ b/agenta-cli/agenta/client/backend/__init__.py
@@ -2,37 +2,43 @@
 
 from .types import (
     AddVariantFromBaseAndConfigResponse,
+    AggregatedResult,
     App,
     AppVariantOutput,
     BaseOutput,
     BodyImportTestset,
     ContainerTemplatesResponse,
     CreateAppOutput,
-    CreateCustomEvaluation,
-    CustomEvaluationDetail,
-    CustomEvaluationNames,
-    CustomEvaluationOutput,
+    DeleteEvaluation,
     DockerEnvVars,
     EnvironmentOutput,
     Evaluation,
     EvaluationScenario,
     EvaluationScenarioInput,
     EvaluationScenarioOutput,
-    EvaluationScenarioScore,
-    EvaluationScenarioUpdateScore,
+    EvaluationScenarioResult,
     EvaluationStatusEnum,
     EvaluationType,
-    EvaluationTypeSettings,
     EvaluationWebhook,
+    Evaluator,
+    EvaluatorConfig,
     Feedback,
     GetConfigReponse,
     HttpValidationError,
+    HumanEvaluation,
+    HumanEvaluationScenario,
+    HumanEvaluationScenarioInput,
+    HumanEvaluationScenarioOutput,
+    HumanEvaluationScenarioScore,
+    HumanEvaluationScenarioUpdateScore,
     Image,
     InviteRequest,
     ListApiKeysOutput,
+    LlmRunRateLimit,
     NewTestset,
     Organization,
     OrganizationOutput,
+    Result,
     SimpleEvaluationOutput,
     Span,
     Template,
@@ -50,37 +56,43 @@
 
 __all__ = [
     "AddVariantFromBaseAndConfigResponse",
+    "AggregatedResult",
     "App",
     "AppVariantOutput",
     "BaseOutput",
     "BodyImportTestset",
     "ContainerTemplatesResponse",
     "CreateAppOutput",
-    "CreateCustomEvaluation",
-    "CustomEvaluationDetail",
-    "CustomEvaluationNames",
-    "CustomEvaluationOutput",
+    "DeleteEvaluation",
     "DockerEnvVars",
     "EnvironmentOutput",
     "Evaluation",
     "EvaluationScenario",
     "EvaluationScenarioInput",
     "EvaluationScenarioOutput",
-    "EvaluationScenarioScore",
-    "EvaluationScenarioUpdateScore",
+    "EvaluationScenarioResult",
     "EvaluationStatusEnum",
     "EvaluationType",
-    "EvaluationTypeSettings",
     "EvaluationWebhook",
+    "Evaluator",
+    "EvaluatorConfig",
     "Feedback",
     "GetConfigReponse",
     "HttpValidationError",
+    "HumanEvaluation",
+    "HumanEvaluationScenario",
+    "HumanEvaluationScenarioInput",
+    "HumanEvaluationScenarioOutput",
+    "HumanEvaluationScenarioScore",
+    "HumanEvaluationScenarioUpdateScore",
     "Image",
     "InviteRequest",
     "ListApiKeysOutput",
+    "LlmRunRateLimit",
     "NewTestset",
     "Organization",
     "OrganizationOutput",
+    "Result",
     "SimpleEvaluationOutput",
     "Span",
     "Template",
diff --git a/agenta-cli/agenta/client/backend/client.py b/agenta-cli/agenta/client/backend/client.py
index 5cd775daa1..f9ab4fdb01 100644
--- a/agenta-cli/agenta/client/backend/client.py
+++ b/agenta-cli/agenta/client/backend/client.py
@@ -20,24 +20,25 @@
 from .types.base_output import BaseOutput
 from .types.container_templates_response import ContainerTemplatesResponse
 from .types.create_app_output import CreateAppOutput
-from .types.create_custom_evaluation import CreateCustomEvaluation
-from .types.custom_evaluation_detail import CustomEvaluationDetail
-from .types.custom_evaluation_names import CustomEvaluationNames
-from .types.custom_evaluation_output import CustomEvaluationOutput
+from .types.delete_evaluation import DeleteEvaluation
 from .types.docker_env_vars import DockerEnvVars
 from .types.environment_output import EnvironmentOutput
 from .types.evaluation import Evaluation
-from .types.evaluation_scenario import EvaluationScenario
-from .types.evaluation_scenario_input import EvaluationScenarioInput
-from .types.evaluation_scenario_output import EvaluationScenarioOutput
-from .types.evaluation_scenario_update_score import EvaluationScenarioUpdateScore
 from .types.evaluation_status_enum import EvaluationStatusEnum
 from .types.evaluation_type import EvaluationType
-from .types.evaluation_type_settings import EvaluationTypeSettings
 from .types.evaluation_webhook import EvaluationWebhook
+from .types.evaluator import Evaluator
+from .types.evaluator_config import EvaluatorConfig
 from .types.feedback import Feedback
 from .types.get_config_reponse import GetConfigReponse
 from .types.http_validation_error import HttpValidationError
+from .types.human_evaluation import HumanEvaluation
+from .types.human_evaluation_scenario import HumanEvaluationScenario
+from .types.human_evaluation_scenario_input import HumanEvaluationScenarioInput
+from .types.human_evaluation_scenario_output import HumanEvaluationScenarioOutput
+from .types.human_evaluation_scenario_update_score import (
+    HumanEvaluationScenarioUpdateScore,
+)
 from .types.image import Image
 from .types.invite_request import InviteRequest
 from .types.list_api_keys_output import ListApiKeysOutput
@@ -61,7 +62,7 @@
 OMIT = typing.cast(typing.Any, ...)
 
 
-class AgentaApi:
+class AakremApi:
     def __init__(
         self, *, base_url: str, api_key: str, timeout: typing.Optional[float] = 60
     ):
@@ -82,9 +83,9 @@ def list_api_keys(self) -> typing.List[ListApiKeysOutput]:
         List[ListAPIKeysOutput]: A list of API Keys associated with the user.
 
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_api_keys()
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -142,9 +143,9 @@ def delete_api_key(self, key_prefix: str) -> typing.Dict[str, typing.Any]:
         Parameters:
             - key_prefix: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.delete_api_key(key_prefix="key-prefix")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -445,9 +446,9 @@ def list_app_variants(self, app_id: str) -> typing.List[AppVariantOutput]:
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_app_variants(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -534,9 +535,9 @@ def list_apps(
 
             - org_id: typing.Optional[str].
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_apps()
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -762,9 +763,9 @@ def list_environments(self, app_id: str) -> typing.List[EnvironmentOutput]:
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_environments(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -1006,7 +1007,9 @@ def update_variant_image(self, variant_id: str, *, request: Image) -> typing.Any
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]:
+    def fetch_list_evaluations_evaluations_get(
+        self, *, app_id: str
+    ) -> typing.List[Evaluation]:
         """
         Fetches a list of evaluations, optionally filtered by an app ID.
 
@@ -1019,10 +1022,10 @@ def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]:
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        client.fetch_list_evaluations(app_id="app-id")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.fetch_list_evaluations_evaluations_get(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
             "GET",
@@ -1049,7 +1052,6 @@ def create_evaluation(
         app_id: str,
         variant_ids: typing.List[str],
         evaluation_type: EvaluationType,
-        evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT,
         inputs: typing.List[str],
         testset_id: str,
         status: str,
@@ -1068,30 +1070,27 @@ def create_evaluation(
 
             - evaluation_type: EvaluationType.
 
-            - evaluation_type_settings: typing.Optional[EvaluationTypeSettings].
-
             - inputs: typing.List[str].
 
             - testset_id: str.
 
             - status: str.
         """
-        _request: typing.Dict[str, typing.Any] = {
-            "app_id": app_id,
-            "variant_ids": variant_ids,
-            "evaluation_type": evaluation_type,
-            "inputs": inputs,
-            "testset_id": testset_id,
-            "status": status,
-        }
-        if evaluation_type_settings is not OMIT:
-            _request["evaluation_type_settings"] = evaluation_type_settings
         _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/", "evaluations"
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
+            ),
+            json=jsonable_encoder(
+                {
+                    "app_id": app_id,
+                    "variant_ids": variant_ids,
+                    "evaluation_type": evaluation_type,
+                    "inputs": inputs,
+                    "testset_id": testset_id,
+                    "status": status,
+                }
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -1105,9 +1104,7 @@ def create_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def delete_evaluations(
-        self, *, evaluations_ids: typing.List[str]
-    ) -> typing.List[str]:
+    def delete_evaluations(self, *, request: DeleteEvaluation) -> typing.List[str]:
         """
         Delete specific comparison tables based on their unique IDs.
 
@@ -1118,19 +1115,20 @@ def delete_evaluations(
         A list of the deleted comparison tables' IDs.
 
         Parameters:
-            - evaluations_ids: typing.List[str].
+            - request: DeleteEvaluation.
         ---
-        from agenta.client import AgentaApi
+        from aakrem import DeleteEvaluation
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        client.delete_evaluations(evaluations_ids=[])
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.delete_evaluations(request=DeleteEvaluation(evaluations_ids=[]))
         """
         _response = self._client_wrapper.httpx_client.request(
             "DELETE",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/", "evaluations"
             ),
-            json=jsonable_encoder({"evaluations_ids": evaluations_ids}),
+            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -1144,15 +1142,16 @@ def delete_evaluations(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
+    def fetch_evaluation_status(self, evaluation_id: str) -> typing.Any:
         """
-        Fetches a single evaluation based on its ID.
+        Fetches the status of the evaluation.
 
         Args:
-        evaluation_id (str): The ID of the evaluation to fetch.
+        evaluation_id (str): the evaluation id
+        request (Request): the request object
 
         Returns:
-        Evaluation: The fetched evaluation.
+        (str): the evaluation status
 
         Parameters:
             - evaluation_id: str.
@@ -1161,13 +1160,13 @@ def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}",
+                f"evaluations/{evaluation_id}/status",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(Evaluation, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1176,41 +1175,26 @@ def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_evaluation(
-        self,
-        evaluation_id: str,
-        *,
-        status: typing.Optional[EvaluationStatusEnum] = OMIT,
-        evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT,
-    ) -> typing.Any:
+    def fetch_evaluation_results(self, evaluation_id: str) -> typing.Any:
         """
-        Updates an evaluation's status.
+        Fetches the results of the evaluation
 
-        Raises:
-        HTTPException: If the columns in the test set do not match with the inputs in the variant.
+        Args:
+        evaluation_id (str): the evaluation id
+        request (Request): the request object
 
         Returns:
-        None: A 204 No Content status code, indicating that the update was successful.
+        _type_: _description_
 
         Parameters:
             - evaluation_id: str.
-
-            - status: typing.Optional[EvaluationStatusEnum].
-
-            - evaluation_type_settings: typing.Optional[EvaluationTypeSettings].
         """
-        _request: typing.Dict[str, typing.Any] = {}
-        if status is not OMIT:
-            _request["status"] = status
-        if evaluation_type_settings is not OMIT:
-            _request["evaluation_type_settings"] = evaluation_type_settings
         _response = self._client_wrapper.httpx_client.request(
-            "PUT",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}",
+                f"evaluations/{evaluation_id}/results",
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -1226,7 +1210,7 @@ def update_evaluation(
 
     def fetch_evaluation_scenarios(
         self, evaluation_id: str
-    ) -> typing.List[EvaluationScenario]:
+    ) -> typing.List[HumanEvaluationScenario]:
         """
         Fetches evaluation scenarios for a given evaluation ID.
 
@@ -1242,22 +1226,22 @@ def fetch_evaluation_scenarios(
         Parameters:
             - evaluation_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.fetch_evaluation_scenarios(evaluation_id="evaluation-id")
         """
         _response = self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenarios",
+                f"human-evaluations/{evaluation_id}/evaluation_scenarios",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[EvaluationScenario], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[HumanEvaluationScenario], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1266,35 +1250,30 @@ def fetch_evaluation_scenarios(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def create_evaluation_scenario(
-        self, evaluation_id: str, *, request: EvaluationScenario
-    ) -> typing.Any:
+    def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
         """
-        Create a new evaluation scenario for a given evaluation ID.
+        Fetches a single evaluation based on its ID.
 
-        Raises:
-        HTTPException: If evaluation not found or access denied.
+        Args:
+        evaluation_id (str): The ID of the evaluation to fetch.
 
         Returns:
-        None: 204 No Content status code upon success.
+        Evaluation: The fetched evaluation.
 
         Parameters:
             - evaluation_id: str.
-
-            - request: EvaluationScenario.
         """
         _response = self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenario",
+                f"evaluations/{evaluation_id}",
             ),
-            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(Evaluation, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1303,144 +1282,60 @@ def create_evaluation_scenario(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_evaluation_scenario(
-        self,
-        evaluation_id: str,
-        evaluation_scenario_id: str,
-        evaluation_type: EvaluationType,
-        *,
-        vote: typing.Optional[str] = OMIT,
-        score: typing.Optional[EvaluationScenarioUpdateScore] = OMIT,
-        correct_answer: typing.Optional[str] = OMIT,
-        outputs: typing.Optional[typing.List[EvaluationScenarioOutput]] = OMIT,
-        inputs: typing.Optional[typing.List[EvaluationScenarioInput]] = OMIT,
-        is_pinned: typing.Optional[bool] = OMIT,
-        note: typing.Optional[str] = OMIT,
-    ) -> typing.Any:
+    def webhook_example_fake(self) -> EvaluationWebhook:
         """
-        Updates an evaluation scenario's vote or score based on its type.
-
-        Raises:
-        HTTPException: If update fails or unauthorized.
+        Returns a fake score response for example webhook evaluation
 
         Returns:
-        None: 204 No Content status code upon successful update.
-
-        Parameters:
-            - evaluation_id: str.
-
-            - evaluation_scenario_id: str.
-
-            - evaluation_type: EvaluationType.
-
-            - vote: typing.Optional[str].
-
-            - score: typing.Optional[EvaluationScenarioUpdateScore].
-
-            - correct_answer: typing.Optional[str].
-
-            - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]].
-
-            - inputs: typing.Optional[typing.List[EvaluationScenarioInput]].
-
-            - is_pinned: typing.Optional[bool].
-
-            - note: typing.Optional[str].
+        _description_
         """
-        _request: typing.Dict[str, typing.Any] = {}
-        if vote is not OMIT:
-            _request["vote"] = vote
-        if score is not OMIT:
-            _request["score"] = score
-        if correct_answer is not OMIT:
-            _request["correct_answer"] = correct_answer
-        if outputs is not OMIT:
-            _request["outputs"] = outputs
-        if inputs is not OMIT:
-            _request["inputs"] = inputs
-        if is_pinned is not OMIT:
-            _request["is_pinned"] = is_pinned
-        if note is not OMIT:
-            _request["note"] = note
         _response = self._client_wrapper.httpx_client.request(
-            "PUT",
+            "POST",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}",
+                "evaluations/webhook_example_fake",
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
-        if _response.status_code == 422:
-            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+            return pydantic.parse_obj_as(EvaluationWebhook, _response.json())  # type: ignore
         try:
             _response_json = _response.json()
         except JSONDecodeError:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def evaluate_ai_critique(
-        self,
-        *,
-        correct_answer: str,
-        llm_app_prompt_template: typing.Optional[str] = OMIT,
-        inputs: typing.List[EvaluationScenarioInput],
-        outputs: typing.List[EvaluationScenarioOutput],
-        evaluation_prompt_template: typing.Optional[str] = OMIT,
-        open_ai_key: typing.Optional[str] = OMIT,
-    ) -> str:
+    def fetch_evaluation_scenarios_evaluations_evaluation_scenarios_comparison_results_get(
+        self, *, evaluations_ids: str
+    ) -> typing.Any:
         """
-        Evaluate AI critique based on the given payload.
-
-        Args:
-        payload (AICritiqueCreate): The payload containing data for AI critique evaluation.
-        stoken_session (SessionContainer): The session container verified by `verify_session`.
+        Fetches evaluation scenarios for a given evaluation ID.
 
-        Returns:
-        str: The output of the AI critique evaluation.
+        Arguments:
+        evaluation_id (str): The ID of the evaluation for which to fetch scenarios.
 
         Raises:
-        HTTPException: If any exception occurs during the evaluation.
-
-        Parameters:
-            - correct_answer: str.
-
-            - llm_app_prompt_template: typing.Optional[str].
-
-            - inputs: typing.List[EvaluationScenarioInput].
-
-            - outputs: typing.List[EvaluationScenarioOutput].
+        HTTPException: If the evaluation is not found or access is denied.
 
-            - evaluation_prompt_template: typing.Optional[str].
+        Returns:
+        List[EvaluationScenario]: A list of evaluation scenarios.
 
-            - open_ai_key: typing.Optional[str].
+        Parameters:
+            - evaluations_ids: str.
         """
-        _request: typing.Dict[str, typing.Any] = {
-            "correct_answer": correct_answer,
-            "inputs": inputs,
-            "outputs": outputs,
-        }
-        if llm_app_prompt_template is not OMIT:
-            _request["llm_app_prompt_template"] = llm_app_prompt_template
-        if evaluation_prompt_template is not OMIT:
-            _request["evaluation_prompt_template"] = evaluation_prompt_template
-        if open_ai_key is not OMIT:
-            _request["open_ai_key"] = open_ai_key
         _response = self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/evaluation_scenario/ai_critique",
+                "evaluations/evaluation_scenarios/comparison-results",
             ),
-            json=jsonable_encoder(_request),
+            params=remove_none_from_dict({"evaluations_ids": evaluations_ids}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(str, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1449,38 +1344,37 @@ def evaluate_ai_critique(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get_evaluation_scenario_score(
-        self, evaluation_scenario_id: str
-    ) -> typing.Dict[str, str]:
+    def fetch_list_human_evaluations_human_evaluations_get(
+        self, *, app_id: str
+    ) -> typing.List[HumanEvaluation]:
         """
-        Fetch the score of a specific evaluation scenario.
+        Fetches a list of evaluations, optionally filtered by an app ID.
 
         Args:
-        evaluation_scenario_id: The ID of the evaluation scenario to fetch.
-        stoken_session: Session data, verified by `verify_session`.
+        app_id (Optional[str]): An optional app ID to filter the evaluations.
 
         Returns:
-        Dictionary containing the scenario ID and its score.
+        List[HumanEvaluation]: A list of evaluations.
 
         Parameters:
-            - evaluation_scenario_id: str.
+            - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        client.get_evaluation_scenario_score(evaluation_scenario_id="evaluation-scenario-id")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.fetch_list_human_evaluations_human_evaluations_get(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
             ),
+            params=remove_none_from_dict({"app_id": app_id}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Dict[str, str], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[HumanEvaluation], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1489,35 +1383,38 @@ def get_evaluation_scenario_score(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_evaluation_scenario_score(
-        self, evaluation_scenario_id: str, *, score: float
-    ) -> typing.Any:
+    def delete_evaluations_human_evaluations_delete(
+        self, *, request: DeleteEvaluation
+    ) -> typing.List[str]:
         """
-        Updates the score of an evaluation scenario.
+        Delete specific comparison tables based on their unique IDs.
 
-        Raises:
-        HTTPException: Server error if the evaluation update fails.
+        Args:
+        delete_evaluations (List[str]): The unique identifiers of the comparison tables to delete.
 
         Returns:
-        None: 204 No Content status code upon successful update.
+        A list of the deleted comparison tables' IDs.
 
         Parameters:
-            - evaluation_scenario_id: str.
+            - request: DeleteEvaluation.
+        ---
+        from aakrem import DeleteEvaluation
+        from aakrem.client import AakremApi
 
-            - score: float.
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.delete_evaluations_human_evaluations_delete(request=DeleteEvaluation(evaluations_ids=[]))
         """
         _response = self._client_wrapper.httpx_client.request(
-            "PUT",
+            "DELETE",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
             ),
-            json=jsonable_encoder({"score": score}),
+            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[str], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1526,15 +1423,17 @@ def update_evaluation_scenario_score(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def fetch_results(self, evaluation_id: str) -> typing.Any:
+    def fetch_human_evaluation_human_evaluations_evaluation_id_get(
+        self, evaluation_id: str
+    ) -> HumanEvaluation:
         """
-        Fetch all the results for one the comparison table
+        Fetches a single evaluation based on its ID.
 
-        Arguments:
-        evaluation*id -- \_description*
+        Args:
+        evaluation_id (str): The ID of the evaluation to fetch.
 
         Returns:
-        _description_
+        HumanEvaluation: The fetched evaluation.
 
         Parameters:
             - evaluation_id: str.
@@ -1543,13 +1442,13 @@ def fetch_results(self, evaluation_id: str) -> typing.Any:
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/results",
+                f"human-evaluations/{evaluation_id}",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(HumanEvaluation, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1558,26 +1457,36 @@ def fetch_results(self, evaluation_id: str) -> typing.Any:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def create_custom_evaluation(
-        self, *, request: CreateCustomEvaluation
+    def update_human_evaluation(
+        self,
+        evaluation_id: str,
+        *,
+        status: typing.Optional[EvaluationStatusEnum] = OMIT,
     ) -> typing.Any:
         """
-        Create evaluation with custom python code.
+        Updates an evaluation's status.
 
-        Args:
+        Raises:
+        HTTPException: If the columns in the test set do not match with the inputs in the variant.
 
-        custom_evaluation_payload (CreateCustomEvaluation): the required payload
+        Returns:
+        None: A 204 No Content status code, indicating that the update was successful.
 
         Parameters:
-            - request: CreateCustomEvaluation.
+            - evaluation_id: str.
+
+            - status: typing.Optional[EvaluationStatusEnum].
         """
+        _request: typing.Dict[str, typing.Any] = {}
+        if status is not OMIT:
+            _request["status"] = status
         _response = self._client_wrapper.httpx_client.request(
-            "POST",
+            "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/custom_evaluation",
+                f"human-evaluations/{evaluation_id}",
             ),
-            json=jsonable_encoder(request),
+            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -1591,30 +1500,77 @@ def create_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail:
+    def update_evaluation_scenario_router_human_evaluations_evaluation_id_evaluation_scenario_evaluation_scenario_id_evaluation_type_put(
+        self,
+        evaluation_id: str,
+        evaluation_scenario_id: str,
+        evaluation_type: EvaluationType,
+        *,
+        vote: typing.Optional[str] = OMIT,
+        score: typing.Optional[HumanEvaluationScenarioUpdateScore] = OMIT,
+        correct_answer: typing.Optional[str] = OMIT,
+        outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]] = OMIT,
+        inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]] = OMIT,
+        is_pinned: typing.Optional[bool] = OMIT,
+        note: typing.Optional[str] = OMIT,
+    ) -> typing.Any:
         """
-        Get the custom code evaluation detail.
+        Updates an evaluation scenario's vote or score based on its type.
 
-        Args:
-        id (str): the id of the custom evaluation
+        Raises:
+        HTTPException: If update fails or unauthorized.
 
         Returns:
-        CustomEvaluationDetail: Detail of the custom evaluation
+        None: 204 No Content status code upon successful update.
 
         Parameters:
-            - id: str.
+            - evaluation_id: str.
+
+            - evaluation_scenario_id: str.
+
+            - evaluation_type: EvaluationType.
+
+            - vote: typing.Optional[str].
+
+            - score: typing.Optional[HumanEvaluationScenarioUpdateScore].
+
+            - correct_answer: typing.Optional[str].
+
+            - outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]].
+
+            - inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]].
+
+            - is_pinned: typing.Optional[bool].
+
+            - note: typing.Optional[str].
         """
+        _request: typing.Dict[str, typing.Any] = {}
+        if vote is not OMIT:
+            _request["vote"] = vote
+        if score is not OMIT:
+            _request["score"] = score
+        if correct_answer is not OMIT:
+            _request["correct_answer"] = correct_answer
+        if outputs is not OMIT:
+            _request["outputs"] = outputs
+        if inputs is not OMIT:
+            _request["inputs"] = inputs
+        if is_pinned is not OMIT:
+            _request["is_pinned"] = is_pinned
+        if note is not OMIT:
+            _request["note"] = note
         _response = self._client_wrapper.httpx_client.request(
-            "GET",
+            "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{id}",
+                f"human-evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}",
             ),
+            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(CustomEvaluationDetail, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1623,28 +1579,72 @@ def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_custom_evaluation(
-        self, id: str, *, request: CreateCustomEvaluation
-    ) -> typing.Any:
+    def get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get(
+        self, evaluation_scenario_id: str
+    ) -> typing.Dict[str, str]:
         """
-        Update a custom code evaluation.
+        Fetch the score of a specific evaluation scenario.
+
         Args:
-        id (str): the ID of the custom evaluation to update
-        updated_data (CreateCustomEvaluation): the payload with updated data
-        stoken_session (SessionContainer): session container for authentication
+        evaluation_scenario_id: The ID of the evaluation scenario to fetch.
+        stoken_session: Session data, verified by `verify_session`.
+
+        Returns:
+        Dictionary containing the scenario ID and its score.
+
+        Parameters:
+            - evaluation_scenario_id: str.
+        ---
+        from aakrem.client import AakremApi
+
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get(
+            evaluation_scenario_id="evaluation-scenario-id"
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "GET",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
+            ),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(typing.Dict[str, str], _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def update_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_put(
+        self, evaluation_scenario_id: str, *, score: float
+    ) -> typing.Any:
+        """
+        Updates the score of an evaluation scenario.
+
+        Raises:
+        HTTPException: Server error if the evaluation update fails.
+
+        Returns:
+        None: 204 No Content status code upon successful update.
 
         Parameters:
-            - id: str.
+            - evaluation_scenario_id: str.
 
-            - request: CreateCustomEvaluation.
+            - score: float.
         """
         _response = self._client_wrapper.httpx_client.request(
             "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{id}",
+                f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
             ),
-            json=jsonable_encoder(request),
+            json=jsonable_encoder({"score": score}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -1658,37 +1658,30 @@ def update_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def list_custom_evaluations(
-        self, app_id: str
-    ) -> typing.List[CustomEvaluationOutput]:
+    def fetch_results(self, evaluation_id: str) -> typing.Any:
         """
-        List the custom code evaluations for a given app.
+        Fetch all the results for one the comparison table
 
-        Args:
-        app_id (str): the id of the app
+        Arguments:
+        evaluation*id -- \_description*
 
         Returns:
-        List[CustomEvaluationOutput]: a list of custom evaluation
+        _description_
 
         Parameters:
-            - app_id: str.
-        ---
-        from agenta.client import AgentaApi
-
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        client.list_custom_evaluations(app_id="app-id")
+            - evaluation_id: str.
         """
         _response = self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/list/{app_id}",
+                f"human-evaluations/{evaluation_id}/results",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[CustomEvaluationOutput], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1697,37 +1690,66 @@ def list_custom_evaluations(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get_custom_evaluation_names(
-        self, app_name: str
-    ) -> typing.List[CustomEvaluationNames]:
+    def get_evaluators_endpoint_evaluators_get(self) -> typing.List[Evaluator]:
+        """
+        Endpoint to fetch a list of evaluators.
+
+        Returns:
+        List[Evaluator]: A list of evaluator objects.
+
+        ---
+        from aakrem.client import AakremApi
+
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.get_evaluators_endpoint_evaluators_get()
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "GET",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/", "evaluators"
+            ),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(typing.List[Evaluator], _response.json())  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def get_evaluator_configs_evaluators_configs_get(
+        self, *, app_id: str
+    ) -> typing.List[EvaluatorConfig]:
         """
-        Get the names of custom evaluation for a given app.
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Args:
-        app_name (str): the name of the app the evaluation belongs to
+        app_id (str): The ID of the app.
 
         Returns:
-        List[CustomEvaluationNames]: the list of name of custom evaluations
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
 
         Parameters:
-            - app_name: str.
+            - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        client.get_custom_evaluation_names(app_name="app-name")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client.get_evaluator_configs_evaluators_configs_get(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{app_name}/names",
+                f"{self._client_wrapper.get_base_url()}/", "evaluators/configs"
             ),
+            params=remove_none_from_dict({"app_id": app_id}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[CustomEvaluationNames], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[EvaluatorConfig], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1736,59 +1758,50 @@ def get_custom_evaluation_names(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def execute_custom_evaluation(
+    def create_new_evaluator_config_evaluators_configs_post(
         self,
-        evaluation_id: str,
         *,
-        inputs: typing.List[typing.Dict[str, typing.Any]],
         app_id: str,
-        variant_id: str,
-        correct_answer: str,
-        outputs: typing.List[typing.Dict[str, typing.Any]],
-    ) -> typing.Any:
+        name: str,
+        evaluator_key: str,
+        settings_values: typing.Dict[str, typing.Any],
+    ) -> EvaluatorConfig:
         """
-        Execute a custom evaluation code.
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Args:
-        evaluation_id (str): the custom evaluation id
-        payload (ExecuteCustomEvaluationCode): the required payload
+        app_id (str): The ID of the app.
 
         Returns:
-        float: the result of the evaluation custom code
+        EvaluatorConfigDB: Evaluator configuration api model.
 
         Parameters:
-            - evaluation_id: str.
-
-            - inputs: typing.List[typing.Dict[str, typing.Any]].
-
             - app_id: str.
 
-            - variant_id: str.
+            - name: str.
 
-            - correct_answer: str.
+            - evaluator_key: str.
 
-            - outputs: typing.List[typing.Dict[str, typing.Any]].
+            - settings_values: typing.Dict[str, typing.Any].
         """
         _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/execute/{evaluation_id}",
+                f"{self._client_wrapper.get_base_url()}/", "evaluators/configs"
             ),
             json=jsonable_encoder(
                 {
-                    "inputs": inputs,
                     "app_id": app_id,
-                    "variant_id": variant_id,
-                    "correct_answer": correct_answer,
-                    "outputs": outputs,
+                    "name": name,
+                    "evaluator_key": evaluator_key,
+                    "settings_values": settings_values,
                 }
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -1797,24 +1810,115 @@ def execute_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def webhook_example_fake(self) -> EvaluationWebhook:
+    def get_evaluator_config_evaluators_configs_evaluator_config_id_get(
+        self, evaluator_config_id: str
+    ) -> EvaluatorConfig:
         """
-        Returns a fake score response for example webhook evaluation
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Returns:
-        _description_
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
+
+        Parameters:
+            - evaluator_config_id: str.
         """
         _response = self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/webhook_example_fake",
+                f"evaluators/configs/{evaluator_config_id}",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(EvaluationWebhook, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def update_evaluator_config_evaluators_configs_evaluator_config_id_put(
+        self,
+        evaluator_config_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator_key: typing.Optional[str] = OMIT,
+        settings_values: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+    ) -> EvaluatorConfig:
+        """
+        Endpoint to update evaluator configurations for a specific app.
+
+        Returns:
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
+
+        Parameters:
+            - evaluator_config_id: str.
+
+            - name: typing.Optional[str].
+
+            - evaluator_key: typing.Optional[str].
+
+            - settings_values: typing.Optional[typing.Dict[str, typing.Any]].
+        """
+        _request: typing.Dict[str, typing.Any] = {}
+        if name is not OMIT:
+            _request["name"] = name
+        if evaluator_key is not OMIT:
+            _request["evaluator_key"] = evaluator_key
+        if settings_values is not OMIT:
+            _request["settings_values"] = settings_values
+        _response = self._client_wrapper.httpx_client.request(
+            "PUT",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                f"evaluators/configs/{evaluator_config_id}",
+            ),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def delete_evaluator_config_evaluators_configs_evaluator_config_id_delete(
+        self, evaluator_config_id: str
+    ) -> bool:
+        """
+        Endpoint to delete a specific evaluator configuration.
+
+        Args:
+        evaluator_config_id (str): The unique identifier of the evaluator configuration.
+
+        Returns:
+        bool: True if deletion was successful, False otherwise.
+
+        Parameters:
+            - evaluator_config_id: str.
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "DELETE",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                f"evaluators/configs/{evaluator_config_id}",
+            ),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(bool, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
             _response_json = _response.json()
         except JSONDecodeError:
@@ -2018,9 +2122,9 @@ def get_testsets(self, *, app_id: str) -> typing.List[TestSetOutputResponse]:
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.get_testsets(app_id="app-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2053,9 +2157,9 @@ def delete_testsets(self, *, testset_ids: typing.List[str]) -> typing.List[str]:
         Parameters:
             - testset_ids: typing.List[str].
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.delete_testsets(testset_ids=[])
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2338,9 +2442,9 @@ def get_traces(self, app_id: str, variant_id: str) -> typing.List[Trace]:
 
             - variant_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.get_traces(app_id="app-id", variant_id="variant-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2521,9 +2625,9 @@ def get_spans_of_trace(self, trace_id: str) -> typing.List[Span]:
         Parameters:
             - trace_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.get_spans_of_trace(trace_id="trace-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2550,9 +2654,9 @@ def get_feedbacks(self, trace_id: str) -> typing.List[Feedback]:
         Parameters:
             - trace_id: str.
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.get_feedbacks(trace_id="trace-id")
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2705,9 +2809,9 @@ def list_organizations(self) -> typing.List[Organization]:
         HTTPException: If there is an error retrieving the organizations from the database.
 
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_organizations()
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2768,9 +2872,9 @@ def list_bases(
 
             - base_name: typing.Optional[str].
         ---
-        from agenta.client import AgentaApi
+        from aakrem.client import AakremApi
 
-        client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         client.list_bases()
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2871,7 +2975,7 @@ def save_config(
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
 
-class AsyncAgentaApi:
+class AsyncAakremApi:
     def __init__(
         self, *, base_url: str, api_key: str, timeout: typing.Optional[float] = 60
     ):
@@ -2892,9 +2996,9 @@ async def list_api_keys(self) -> typing.List[ListApiKeysOutput]:
         List[ListAPIKeysOutput]: A list of API Keys associated with the user.
 
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_api_keys()
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -2952,9 +3056,9 @@ async def delete_api_key(self, key_prefix: str) -> typing.Dict[str, typing.Any]:
         Parameters:
             - key_prefix: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.delete_api_key(key_prefix="key-prefix")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -3257,9 +3361,9 @@ async def list_app_variants(self, app_id: str) -> typing.List[AppVariantOutput]:
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_app_variants(app_id="app-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -3348,9 +3452,9 @@ async def list_apps(
 
             - org_id: typing.Optional[str].
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_apps()
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -3576,9 +3680,9 @@ async def list_environments(self, app_id: str) -> typing.List[EnvironmentOutput]
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_environments(app_id="app-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -3822,7 +3926,9 @@ async def update_variant_image(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]:
+    async def fetch_list_evaluations_evaluations_get(
+        self, *, app_id: str
+    ) -> typing.List[Evaluation]:
         """
         Fetches a list of evaluations, optionally filtered by an app ID.
 
@@ -3835,10 +3941,10 @@ async def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        await client.fetch_list_evaluations(app_id="app-id")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.fetch_list_evaluations_evaluations_get(app_id="app-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
             "GET",
@@ -3865,7 +3971,6 @@ async def create_evaluation(
         app_id: str,
         variant_ids: typing.List[str],
         evaluation_type: EvaluationType,
-        evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT,
         inputs: typing.List[str],
         testset_id: str,
         status: str,
@@ -3884,30 +3989,27 @@ async def create_evaluation(
 
             - evaluation_type: EvaluationType.
 
-            - evaluation_type_settings: typing.Optional[EvaluationTypeSettings].
-
             - inputs: typing.List[str].
 
             - testset_id: str.
 
             - status: str.
         """
-        _request: typing.Dict[str, typing.Any] = {
-            "app_id": app_id,
-            "variant_ids": variant_ids,
-            "evaluation_type": evaluation_type,
-            "inputs": inputs,
-            "testset_id": testset_id,
-            "status": status,
-        }
-        if evaluation_type_settings is not OMIT:
-            _request["evaluation_type_settings"] = evaluation_type_settings
         _response = await self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/", "evaluations"
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
+            ),
+            json=jsonable_encoder(
+                {
+                    "app_id": app_id,
+                    "variant_ids": variant_ids,
+                    "evaluation_type": evaluation_type,
+                    "inputs": inputs,
+                    "testset_id": testset_id,
+                    "status": status,
+                }
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -3922,7 +4024,7 @@ async def create_evaluation(
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
     async def delete_evaluations(
-        self, *, evaluations_ids: typing.List[str]
+        self, *, request: DeleteEvaluation
     ) -> typing.List[str]:
         """
         Delete specific comparison tables based on their unique IDs.
@@ -3934,19 +4036,20 @@ async def delete_evaluations(
         A list of the deleted comparison tables' IDs.
 
         Parameters:
-            - evaluations_ids: typing.List[str].
+            - request: DeleteEvaluation.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem import DeleteEvaluation
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        await client.delete_evaluations(evaluations_ids=[])
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.delete_evaluations(request=DeleteEvaluation(evaluations_ids=[]))
         """
         _response = await self._client_wrapper.httpx_client.request(
             "DELETE",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/", "evaluations"
             ),
-            json=jsonable_encoder({"evaluations_ids": evaluations_ids}),
+            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -3960,15 +4063,16 @@ async def delete_evaluations(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
+    async def fetch_evaluation_status(self, evaluation_id: str) -> typing.Any:
         """
-        Fetches a single evaluation based on its ID.
+        Fetches the status of the evaluation.
 
         Args:
-        evaluation_id (str): The ID of the evaluation to fetch.
+        evaluation_id (str): the evaluation id
+        request (Request): the request object
 
         Returns:
-        Evaluation: The fetched evaluation.
+        (str): the evaluation status
 
         Parameters:
             - evaluation_id: str.
@@ -3977,13 +4081,13 @@ async def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}",
+                f"evaluations/{evaluation_id}/status",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(Evaluation, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -3992,41 +4096,26 @@ async def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_evaluation(
-        self,
-        evaluation_id: str,
-        *,
-        status: typing.Optional[EvaluationStatusEnum] = OMIT,
-        evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT,
-    ) -> typing.Any:
+    async def fetch_evaluation_results(self, evaluation_id: str) -> typing.Any:
         """
-        Updates an evaluation's status.
+        Fetches the results of the evaluation
 
-        Raises:
-        HTTPException: If the columns in the test set do not match with the inputs in the variant.
+        Args:
+        evaluation_id (str): the evaluation id
+        request (Request): the request object
 
         Returns:
-        None: A 204 No Content status code, indicating that the update was successful.
+        _type_: _description_
 
         Parameters:
             - evaluation_id: str.
-
-            - status: typing.Optional[EvaluationStatusEnum].
-
-            - evaluation_type_settings: typing.Optional[EvaluationTypeSettings].
         """
-        _request: typing.Dict[str, typing.Any] = {}
-        if status is not OMIT:
-            _request["status"] = status
-        if evaluation_type_settings is not OMIT:
-            _request["evaluation_type_settings"] = evaluation_type_settings
         _response = await self._client_wrapper.httpx_client.request(
-            "PUT",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}",
+                f"evaluations/{evaluation_id}/results",
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -4042,7 +4131,7 @@ async def update_evaluation(
 
     async def fetch_evaluation_scenarios(
         self, evaluation_id: str
-    ) -> typing.List[EvaluationScenario]:
+    ) -> typing.List[HumanEvaluationScenario]:
         """
         Fetches evaluation scenarios for a given evaluation ID.
 
@@ -4058,22 +4147,22 @@ async def fetch_evaluation_scenarios(
         Parameters:
             - evaluation_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.fetch_evaluation_scenarios(evaluation_id="evaluation-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenarios",
+                f"human-evaluations/{evaluation_id}/evaluation_scenarios",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[EvaluationScenario], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[HumanEvaluationScenario], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4082,35 +4171,30 @@ async def fetch_evaluation_scenarios(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def create_evaluation_scenario(
-        self, evaluation_id: str, *, request: EvaluationScenario
-    ) -> typing.Any:
+    async def fetch_evaluation(self, evaluation_id: str) -> Evaluation:
         """
-        Create a new evaluation scenario for a given evaluation ID.
+        Fetches a single evaluation based on its ID.
 
-        Raises:
-        HTTPException: If evaluation not found or access denied.
+        Args:
+        evaluation_id (str): The ID of the evaluation to fetch.
 
         Returns:
-        None: 204 No Content status code upon success.
+        Evaluation: The fetched evaluation.
 
         Parameters:
             - evaluation_id: str.
-
-            - request: EvaluationScenario.
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenario",
+                f"evaluations/{evaluation_id}",
             ),
-            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(Evaluation, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4119,144 +4203,99 @@ async def create_evaluation_scenario(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_evaluation_scenario(
-        self,
-        evaluation_id: str,
-        evaluation_scenario_id: str,
-        evaluation_type: EvaluationType,
-        *,
-        vote: typing.Optional[str] = OMIT,
-        score: typing.Optional[EvaluationScenarioUpdateScore] = OMIT,
-        correct_answer: typing.Optional[str] = OMIT,
-        outputs: typing.Optional[typing.List[EvaluationScenarioOutput]] = OMIT,
-        inputs: typing.Optional[typing.List[EvaluationScenarioInput]] = OMIT,
-        is_pinned: typing.Optional[bool] = OMIT,
-        note: typing.Optional[str] = OMIT,
-    ) -> typing.Any:
+    async def webhook_example_fake(self) -> EvaluationWebhook:
         """
-        Updates an evaluation scenario's vote or score based on its type.
-
-        Raises:
-        HTTPException: If update fails or unauthorized.
+        Returns a fake score response for example webhook evaluation
 
         Returns:
-        None: 204 No Content status code upon successful update.
-
-        Parameters:
-            - evaluation_id: str.
-
-            - evaluation_scenario_id: str.
-
-            - evaluation_type: EvaluationType.
-
-            - vote: typing.Optional[str].
-
-            - score: typing.Optional[EvaluationScenarioUpdateScore].
-
-            - correct_answer: typing.Optional[str].
-
-            - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]].
-
-            - inputs: typing.Optional[typing.List[EvaluationScenarioInput]].
-
-            - is_pinned: typing.Optional[bool].
-
-            - note: typing.Optional[str].
+        _description_
         """
-        _request: typing.Dict[str, typing.Any] = {}
-        if vote is not OMIT:
-            _request["vote"] = vote
-        if score is not OMIT:
-            _request["score"] = score
-        if correct_answer is not OMIT:
-            _request["correct_answer"] = correct_answer
-        if outputs is not OMIT:
-            _request["outputs"] = outputs
-        if inputs is not OMIT:
-            _request["inputs"] = inputs
-        if is_pinned is not OMIT:
-            _request["is_pinned"] = is_pinned
-        if note is not OMIT:
-            _request["note"] = note
         _response = await self._client_wrapper.httpx_client.request(
-            "PUT",
+            "POST",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}",
+                "evaluations/webhook_example_fake",
             ),
-            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
-        if _response.status_code == 422:
-            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+            return pydantic.parse_obj_as(EvaluationWebhook, _response.json())  # type: ignore
         try:
             _response_json = _response.json()
         except JSONDecodeError:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def evaluate_ai_critique(
-        self,
-        *,
-        correct_answer: str,
-        llm_app_prompt_template: typing.Optional[str] = OMIT,
-        inputs: typing.List[EvaluationScenarioInput],
-        outputs: typing.List[EvaluationScenarioOutput],
-        evaluation_prompt_template: typing.Optional[str] = OMIT,
-        open_ai_key: typing.Optional[str] = OMIT,
-    ) -> str:
+    async def fetch_evaluation_scenarios_evaluations_evaluation_scenarios_comparison_results_get(
+        self, *, evaluations_ids: str
+    ) -> typing.Any:
         """
-        Evaluate AI critique based on the given payload.
-
-        Args:
-        payload (AICritiqueCreate): The payload containing data for AI critique evaluation.
-        stoken_session (SessionContainer): The session container verified by `verify_session`.
+        Fetches evaluation scenarios for a given evaluation ID.
 
-        Returns:
-        str: The output of the AI critique evaluation.
+        Arguments:
+        evaluation_id (str): The ID of the evaluation for which to fetch scenarios.
 
         Raises:
-        HTTPException: If any exception occurs during the evaluation.
+        HTTPException: If the evaluation is not found or access is denied.
+
+        Returns:
+        List[EvaluationScenario]: A list of evaluation scenarios.
 
         Parameters:
-            - correct_answer: str.
+            - evaluations_ids: str.
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "GET",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                "evaluations/evaluation_scenarios/comparison-results",
+            ),
+            params=remove_none_from_dict({"evaluations_ids": evaluations_ids}),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
 
-            - llm_app_prompt_template: typing.Optional[str].
+    async def fetch_list_human_evaluations_human_evaluations_get(
+        self, *, app_id: str
+    ) -> typing.List[HumanEvaluation]:
+        """
+        Fetches a list of evaluations, optionally filtered by an app ID.
 
-            - inputs: typing.List[EvaluationScenarioInput].
+        Args:
+        app_id (Optional[str]): An optional app ID to filter the evaluations.
 
-            - outputs: typing.List[EvaluationScenarioOutput].
+        Returns:
+        List[HumanEvaluation]: A list of evaluations.
 
-            - evaluation_prompt_template: typing.Optional[str].
+        Parameters:
+            - app_id: str.
+        ---
+        from aakrem.client import AsyncAakremApi
 
-            - open_ai_key: typing.Optional[str].
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.fetch_list_human_evaluations_human_evaluations_get(app_id="app-id")
         """
-        _request: typing.Dict[str, typing.Any] = {
-            "correct_answer": correct_answer,
-            "inputs": inputs,
-            "outputs": outputs,
-        }
-        if llm_app_prompt_template is not OMIT:
-            _request["llm_app_prompt_template"] = llm_app_prompt_template
-        if evaluation_prompt_template is not OMIT:
-            _request["evaluation_prompt_template"] = evaluation_prompt_template
-        if open_ai_key is not OMIT:
-            _request["open_ai_key"] = open_ai_key
         _response = await self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/evaluation_scenario/ai_critique",
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
             ),
-            json=jsonable_encoder(_request),
+            params=remove_none_from_dict({"app_id": app_id}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(str, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[HumanEvaluation], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4265,38 +4304,38 @@ async def evaluate_ai_critique(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def get_evaluation_scenario_score(
-        self, evaluation_scenario_id: str
-    ) -> typing.Dict[str, str]:
+    async def delete_evaluations_human_evaluations_delete(
+        self, *, request: DeleteEvaluation
+    ) -> typing.List[str]:
         """
-        Fetch the score of a specific evaluation scenario.
+        Delete specific comparison tables based on their unique IDs.
 
         Args:
-        evaluation_scenario_id: The ID of the evaluation scenario to fetch.
-        stoken_session: Session data, verified by `verify_session`.
+        delete_evaluations (List[str]): The unique identifiers of the comparison tables to delete.
 
         Returns:
-        Dictionary containing the scenario ID and its score.
+        A list of the deleted comparison tables' IDs.
 
         Parameters:
-            - evaluation_scenario_id: str.
+            - request: DeleteEvaluation.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem import DeleteEvaluation
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        await client.get_evaluation_scenario_score(evaluation_scenario_id="evaluation-scenario-id")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.delete_evaluations_human_evaluations_delete(request=DeleteEvaluation(evaluations_ids=[]))
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "GET",
+            "DELETE",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
+                f"{self._client_wrapper.get_base_url()}/", "human-evaluations"
             ),
+            json=jsonable_encoder(request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Dict[str, str], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[str], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4305,35 +4344,32 @@ async def get_evaluation_scenario_score(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_evaluation_scenario_score(
-        self, evaluation_scenario_id: str, *, score: float
-    ) -> typing.Any:
+    async def fetch_human_evaluation_human_evaluations_evaluation_id_get(
+        self, evaluation_id: str
+    ) -> HumanEvaluation:
         """
-        Updates the score of an evaluation scenario.
+        Fetches a single evaluation based on its ID.
 
-        Raises:
-        HTTPException: Server error if the evaluation update fails.
+        Args:
+        evaluation_id (str): The ID of the evaluation to fetch.
 
         Returns:
-        None: 204 No Content status code upon successful update.
+        HumanEvaluation: The fetched evaluation.
 
         Parameters:
-            - evaluation_scenario_id: str.
-
-            - score: float.
+            - evaluation_id: str.
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "PUT",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
+                f"human-evaluations/{evaluation_id}",
             ),
-            json=jsonable_encoder({"score": score}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(HumanEvaluation, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4342,25 +4378,36 @@ async def update_evaluation_scenario_score(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def fetch_results(self, evaluation_id: str) -> typing.Any:
+    async def update_human_evaluation(
+        self,
+        evaluation_id: str,
+        *,
+        status: typing.Optional[EvaluationStatusEnum] = OMIT,
+    ) -> typing.Any:
         """
-        Fetch all the results for one the comparison table
+        Updates an evaluation's status.
 
-        Arguments:
-        evaluation*id -- \_description*
+        Raises:
+        HTTPException: If the columns in the test set do not match with the inputs in the variant.
 
         Returns:
-        _description_
+        None: A 204 No Content status code, indicating that the update was successful.
 
         Parameters:
             - evaluation_id: str.
+
+            - status: typing.Optional[EvaluationStatusEnum].
         """
+        _request: typing.Dict[str, typing.Any] = {}
+        if status is not OMIT:
+            _request["status"] = status
         _response = await self._client_wrapper.httpx_client.request(
-            "GET",
+            "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/{evaluation_id}/results",
+                f"human-evaluations/{evaluation_id}",
             ),
+            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -4374,26 +4421,72 @@ async def fetch_results(self, evaluation_id: str) -> typing.Any:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def create_custom_evaluation(
-        self, *, request: CreateCustomEvaluation
+    async def update_evaluation_scenario_router_human_evaluations_evaluation_id_evaluation_scenario_evaluation_scenario_id_evaluation_type_put(
+        self,
+        evaluation_id: str,
+        evaluation_scenario_id: str,
+        evaluation_type: EvaluationType,
+        *,
+        vote: typing.Optional[str] = OMIT,
+        score: typing.Optional[HumanEvaluationScenarioUpdateScore] = OMIT,
+        correct_answer: typing.Optional[str] = OMIT,
+        outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]] = OMIT,
+        inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]] = OMIT,
+        is_pinned: typing.Optional[bool] = OMIT,
+        note: typing.Optional[str] = OMIT,
     ) -> typing.Any:
         """
-        Create evaluation with custom python code.
+        Updates an evaluation scenario's vote or score based on its type.
 
-        Args:
+        Raises:
+        HTTPException: If update fails or unauthorized.
 
-        custom_evaluation_payload (CreateCustomEvaluation): the required payload
+        Returns:
+        None: 204 No Content status code upon successful update.
 
         Parameters:
-            - request: CreateCustomEvaluation.
+            - evaluation_id: str.
+
+            - evaluation_scenario_id: str.
+
+            - evaluation_type: EvaluationType.
+
+            - vote: typing.Optional[str].
+
+            - score: typing.Optional[HumanEvaluationScenarioUpdateScore].
+
+            - correct_answer: typing.Optional[str].
+
+            - outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]].
+
+            - inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]].
+
+            - is_pinned: typing.Optional[bool].
+
+            - note: typing.Optional[str].
         """
+        _request: typing.Dict[str, typing.Any] = {}
+        if vote is not OMIT:
+            _request["vote"] = vote
+        if score is not OMIT:
+            _request["score"] = score
+        if correct_answer is not OMIT:
+            _request["correct_answer"] = correct_answer
+        if outputs is not OMIT:
+            _request["outputs"] = outputs
+        if inputs is not OMIT:
+            _request["inputs"] = inputs
+        if is_pinned is not OMIT:
+            _request["is_pinned"] = is_pinned
+        if note is not OMIT:
+            _request["note"] = note
         _response = await self._client_wrapper.httpx_client.request(
-            "POST",
+            "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/custom_evaluation",
+                f"human-evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}",
             ),
-            json=jsonable_encoder(request),
+            json=jsonable_encoder(_request),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -4407,30 +4500,40 @@ async def create_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail:
+    async def get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get(
+        self, evaluation_scenario_id: str
+    ) -> typing.Dict[str, str]:
         """
-        Get the custom code evaluation detail.
+        Fetch the score of a specific evaluation scenario.
 
         Args:
-        id (str): the id of the custom evaluation
+        evaluation_scenario_id: The ID of the evaluation scenario to fetch.
+        stoken_session: Session data, verified by `verify_session`.
 
         Returns:
-        CustomEvaluationDetail: Detail of the custom evaluation
+        Dictionary containing the scenario ID and its score.
 
         Parameters:
-            - id: str.
+            - evaluation_scenario_id: str.
+        ---
+        from aakrem.client import AsyncAakremApi
+
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get(
+            evaluation_scenario_id="evaluation-scenario-id"
+        )
         """
         _response = await self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{id}",
+                f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(CustomEvaluationDetail, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Dict[str, str], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4439,28 +4542,30 @@ async def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_custom_evaluation(
-        self, id: str, *, request: CreateCustomEvaluation
+    async def update_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_put(
+        self, evaluation_scenario_id: str, *, score: float
     ) -> typing.Any:
         """
-        Update a custom code evaluation.
-        Args:
-        id (str): the ID of the custom evaluation to update
-        updated_data (CreateCustomEvaluation): the payload with updated data
-        stoken_session (SessionContainer): session container for authentication
+        Updates the score of an evaluation scenario.
+
+        Raises:
+        HTTPException: Server error if the evaluation update fails.
+
+        Returns:
+        None: 204 No Content status code upon successful update.
 
         Parameters:
-            - id: str.
+            - evaluation_scenario_id: str.
 
-            - request: CreateCustomEvaluation.
+            - score: float.
         """
         _response = await self._client_wrapper.httpx_client.request(
             "PUT",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{id}",
+                f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score",
             ),
-            json=jsonable_encoder(request),
+            json=jsonable_encoder({"score": score}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
@@ -4474,37 +4579,30 @@ async def update_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def list_custom_evaluations(
-        self, app_id: str
-    ) -> typing.List[CustomEvaluationOutput]:
+    async def fetch_results(self, evaluation_id: str) -> typing.Any:
         """
-        List the custom code evaluations for a given app.
+        Fetch all the results for one the comparison table
 
-        Args:
-        app_id (str): the id of the app
+        Arguments:
+        evaluation*id -- \_description*
 
         Returns:
-        List[CustomEvaluationOutput]: a list of custom evaluation
+        _description_
 
         Parameters:
-            - app_id: str.
-        ---
-        from agenta.client import AsyncAgentaApi
-
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        await client.list_custom_evaluations(app_id="app-id")
+            - evaluation_id: str.
         """
         _response = await self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/list/{app_id}",
+                f"human-evaluations/{evaluation_id}/results",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[CustomEvaluationOutput], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4513,37 +4611,66 @@ async def list_custom_evaluations(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def get_custom_evaluation_names(
-        self, app_name: str
-    ) -> typing.List[CustomEvaluationNames]:
+    async def get_evaluators_endpoint_evaluators_get(self) -> typing.List[Evaluator]:
+        """
+        Endpoint to fetch a list of evaluators.
+
+        Returns:
+        List[Evaluator]: A list of evaluator objects.
+
+        ---
+        from aakrem.client import AsyncAakremApi
+
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.get_evaluators_endpoint_evaluators_get()
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "GET",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/", "evaluators"
+            ),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(typing.List[Evaluator], _response.json())  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def get_evaluator_configs_evaluators_configs_get(
+        self, *, app_id: str
+    ) -> typing.List[EvaluatorConfig]:
         """
-        Get the names of custom evaluation for a given app.
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Args:
-        app_name (str): the name of the app the evaluation belongs to
+        app_id (str): The ID of the app.
 
         Returns:
-        List[CustomEvaluationNames]: the list of name of custom evaluations
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
 
         Parameters:
-            - app_name: str.
+            - app_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
-        await client.get_custom_evaluation_names(app_name="app-name")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        await client.get_evaluator_configs_evaluators_configs_get(app_id="app-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
             "GET",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/{app_name}/names",
+                f"{self._client_wrapper.get_base_url()}/", "evaluators/configs"
             ),
+            params=remove_none_from_dict({"app_id": app_id}),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.List[CustomEvaluationNames], _response.json())  # type: ignore
+            return pydantic.parse_obj_as(typing.List[EvaluatorConfig], _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4552,59 +4679,50 @@ async def get_custom_evaluation_names(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def execute_custom_evaluation(
+    async def create_new_evaluator_config_evaluators_configs_post(
         self,
-        evaluation_id: str,
         *,
-        inputs: typing.List[typing.Dict[str, typing.Any]],
         app_id: str,
-        variant_id: str,
-        correct_answer: str,
-        outputs: typing.List[typing.Dict[str, typing.Any]],
-    ) -> typing.Any:
+        name: str,
+        evaluator_key: str,
+        settings_values: typing.Dict[str, typing.Any],
+    ) -> EvaluatorConfig:
         """
-        Execute a custom evaluation code.
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Args:
-        evaluation_id (str): the custom evaluation id
-        payload (ExecuteCustomEvaluationCode): the required payload
+        app_id (str): The ID of the app.
 
         Returns:
-        float: the result of the evaluation custom code
+        EvaluatorConfigDB: Evaluator configuration api model.
 
         Parameters:
-            - evaluation_id: str.
-
-            - inputs: typing.List[typing.Dict[str, typing.Any]].
-
             - app_id: str.
 
-            - variant_id: str.
+            - name: str.
 
-            - correct_answer: str.
+            - evaluator_key: str.
 
-            - outputs: typing.List[typing.Dict[str, typing.Any]].
+            - settings_values: typing.Dict[str, typing.Any].
         """
         _response = await self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(
-                f"{self._client_wrapper.get_base_url()}/",
-                f"evaluations/custom_evaluation/execute/{evaluation_id}",
+                f"{self._client_wrapper.get_base_url()}/", "evaluators/configs"
             ),
             json=jsonable_encoder(
                 {
-                    "inputs": inputs,
                     "app_id": app_id,
-                    "variant_id": variant_id,
-                    "correct_answer": correct_answer,
-                    "outputs": outputs,
+                    "name": name,
+                    "evaluator_key": evaluator_key,
+                    "settings_values": settings_values,
                 }
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(typing.Any, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
         if _response.status_code == 422:
             raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
@@ -4613,24 +4731,115 @@ async def execute_custom_evaluation(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def webhook_example_fake(self) -> EvaluationWebhook:
+    async def get_evaluator_config_evaluators_configs_evaluator_config_id_get(
+        self, evaluator_config_id: str
+    ) -> EvaluatorConfig:
         """
-        Returns a fake score response for example webhook evaluation
+        Endpoint to fetch evaluator configurations for a specific app.
 
         Returns:
-        _description_
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
+
+        Parameters:
+            - evaluator_config_id: str.
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "POST",
+            "GET",
             urllib.parse.urljoin(
                 f"{self._client_wrapper.get_base_url()}/",
-                "evaluations/webhook_example_fake",
+                f"evaluators/configs/{evaluator_config_id}",
             ),
             headers=self._client_wrapper.get_headers(),
             timeout=60,
         )
         if 200 <= _response.status_code < 300:
-            return pydantic.parse_obj_as(EvaluationWebhook, _response.json())  # type: ignore
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def update_evaluator_config_evaluators_configs_evaluator_config_id_put(
+        self,
+        evaluator_config_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator_key: typing.Optional[str] = OMIT,
+        settings_values: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
+    ) -> EvaluatorConfig:
+        """
+        Endpoint to update evaluator configurations for a specific app.
+
+        Returns:
+        List[EvaluatorConfigDB]: A list of evaluator configuration objects.
+
+        Parameters:
+            - evaluator_config_id: str.
+
+            - name: typing.Optional[str].
+
+            - evaluator_key: typing.Optional[str].
+
+            - settings_values: typing.Optional[typing.Dict[str, typing.Any]].
+        """
+        _request: typing.Dict[str, typing.Any] = {}
+        if name is not OMIT:
+            _request["name"] = name
+        if evaluator_key is not OMIT:
+            _request["evaluator_key"] = evaluator_key
+        if settings_values is not OMIT:
+            _request["settings_values"] = settings_values
+        _response = await self._client_wrapper.httpx_client.request(
+            "PUT",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                f"evaluators/configs/{evaluator_config_id}",
+            ),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(EvaluatorConfig, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
+        try:
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def delete_evaluator_config_evaluators_configs_evaluator_config_id_delete(
+        self, evaluator_config_id: str
+    ) -> bool:
+        """
+        Endpoint to delete a specific evaluator configuration.
+
+        Args:
+        evaluator_config_id (str): The unique identifier of the evaluator configuration.
+
+        Returns:
+        bool: True if deletion was successful, False otherwise.
+
+        Parameters:
+            - evaluator_config_id: str.
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "DELETE",
+            urllib.parse.urljoin(
+                f"{self._client_wrapper.get_base_url()}/",
+                f"evaluators/configs/{evaluator_config_id}",
+            ),
+            headers=self._client_wrapper.get_headers(),
+            timeout=60,
+        )
+        if 200 <= _response.status_code < 300:
+            return pydantic.parse_obj_as(bool, _response.json())  # type: ignore
+        if _response.status_code == 422:
+            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
         try:
             _response_json = _response.json()
         except JSONDecodeError:
@@ -4836,9 +5045,9 @@ async def get_testsets(self, *, app_id: str) -> typing.List[TestSetOutputRespons
         Parameters:
             - app_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.get_testsets(app_id="app-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -4873,9 +5082,9 @@ async def delete_testsets(
         Parameters:
             - testset_ids: typing.List[str].
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.delete_testsets(testset_ids=[])
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -5162,9 +5371,9 @@ async def get_traces(self, app_id: str, variant_id: str) -> typing.List[Trace]:
 
             - variant_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.get_traces(app_id="app-id", variant_id="variant-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -5345,9 +5554,9 @@ async def get_spans_of_trace(self, trace_id: str) -> typing.List[Span]:
         Parameters:
             - trace_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.get_spans_of_trace(trace_id="trace-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -5374,9 +5583,9 @@ async def get_feedbacks(self, trace_id: str) -> typing.List[Feedback]:
         Parameters:
             - trace_id: str.
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.get_feedbacks(trace_id="trace-id")
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -5529,9 +5738,9 @@ async def list_organizations(self) -> typing.List[Organization]:
         HTTPException: If there is an error retrieving the organizations from the database.
 
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_organizations()
         """
         _response = await self._client_wrapper.httpx_client.request(
@@ -5592,9 +5801,9 @@ async def list_bases(
 
             - base_name: typing.Optional[str].
         ---
-        from agenta.client import AsyncAgentaApi
+        from aakrem.client import AsyncAakremApi
 
-        client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
+        client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api")
         await client.list_bases()
         """
         _response = await self._client_wrapper.httpx_client.request(
diff --git a/agenta-cli/agenta/client/backend/types/__init__.py b/agenta-cli/agenta/client/backend/types/__init__.py
index 4be042f7a1..59bde99e4f 100644
--- a/agenta-cli/agenta/client/backend/types/__init__.py
+++ b/agenta-cli/agenta/client/backend/types/__init__.py
@@ -3,37 +3,43 @@
 from .add_variant_from_base_and_config_response import (
     AddVariantFromBaseAndConfigResponse,
 )
+from .aggregated_result import AggregatedResult
 from .app import App
 from .app_variant_output import AppVariantOutput
 from .base_output import BaseOutput
 from .body_import_testset import BodyImportTestset
 from .container_templates_response import ContainerTemplatesResponse
 from .create_app_output import CreateAppOutput
-from .create_custom_evaluation import CreateCustomEvaluation
-from .custom_evaluation_detail import CustomEvaluationDetail
-from .custom_evaluation_names import CustomEvaluationNames
-from .custom_evaluation_output import CustomEvaluationOutput
+from .delete_evaluation import DeleteEvaluation
 from .docker_env_vars import DockerEnvVars
 from .environment_output import EnvironmentOutput
 from .evaluation import Evaluation
 from .evaluation_scenario import EvaluationScenario
 from .evaluation_scenario_input import EvaluationScenarioInput
 from .evaluation_scenario_output import EvaluationScenarioOutput
-from .evaluation_scenario_score import EvaluationScenarioScore
-from .evaluation_scenario_update_score import EvaluationScenarioUpdateScore
+from .evaluation_scenario_result import EvaluationScenarioResult
 from .evaluation_status_enum import EvaluationStatusEnum
 from .evaluation_type import EvaluationType
-from .evaluation_type_settings import EvaluationTypeSettings
 from .evaluation_webhook import EvaluationWebhook
+from .evaluator import Evaluator
+from .evaluator_config import EvaluatorConfig
 from .feedback import Feedback
 from .get_config_reponse import GetConfigReponse
 from .http_validation_error import HttpValidationError
+from .human_evaluation import HumanEvaluation
+from .human_evaluation_scenario import HumanEvaluationScenario
+from .human_evaluation_scenario_input import HumanEvaluationScenarioInput
+from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput
+from .human_evaluation_scenario_score import HumanEvaluationScenarioScore
+from .human_evaluation_scenario_update_score import HumanEvaluationScenarioUpdateScore
 from .image import Image
 from .invite_request import InviteRequest
 from .list_api_keys_output import ListApiKeysOutput
+from .llm_run_rate_limit import LlmRunRateLimit
 from .new_testset import NewTestset
 from .organization import Organization
 from .organization_output import OrganizationOutput
+from .result import Result
 from .simple_evaluation_output import SimpleEvaluationOutput
 from .span import Span
 from .template import Template
@@ -49,37 +55,43 @@
 
 __all__ = [
     "AddVariantFromBaseAndConfigResponse",
+    "AggregatedResult",
     "App",
     "AppVariantOutput",
     "BaseOutput",
     "BodyImportTestset",
     "ContainerTemplatesResponse",
     "CreateAppOutput",
-    "CreateCustomEvaluation",
-    "CustomEvaluationDetail",
-    "CustomEvaluationNames",
-    "CustomEvaluationOutput",
+    "DeleteEvaluation",
     "DockerEnvVars",
     "EnvironmentOutput",
     "Evaluation",
     "EvaluationScenario",
     "EvaluationScenarioInput",
     "EvaluationScenarioOutput",
-    "EvaluationScenarioScore",
-    "EvaluationScenarioUpdateScore",
+    "EvaluationScenarioResult",
     "EvaluationStatusEnum",
     "EvaluationType",
-    "EvaluationTypeSettings",
     "EvaluationWebhook",
+    "Evaluator",
+    "EvaluatorConfig",
     "Feedback",
     "GetConfigReponse",
     "HttpValidationError",
+    "HumanEvaluation",
+    "HumanEvaluationScenario",
+    "HumanEvaluationScenarioInput",
+    "HumanEvaluationScenarioOutput",
+    "HumanEvaluationScenarioScore",
+    "HumanEvaluationScenarioUpdateScore",
     "Image",
     "InviteRequest",
     "ListApiKeysOutput",
+    "LlmRunRateLimit",
     "NewTestset",
     "Organization",
     "OrganizationOutput",
+    "Result",
     "SimpleEvaluationOutput",
     "Span",
     "Template",
diff --git a/agenta-cli/agenta/client/backend/types/aggregated_result.py b/agenta-cli/agenta/client/backend/types/aggregated_result.py
new file mode 100644
index 0000000000..ab5cd4ee8b
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/aggregated_result.py
@@ -0,0 +1,39 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+from .evaluator_config import EvaluatorConfig
+from .result import Result
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class AggregatedResult(pydantic.BaseModel):
+    evaluator_config: EvaluatorConfig
+    result: Result
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_names.py b/agenta-cli/agenta/client/backend/types/delete_evaluation.py
similarity index 91%
rename from agenta-cli/agenta/client/backend/types/custom_evaluation_names.py
rename to agenta-cli/agenta/client/backend/types/delete_evaluation.py
index 860cf6b3db..46eea6648a 100644
--- a/agenta-cli/agenta/client/backend/types/custom_evaluation_names.py
+++ b/agenta-cli/agenta/client/backend/types/delete_evaluation.py
@@ -11,9 +11,8 @@
     import pydantic  # type: ignore
 
 
-class CustomEvaluationNames(pydantic.BaseModel):
-    id: str
-    evaluation_name: str
+class DeleteEvaluation(pydantic.BaseModel):
+    evaluations_ids: typing.List[str]
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/evaluation.py b/agenta-cli/agenta/client/backend/types/evaluation.py
index b0211abd38..dcef94e56f 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation.py
@@ -4,8 +4,7 @@
 import typing
 
 from ..core.datetime_utils import serialize_datetime
-from .evaluation_type import EvaluationType
-from .evaluation_type_settings import EvaluationTypeSettings
+from .aggregated_result import AggregatedResult
 
 try:
     import pydantic.v1 as pydantic  # type: ignore
@@ -18,13 +17,12 @@ class Evaluation(pydantic.BaseModel):
     app_id: str
     user_id: str
     user_username: str
-    evaluation_type: EvaluationType
-    evaluation_type_settings: typing.Optional[EvaluationTypeSettings]
     variant_ids: typing.List[str]
     variant_names: typing.List[str]
     testset_id: str
     testset_name: str
     status: str
+    aggregated_results: typing.List[AggregatedResult]
     created_at: dt.datetime
     updated_at: dt.datetime
 
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario.py
index 6e49169ba7..36229c357d 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_scenario.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario.py
@@ -6,7 +6,7 @@
 from ..core.datetime_utils import serialize_datetime
 from .evaluation_scenario_input import EvaluationScenarioInput
 from .evaluation_scenario_output import EvaluationScenarioOutput
-from .evaluation_scenario_score import EvaluationScenarioScore
+from .evaluation_scenario_result import EvaluationScenarioResult
 
 try:
     import pydantic.v1 as pydantic  # type: ignore
@@ -19,12 +19,11 @@ class EvaluationScenario(pydantic.BaseModel):
     evaluation_id: str
     inputs: typing.List[EvaluationScenarioInput]
     outputs: typing.List[EvaluationScenarioOutput]
-    vote: typing.Optional[str]
-    score: typing.Optional[EvaluationScenarioScore]
     evaluation: typing.Optional[str]
     correct_answer: typing.Optional[str]
     is_pinned: typing.Optional[bool]
     note: typing.Optional[str]
+    results: typing.List[EvaluationScenarioResult]
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py
index ff78ae974f..700f6372e4 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py
@@ -12,8 +12,9 @@
 
 
 class EvaluationScenarioInput(pydantic.BaseModel):
-    input_name: str
-    input_value: str
+    name: str
+    type: str
+    value: typing.Optional[typing.Any]
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py
index 17ecf2cc53..e869cb650b 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py
@@ -12,8 +12,8 @@
 
 
 class EvaluationScenarioOutput(pydantic.BaseModel):
-    variant_id: str
-    variant_output: str
+    type: str
+    value: typing.Optional[typing.Any]
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_output.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py
similarity index 87%
rename from agenta-cli/agenta/client/backend/types/custom_evaluation_output.py
rename to agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py
index 3a9d2a8be0..57fbd6e082 100644
--- a/agenta-cli/agenta/client/backend/types/custom_evaluation_output.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py
@@ -4,6 +4,7 @@
 import typing
 
 from ..core.datetime_utils import serialize_datetime
+from .result import Result
 
 try:
     import pydantic.v1 as pydantic  # type: ignore
@@ -11,11 +12,9 @@
     import pydantic  # type: ignore
 
 
-class CustomEvaluationOutput(pydantic.BaseModel):
-    id: str
-    app_id: str
-    evaluation_name: str
-    created_at: dt.datetime
+class EvaluationScenarioResult(pydantic.BaseModel):
+    evaluator_config: str
+    result: Result
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py b/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py
index 159716b2e6..43c2b002d4 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py
@@ -13,21 +13,21 @@ class EvaluationStatusEnum(str, enum.Enum):
 
     EVALUATION_INITIALIZED = "EVALUATION_INITIALIZED"
     EVALUATION_STARTED = "EVALUATION_STARTED"
-    COMPARISON_RUN_STARTED = "COMPARISON_RUN_STARTED"
     EVALUATION_FINISHED = "EVALUATION_FINISHED"
+    EVALUATION_FAILED = "EVALUATION_FAILED"
 
     def visit(
         self,
         evaluation_initialized: typing.Callable[[], T_Result],
         evaluation_started: typing.Callable[[], T_Result],
-        comparison_run_started: typing.Callable[[], T_Result],
         evaluation_finished: typing.Callable[[], T_Result],
+        evaluation_failed: typing.Callable[[], T_Result],
     ) -> T_Result:
         if self is EvaluationStatusEnum.EVALUATION_INITIALIZED:
             return evaluation_initialized()
         if self is EvaluationStatusEnum.EVALUATION_STARTED:
             return evaluation_started()
-        if self is EvaluationStatusEnum.COMPARISON_RUN_STARTED:
-            return comparison_run_started()
         if self is EvaluationStatusEnum.EVALUATION_FINISHED:
             return evaluation_finished()
+        if self is EvaluationStatusEnum.EVALUATION_FAILED:
+            return evaluation_failed()
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_type.py b/agenta-cli/agenta/client/backend/types/evaluation_type.py
index 29990df5e9..4d2b91066d 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_type.py
+++ b/agenta-cli/agenta/client/backend/types/evaluation_type.py
@@ -11,43 +11,15 @@ class EvaluationType(str, enum.Enum):
     An enumeration.
     """
 
-    AUTO_EXACT_MATCH = "auto_exact_match"
-    AUTO_SIMILARITY_MATCH = "auto_similarity_match"
-    AUTO_REGEX_TEST = "auto_regex_test"
-    AUTO_WEBHOOK_TEST = "auto_webhook_test"
-    AUTO_AI_CRITIQUE = "auto_ai_critique"
     HUMAN_A_B_TESTING = "human_a_b_testing"
-    HUMAN_SCORING = "human_scoring"
-    CUSTOM_CODE_RUN = "custom_code_run"
     SINGLE_MODEL_TEST = "single_model_test"
 
     def visit(
         self,
-        auto_exact_match: typing.Callable[[], T_Result],
-        auto_similarity_match: typing.Callable[[], T_Result],
-        auto_regex_test: typing.Callable[[], T_Result],
-        auto_webhook_test: typing.Callable[[], T_Result],
-        auto_ai_critique: typing.Callable[[], T_Result],
         human_a_b_testing: typing.Callable[[], T_Result],
-        human_scoring: typing.Callable[[], T_Result],
-        custom_code_run: typing.Callable[[], T_Result],
         single_model_test: typing.Callable[[], T_Result],
     ) -> T_Result:
-        if self is EvaluationType.AUTO_EXACT_MATCH:
-            return auto_exact_match()
-        if self is EvaluationType.AUTO_SIMILARITY_MATCH:
-            return auto_similarity_match()
-        if self is EvaluationType.AUTO_REGEX_TEST:
-            return auto_regex_test()
-        if self is EvaluationType.AUTO_WEBHOOK_TEST:
-            return auto_webhook_test()
-        if self is EvaluationType.AUTO_AI_CRITIQUE:
-            return auto_ai_critique()
         if self is EvaluationType.HUMAN_A_B_TESTING:
             return human_a_b_testing()
-        if self is EvaluationType.HUMAN_SCORING:
-            return human_scoring()
-        if self is EvaluationType.CUSTOM_CODE_RUN:
-            return custom_code_run()
         if self is EvaluationType.SINGLE_MODEL_TEST:
             return single_model_test()
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py b/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py
deleted file mode 100644
index 3b6c1d0691..0000000000
--- a/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import datetime as dt
-import typing
-
-from ..core.datetime_utils import serialize_datetime
-
-try:
-    import pydantic.v1 as pydantic  # type: ignore
-except ImportError:
-    import pydantic  # type: ignore
-
-
-class EvaluationTypeSettings(pydantic.BaseModel):
-    similarity_threshold: typing.Optional[float]
-    regex_pattern: typing.Optional[str]
-    regex_should_match: typing.Optional[bool]
-    webhook_url: typing.Optional[str]
-    custom_code_evaluation_id: typing.Optional[str]
-    llm_app_prompt_template: typing.Optional[str]
-    evaluation_prompt_template: typing.Optional[str]
-
-    def json(self, **kwargs: typing.Any) -> str:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().json(**kwargs_with_defaults)
-
-    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().dict(**kwargs_with_defaults)
-
-    class Config:
-        frozen = True
-        smart_union = True
-        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/evaluator.py b/agenta-cli/agenta/client/backend/types/evaluator.py
new file mode 100644
index 0000000000..70bea7aa58
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/evaluator.py
@@ -0,0 +1,39 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class Evaluator(pydantic.BaseModel):
+    name: str
+    key: str
+    direct_use: bool
+    settings_template: typing.Dict[str, typing.Any]
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py b/agenta-cli/agenta/client/backend/types/evaluator_config.py
similarity index 86%
rename from agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py
rename to agenta-cli/agenta/client/backend/types/evaluator_config.py
index 2ae2a3f96f..7ca248d882 100644
--- a/agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py
+++ b/agenta-cli/agenta/client/backend/types/evaluator_config.py
@@ -11,11 +11,11 @@
     import pydantic  # type: ignore
 
 
-class CustomEvaluationDetail(pydantic.BaseModel):
+class EvaluatorConfig(pydantic.BaseModel):
     id: str
-    app_id: str
-    evaluation_name: str
-    python_code: str
+    name: str
+    evaluator_key: str
+    settings_values: typing.Optional[typing.Dict[str, typing.Any]]
     created_at: dt.datetime
     updated_at: dt.datetime
 
diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation.py b/agenta-cli/agenta/client/backend/types/human_evaluation.py
new file mode 100644
index 0000000000..0fcf73d267
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation.py
@@ -0,0 +1,48 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+from .evaluation_type import EvaluationType
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class HumanEvaluation(pydantic.BaseModel):
+    id: str
+    app_id: str
+    user_id: str
+    user_username: str
+    evaluation_type: EvaluationType
+    variant_ids: typing.List[str]
+    variant_names: typing.List[str]
+    testset_id: str
+    testset_name: str
+    status: str
+    created_at: dt.datetime
+    updated_at: dt.datetime
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py
new file mode 100644
index 0000000000..cac5dbdd5e
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py
@@ -0,0 +1,48 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+from .human_evaluation_scenario_input import HumanEvaluationScenarioInput
+from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput
+from .human_evaluation_scenario_score import HumanEvaluationScenarioScore
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class HumanEvaluationScenario(pydantic.BaseModel):
+    id: typing.Optional[str]
+    evaluation_id: str
+    inputs: typing.List[HumanEvaluationScenarioInput]
+    outputs: typing.List[HumanEvaluationScenarioOutput]
+    vote: typing.Optional[str]
+    score: typing.Optional[HumanEvaluationScenarioScore]
+    evaluation: typing.Optional[str]
+    correct_answer: typing.Optional[str]
+    is_pinned: typing.Optional[bool]
+    note: typing.Optional[str]
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/create_custom_evaluation.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py
similarity index 89%
rename from agenta-cli/agenta/client/backend/types/create_custom_evaluation.py
rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py
index 452f784611..d65233935e 100644
--- a/agenta-cli/agenta/client/backend/types/create_custom_evaluation.py
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py
@@ -11,10 +11,9 @@
     import pydantic  # type: ignore
 
 
-class CreateCustomEvaluation(pydantic.BaseModel):
-    evaluation_name: str
-    python_code: str
-    app_id: str
+class HumanEvaluationScenarioInput(pydantic.BaseModel):
+    input_name: str
+    input_value: str
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py
new file mode 100644
index 0000000000..752606176b
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py
@@ -0,0 +1,37 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class HumanEvaluationScenarioOutput(pydantic.BaseModel):
+    variant_id: str
+    variant_output: str
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py
similarity index 59%
rename from agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py
rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py
index 0dc572cd6d..eb99491278 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py
@@ -2,4 +2,4 @@
 
 import typing
 
-EvaluationScenarioScore = typing.Union[int, str]
+HumanEvaluationScenarioScore = typing.Union[str, int]
diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py
similarity index 57%
rename from agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py
rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py
index 5c87996489..02d51b079b 100644
--- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py
+++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py
@@ -2,4 +2,4 @@
 
 import typing
 
-EvaluationScenarioUpdateScore = typing.Union[int, str]
+HumanEvaluationScenarioUpdateScore = typing.Union[str, int]
diff --git a/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py b/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py
new file mode 100644
index 0000000000..24214f4c59
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py
@@ -0,0 +1,39 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class LlmRunRateLimit(pydantic.BaseModel):
+    batch_size: int
+    max_retries: int
+    retry_delay: int
+    delay_between_batches: int
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/agenta-cli/agenta/client/backend/types/result.py b/agenta-cli/agenta/client/backend/types/result.py
new file mode 100644
index 0000000000..544336c305
--- /dev/null
+++ b/agenta-cli/agenta/client/backend/types/result.py
@@ -0,0 +1,37 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ..core.datetime_utils import serialize_datetime
+
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
+
+
+class Result(pydantic.BaseModel):
+    type: str
+    value: typing.Optional[typing.Any]
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().dict(**kwargs_with_defaults)
+
+    class Config:
+        frozen = True
+        smart_union = True
+        json_encoders = {dt.datetime: serialize_datetime}