diff --git a/agenta-cli/agenta/client/backend/__init__.py b/agenta-cli/agenta/client/backend/__init__.py index eb6978b547..a5882f65ea 100644 --- a/agenta-cli/agenta/client/backend/__init__.py +++ b/agenta-cli/agenta/client/backend/__init__.py @@ -2,37 +2,43 @@ from .types import ( AddVariantFromBaseAndConfigResponse, + AggregatedResult, App, AppVariantOutput, BaseOutput, BodyImportTestset, ContainerTemplatesResponse, CreateAppOutput, - CreateCustomEvaluation, - CustomEvaluationDetail, - CustomEvaluationNames, - CustomEvaluationOutput, + DeleteEvaluation, DockerEnvVars, EnvironmentOutput, Evaluation, EvaluationScenario, EvaluationScenarioInput, EvaluationScenarioOutput, - EvaluationScenarioScore, - EvaluationScenarioUpdateScore, + EvaluationScenarioResult, EvaluationStatusEnum, EvaluationType, - EvaluationTypeSettings, EvaluationWebhook, + Evaluator, + EvaluatorConfig, Feedback, GetConfigReponse, HttpValidationError, + HumanEvaluation, + HumanEvaluationScenario, + HumanEvaluationScenarioInput, + HumanEvaluationScenarioOutput, + HumanEvaluationScenarioScore, + HumanEvaluationScenarioUpdateScore, Image, InviteRequest, ListApiKeysOutput, + LlmRunRateLimit, NewTestset, Organization, OrganizationOutput, + Result, SimpleEvaluationOutput, Span, Template, @@ -50,37 +56,43 @@ __all__ = [ "AddVariantFromBaseAndConfigResponse", + "AggregatedResult", "App", "AppVariantOutput", "BaseOutput", "BodyImportTestset", "ContainerTemplatesResponse", "CreateAppOutput", - "CreateCustomEvaluation", - "CustomEvaluationDetail", - "CustomEvaluationNames", - "CustomEvaluationOutput", + "DeleteEvaluation", "DockerEnvVars", "EnvironmentOutput", "Evaluation", "EvaluationScenario", "EvaluationScenarioInput", "EvaluationScenarioOutput", - "EvaluationScenarioScore", - "EvaluationScenarioUpdateScore", + "EvaluationScenarioResult", "EvaluationStatusEnum", "EvaluationType", - "EvaluationTypeSettings", "EvaluationWebhook", + "Evaluator", + "EvaluatorConfig", "Feedback", "GetConfigReponse", "HttpValidationError", + "HumanEvaluation", + "HumanEvaluationScenario", + "HumanEvaluationScenarioInput", + "HumanEvaluationScenarioOutput", + "HumanEvaluationScenarioScore", + "HumanEvaluationScenarioUpdateScore", "Image", "InviteRequest", "ListApiKeysOutput", + "LlmRunRateLimit", "NewTestset", "Organization", "OrganizationOutput", + "Result", "SimpleEvaluationOutput", "Span", "Template", diff --git a/agenta-cli/agenta/client/backend/client.py b/agenta-cli/agenta/client/backend/client.py index 5cd775daa1..f9ab4fdb01 100644 --- a/agenta-cli/agenta/client/backend/client.py +++ b/agenta-cli/agenta/client/backend/client.py @@ -20,24 +20,25 @@ from .types.base_output import BaseOutput from .types.container_templates_response import ContainerTemplatesResponse from .types.create_app_output import CreateAppOutput -from .types.create_custom_evaluation import CreateCustomEvaluation -from .types.custom_evaluation_detail import CustomEvaluationDetail -from .types.custom_evaluation_names import CustomEvaluationNames -from .types.custom_evaluation_output import CustomEvaluationOutput +from .types.delete_evaluation import DeleteEvaluation from .types.docker_env_vars import DockerEnvVars from .types.environment_output import EnvironmentOutput from .types.evaluation import Evaluation -from .types.evaluation_scenario import EvaluationScenario -from .types.evaluation_scenario_input import EvaluationScenarioInput -from .types.evaluation_scenario_output import EvaluationScenarioOutput -from .types.evaluation_scenario_update_score import EvaluationScenarioUpdateScore from .types.evaluation_status_enum import EvaluationStatusEnum from .types.evaluation_type import EvaluationType -from .types.evaluation_type_settings import EvaluationTypeSettings from .types.evaluation_webhook import EvaluationWebhook +from .types.evaluator import Evaluator +from .types.evaluator_config import EvaluatorConfig from .types.feedback import Feedback from .types.get_config_reponse import GetConfigReponse from .types.http_validation_error import HttpValidationError +from .types.human_evaluation import HumanEvaluation +from .types.human_evaluation_scenario import HumanEvaluationScenario +from .types.human_evaluation_scenario_input import HumanEvaluationScenarioInput +from .types.human_evaluation_scenario_output import HumanEvaluationScenarioOutput +from .types.human_evaluation_scenario_update_score import ( + HumanEvaluationScenarioUpdateScore, +) from .types.image import Image from .types.invite_request import InviteRequest from .types.list_api_keys_output import ListApiKeysOutput @@ -61,7 +62,7 @@ OMIT = typing.cast(typing.Any, ...) -class AgentaApi: +class AakremApi: def __init__( self, *, base_url: str, api_key: str, timeout: typing.Optional[float] = 60 ): @@ -82,9 +83,9 @@ def list_api_keys(self) -> typing.List[ListApiKeysOutput]: List[ListAPIKeysOutput]: A list of API Keys associated with the user. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_api_keys() """ _response = self._client_wrapper.httpx_client.request( @@ -142,9 +143,9 @@ def delete_api_key(self, key_prefix: str) -> typing.Dict[str, typing.Any]: Parameters: - key_prefix: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.delete_api_key(key_prefix="key-prefix") """ _response = self._client_wrapper.httpx_client.request( @@ -445,9 +446,9 @@ def list_app_variants(self, app_id: str) -> typing.List[AppVariantOutput]: Parameters: - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_app_variants(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( @@ -534,9 +535,9 @@ def list_apps( - org_id: typing.Optional[str]. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_apps() """ _response = self._client_wrapper.httpx_client.request( @@ -762,9 +763,9 @@ def list_environments(self, app_id: str) -> typing.List[EnvironmentOutput]: Parameters: - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_environments(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( @@ -1006,7 +1007,9 @@ def update_variant_image(self, variant_id: str, *, request: Image) -> typing.Any raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]: + def fetch_list_evaluations_evaluations_get( + self, *, app_id: str + ) -> typing.List[Evaluation]: """ Fetches a list of evaluations, optionally filtered by an app ID. @@ -1019,10 +1022,10 @@ def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]: Parameters: - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - client.fetch_list_evaluations(app_id="app-id") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.fetch_list_evaluations_evaluations_get(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( "GET", @@ -1049,7 +1052,6 @@ def create_evaluation( app_id: str, variant_ids: typing.List[str], evaluation_type: EvaluationType, - evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT, inputs: typing.List[str], testset_id: str, status: str, @@ -1068,30 +1070,27 @@ def create_evaluation( - evaluation_type: EvaluationType. - - evaluation_type_settings: typing.Optional[EvaluationTypeSettings]. - - inputs: typing.List[str]. - testset_id: str. - status: str. """ - _request: typing.Dict[str, typing.Any] = { - "app_id": app_id, - "variant_ids": variant_ids, - "evaluation_type": evaluation_type, - "inputs": inputs, - "testset_id": testset_id, - "status": status, - } - if evaluation_type_settings is not OMIT: - _request["evaluation_type_settings"] = evaluation_type_settings _response = self._client_wrapper.httpx_client.request( "POST", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", "evaluations" + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" + ), + json=jsonable_encoder( + { + "app_id": app_id, + "variant_ids": variant_ids, + "evaluation_type": evaluation_type, + "inputs": inputs, + "testset_id": testset_id, + "status": status, + } ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -1105,9 +1104,7 @@ def create_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def delete_evaluations( - self, *, evaluations_ids: typing.List[str] - ) -> typing.List[str]: + def delete_evaluations(self, *, request: DeleteEvaluation) -> typing.List[str]: """ Delete specific comparison tables based on their unique IDs. @@ -1118,19 +1115,20 @@ def delete_evaluations( A list of the deleted comparison tables' IDs. Parameters: - - evaluations_ids: typing.List[str]. + - request: DeleteEvaluation. --- - from agenta.client import AgentaApi + from aakrem import DeleteEvaluation + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - client.delete_evaluations(evaluations_ids=[]) + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.delete_evaluations(request=DeleteEvaluation(evaluations_ids=[])) """ _response = self._client_wrapper.httpx_client.request( "DELETE", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", "evaluations" ), - json=jsonable_encoder({"evaluations_ids": evaluations_ids}), + json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -1144,15 +1142,16 @@ def delete_evaluations( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def fetch_evaluation(self, evaluation_id: str) -> Evaluation: + def fetch_evaluation_status(self, evaluation_id: str) -> typing.Any: """ - Fetches a single evaluation based on its ID. + Fetches the status of the evaluation. Args: - evaluation_id (str): The ID of the evaluation to fetch. + evaluation_id (str): the evaluation id + request (Request): the request object Returns: - Evaluation: The fetched evaluation. + (str): the evaluation status Parameters: - evaluation_id: str. @@ -1161,13 +1160,13 @@ def fetch_evaluation(self, evaluation_id: str) -> Evaluation: "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}", + f"evaluations/{evaluation_id}/status", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(Evaluation, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1176,41 +1175,26 @@ def fetch_evaluation(self, evaluation_id: str) -> Evaluation: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def update_evaluation( - self, - evaluation_id: str, - *, - status: typing.Optional[EvaluationStatusEnum] = OMIT, - evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT, - ) -> typing.Any: + def fetch_evaluation_results(self, evaluation_id: str) -> typing.Any: """ - Updates an evaluation's status. + Fetches the results of the evaluation - Raises: - HTTPException: If the columns in the test set do not match with the inputs in the variant. + Args: + evaluation_id (str): the evaluation id + request (Request): the request object Returns: - None: A 204 No Content status code, indicating that the update was successful. + _type_: _description_ Parameters: - evaluation_id: str. - - - status: typing.Optional[EvaluationStatusEnum]. - - - evaluation_type_settings: typing.Optional[EvaluationTypeSettings]. """ - _request: typing.Dict[str, typing.Any] = {} - if status is not OMIT: - _request["status"] = status - if evaluation_type_settings is not OMIT: - _request["evaluation_type_settings"] = evaluation_type_settings _response = self._client_wrapper.httpx_client.request( - "PUT", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}", + f"evaluations/{evaluation_id}/results", ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -1226,7 +1210,7 @@ def update_evaluation( def fetch_evaluation_scenarios( self, evaluation_id: str - ) -> typing.List[EvaluationScenario]: + ) -> typing.List[HumanEvaluationScenario]: """ Fetches evaluation scenarios for a given evaluation ID. @@ -1242,22 +1226,22 @@ def fetch_evaluation_scenarios( Parameters: - evaluation_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.fetch_evaluation_scenarios(evaluation_id="evaluation-id") """ _response = self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenarios", + f"human-evaluations/{evaluation_id}/evaluation_scenarios", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[EvaluationScenario], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[HumanEvaluationScenario], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1266,35 +1250,30 @@ def fetch_evaluation_scenarios( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def create_evaluation_scenario( - self, evaluation_id: str, *, request: EvaluationScenario - ) -> typing.Any: + def fetch_evaluation(self, evaluation_id: str) -> Evaluation: """ - Create a new evaluation scenario for a given evaluation ID. + Fetches a single evaluation based on its ID. - Raises: - HTTPException: If evaluation not found or access denied. + Args: + evaluation_id (str): The ID of the evaluation to fetch. Returns: - None: 204 No Content status code upon success. + Evaluation: The fetched evaluation. Parameters: - evaluation_id: str. - - - request: EvaluationScenario. """ _response = self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenario", + f"evaluations/{evaluation_id}", ), - json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(Evaluation, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1303,144 +1282,60 @@ def create_evaluation_scenario( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def update_evaluation_scenario( - self, - evaluation_id: str, - evaluation_scenario_id: str, - evaluation_type: EvaluationType, - *, - vote: typing.Optional[str] = OMIT, - score: typing.Optional[EvaluationScenarioUpdateScore] = OMIT, - correct_answer: typing.Optional[str] = OMIT, - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]] = OMIT, - inputs: typing.Optional[typing.List[EvaluationScenarioInput]] = OMIT, - is_pinned: typing.Optional[bool] = OMIT, - note: typing.Optional[str] = OMIT, - ) -> typing.Any: + def webhook_example_fake(self) -> EvaluationWebhook: """ - Updates an evaluation scenario's vote or score based on its type. - - Raises: - HTTPException: If update fails or unauthorized. + Returns a fake score response for example webhook evaluation Returns: - None: 204 No Content status code upon successful update. - - Parameters: - - evaluation_id: str. - - - evaluation_scenario_id: str. - - - evaluation_type: EvaluationType. - - - vote: typing.Optional[str]. - - - score: typing.Optional[EvaluationScenarioUpdateScore]. - - - correct_answer: typing.Optional[str]. - - - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]]. - - - inputs: typing.Optional[typing.List[EvaluationScenarioInput]]. - - - is_pinned: typing.Optional[bool]. - - - note: typing.Optional[str]. + _description_ """ - _request: typing.Dict[str, typing.Any] = {} - if vote is not OMIT: - _request["vote"] = vote - if score is not OMIT: - _request["score"] = score - if correct_answer is not OMIT: - _request["correct_answer"] = correct_answer - if outputs is not OMIT: - _request["outputs"] = outputs - if inputs is not OMIT: - _request["inputs"] = inputs - if is_pinned is not OMIT: - _request["is_pinned"] = is_pinned - if note is not OMIT: - _request["note"] = note _response = self._client_wrapper.httpx_client.request( - "PUT", + "POST", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}", + "evaluations/webhook_example_fake", ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore - if _response.status_code == 422: - raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + return pydantic.parse_obj_as(EvaluationWebhook, _response.json()) # type: ignore try: _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def evaluate_ai_critique( - self, - *, - correct_answer: str, - llm_app_prompt_template: typing.Optional[str] = OMIT, - inputs: typing.List[EvaluationScenarioInput], - outputs: typing.List[EvaluationScenarioOutput], - evaluation_prompt_template: typing.Optional[str] = OMIT, - open_ai_key: typing.Optional[str] = OMIT, - ) -> str: + def fetch_evaluation_scenarios_evaluations_evaluation_scenarios_comparison_results_get( + self, *, evaluations_ids: str + ) -> typing.Any: """ - Evaluate AI critique based on the given payload. - - Args: - payload (AICritiqueCreate): The payload containing data for AI critique evaluation. - stoken_session (SessionContainer): The session container verified by `verify_session`. + Fetches evaluation scenarios for a given evaluation ID. - Returns: - str: The output of the AI critique evaluation. + Arguments: + evaluation_id (str): The ID of the evaluation for which to fetch scenarios. Raises: - HTTPException: If any exception occurs during the evaluation. - - Parameters: - - correct_answer: str. - - - llm_app_prompt_template: typing.Optional[str]. - - - inputs: typing.List[EvaluationScenarioInput]. - - - outputs: typing.List[EvaluationScenarioOutput]. + HTTPException: If the evaluation is not found or access is denied. - - evaluation_prompt_template: typing.Optional[str]. + Returns: + List[EvaluationScenario]: A list of evaluation scenarios. - - open_ai_key: typing.Optional[str]. + Parameters: + - evaluations_ids: str. """ - _request: typing.Dict[str, typing.Any] = { - "correct_answer": correct_answer, - "inputs": inputs, - "outputs": outputs, - } - if llm_app_prompt_template is not OMIT: - _request["llm_app_prompt_template"] = llm_app_prompt_template - if evaluation_prompt_template is not OMIT: - _request["evaluation_prompt_template"] = evaluation_prompt_template - if open_ai_key is not OMIT: - _request["open_ai_key"] = open_ai_key _response = self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - "evaluations/evaluation_scenario/ai_critique", + "evaluations/evaluation_scenarios/comparison-results", ), - json=jsonable_encoder(_request), + params=remove_none_from_dict({"evaluations_ids": evaluations_ids}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(str, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1449,38 +1344,37 @@ def evaluate_ai_critique( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def get_evaluation_scenario_score( - self, evaluation_scenario_id: str - ) -> typing.Dict[str, str]: + def fetch_list_human_evaluations_human_evaluations_get( + self, *, app_id: str + ) -> typing.List[HumanEvaluation]: """ - Fetch the score of a specific evaluation scenario. + Fetches a list of evaluations, optionally filtered by an app ID. Args: - evaluation_scenario_id: The ID of the evaluation scenario to fetch. - stoken_session: Session data, verified by `verify_session`. + app_id (Optional[str]): An optional app ID to filter the evaluations. Returns: - Dictionary containing the scenario ID and its score. + List[HumanEvaluation]: A list of evaluations. Parameters: - - evaluation_scenario_id: str. + - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - client.get_evaluation_scenario_score(evaluation_scenario_id="evaluation-scenario-id") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.fetch_list_human_evaluations_human_evaluations_get(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score", + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" ), + params=remove_none_from_dict({"app_id": app_id}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[HumanEvaluation], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1489,35 +1383,38 @@ def get_evaluation_scenario_score( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def update_evaluation_scenario_score( - self, evaluation_scenario_id: str, *, score: float - ) -> typing.Any: + def delete_evaluations_human_evaluations_delete( + self, *, request: DeleteEvaluation + ) -> typing.List[str]: """ - Updates the score of an evaluation scenario. + Delete specific comparison tables based on their unique IDs. - Raises: - HTTPException: Server error if the evaluation update fails. + Args: + delete_evaluations (List[str]): The unique identifiers of the comparison tables to delete. Returns: - None: 204 No Content status code upon successful update. + A list of the deleted comparison tables' IDs. Parameters: - - evaluation_scenario_id: str. + - request: DeleteEvaluation. + --- + from aakrem import DeleteEvaluation + from aakrem.client import AakremApi - - score: float. + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.delete_evaluations_human_evaluations_delete(request=DeleteEvaluation(evaluations_ids=[])) """ _response = self._client_wrapper.httpx_client.request( - "PUT", + "DELETE", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score", + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" ), - json=jsonable_encoder({"score": score}), + json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[str], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1526,15 +1423,17 @@ def update_evaluation_scenario_score( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def fetch_results(self, evaluation_id: str) -> typing.Any: + def fetch_human_evaluation_human_evaluations_evaluation_id_get( + self, evaluation_id: str + ) -> HumanEvaluation: """ - Fetch all the results for one the comparison table + Fetches a single evaluation based on its ID. - Arguments: - evaluation*id -- \_description* + Args: + evaluation_id (str): The ID of the evaluation to fetch. Returns: - _description_ + HumanEvaluation: The fetched evaluation. Parameters: - evaluation_id: str. @@ -1543,13 +1442,13 @@ def fetch_results(self, evaluation_id: str) -> typing.Any: "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/results", + f"human-evaluations/{evaluation_id}", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(HumanEvaluation, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1558,26 +1457,36 @@ def fetch_results(self, evaluation_id: str) -> typing.Any: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def create_custom_evaluation( - self, *, request: CreateCustomEvaluation + def update_human_evaluation( + self, + evaluation_id: str, + *, + status: typing.Optional[EvaluationStatusEnum] = OMIT, ) -> typing.Any: """ - Create evaluation with custom python code. + Updates an evaluation's status. - Args: + Raises: + HTTPException: If the columns in the test set do not match with the inputs in the variant. - custom_evaluation_payload (CreateCustomEvaluation): the required payload + Returns: + None: A 204 No Content status code, indicating that the update was successful. Parameters: - - request: CreateCustomEvaluation. + - evaluation_id: str. + + - status: typing.Optional[EvaluationStatusEnum]. """ + _request: typing.Dict[str, typing.Any] = {} + if status is not OMIT: + _request["status"] = status _response = self._client_wrapper.httpx_client.request( - "POST", + "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - "evaluations/custom_evaluation", + f"human-evaluations/{evaluation_id}", ), - json=jsonable_encoder(request), + json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -1591,30 +1500,77 @@ def create_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail: + def update_evaluation_scenario_router_human_evaluations_evaluation_id_evaluation_scenario_evaluation_scenario_id_evaluation_type_put( + self, + evaluation_id: str, + evaluation_scenario_id: str, + evaluation_type: EvaluationType, + *, + vote: typing.Optional[str] = OMIT, + score: typing.Optional[HumanEvaluationScenarioUpdateScore] = OMIT, + correct_answer: typing.Optional[str] = OMIT, + outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]] = OMIT, + inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]] = OMIT, + is_pinned: typing.Optional[bool] = OMIT, + note: typing.Optional[str] = OMIT, + ) -> typing.Any: """ - Get the custom code evaluation detail. + Updates an evaluation scenario's vote or score based on its type. - Args: - id (str): the id of the custom evaluation + Raises: + HTTPException: If update fails or unauthorized. Returns: - CustomEvaluationDetail: Detail of the custom evaluation + None: 204 No Content status code upon successful update. Parameters: - - id: str. + - evaluation_id: str. + + - evaluation_scenario_id: str. + + - evaluation_type: EvaluationType. + + - vote: typing.Optional[str]. + + - score: typing.Optional[HumanEvaluationScenarioUpdateScore]. + + - correct_answer: typing.Optional[str]. + + - outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]]. + + - inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]]. + + - is_pinned: typing.Optional[bool]. + + - note: typing.Optional[str]. """ + _request: typing.Dict[str, typing.Any] = {} + if vote is not OMIT: + _request["vote"] = vote + if score is not OMIT: + _request["score"] = score + if correct_answer is not OMIT: + _request["correct_answer"] = correct_answer + if outputs is not OMIT: + _request["outputs"] = outputs + if inputs is not OMIT: + _request["inputs"] = inputs + if is_pinned is not OMIT: + _request["is_pinned"] = is_pinned + if note is not OMIT: + _request["note"] = note _response = self._client_wrapper.httpx_client.request( - "GET", + "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{id}", + f"human-evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}", ), + json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(CustomEvaluationDetail, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1623,28 +1579,72 @@ def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def update_custom_evaluation( - self, id: str, *, request: CreateCustomEvaluation - ) -> typing.Any: + def get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get( + self, evaluation_scenario_id: str + ) -> typing.Dict[str, str]: """ - Update a custom code evaluation. + Fetch the score of a specific evaluation scenario. + Args: - id (str): the ID of the custom evaluation to update - updated_data (CreateCustomEvaluation): the payload with updated data - stoken_session (SessionContainer): session container for authentication + evaluation_scenario_id: The ID of the evaluation scenario to fetch. + stoken_session: Session data, verified by `verify_session`. + + Returns: + Dictionary containing the scenario ID and its score. + + Parameters: + - evaluation_scenario_id: str. + --- + from aakrem.client import AakremApi + + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get( + evaluation_scenario_id="evaluation-scenario-id" + ) + """ + _response = self._client_wrapper.httpx_client.request( + "GET", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score", + ), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def update_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_put( + self, evaluation_scenario_id: str, *, score: float + ) -> typing.Any: + """ + Updates the score of an evaluation scenario. + + Raises: + HTTPException: Server error if the evaluation update fails. + + Returns: + None: 204 No Content status code upon successful update. Parameters: - - id: str. + - evaluation_scenario_id: str. - - request: CreateCustomEvaluation. + - score: float. """ _response = self._client_wrapper.httpx_client.request( "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{id}", + f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score", ), - json=jsonable_encoder(request), + json=jsonable_encoder({"score": score}), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -1658,37 +1658,30 @@ def update_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def list_custom_evaluations( - self, app_id: str - ) -> typing.List[CustomEvaluationOutput]: + def fetch_results(self, evaluation_id: str) -> typing.Any: """ - List the custom code evaluations for a given app. + Fetch all the results for one the comparison table - Args: - app_id (str): the id of the app + Arguments: + evaluation*id -- \_description* Returns: - List[CustomEvaluationOutput]: a list of custom evaluation + _description_ Parameters: - - app_id: str. - --- - from agenta.client import AgentaApi - - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - client.list_custom_evaluations(app_id="app-id") + - evaluation_id: str. """ _response = self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/list/{app_id}", + f"human-evaluations/{evaluation_id}/results", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[CustomEvaluationOutput], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1697,37 +1690,66 @@ def list_custom_evaluations( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def get_custom_evaluation_names( - self, app_name: str - ) -> typing.List[CustomEvaluationNames]: + def get_evaluators_endpoint_evaluators_get(self) -> typing.List[Evaluator]: + """ + Endpoint to fetch a list of evaluators. + + Returns: + List[Evaluator]: A list of evaluator objects. + + --- + from aakrem.client import AakremApi + + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.get_evaluators_endpoint_evaluators_get() + """ + _response = self._client_wrapper.httpx_client.request( + "GET", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", "evaluators" + ), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(typing.List[Evaluator], _response.json()) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def get_evaluator_configs_evaluators_configs_get( + self, *, app_id: str + ) -> typing.List[EvaluatorConfig]: """ - Get the names of custom evaluation for a given app. + Endpoint to fetch evaluator configurations for a specific app. Args: - app_name (str): the name of the app the evaluation belongs to + app_id (str): The ID of the app. Returns: - List[CustomEvaluationNames]: the list of name of custom evaluations + List[EvaluatorConfigDB]: A list of evaluator configuration objects. Parameters: - - app_name: str. + - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - client.get_custom_evaluation_names(app_name="app-name") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client.get_evaluator_configs_evaluators_configs_get(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{app_name}/names", + f"{self._client_wrapper.get_base_url()}/", "evaluators/configs" ), + params=remove_none_from_dict({"app_id": app_id}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[CustomEvaluationNames], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[EvaluatorConfig], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1736,59 +1758,50 @@ def get_custom_evaluation_names( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def execute_custom_evaluation( + def create_new_evaluator_config_evaluators_configs_post( self, - evaluation_id: str, *, - inputs: typing.List[typing.Dict[str, typing.Any]], app_id: str, - variant_id: str, - correct_answer: str, - outputs: typing.List[typing.Dict[str, typing.Any]], - ) -> typing.Any: + name: str, + evaluator_key: str, + settings_values: typing.Dict[str, typing.Any], + ) -> EvaluatorConfig: """ - Execute a custom evaluation code. + Endpoint to fetch evaluator configurations for a specific app. Args: - evaluation_id (str): the custom evaluation id - payload (ExecuteCustomEvaluationCode): the required payload + app_id (str): The ID of the app. Returns: - float: the result of the evaluation custom code + EvaluatorConfigDB: Evaluator configuration api model. Parameters: - - evaluation_id: str. - - - inputs: typing.List[typing.Dict[str, typing.Any]]. - - app_id: str. - - variant_id: str. + - name: str. - - correct_answer: str. + - evaluator_key: str. - - outputs: typing.List[typing.Dict[str, typing.Any]]. + - settings_values: typing.Dict[str, typing.Any]. """ _response = self._client_wrapper.httpx_client.request( "POST", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/execute/{evaluation_id}", + f"{self._client_wrapper.get_base_url()}/", "evaluators/configs" ), json=jsonable_encoder( { - "inputs": inputs, "app_id": app_id, - "variant_id": variant_id, - "correct_answer": correct_answer, - "outputs": outputs, + "name": name, + "evaluator_key": evaluator_key, + "settings_values": settings_values, } ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -1797,24 +1810,115 @@ def execute_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - def webhook_example_fake(self) -> EvaluationWebhook: + def get_evaluator_config_evaluators_configs_evaluator_config_id_get( + self, evaluator_config_id: str + ) -> EvaluatorConfig: """ - Returns a fake score response for example webhook evaluation + Endpoint to fetch evaluator configurations for a specific app. Returns: - _description_ + List[EvaluatorConfigDB]: A list of evaluator configuration objects. + + Parameters: + - evaluator_config_id: str. """ _response = self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - "evaluations/webhook_example_fake", + f"evaluators/configs/{evaluator_config_id}", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(EvaluationWebhook, _response.json()) # type: ignore + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def update_evaluator_config_evaluators_configs_evaluator_config_id_put( + self, + evaluator_config_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator_key: typing.Optional[str] = OMIT, + settings_values: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + ) -> EvaluatorConfig: + """ + Endpoint to update evaluator configurations for a specific app. + + Returns: + List[EvaluatorConfigDB]: A list of evaluator configuration objects. + + Parameters: + - evaluator_config_id: str. + + - name: typing.Optional[str]. + + - evaluator_key: typing.Optional[str]. + + - settings_values: typing.Optional[typing.Dict[str, typing.Any]]. + """ + _request: typing.Dict[str, typing.Any] = {} + if name is not OMIT: + _request["name"] = name + if evaluator_key is not OMIT: + _request["evaluator_key"] = evaluator_key + if settings_values is not OMIT: + _request["settings_values"] = settings_values + _response = self._client_wrapper.httpx_client.request( + "PUT", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + f"evaluators/configs/{evaluator_config_id}", + ), + json=jsonable_encoder(_request), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def delete_evaluator_config_evaluators_configs_evaluator_config_id_delete( + self, evaluator_config_id: str + ) -> bool: + """ + Endpoint to delete a specific evaluator configuration. + + Args: + evaluator_config_id (str): The unique identifier of the evaluator configuration. + + Returns: + bool: True if deletion was successful, False otherwise. + + Parameters: + - evaluator_config_id: str. + """ + _response = self._client_wrapper.httpx_client.request( + "DELETE", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + f"evaluators/configs/{evaluator_config_id}", + ), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(bool, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: _response_json = _response.json() except JSONDecodeError: @@ -2018,9 +2122,9 @@ def get_testsets(self, *, app_id: str) -> typing.List[TestSetOutputResponse]: Parameters: - app_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.get_testsets(app_id="app-id") """ _response = self._client_wrapper.httpx_client.request( @@ -2053,9 +2157,9 @@ def delete_testsets(self, *, testset_ids: typing.List[str]) -> typing.List[str]: Parameters: - testset_ids: typing.List[str]. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.delete_testsets(testset_ids=[]) """ _response = self._client_wrapper.httpx_client.request( @@ -2338,9 +2442,9 @@ def get_traces(self, app_id: str, variant_id: str) -> typing.List[Trace]: - variant_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.get_traces(app_id="app-id", variant_id="variant-id") """ _response = self._client_wrapper.httpx_client.request( @@ -2521,9 +2625,9 @@ def get_spans_of_trace(self, trace_id: str) -> typing.List[Span]: Parameters: - trace_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.get_spans_of_trace(trace_id="trace-id") """ _response = self._client_wrapper.httpx_client.request( @@ -2550,9 +2654,9 @@ def get_feedbacks(self, trace_id: str) -> typing.List[Feedback]: Parameters: - trace_id: str. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.get_feedbacks(trace_id="trace-id") """ _response = self._client_wrapper.httpx_client.request( @@ -2705,9 +2809,9 @@ def list_organizations(self) -> typing.List[Organization]: HTTPException: If there is an error retrieving the organizations from the database. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_organizations() """ _response = self._client_wrapper.httpx_client.request( @@ -2768,9 +2872,9 @@ def list_bases( - base_name: typing.Optional[str]. --- - from agenta.client import AgentaApi + from aakrem.client import AakremApi - client = AgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") client.list_bases() """ _response = self._client_wrapper.httpx_client.request( @@ -2871,7 +2975,7 @@ def save_config( raise ApiError(status_code=_response.status_code, body=_response_json) -class AsyncAgentaApi: +class AsyncAakremApi: def __init__( self, *, base_url: str, api_key: str, timeout: typing.Optional[float] = 60 ): @@ -2892,9 +2996,9 @@ async def list_api_keys(self) -> typing.List[ListApiKeysOutput]: List[ListAPIKeysOutput]: A list of API Keys associated with the user. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_api_keys() """ _response = await self._client_wrapper.httpx_client.request( @@ -2952,9 +3056,9 @@ async def delete_api_key(self, key_prefix: str) -> typing.Dict[str, typing.Any]: Parameters: - key_prefix: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.delete_api_key(key_prefix="key-prefix") """ _response = await self._client_wrapper.httpx_client.request( @@ -3257,9 +3361,9 @@ async def list_app_variants(self, app_id: str) -> typing.List[AppVariantOutput]: Parameters: - app_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_app_variants(app_id="app-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -3348,9 +3452,9 @@ async def list_apps( - org_id: typing.Optional[str]. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_apps() """ _response = await self._client_wrapper.httpx_client.request( @@ -3576,9 +3680,9 @@ async def list_environments(self, app_id: str) -> typing.List[EnvironmentOutput] Parameters: - app_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_environments(app_id="app-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -3822,7 +3926,9 @@ async def update_variant_image( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation]: + async def fetch_list_evaluations_evaluations_get( + self, *, app_id: str + ) -> typing.List[Evaluation]: """ Fetches a list of evaluations, optionally filtered by an app ID. @@ -3835,10 +3941,10 @@ async def fetch_list_evaluations(self, *, app_id: str) -> typing.List[Evaluation Parameters: - app_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - await client.fetch_list_evaluations(app_id="app-id") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.fetch_list_evaluations_evaluations_get(app_id="app-id") """ _response = await self._client_wrapper.httpx_client.request( "GET", @@ -3865,7 +3971,6 @@ async def create_evaluation( app_id: str, variant_ids: typing.List[str], evaluation_type: EvaluationType, - evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT, inputs: typing.List[str], testset_id: str, status: str, @@ -3884,30 +3989,27 @@ async def create_evaluation( - evaluation_type: EvaluationType. - - evaluation_type_settings: typing.Optional[EvaluationTypeSettings]. - - inputs: typing.List[str]. - testset_id: str. - status: str. """ - _request: typing.Dict[str, typing.Any] = { - "app_id": app_id, - "variant_ids": variant_ids, - "evaluation_type": evaluation_type, - "inputs": inputs, - "testset_id": testset_id, - "status": status, - } - if evaluation_type_settings is not OMIT: - _request["evaluation_type_settings"] = evaluation_type_settings _response = await self._client_wrapper.httpx_client.request( "POST", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", "evaluations" + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" + ), + json=jsonable_encoder( + { + "app_id": app_id, + "variant_ids": variant_ids, + "evaluation_type": evaluation_type, + "inputs": inputs, + "testset_id": testset_id, + "status": status, + } ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -3922,7 +4024,7 @@ async def create_evaluation( raise ApiError(status_code=_response.status_code, body=_response_json) async def delete_evaluations( - self, *, evaluations_ids: typing.List[str] + self, *, request: DeleteEvaluation ) -> typing.List[str]: """ Delete specific comparison tables based on their unique IDs. @@ -3934,19 +4036,20 @@ async def delete_evaluations( A list of the deleted comparison tables' IDs. Parameters: - - evaluations_ids: typing.List[str]. + - request: DeleteEvaluation. --- - from agenta.client import AsyncAgentaApi + from aakrem import DeleteEvaluation + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - await client.delete_evaluations(evaluations_ids=[]) + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.delete_evaluations(request=DeleteEvaluation(evaluations_ids=[])) """ _response = await self._client_wrapper.httpx_client.request( "DELETE", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", "evaluations" ), - json=jsonable_encoder({"evaluations_ids": evaluations_ids}), + json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -3960,15 +4063,16 @@ async def delete_evaluations( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def fetch_evaluation(self, evaluation_id: str) -> Evaluation: + async def fetch_evaluation_status(self, evaluation_id: str) -> typing.Any: """ - Fetches a single evaluation based on its ID. + Fetches the status of the evaluation. Args: - evaluation_id (str): The ID of the evaluation to fetch. + evaluation_id (str): the evaluation id + request (Request): the request object Returns: - Evaluation: The fetched evaluation. + (str): the evaluation status Parameters: - evaluation_id: str. @@ -3977,13 +4081,13 @@ async def fetch_evaluation(self, evaluation_id: str) -> Evaluation: "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}", + f"evaluations/{evaluation_id}/status", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(Evaluation, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -3992,41 +4096,26 @@ async def fetch_evaluation(self, evaluation_id: str) -> Evaluation: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def update_evaluation( - self, - evaluation_id: str, - *, - status: typing.Optional[EvaluationStatusEnum] = OMIT, - evaluation_type_settings: typing.Optional[EvaluationTypeSettings] = OMIT, - ) -> typing.Any: + async def fetch_evaluation_results(self, evaluation_id: str) -> typing.Any: """ - Updates an evaluation's status. + Fetches the results of the evaluation - Raises: - HTTPException: If the columns in the test set do not match with the inputs in the variant. + Args: + evaluation_id (str): the evaluation id + request (Request): the request object Returns: - None: A 204 No Content status code, indicating that the update was successful. + _type_: _description_ Parameters: - evaluation_id: str. - - - status: typing.Optional[EvaluationStatusEnum]. - - - evaluation_type_settings: typing.Optional[EvaluationTypeSettings]. """ - _request: typing.Dict[str, typing.Any] = {} - if status is not OMIT: - _request["status"] = status - if evaluation_type_settings is not OMIT: - _request["evaluation_type_settings"] = evaluation_type_settings _response = await self._client_wrapper.httpx_client.request( - "PUT", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}", + f"evaluations/{evaluation_id}/results", ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -4042,7 +4131,7 @@ async def update_evaluation( async def fetch_evaluation_scenarios( self, evaluation_id: str - ) -> typing.List[EvaluationScenario]: + ) -> typing.List[HumanEvaluationScenario]: """ Fetches evaluation scenarios for a given evaluation ID. @@ -4058,22 +4147,22 @@ async def fetch_evaluation_scenarios( Parameters: - evaluation_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.fetch_evaluation_scenarios(evaluation_id="evaluation-id") """ _response = await self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenarios", + f"human-evaluations/{evaluation_id}/evaluation_scenarios", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[EvaluationScenario], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[HumanEvaluationScenario], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4082,35 +4171,30 @@ async def fetch_evaluation_scenarios( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def create_evaluation_scenario( - self, evaluation_id: str, *, request: EvaluationScenario - ) -> typing.Any: + async def fetch_evaluation(self, evaluation_id: str) -> Evaluation: """ - Create a new evaluation scenario for a given evaluation ID. + Fetches a single evaluation based on its ID. - Raises: - HTTPException: If evaluation not found or access denied. + Args: + evaluation_id (str): The ID of the evaluation to fetch. Returns: - None: 204 No Content status code upon success. + Evaluation: The fetched evaluation. Parameters: - evaluation_id: str. - - - request: EvaluationScenario. """ _response = await self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenario", + f"evaluations/{evaluation_id}", ), - json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(Evaluation, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4119,144 +4203,99 @@ async def create_evaluation_scenario( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def update_evaluation_scenario( - self, - evaluation_id: str, - evaluation_scenario_id: str, - evaluation_type: EvaluationType, - *, - vote: typing.Optional[str] = OMIT, - score: typing.Optional[EvaluationScenarioUpdateScore] = OMIT, - correct_answer: typing.Optional[str] = OMIT, - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]] = OMIT, - inputs: typing.Optional[typing.List[EvaluationScenarioInput]] = OMIT, - is_pinned: typing.Optional[bool] = OMIT, - note: typing.Optional[str] = OMIT, - ) -> typing.Any: + async def webhook_example_fake(self) -> EvaluationWebhook: """ - Updates an evaluation scenario's vote or score based on its type. - - Raises: - HTTPException: If update fails or unauthorized. + Returns a fake score response for example webhook evaluation Returns: - None: 204 No Content status code upon successful update. - - Parameters: - - evaluation_id: str. - - - evaluation_scenario_id: str. - - - evaluation_type: EvaluationType. - - - vote: typing.Optional[str]. - - - score: typing.Optional[EvaluationScenarioUpdateScore]. - - - correct_answer: typing.Optional[str]. - - - outputs: typing.Optional[typing.List[EvaluationScenarioOutput]]. - - - inputs: typing.Optional[typing.List[EvaluationScenarioInput]]. - - - is_pinned: typing.Optional[bool]. - - - note: typing.Optional[str]. + _description_ """ - _request: typing.Dict[str, typing.Any] = {} - if vote is not OMIT: - _request["vote"] = vote - if score is not OMIT: - _request["score"] = score - if correct_answer is not OMIT: - _request["correct_answer"] = correct_answer - if outputs is not OMIT: - _request["outputs"] = outputs - if inputs is not OMIT: - _request["inputs"] = inputs - if is_pinned is not OMIT: - _request["is_pinned"] = is_pinned - if note is not OMIT: - _request["note"] = note _response = await self._client_wrapper.httpx_client.request( - "PUT", + "POST", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}", + "evaluations/webhook_example_fake", ), - json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore - if _response.status_code == 422: - raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + return pydantic.parse_obj_as(EvaluationWebhook, _response.json()) # type: ignore try: _response_json = _response.json() except JSONDecodeError: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def evaluate_ai_critique( - self, - *, - correct_answer: str, - llm_app_prompt_template: typing.Optional[str] = OMIT, - inputs: typing.List[EvaluationScenarioInput], - outputs: typing.List[EvaluationScenarioOutput], - evaluation_prompt_template: typing.Optional[str] = OMIT, - open_ai_key: typing.Optional[str] = OMIT, - ) -> str: + async def fetch_evaluation_scenarios_evaluations_evaluation_scenarios_comparison_results_get( + self, *, evaluations_ids: str + ) -> typing.Any: """ - Evaluate AI critique based on the given payload. - - Args: - payload (AICritiqueCreate): The payload containing data for AI critique evaluation. - stoken_session (SessionContainer): The session container verified by `verify_session`. + Fetches evaluation scenarios for a given evaluation ID. - Returns: - str: The output of the AI critique evaluation. + Arguments: + evaluation_id (str): The ID of the evaluation for which to fetch scenarios. Raises: - HTTPException: If any exception occurs during the evaluation. + HTTPException: If the evaluation is not found or access is denied. + + Returns: + List[EvaluationScenario]: A list of evaluation scenarios. Parameters: - - correct_answer: str. + - evaluations_ids: str. + """ + _response = await self._client_wrapper.httpx_client.request( + "GET", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + "evaluations/evaluation_scenarios/comparison-results", + ), + params=remove_none_from_dict({"evaluations_ids": evaluations_ids}), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) - - llm_app_prompt_template: typing.Optional[str]. + async def fetch_list_human_evaluations_human_evaluations_get( + self, *, app_id: str + ) -> typing.List[HumanEvaluation]: + """ + Fetches a list of evaluations, optionally filtered by an app ID. - - inputs: typing.List[EvaluationScenarioInput]. + Args: + app_id (Optional[str]): An optional app ID to filter the evaluations. - - outputs: typing.List[EvaluationScenarioOutput]. + Returns: + List[HumanEvaluation]: A list of evaluations. - - evaluation_prompt_template: typing.Optional[str]. + Parameters: + - app_id: str. + --- + from aakrem.client import AsyncAakremApi - - open_ai_key: typing.Optional[str]. + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.fetch_list_human_evaluations_human_evaluations_get(app_id="app-id") """ - _request: typing.Dict[str, typing.Any] = { - "correct_answer": correct_answer, - "inputs": inputs, - "outputs": outputs, - } - if llm_app_prompt_template is not OMIT: - _request["llm_app_prompt_template"] = llm_app_prompt_template - if evaluation_prompt_template is not OMIT: - _request["evaluation_prompt_template"] = evaluation_prompt_template - if open_ai_key is not OMIT: - _request["open_ai_key"] = open_ai_key _response = await self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - "evaluations/evaluation_scenario/ai_critique", + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" ), - json=jsonable_encoder(_request), + params=remove_none_from_dict({"app_id": app_id}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(str, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[HumanEvaluation], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4265,38 +4304,38 @@ async def evaluate_ai_critique( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def get_evaluation_scenario_score( - self, evaluation_scenario_id: str - ) -> typing.Dict[str, str]: + async def delete_evaluations_human_evaluations_delete( + self, *, request: DeleteEvaluation + ) -> typing.List[str]: """ - Fetch the score of a specific evaluation scenario. + Delete specific comparison tables based on their unique IDs. Args: - evaluation_scenario_id: The ID of the evaluation scenario to fetch. - stoken_session: Session data, verified by `verify_session`. + delete_evaluations (List[str]): The unique identifiers of the comparison tables to delete. Returns: - Dictionary containing the scenario ID and its score. + A list of the deleted comparison tables' IDs. Parameters: - - evaluation_scenario_id: str. + - request: DeleteEvaluation. --- - from agenta.client import AsyncAgentaApi + from aakrem import DeleteEvaluation + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - await client.get_evaluation_scenario_score(evaluation_scenario_id="evaluation-scenario-id") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.delete_evaluations_human_evaluations_delete(request=DeleteEvaluation(evaluations_ids=[])) """ _response = await self._client_wrapper.httpx_client.request( - "GET", + "DELETE", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score", + f"{self._client_wrapper.get_base_url()}/", "human-evaluations" ), + json=jsonable_encoder(request), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[str], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4305,35 +4344,32 @@ async def get_evaluation_scenario_score( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def update_evaluation_scenario_score( - self, evaluation_scenario_id: str, *, score: float - ) -> typing.Any: + async def fetch_human_evaluation_human_evaluations_evaluation_id_get( + self, evaluation_id: str + ) -> HumanEvaluation: """ - Updates the score of an evaluation scenario. + Fetches a single evaluation based on its ID. - Raises: - HTTPException: Server error if the evaluation update fails. + Args: + evaluation_id (str): The ID of the evaluation to fetch. Returns: - None: 204 No Content status code upon successful update. + HumanEvaluation: The fetched evaluation. Parameters: - - evaluation_scenario_id: str. - - - score: float. + - evaluation_id: str. """ _response = await self._client_wrapper.httpx_client.request( - "PUT", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/evaluation_scenario/{evaluation_scenario_id}/score", + f"human-evaluations/{evaluation_id}", ), - json=jsonable_encoder({"score": score}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(HumanEvaluation, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4342,25 +4378,36 @@ async def update_evaluation_scenario_score( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def fetch_results(self, evaluation_id: str) -> typing.Any: + async def update_human_evaluation( + self, + evaluation_id: str, + *, + status: typing.Optional[EvaluationStatusEnum] = OMIT, + ) -> typing.Any: """ - Fetch all the results for one the comparison table + Updates an evaluation's status. - Arguments: - evaluation*id -- \_description* + Raises: + HTTPException: If the columns in the test set do not match with the inputs in the variant. Returns: - _description_ + None: A 204 No Content status code, indicating that the update was successful. Parameters: - evaluation_id: str. + + - status: typing.Optional[EvaluationStatusEnum]. """ + _request: typing.Dict[str, typing.Any] = {} + if status is not OMIT: + _request["status"] = status _response = await self._client_wrapper.httpx_client.request( - "GET", + "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/{evaluation_id}/results", + f"human-evaluations/{evaluation_id}", ), + json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -4374,26 +4421,72 @@ async def fetch_results(self, evaluation_id: str) -> typing.Any: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def create_custom_evaluation( - self, *, request: CreateCustomEvaluation + async def update_evaluation_scenario_router_human_evaluations_evaluation_id_evaluation_scenario_evaluation_scenario_id_evaluation_type_put( + self, + evaluation_id: str, + evaluation_scenario_id: str, + evaluation_type: EvaluationType, + *, + vote: typing.Optional[str] = OMIT, + score: typing.Optional[HumanEvaluationScenarioUpdateScore] = OMIT, + correct_answer: typing.Optional[str] = OMIT, + outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]] = OMIT, + inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]] = OMIT, + is_pinned: typing.Optional[bool] = OMIT, + note: typing.Optional[str] = OMIT, ) -> typing.Any: """ - Create evaluation with custom python code. + Updates an evaluation scenario's vote or score based on its type. - Args: + Raises: + HTTPException: If update fails or unauthorized. - custom_evaluation_payload (CreateCustomEvaluation): the required payload + Returns: + None: 204 No Content status code upon successful update. Parameters: - - request: CreateCustomEvaluation. + - evaluation_id: str. + + - evaluation_scenario_id: str. + + - evaluation_type: EvaluationType. + + - vote: typing.Optional[str]. + + - score: typing.Optional[HumanEvaluationScenarioUpdateScore]. + + - correct_answer: typing.Optional[str]. + + - outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]]. + + - inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]]. + + - is_pinned: typing.Optional[bool]. + + - note: typing.Optional[str]. """ + _request: typing.Dict[str, typing.Any] = {} + if vote is not OMIT: + _request["vote"] = vote + if score is not OMIT: + _request["score"] = score + if correct_answer is not OMIT: + _request["correct_answer"] = correct_answer + if outputs is not OMIT: + _request["outputs"] = outputs + if inputs is not OMIT: + _request["inputs"] = inputs + if is_pinned is not OMIT: + _request["is_pinned"] = is_pinned + if note is not OMIT: + _request["note"] = note _response = await self._client_wrapper.httpx_client.request( - "POST", + "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - "evaluations/custom_evaluation", + f"human-evaluations/{evaluation_id}/evaluation_scenario/{evaluation_scenario_id}/{evaluation_type}", ), - json=jsonable_encoder(request), + json=jsonable_encoder(_request), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -4407,30 +4500,40 @@ async def create_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail: + async def get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get( + self, evaluation_scenario_id: str + ) -> typing.Dict[str, str]: """ - Get the custom code evaluation detail. + Fetch the score of a specific evaluation scenario. Args: - id (str): the id of the custom evaluation + evaluation_scenario_id: The ID of the evaluation scenario to fetch. + stoken_session: Session data, verified by `verify_session`. Returns: - CustomEvaluationDetail: Detail of the custom evaluation + Dictionary containing the scenario ID and its score. Parameters: - - id: str. + - evaluation_scenario_id: str. + --- + from aakrem.client import AsyncAakremApi + + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.get_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_get( + evaluation_scenario_id="evaluation-scenario-id" + ) """ _response = await self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{id}", + f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(CustomEvaluationDetail, _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4439,28 +4542,30 @@ async def get_custom_evaluation(self, id: str) -> CustomEvaluationDetail: raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def update_custom_evaluation( - self, id: str, *, request: CreateCustomEvaluation + async def update_evaluation_scenario_score_router_human_evaluations_evaluation_scenario_evaluation_scenario_id_score_put( + self, evaluation_scenario_id: str, *, score: float ) -> typing.Any: """ - Update a custom code evaluation. - Args: - id (str): the ID of the custom evaluation to update - updated_data (CreateCustomEvaluation): the payload with updated data - stoken_session (SessionContainer): session container for authentication + Updates the score of an evaluation scenario. + + Raises: + HTTPException: Server error if the evaluation update fails. + + Returns: + None: 204 No Content status code upon successful update. Parameters: - - id: str. + - evaluation_scenario_id: str. - - request: CreateCustomEvaluation. + - score: float. """ _response = await self._client_wrapper.httpx_client.request( "PUT", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{id}", + f"human-evaluations/evaluation_scenario/{evaluation_scenario_id}/score", ), - json=jsonable_encoder(request), + json=jsonable_encoder({"score": score}), headers=self._client_wrapper.get_headers(), timeout=60, ) @@ -4474,37 +4579,30 @@ async def update_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def list_custom_evaluations( - self, app_id: str - ) -> typing.List[CustomEvaluationOutput]: + async def fetch_results(self, evaluation_id: str) -> typing.Any: """ - List the custom code evaluations for a given app. + Fetch all the results for one the comparison table - Args: - app_id (str): the id of the app + Arguments: + evaluation*id -- \_description* Returns: - List[CustomEvaluationOutput]: a list of custom evaluation + _description_ Parameters: - - app_id: str. - --- - from agenta.client import AsyncAgentaApi - - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - await client.list_custom_evaluations(app_id="app-id") + - evaluation_id: str. """ _response = await self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/list/{app_id}", + f"human-evaluations/{evaluation_id}/results", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[CustomEvaluationOutput], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4513,37 +4611,66 @@ async def list_custom_evaluations( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def get_custom_evaluation_names( - self, app_name: str - ) -> typing.List[CustomEvaluationNames]: + async def get_evaluators_endpoint_evaluators_get(self) -> typing.List[Evaluator]: + """ + Endpoint to fetch a list of evaluators. + + Returns: + List[Evaluator]: A list of evaluator objects. + + --- + from aakrem.client import AsyncAakremApi + + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.get_evaluators_endpoint_evaluators_get() + """ + _response = await self._client_wrapper.httpx_client.request( + "GET", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", "evaluators" + ), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(typing.List[Evaluator], _response.json()) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def get_evaluator_configs_evaluators_configs_get( + self, *, app_id: str + ) -> typing.List[EvaluatorConfig]: """ - Get the names of custom evaluation for a given app. + Endpoint to fetch evaluator configurations for a specific app. Args: - app_name (str): the name of the app the evaluation belongs to + app_id (str): The ID of the app. Returns: - List[CustomEvaluationNames]: the list of name of custom evaluations + List[EvaluatorConfigDB]: A list of evaluator configuration objects. Parameters: - - app_name: str. + - app_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") - await client.get_custom_evaluation_names(app_name="app-name") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + await client.get_evaluator_configs_evaluators_configs_get(app_id="app-id") """ _response = await self._client_wrapper.httpx_client.request( "GET", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/{app_name}/names", + f"{self._client_wrapper.get_base_url()}/", "evaluators/configs" ), + params=remove_none_from_dict({"app_id": app_id}), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.List[CustomEvaluationNames], _response.json()) # type: ignore + return pydantic.parse_obj_as(typing.List[EvaluatorConfig], _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4552,59 +4679,50 @@ async def get_custom_evaluation_names( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def execute_custom_evaluation( + async def create_new_evaluator_config_evaluators_configs_post( self, - evaluation_id: str, *, - inputs: typing.List[typing.Dict[str, typing.Any]], app_id: str, - variant_id: str, - correct_answer: str, - outputs: typing.List[typing.Dict[str, typing.Any]], - ) -> typing.Any: + name: str, + evaluator_key: str, + settings_values: typing.Dict[str, typing.Any], + ) -> EvaluatorConfig: """ - Execute a custom evaluation code. + Endpoint to fetch evaluator configurations for a specific app. Args: - evaluation_id (str): the custom evaluation id - payload (ExecuteCustomEvaluationCode): the required payload + app_id (str): The ID of the app. Returns: - float: the result of the evaluation custom code + EvaluatorConfigDB: Evaluator configuration api model. Parameters: - - evaluation_id: str. - - - inputs: typing.List[typing.Dict[str, typing.Any]]. - - app_id: str. - - variant_id: str. + - name: str. - - correct_answer: str. + - evaluator_key: str. - - outputs: typing.List[typing.Dict[str, typing.Any]]. + - settings_values: typing.Dict[str, typing.Any]. """ _response = await self._client_wrapper.httpx_client.request( "POST", urllib.parse.urljoin( - f"{self._client_wrapper.get_base_url()}/", - f"evaluations/custom_evaluation/execute/{evaluation_id}", + f"{self._client_wrapper.get_base_url()}/", "evaluators/configs" ), json=jsonable_encoder( { - "inputs": inputs, "app_id": app_id, - "variant_id": variant_id, - "correct_answer": correct_answer, - "outputs": outputs, + "name": name, + "evaluator_key": evaluator_key, + "settings_values": settings_values, } ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore if _response.status_code == 422: raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: @@ -4613,24 +4731,115 @@ async def execute_custom_evaluation( raise ApiError(status_code=_response.status_code, body=_response.text) raise ApiError(status_code=_response.status_code, body=_response_json) - async def webhook_example_fake(self) -> EvaluationWebhook: + async def get_evaluator_config_evaluators_configs_evaluator_config_id_get( + self, evaluator_config_id: str + ) -> EvaluatorConfig: """ - Returns a fake score response for example webhook evaluation + Endpoint to fetch evaluator configurations for a specific app. Returns: - _description_ + List[EvaluatorConfigDB]: A list of evaluator configuration objects. + + Parameters: + - evaluator_config_id: str. """ _response = await self._client_wrapper.httpx_client.request( - "POST", + "GET", urllib.parse.urljoin( f"{self._client_wrapper.get_base_url()}/", - "evaluations/webhook_example_fake", + f"evaluators/configs/{evaluator_config_id}", ), headers=self._client_wrapper.get_headers(), timeout=60, ) if 200 <= _response.status_code < 300: - return pydantic.parse_obj_as(EvaluationWebhook, _response.json()) # type: ignore + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def update_evaluator_config_evaluators_configs_evaluator_config_id_put( + self, + evaluator_config_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator_key: typing.Optional[str] = OMIT, + settings_values: typing.Optional[typing.Dict[str, typing.Any]] = OMIT, + ) -> EvaluatorConfig: + """ + Endpoint to update evaluator configurations for a specific app. + + Returns: + List[EvaluatorConfigDB]: A list of evaluator configuration objects. + + Parameters: + - evaluator_config_id: str. + + - name: typing.Optional[str]. + + - evaluator_key: typing.Optional[str]. + + - settings_values: typing.Optional[typing.Dict[str, typing.Any]]. + """ + _request: typing.Dict[str, typing.Any] = {} + if name is not OMIT: + _request["name"] = name + if evaluator_key is not OMIT: + _request["evaluator_key"] = evaluator_key + if settings_values is not OMIT: + _request["settings_values"] = settings_values + _response = await self._client_wrapper.httpx_client.request( + "PUT", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + f"evaluators/configs/{evaluator_config_id}", + ), + json=jsonable_encoder(_request), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(EvaluatorConfig, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore + try: + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def delete_evaluator_config_evaluators_configs_evaluator_config_id_delete( + self, evaluator_config_id: str + ) -> bool: + """ + Endpoint to delete a specific evaluator configuration. + + Args: + evaluator_config_id (str): The unique identifier of the evaluator configuration. + + Returns: + bool: True if deletion was successful, False otherwise. + + Parameters: + - evaluator_config_id: str. + """ + _response = await self._client_wrapper.httpx_client.request( + "DELETE", + urllib.parse.urljoin( + f"{self._client_wrapper.get_base_url()}/", + f"evaluators/configs/{evaluator_config_id}", + ), + headers=self._client_wrapper.get_headers(), + timeout=60, + ) + if 200 <= _response.status_code < 300: + return pydantic.parse_obj_as(bool, _response.json()) # type: ignore + if _response.status_code == 422: + raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore try: _response_json = _response.json() except JSONDecodeError: @@ -4836,9 +5045,9 @@ async def get_testsets(self, *, app_id: str) -> typing.List[TestSetOutputRespons Parameters: - app_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.get_testsets(app_id="app-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -4873,9 +5082,9 @@ async def delete_testsets( Parameters: - testset_ids: typing.List[str]. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.delete_testsets(testset_ids=[]) """ _response = await self._client_wrapper.httpx_client.request( @@ -5162,9 +5371,9 @@ async def get_traces(self, app_id: str, variant_id: str) -> typing.List[Trace]: - variant_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.get_traces(app_id="app-id", variant_id="variant-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -5345,9 +5554,9 @@ async def get_spans_of_trace(self, trace_id: str) -> typing.List[Span]: Parameters: - trace_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.get_spans_of_trace(trace_id="trace-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -5374,9 +5583,9 @@ async def get_feedbacks(self, trace_id: str) -> typing.List[Feedback]: Parameters: - trace_id: str. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.get_feedbacks(trace_id="trace-id") """ _response = await self._client_wrapper.httpx_client.request( @@ -5529,9 +5738,9 @@ async def list_organizations(self) -> typing.List[Organization]: HTTPException: If there is an error retrieving the organizations from the database. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_organizations() """ _response = await self._client_wrapper.httpx_client.request( @@ -5592,9 +5801,9 @@ async def list_bases( - base_name: typing.Optional[str]. --- - from agenta.client import AsyncAgentaApi + from aakrem.client import AsyncAakremApi - client = AsyncAgentaApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") + client = AsyncAakremApi(api_key="YOUR_API_KEY", base_url="https://yourhost.com/path/to/api") await client.list_bases() """ _response = await self._client_wrapper.httpx_client.request( diff --git a/agenta-cli/agenta/client/backend/types/__init__.py b/agenta-cli/agenta/client/backend/types/__init__.py index 4be042f7a1..59bde99e4f 100644 --- a/agenta-cli/agenta/client/backend/types/__init__.py +++ b/agenta-cli/agenta/client/backend/types/__init__.py @@ -3,37 +3,43 @@ from .add_variant_from_base_and_config_response import ( AddVariantFromBaseAndConfigResponse, ) +from .aggregated_result import AggregatedResult from .app import App from .app_variant_output import AppVariantOutput from .base_output import BaseOutput from .body_import_testset import BodyImportTestset from .container_templates_response import ContainerTemplatesResponse from .create_app_output import CreateAppOutput -from .create_custom_evaluation import CreateCustomEvaluation -from .custom_evaluation_detail import CustomEvaluationDetail -from .custom_evaluation_names import CustomEvaluationNames -from .custom_evaluation_output import CustomEvaluationOutput +from .delete_evaluation import DeleteEvaluation from .docker_env_vars import DockerEnvVars from .environment_output import EnvironmentOutput from .evaluation import Evaluation from .evaluation_scenario import EvaluationScenario from .evaluation_scenario_input import EvaluationScenarioInput from .evaluation_scenario_output import EvaluationScenarioOutput -from .evaluation_scenario_score import EvaluationScenarioScore -from .evaluation_scenario_update_score import EvaluationScenarioUpdateScore +from .evaluation_scenario_result import EvaluationScenarioResult from .evaluation_status_enum import EvaluationStatusEnum from .evaluation_type import EvaluationType -from .evaluation_type_settings import EvaluationTypeSettings from .evaluation_webhook import EvaluationWebhook +from .evaluator import Evaluator +from .evaluator_config import EvaluatorConfig from .feedback import Feedback from .get_config_reponse import GetConfigReponse from .http_validation_error import HttpValidationError +from .human_evaluation import HumanEvaluation +from .human_evaluation_scenario import HumanEvaluationScenario +from .human_evaluation_scenario_input import HumanEvaluationScenarioInput +from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput +from .human_evaluation_scenario_score import HumanEvaluationScenarioScore +from .human_evaluation_scenario_update_score import HumanEvaluationScenarioUpdateScore from .image import Image from .invite_request import InviteRequest from .list_api_keys_output import ListApiKeysOutput +from .llm_run_rate_limit import LlmRunRateLimit from .new_testset import NewTestset from .organization import Organization from .organization_output import OrganizationOutput +from .result import Result from .simple_evaluation_output import SimpleEvaluationOutput from .span import Span from .template import Template @@ -49,37 +55,43 @@ __all__ = [ "AddVariantFromBaseAndConfigResponse", + "AggregatedResult", "App", "AppVariantOutput", "BaseOutput", "BodyImportTestset", "ContainerTemplatesResponse", "CreateAppOutput", - "CreateCustomEvaluation", - "CustomEvaluationDetail", - "CustomEvaluationNames", - "CustomEvaluationOutput", + "DeleteEvaluation", "DockerEnvVars", "EnvironmentOutput", "Evaluation", "EvaluationScenario", "EvaluationScenarioInput", "EvaluationScenarioOutput", - "EvaluationScenarioScore", - "EvaluationScenarioUpdateScore", + "EvaluationScenarioResult", "EvaluationStatusEnum", "EvaluationType", - "EvaluationTypeSettings", "EvaluationWebhook", + "Evaluator", + "EvaluatorConfig", "Feedback", "GetConfigReponse", "HttpValidationError", + "HumanEvaluation", + "HumanEvaluationScenario", + "HumanEvaluationScenarioInput", + "HumanEvaluationScenarioOutput", + "HumanEvaluationScenarioScore", + "HumanEvaluationScenarioUpdateScore", "Image", "InviteRequest", "ListApiKeysOutput", + "LlmRunRateLimit", "NewTestset", "Organization", "OrganizationOutput", + "Result", "SimpleEvaluationOutput", "Span", "Template", diff --git a/agenta-cli/agenta/client/backend/types/aggregated_result.py b/agenta-cli/agenta/client/backend/types/aggregated_result.py new file mode 100644 index 0000000000..ab5cd4ee8b --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/aggregated_result.py @@ -0,0 +1,39 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from .evaluator_config import EvaluatorConfig +from .result import Result + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class AggregatedResult(pydantic.BaseModel): + evaluator_config: EvaluatorConfig + result: Result + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_names.py b/agenta-cli/agenta/client/backend/types/delete_evaluation.py similarity index 91% rename from agenta-cli/agenta/client/backend/types/custom_evaluation_names.py rename to agenta-cli/agenta/client/backend/types/delete_evaluation.py index 860cf6b3db..46eea6648a 100644 --- a/agenta-cli/agenta/client/backend/types/custom_evaluation_names.py +++ b/agenta-cli/agenta/client/backend/types/delete_evaluation.py @@ -11,9 +11,8 @@ import pydantic # type: ignore -class CustomEvaluationNames(pydantic.BaseModel): - id: str - evaluation_name: str +class DeleteEvaluation(pydantic.BaseModel): + evaluations_ids: typing.List[str] def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/evaluation.py b/agenta-cli/agenta/client/backend/types/evaluation.py index b0211abd38..dcef94e56f 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation.py +++ b/agenta-cli/agenta/client/backend/types/evaluation.py @@ -4,8 +4,7 @@ import typing from ..core.datetime_utils import serialize_datetime -from .evaluation_type import EvaluationType -from .evaluation_type_settings import EvaluationTypeSettings +from .aggregated_result import AggregatedResult try: import pydantic.v1 as pydantic # type: ignore @@ -18,13 +17,12 @@ class Evaluation(pydantic.BaseModel): app_id: str user_id: str user_username: str - evaluation_type: EvaluationType - evaluation_type_settings: typing.Optional[EvaluationTypeSettings] variant_ids: typing.List[str] variant_names: typing.List[str] testset_id: str testset_name: str status: str + aggregated_results: typing.List[AggregatedResult] created_at: dt.datetime updated_at: dt.datetime diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario.py index 6e49169ba7..36229c357d 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_scenario.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario.py @@ -6,7 +6,7 @@ from ..core.datetime_utils import serialize_datetime from .evaluation_scenario_input import EvaluationScenarioInput from .evaluation_scenario_output import EvaluationScenarioOutput -from .evaluation_scenario_score import EvaluationScenarioScore +from .evaluation_scenario_result import EvaluationScenarioResult try: import pydantic.v1 as pydantic # type: ignore @@ -19,12 +19,11 @@ class EvaluationScenario(pydantic.BaseModel): evaluation_id: str inputs: typing.List[EvaluationScenarioInput] outputs: typing.List[EvaluationScenarioOutput] - vote: typing.Optional[str] - score: typing.Optional[EvaluationScenarioScore] evaluation: typing.Optional[str] correct_answer: typing.Optional[str] is_pinned: typing.Optional[bool] note: typing.Optional[str] + results: typing.List[EvaluationScenarioResult] def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py index ff78ae974f..700f6372e4 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_input.py @@ -12,8 +12,9 @@ class EvaluationScenarioInput(pydantic.BaseModel): - input_name: str - input_value: str + name: str + type: str + value: typing.Optional[typing.Any] def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py index 17ecf2cc53..e869cb650b 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_output.py @@ -12,8 +12,8 @@ class EvaluationScenarioOutput(pydantic.BaseModel): - variant_id: str - variant_output: str + type: str + value: typing.Optional[typing.Any] def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_output.py b/agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py similarity index 87% rename from agenta-cli/agenta/client/backend/types/custom_evaluation_output.py rename to agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py index 3a9d2a8be0..57fbd6e082 100644 --- a/agenta-cli/agenta/client/backend/types/custom_evaluation_output.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_scenario_result.py @@ -4,6 +4,7 @@ import typing from ..core.datetime_utils import serialize_datetime +from .result import Result try: import pydantic.v1 as pydantic # type: ignore @@ -11,11 +12,9 @@ import pydantic # type: ignore -class CustomEvaluationOutput(pydantic.BaseModel): - id: str - app_id: str - evaluation_name: str - created_at: dt.datetime +class EvaluationScenarioResult(pydantic.BaseModel): + evaluator_config: str + result: Result def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py b/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py index 159716b2e6..43c2b002d4 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_status_enum.py @@ -13,21 +13,21 @@ class EvaluationStatusEnum(str, enum.Enum): EVALUATION_INITIALIZED = "EVALUATION_INITIALIZED" EVALUATION_STARTED = "EVALUATION_STARTED" - COMPARISON_RUN_STARTED = "COMPARISON_RUN_STARTED" EVALUATION_FINISHED = "EVALUATION_FINISHED" + EVALUATION_FAILED = "EVALUATION_FAILED" def visit( self, evaluation_initialized: typing.Callable[[], T_Result], evaluation_started: typing.Callable[[], T_Result], - comparison_run_started: typing.Callable[[], T_Result], evaluation_finished: typing.Callable[[], T_Result], + evaluation_failed: typing.Callable[[], T_Result], ) -> T_Result: if self is EvaluationStatusEnum.EVALUATION_INITIALIZED: return evaluation_initialized() if self is EvaluationStatusEnum.EVALUATION_STARTED: return evaluation_started() - if self is EvaluationStatusEnum.COMPARISON_RUN_STARTED: - return comparison_run_started() if self is EvaluationStatusEnum.EVALUATION_FINISHED: return evaluation_finished() + if self is EvaluationStatusEnum.EVALUATION_FAILED: + return evaluation_failed() diff --git a/agenta-cli/agenta/client/backend/types/evaluation_type.py b/agenta-cli/agenta/client/backend/types/evaluation_type.py index 29990df5e9..4d2b91066d 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_type.py +++ b/agenta-cli/agenta/client/backend/types/evaluation_type.py @@ -11,43 +11,15 @@ class EvaluationType(str, enum.Enum): An enumeration. """ - AUTO_EXACT_MATCH = "auto_exact_match" - AUTO_SIMILARITY_MATCH = "auto_similarity_match" - AUTO_REGEX_TEST = "auto_regex_test" - AUTO_WEBHOOK_TEST = "auto_webhook_test" - AUTO_AI_CRITIQUE = "auto_ai_critique" HUMAN_A_B_TESTING = "human_a_b_testing" - HUMAN_SCORING = "human_scoring" - CUSTOM_CODE_RUN = "custom_code_run" SINGLE_MODEL_TEST = "single_model_test" def visit( self, - auto_exact_match: typing.Callable[[], T_Result], - auto_similarity_match: typing.Callable[[], T_Result], - auto_regex_test: typing.Callable[[], T_Result], - auto_webhook_test: typing.Callable[[], T_Result], - auto_ai_critique: typing.Callable[[], T_Result], human_a_b_testing: typing.Callable[[], T_Result], - human_scoring: typing.Callable[[], T_Result], - custom_code_run: typing.Callable[[], T_Result], single_model_test: typing.Callable[[], T_Result], ) -> T_Result: - if self is EvaluationType.AUTO_EXACT_MATCH: - return auto_exact_match() - if self is EvaluationType.AUTO_SIMILARITY_MATCH: - return auto_similarity_match() - if self is EvaluationType.AUTO_REGEX_TEST: - return auto_regex_test() - if self is EvaluationType.AUTO_WEBHOOK_TEST: - return auto_webhook_test() - if self is EvaluationType.AUTO_AI_CRITIQUE: - return auto_ai_critique() if self is EvaluationType.HUMAN_A_B_TESTING: return human_a_b_testing() - if self is EvaluationType.HUMAN_SCORING: - return human_scoring() - if self is EvaluationType.CUSTOM_CODE_RUN: - return custom_code_run() if self is EvaluationType.SINGLE_MODEL_TEST: return single_model_test() diff --git a/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py b/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py deleted file mode 100644 index 3b6c1d0691..0000000000 --- a/agenta-cli/agenta/client/backend/types/evaluation_type_settings.py +++ /dev/null @@ -1,42 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import datetime as dt -import typing - -from ..core.datetime_utils import serialize_datetime - -try: - import pydantic.v1 as pydantic # type: ignore -except ImportError: - import pydantic # type: ignore - - -class EvaluationTypeSettings(pydantic.BaseModel): - similarity_threshold: typing.Optional[float] - regex_pattern: typing.Optional[str] - regex_should_match: typing.Optional[bool] - webhook_url: typing.Optional[str] - custom_code_evaluation_id: typing.Optional[str] - llm_app_prompt_template: typing.Optional[str] - evaluation_prompt_template: typing.Optional[str] - - def json(self, **kwargs: typing.Any) -> str: - kwargs_with_defaults: typing.Any = { - "by_alias": True, - "exclude_unset": True, - **kwargs, - } - return super().json(**kwargs_with_defaults) - - def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: - kwargs_with_defaults: typing.Any = { - "by_alias": True, - "exclude_unset": True, - **kwargs, - } - return super().dict(**kwargs_with_defaults) - - class Config: - frozen = True - smart_union = True - json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/evaluator.py b/agenta-cli/agenta/client/backend/types/evaluator.py new file mode 100644 index 0000000000..70bea7aa58 --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/evaluator.py @@ -0,0 +1,39 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class Evaluator(pydantic.BaseModel): + name: str + key: str + direct_use: bool + settings_template: typing.Dict[str, typing.Any] + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py b/agenta-cli/agenta/client/backend/types/evaluator_config.py similarity index 86% rename from agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py rename to agenta-cli/agenta/client/backend/types/evaluator_config.py index 2ae2a3f96f..7ca248d882 100644 --- a/agenta-cli/agenta/client/backend/types/custom_evaluation_detail.py +++ b/agenta-cli/agenta/client/backend/types/evaluator_config.py @@ -11,11 +11,11 @@ import pydantic # type: ignore -class CustomEvaluationDetail(pydantic.BaseModel): +class EvaluatorConfig(pydantic.BaseModel): id: str - app_id: str - evaluation_name: str - python_code: str + name: str + evaluator_key: str + settings_values: typing.Optional[typing.Dict[str, typing.Any]] created_at: dt.datetime updated_at: dt.datetime diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation.py b/agenta-cli/agenta/client/backend/types/human_evaluation.py new file mode 100644 index 0000000000..0fcf73d267 --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/human_evaluation.py @@ -0,0 +1,48 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from .evaluation_type import EvaluationType + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class HumanEvaluation(pydantic.BaseModel): + id: str + app_id: str + user_id: str + user_username: str + evaluation_type: EvaluationType + variant_ids: typing.List[str] + variant_names: typing.List[str] + testset_id: str + testset_name: str + status: str + created_at: dt.datetime + updated_at: dt.datetime + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py new file mode 100644 index 0000000000..cac5dbdd5e --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario.py @@ -0,0 +1,48 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from .human_evaluation_scenario_input import HumanEvaluationScenarioInput +from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput +from .human_evaluation_scenario_score import HumanEvaluationScenarioScore + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class HumanEvaluationScenario(pydantic.BaseModel): + id: typing.Optional[str] + evaluation_id: str + inputs: typing.List[HumanEvaluationScenarioInput] + outputs: typing.List[HumanEvaluationScenarioOutput] + vote: typing.Optional[str] + score: typing.Optional[HumanEvaluationScenarioScore] + evaluation: typing.Optional[str] + correct_answer: typing.Optional[str] + is_pinned: typing.Optional[bool] + note: typing.Optional[str] + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/create_custom_evaluation.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py similarity index 89% rename from agenta-cli/agenta/client/backend/types/create_custom_evaluation.py rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py index 452f784611..d65233935e 100644 --- a/agenta-cli/agenta/client/backend/types/create_custom_evaluation.py +++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_input.py @@ -11,10 +11,9 @@ import pydantic # type: ignore -class CreateCustomEvaluation(pydantic.BaseModel): - evaluation_name: str - python_code: str - app_id: str +class HumanEvaluationScenarioInput(pydantic.BaseModel): + input_name: str + input_value: str def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py new file mode 100644 index 0000000000..752606176b --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_output.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class HumanEvaluationScenarioOutput(pydantic.BaseModel): + variant_id: str + variant_output: str + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py similarity index 59% rename from agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py index 0dc572cd6d..eb99491278 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_score.py +++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_score.py @@ -2,4 +2,4 @@ import typing -EvaluationScenarioScore = typing.Union[int, str] +HumanEvaluationScenarioScore = typing.Union[str, int] diff --git a/agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py similarity index 57% rename from agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py rename to agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py index 5c87996489..02d51b079b 100644 --- a/agenta-cli/agenta/client/backend/types/evaluation_scenario_update_score.py +++ b/agenta-cli/agenta/client/backend/types/human_evaluation_scenario_update_score.py @@ -2,4 +2,4 @@ import typing -EvaluationScenarioUpdateScore = typing.Union[int, str] +HumanEvaluationScenarioUpdateScore = typing.Union[str, int] diff --git a/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py b/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py new file mode 100644 index 0000000000..24214f4c59 --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/llm_run_rate_limit.py @@ -0,0 +1,39 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class LlmRunRateLimit(pydantic.BaseModel): + batch_size: int + max_retries: int + retry_delay: int + delay_between_batches: int + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime} diff --git a/agenta-cli/agenta/client/backend/types/result.py b/agenta-cli/agenta/client/backend/types/result.py new file mode 100644 index 0000000000..544336c305 --- /dev/null +++ b/agenta-cli/agenta/client/backend/types/result.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime + +try: + import pydantic.v1 as pydantic # type: ignore +except ImportError: + import pydantic # type: ignore + + +class Result(pydantic.BaseModel): + type: str + value: typing.Optional[typing.Any] + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().dict(**kwargs_with_defaults) + + class Config: + frozen = True + smart_union = True + json_encoders = {dt.datetime: serialize_datetime}