Merge branch 'main' into evaluations-in-backend

Agenta-AI · Jan 9, 2024 · dc1da36 · dc1da36
2 parents a191f3f + 3aa948a
commit dc1da36
Show file tree

Hide file tree

Showing 47 changed files with 3,601 additions and 129 deletions.
diff --git a/README.md b/README.md
@@ -104,15 +104,15 @@ Agenta allows developers and product teams to collaborate and build robust AI ap
 
 | Using an LLM App Template (For Non-Technical Users)  | Starting from Code |
 | ------------- | ------------- |
-|1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |
+|1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |
 
 <br /><br />
 
 # Quick Start
 
 ### [Try the cloud version](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)
-### [Create your first application in one-minute](https://docs.agenta.ai/quickstart/getting-started-ui)
-### [Create an application using Langchain](https://docs.agenta.ai/tutorials/first-app-with-langchain)
+### [Create your first application in one-minute](https://docs.agenta.ai/getting_started/getting-started-ui)
+### [Create an application using Langchain](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain)
 ### [Self-host agenta](https://docs.agenta.ai/self-host/host-locally)
 ### [Read the Documentation](https://docs.agenta.ai)
 ### [Check the Cookbook](https://docs.agenta.ai/cookbook)

diff --git a/agenta-backend/agenta_backend/routers/configs_router.py b/agenta-backend/agenta_backend/routers/configs_router.py
@@ -44,7 +44,7 @@ async def save_config(
                 variant_to_overwrite = variant_db
                 break
         if variant_to_overwrite is not None:
-            if payload.overwrite:
+            if payload.overwrite or variant_to_overwrite.config.parameters == {}:
                 print(f"update_variant_parameters  ===> {payload.overwrite}")
                 await app_manager.update_variant_parameters(
                     app_variant_id=str(variant_to_overwrite.id),

diff --git a/agenta-backend/agenta_backend/services/app_manager.py b/agenta-backend/agenta_backend/services/app_manager.py
@@ -139,8 +139,13 @@ async def update_variant_image(
     )
     # Update base with new image
     await db_manager.update_base(app_variant_db.base, image=db_image)
+    # Update variant to remove configuration
+    await db_manager.update_variant_parameters(
+        app_variant_db=app_variant_db, parameters={}
+    )
     # Update variant with new image
     app_variant_db = await db_manager.update_app_variant(app_variant_db, image=db_image)
+
     # Start variant
     await start_variant(app_variant_db, **kwargs)
 

diff --git a/agenta-cli/README.md b/agenta-cli/README.md
diff --git a/agenta-cli/agenta/__init__.py b/agenta-cli/agenta/__init__.py
@@ -14,5 +14,6 @@
 )
 from .sdk.utils.preinit import PreInitObject
 from .sdk.agenta_init import Config, init
+from .sdk.utils.helper.openai_cost import calculate_token_usage
 
 config = PreInitObject("agenta.config", Config)
diff --git a/agenta-cli/agenta/sdk/__init__.py b/agenta-cli/agenta/sdk/__init__.py
@@ -15,5 +15,6 @@
     BinaryParam,
 )
 from .agenta_init import Config, init
+from .utils.helper.openai_cost import calculate_token_usage
 
 config = PreInitObject("agenta.config", Config)
diff --git a/agenta-cli/agenta/sdk/agenta_decorator.py b/agenta-cli/agenta/sdk/agenta_decorator.py
@@ -1,17 +1,18 @@
 """The code for the Agenta SDK"""
 import os
 import sys
+import time
 import inspect
 import argparse
 import traceback
 import functools
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import Any, Callable, Dict, Optional, Tuple, List
+from typing import Any, Callable, Dict, Optional, Tuple, List, Union
 
 from fastapi import Body, FastAPI, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 
 import agenta
 from .context import save_context
@@ -26,6 +27,7 @@
     TextParam,
     MessagesInput,
     FileInputURL,
+    FuncResponse,
     BinaryParam,
 )
 
@@ -91,15 +93,15 @@ async def wrapper_deployed(*args, **kwargs) -> Any:
 
     update_function_signature(wrapper, func_signature, config_params, ingestible_files)
     route = f"/{endpoint_name}"
-    app.post(route)(wrapper)
+    app.post(route, response_model=FuncResponse)(wrapper)
 
     update_deployed_function_signature(
         wrapper_deployed,
         func_signature,
         ingestible_files,
     )
     route_deployed = f"/{endpoint_name}_deployed"
-    app.post(route_deployed)(wrapper_deployed)
+    app.post(route_deployed, response_model=FuncResponse)(wrapper_deployed)
     override_schema(
         openapi_schema=app.openapi(),
         func_name=func.__name__,
@@ -149,7 +151,9 @@ def ingest_files(
             func_params[name] = ingest_file(func_params[name])
 
 
-async def execute_function(func: Callable[..., Any], *args, **func_params) -> Any:
+async def execute_function(
+    func: Callable[..., Any], *args, **func_params
+) -> Union[Dict[str, Any], JSONResponse]:
     """Execute the function and handle any exceptions."""
 
     try:
@@ -159,14 +163,20 @@ async def execute_function(func: Callable[..., Any], *args, **func_params) -> An
         it awaits their execution.
         """
         is_coroutine_function = inspect.iscoroutinefunction(func)
+        start_time = time.perf_counter()
         if is_coroutine_function:
             result = await func(*args, **func_params)
         else:
             result = func(*args, **func_params)
+        end_time = time.perf_counter()
+        latency = end_time - start_time
 
         if isinstance(result, Context):
             save_context(result)
-        return result
+        if isinstance(result, Dict):
+            return FuncResponse(**result, latency=round(latency, 4)).dict()
+        if isinstance(result, str):
+            return FuncResponse(message=result, latency=round(latency, 4)).dict()
     except Exception as e:
         return handle_exception(e)
 

diff --git a/agenta-cli/agenta/sdk/agenta_init.py b/agenta-cli/agenta/sdk/agenta_init.py
@@ -1,3 +1,5 @@
+from agenta.client.exceptions import APIRequestError
+from agenta.client.backend.client import AgentaApi
 import os
 import logging
 from typing import Any, Optional
@@ -7,8 +9,6 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
-from agenta.client.backend.client import AgentaApi
-from agenta.client.exceptions import APIRequestError
 
 BACKEND_URL_SUFFIX = os.environ.get("BACKEND_URL_SUFFIX", "api")
 CLIENT_API_KEY = os.environ.get("AGENTA_API_KEY")
@@ -104,11 +104,11 @@ def __init__(self, base_id, host):
         else:
             self.persist = True
 
-    def register_default(self, overwrite=True, **kwargs):
+    def register_default(self, overwrite=False, **kwargs):
         """alias for default"""
         return self.default(overwrite=overwrite, **kwargs)
 
-    def default(self, overwrite=True, **kwargs):
+    def default(self, overwrite=False, **kwargs):
         """Saves the default parameters to the app_name and base_name in case they are not already saved.
         Args:
             overwrite: Whether to overwrite the existing configuration or not

diff --git a/agenta-cli/agenta/sdk/types.py b/agenta-cli/agenta/sdk/types.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, Extra, HttpUrl, Field
 
@@ -10,6 +10,19 @@ def __init__(self, file_name: str, file_path: str):
         self.file_path = file_path
 
 
+class LLMTokenUsage(BaseModel):
+    completion_tokens: int
+    prompt_tokens: int
+    total_tokens: int
+
+
+class FuncResponse(BaseModel):
+    message: str
+    usage: Optional[LLMTokenUsage]
+    cost: Optional[float]
+    latency: float
+
+
 class DictInput(dict):
     def __new__(cls, default_keys=None):
         instance = super().__new__(cls, default_keys)

diff --git a/agenta-cli/agenta/sdk/utils/helper/openai_cost.py b/agenta-cli/agenta/sdk/utils/helper/openai_cost.py
@@ -0,0 +1,166 @@
+# https://raw.githubusercontent.com/langchain-ai/langchain/23eb480c3866db8693a3a2d63b787c898c54bb35/libs/community/langchain_community/callbacks/openai_info.py
+MODEL_COST_PER_1K_TOKENS = {
+    # GPT-4 input
+    "gpt-4": 0.03,
+    "gpt-4-0314": 0.03,
+    "gpt-4-0613": 0.03,
+    "gpt-4-32k": 0.06,
+    "gpt-4-32k-0314": 0.06,
+    "gpt-4-32k-0613": 0.06,
+    "gpt-4-vision-preview": 0.01,
+    "gpt-4-1106-preview": 0.01,
+    # GPT-4 output
+    "gpt-4-completion": 0.06,
+    "gpt-4-0314-completion": 0.06,
+    "gpt-4-0613-completion": 0.06,
+    "gpt-4-32k-completion": 0.12,
+    "gpt-4-32k-0314-completion": 0.12,
+    "gpt-4-32k-0613-completion": 0.12,
+    "gpt-4-vision-preview-completion": 0.03,
+    "gpt-4-1106-preview-completion": 0.03,
+    # GPT-3.5 input
+    "gpt-3.5-turbo": 0.0015,
+    "gpt-3.5-turbo-0301": 0.0015,
+    "gpt-3.5-turbo-0613": 0.0015,
+    "gpt-3.5-turbo-1106": 0.001,
+    "gpt-3.5-turbo-instruct": 0.0015,
+    "gpt-3.5-turbo-16k": 0.003,
+    "gpt-3.5-turbo-16k-0613": 0.003,
+    # GPT-3.5 output
+    "gpt-3.5-turbo-completion": 0.002,
+    "gpt-3.5-turbo-0301-completion": 0.002,
+    "gpt-3.5-turbo-0613-completion": 0.002,
+    "gpt-3.5-turbo-1106-completion": 0.002,
+    "gpt-3.5-turbo-instruct-completion": 0.002,
+    "gpt-3.5-turbo-16k-completion": 0.004,
+    "gpt-3.5-turbo-16k-0613-completion": 0.004,
+    # Azure GPT-35 input
+    "gpt-35-turbo": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613": 0.0015,
+    "gpt-35-turbo-instruct": 0.0015,
+    "gpt-35-turbo-16k": 0.003,
+    "gpt-35-turbo-16k-0613": 0.003,
+    # Azure GPT-35 output
+    "gpt-35-turbo-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613-completion": 0.002,
+    "gpt-35-turbo-instruct-completion": 0.002,
+    "gpt-35-turbo-16k-completion": 0.004,
+    "gpt-35-turbo-16k-0613-completion": 0.004,
+    # Others
+    "text-ada-001": 0.0004,
+    "ada": 0.0004,
+    "text-babbage-001": 0.0005,
+    "babbage": 0.0005,
+    "text-curie-001": 0.002,
+    "curie": 0.002,
+    "text-davinci-003": 0.02,
+    "text-davinci-002": 0.02,
+    "code-davinci-002": 0.02,
+    # Fine Tuned input
+    "babbage-002-finetuned": 0.0016,
+    "davinci-002-finetuned": 0.012,
+    "gpt-3.5-turbo-0613-finetuned": 0.012,
+    # Fine Tuned output
+    "babbage-002-finetuned-completion": 0.0016,
+    "davinci-002-finetuned-completion": 0.012,
+    "gpt-3.5-turbo-0613-finetuned-completion": 0.016,
+    # Azure Fine Tuned input
+    "babbage-002-azure-finetuned": 0.0004,
+    "davinci-002-azure-finetuned": 0.002,
+    "gpt-35-turbo-0613-azure-finetuned": 0.0015,
+    # Azure Fine Tuned output
+    "babbage-002-azure-finetuned-completion": 0.0004,
+    "davinci-002-azure-finetuned-completion": 0.002,
+    "gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
+    # Legacy fine-tuned models
+    "ada-finetuned-legacy": 0.0016,
+    "babbage-finetuned-legacy": 0.0024,
+    "curie-finetuned-legacy": 0.012,
+    "davinci-finetuned-legacy": 0.12,
+}
+
+
+def standardize_model_name(
+    model_name: str,
+    is_completion: bool = False,
+) -> str:
+    """
+    Standardize the model name to a format that can be used in the OpenAI API.
+
+    Args:
+        model_name: Model name to standardize.
+        is_completion: Whether the model is used for completion or not.
+            Defaults to False.
+
+    Returns:
+        Standardized model name.
+    """
+
+    model_name = model_name.lower()
+    if ".ft-" in model_name:
+        model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
+    if ":ft-" in model_name:
+        model_name = model_name.split(":")[0] + "-finetuned-legacy"
+    if "ft:" in model_name:
+        model_name = model_name.split(":")[1] + "-finetuned"
+    if is_completion and (
+        model_name.startswith("gpt-4")
+        or model_name.startswith("gpt-3.5")
+        or model_name.startswith("gpt-35")
+        or ("finetuned" in model_name and "legacy" not in model_name)
+    ):
+        return model_name + "-completion"
+    else:
+        return model_name
+
+
+def get_openai_token_cost_for_model(
+    model_name: str, num_tokens: int, is_completion: bool = False
+) -> float:
+    """
+    Get the cost in USD for a given model and number of tokens.
+
+    Args:
+        model_name: Name of the model
+        num_tokens: Number of tokens.
+        is_completion: Whether the model is used for completion or not.
+            Defaults to False.
+
+    Returns:
+        Cost in USD.
+    """
+
+    model_name = standardize_model_name(model_name, is_completion=is_completion)
+    if model_name not in MODEL_COST_PER_1K_TOKENS:
+        raise ValueError(
+            f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
+            "Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
+        )
+    return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)
+
+
+def calculate_token_usage(model_name: str, token_usage: dict) -> float:
+    """Calculates the total cost of using a language model based on the model name and token
+    usage.
+
+    Args:
+        model_name: The name of the model used to determine the cost per token.
+        token_usage: Contains information about the usage of tokens for a particular model.
+
+    Returns:
+       Total cost of using a model.
+    """
+
+    completion_tokens = token_usage.get("completion_tokens", 0)
+    prompt_tokens = token_usage.get("prompt_tokens", 0)
+    model_name = standardize_model_name(model_name)
+    if model_name in MODEL_COST_PER_1K_TOKENS:
+        completion_cost = get_openai_token_cost_for_model(
+            model_name, completion_tokens, is_completion=True
+        )
+        prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
+        total_cost = prompt_cost + completion_cost
+        return total_cost
+    return 0
diff --git a/agenta-web/src/components/AppSelector/modals/WriteOwnAppModal.tsx b/agenta-web/src/components/AppSelector/modals/WriteOwnAppModal.tsx
@@ -192,7 +192,7 @@ const WriteOwnAppModal: React.FC<Props> = ({...props}) => {
                     </div>
                     <span>
                         Check out{" "}
-                        <a href="https://docs.agenta.ai/tutorials/your-first-llm-app">
+                        <a href="https://docs.agenta.ai/advanced_guides/custom_applications">
                             our tutorial for writing your first LLM app
                         </a>
                     </span>

diff --git a/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx b/agenta-web/src/components/EvaluationTable/ABTestingEvaluationTable.tsx
@@ -21,6 +21,7 @@ import {testsetRowToChatMessages} from "@/lib/helpers/testset"
 import EvaluationVotePanel from "../Evaluations/EvaluationCardView/EvaluationVotePanel"
 import VariantAlphabet from "../Evaluations/EvaluationCardView/VariantAlphabet"
 import {ParamsFormWithRun} from "./SingleModelEvaluationTable"
+import {PassThrough} from "stream"
 
 const {Title} = Typography
 
@@ -238,6 +239,9 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
                             ? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
                             : [],
                     )
+                    if (typeof result !== "string") {
+                        result = result.message
+                    }
 
                     setRowValue(rowIndex, variant.variantId, result)
                     ;(outputs as KeyValuePair)[variant.variantId] = result