Merge pull request #1025 from Agenta-AI/gh/sdk-output-format-change

Enhancement - Improve SDK output format
Agenta-AI · Dec 21, 2023 · a0a2005 · a0a2005
2 parents c489c64 + 6b10ce3
commit a0a2005
Show file tree

Hide file tree

Showing 7 changed files with 241 additions and 7 deletions.
diff --git a/agenta-cli/agenta/__init__.py b/agenta-cli/agenta/__init__.py
@@ -13,5 +13,6 @@
 )
 from .sdk.utils.preinit import PreInitObject
 from .sdk.agenta_init import Config, init
+from .sdk.utils.helper.openai_cost import calculate_token_usage
 
 config = PreInitObject("agenta.config", Config)
diff --git a/agenta-cli/agenta/sdk/__init__.py b/agenta-cli/agenta/sdk/__init__.py
@@ -14,5 +14,6 @@
     FileInputURL,
 )
 from .agenta_init import Config, init
+from .utils.helper.openai_cost import calculate_token_usage
 
 config = PreInitObject("agenta.config", Config)
diff --git a/agenta-cli/agenta/sdk/agenta_decorator.py b/agenta-cli/agenta/sdk/agenta_decorator.py
@@ -1,17 +1,18 @@
 """The code for the Agenta SDK"""
 import os
 import sys
+import time
 import inspect
 import argparse
 import traceback
 import functools
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import Any, Callable, Dict, Optional, Tuple, List
+from typing import Any, Callable, Dict, Optional, Tuple, List, Union
 
 from fastapi import Body, FastAPI, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 
 import agenta
 from .context import save_context
@@ -26,6 +27,7 @@
     TextParam,
     MessagesInput,
     FileInputURL,
+    FuncResponse,
 )
 
 app = FastAPI()
@@ -90,15 +92,15 @@ async def wrapper_deployed(*args, **kwargs) -> Any:
 
     update_function_signature(wrapper, func_signature, config_params, ingestible_files)
     route = f"/{endpoint_name}"
-    app.post(route)(wrapper)
+    app.post(route, response_model=FuncResponse)(wrapper)
 
     update_deployed_function_signature(
         wrapper_deployed,
         func_signature,
         ingestible_files,
     )
     route_deployed = f"/{endpoint_name}_deployed"
-    app.post(route_deployed)(wrapper_deployed)
+    app.post(route_deployed, response_model=FuncResponse)(wrapper_deployed)
     override_schema(
         openapi_schema=app.openapi(),
         func_name=func.__name__,
@@ -148,7 +150,9 @@ def ingest_files(
             func_params[name] = ingest_file(func_params[name])
 
 
-async def execute_function(func: Callable[..., Any], *args, **func_params) -> Any:
+async def execute_function(
+    func: Callable[..., Any], *args, **func_params
+) -> Union[Dict[str, Any], JSONResponse]:
     """Execute the function and handle any exceptions."""
 
     try:
@@ -158,14 +162,20 @@ async def execute_function(func: Callable[..., Any], *args, **func_params) -> An
         it awaits their execution.
         """
         is_coroutine_function = inspect.iscoroutinefunction(func)
+        start_time = time.perf_counter()
         if is_coroutine_function:
             result = await func(*args, **func_params)
         else:
             result = func(*args, **func_params)
+        end_time = time.perf_counter()
+        latency = end_time - start_time
 
         if isinstance(result, Context):
             save_context(result)
-        return result
+        if isinstance(result, Dict):
+            return FuncResponse(**result, latency=round(latency, 4)).dict()
+        if isinstance(result, str):
+            return FuncResponse(message=result, latency=round(latency, 4)).dict()
     except Exception as e:
         return handle_exception(e)
 

diff --git a/agenta-cli/agenta/sdk/types.py b/agenta-cli/agenta/sdk/types.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, Extra, HttpUrl
 
@@ -10,6 +10,19 @@ def __init__(self, file_name: str, file_path: str):
         self.file_path = file_path
 
 
+class LLMTokenUsage(BaseModel):
+    completion_tokens: int
+    prompt_tokens: int
+    total_tokens: int
+
+
+class FuncResponse(BaseModel):
+    message: str
+    usage: Optional[LLMTokenUsage]
+    cost: Optional[float]
+    latency: float
+
+
 class DictInput(dict):
     def __new__(cls, default_keys=None):
         instance = super().__new__(cls, default_keys)

diff --git a/agenta-cli/agenta/sdk/utils/helper/openai_cost.py b/agenta-cli/agenta/sdk/utils/helper/openai_cost.py
@@ -0,0 +1,166 @@
+# https://raw.githubusercontent.com/langchain-ai/langchain/23eb480c3866db8693a3a2d63b787c898c54bb35/libs/community/langchain_community/callbacks/openai_info.py
+MODEL_COST_PER_1K_TOKENS = {
+    # GPT-4 input
+    "gpt-4": 0.03,
+    "gpt-4-0314": 0.03,
+    "gpt-4-0613": 0.03,
+    "gpt-4-32k": 0.06,
+    "gpt-4-32k-0314": 0.06,
+    "gpt-4-32k-0613": 0.06,
+    "gpt-4-vision-preview": 0.01,
+    "gpt-4-1106-preview": 0.01,
+    # GPT-4 output
+    "gpt-4-completion": 0.06,
+    "gpt-4-0314-completion": 0.06,
+    "gpt-4-0613-completion": 0.06,
+    "gpt-4-32k-completion": 0.12,
+    "gpt-4-32k-0314-completion": 0.12,
+    "gpt-4-32k-0613-completion": 0.12,
+    "gpt-4-vision-preview-completion": 0.03,
+    "gpt-4-1106-preview-completion": 0.03,
+    # GPT-3.5 input
+    "gpt-3.5-turbo": 0.0015,
+    "gpt-3.5-turbo-0301": 0.0015,
+    "gpt-3.5-turbo-0613": 0.0015,
+    "gpt-3.5-turbo-1106": 0.001,
+    "gpt-3.5-turbo-instruct": 0.0015,
+    "gpt-3.5-turbo-16k": 0.003,
+    "gpt-3.5-turbo-16k-0613": 0.003,
+    # GPT-3.5 output
+    "gpt-3.5-turbo-completion": 0.002,
+    "gpt-3.5-turbo-0301-completion": 0.002,
+    "gpt-3.5-turbo-0613-completion": 0.002,
+    "gpt-3.5-turbo-1106-completion": 0.002,
+    "gpt-3.5-turbo-instruct-completion": 0.002,
+    "gpt-3.5-turbo-16k-completion": 0.004,
+    "gpt-3.5-turbo-16k-0613-completion": 0.004,
+    # Azure GPT-35 input
+    "gpt-35-turbo": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613": 0.0015,
+    "gpt-35-turbo-instruct": 0.0015,
+    "gpt-35-turbo-16k": 0.003,
+    "gpt-35-turbo-16k-0613": 0.003,
+    # Azure GPT-35 output
+    "gpt-35-turbo-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613-completion": 0.002,
+    "gpt-35-turbo-instruct-completion": 0.002,
+    "gpt-35-turbo-16k-completion": 0.004,
+    "gpt-35-turbo-16k-0613-completion": 0.004,
+    # Others
+    "text-ada-001": 0.0004,
+    "ada": 0.0004,
+    "text-babbage-001": 0.0005,
+    "babbage": 0.0005,
+    "text-curie-001": 0.002,
+    "curie": 0.002,
+    "text-davinci-003": 0.02,
+    "text-davinci-002": 0.02,
+    "code-davinci-002": 0.02,
+    # Fine Tuned input
+    "babbage-002-finetuned": 0.0016,
+    "davinci-002-finetuned": 0.012,
+    "gpt-3.5-turbo-0613-finetuned": 0.012,
+    # Fine Tuned output
+    "babbage-002-finetuned-completion": 0.0016,
+    "davinci-002-finetuned-completion": 0.012,
+    "gpt-3.5-turbo-0613-finetuned-completion": 0.016,
+    # Azure Fine Tuned input
+    "babbage-002-azure-finetuned": 0.0004,
+    "davinci-002-azure-finetuned": 0.002,
+    "gpt-35-turbo-0613-azure-finetuned": 0.0015,
+    # Azure Fine Tuned output
+    "babbage-002-azure-finetuned-completion": 0.0004,
+    "davinci-002-azure-finetuned-completion": 0.002,
+    "gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
+    # Legacy fine-tuned models
+    "ada-finetuned-legacy": 0.0016,
+    "babbage-finetuned-legacy": 0.0024,
+    "curie-finetuned-legacy": 0.012,
+    "davinci-finetuned-legacy": 0.12,
+}
+
+
+def standardize_model_name(
+    model_name: str,
+    is_completion: bool = False,
+) -> str:
+    """
+    Standardize the model name to a format that can be used in the OpenAI API.
+
+    Args:
+        model_name: Model name to standardize.
+        is_completion: Whether the model is used for completion or not.
+            Defaults to False.
+
+    Returns:
+        Standardized model name.
+    """
+
+    model_name = model_name.lower()
+    if ".ft-" in model_name:
+        model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
+    if ":ft-" in model_name:
+        model_name = model_name.split(":")[0] + "-finetuned-legacy"
+    if "ft:" in model_name:
+        model_name = model_name.split(":")[1] + "-finetuned"
+    if is_completion and (
+        model_name.startswith("gpt-4")
+        or model_name.startswith("gpt-3.5")
+        or model_name.startswith("gpt-35")
+        or ("finetuned" in model_name and "legacy" not in model_name)
+    ):
+        return model_name + "-completion"
+    else:
+        return model_name
+
+
+def get_openai_token_cost_for_model(
+    model_name: str, num_tokens: int, is_completion: bool = False
+) -> float:
+    """
+    Get the cost in USD for a given model and number of tokens.
+
+    Args:
+        model_name: Name of the model
+        num_tokens: Number of tokens.
+        is_completion: Whether the model is used for completion or not.
+            Defaults to False.
+
+    Returns:
+        Cost in USD.
+    """
+
+    model_name = standardize_model_name(model_name, is_completion=is_completion)
+    if model_name not in MODEL_COST_PER_1K_TOKENS:
+        raise ValueError(
+            f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
+            "Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
+        )
+    return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)
+
+
+def calculate_token_usage(model_name: str, token_usage: dict) -> float:
+    """Calculates the total cost of using a language model based on the model name and token
+    usage.
+
+    Args:
+        model_name: The name of the model used to determine the cost per token.
+        token_usage: Contains information about the usage of tokens for a particular model.
+
+    Returns:
+       Total cost of using a model.
+    """
+
+    completion_tokens = token_usage.get("completion_tokens", 0)
+    prompt_tokens = token_usage.get("prompt_tokens", 0)
+    model_name = standardize_model_name(model_name)
+    if model_name in MODEL_COST_PER_1K_TOKENS:
+        completion_cost = get_openai_token_cost_for_model(
+            model_name, completion_tokens, is_completion=True
+        )
+        prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
+        total_cost = prompt_cost + completion_cost
+        return total_cost
+    return 0
diff --git a/examples/async_chat_sdk_output_format/app.py b/examples/async_chat_sdk_output_format/app.py
@@ -0,0 +1,41 @@
+import agenta as ag
+from agenta import FloatParam, MessagesInput, MultipleChoiceParam
+from openai import AsyncOpenAI
+
+
+client = AsyncOpenAI()
+
+SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups."
+CHAT_LLM_GPT = [
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-16k-0613",
+    "gpt-4",
+]
+
+ag.init()
+ag.config.default(
+    temperature=FloatParam(0.2),
+    model=MultipleChoiceParam("gpt-3.5-turbo", CHAT_LLM_GPT),
+    max_tokens=ag.IntParam(-1, -1, 4000),
+    prompt_system=ag.TextParam(SYSTEM_PROMPT),
+)
+
+
+@ag.entrypoint
+async def chat(inputs: MessagesInput = MessagesInput()):
+    messages = [{"role": "system", "content": ag.config.prompt_system}] + inputs
+    max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None
+    chat_completion = await client.chat.completions.create(
+        model=ag.config.model,
+        messages=messages,
+        temperature=ag.config.temperature,
+        max_tokens=max_tokens,
+    )
+    token_usage = chat_completion.usage.dict()
+    return {
+        "message": chat_completion.choices[0].message.content,
+        **{"usage": token_usage},
+        "cost": ag.calculate_token_usage(ag.config.model, token_usage),
+    }
diff --git a/examples/async_chat_sdk_output_format/requirements.txt b/examples/async_chat_sdk_output_format/requirements.txt
@@ -0,0 +1,2 @@
+agenta
+openai