Skip to content

Commit

Permalink
Merge pull request #1025 from Agenta-AI/gh/sdk-output-format-change
Browse files Browse the repository at this point in the history
Enhancement - Improve SDK output format
  • Loading branch information
aakrem authored Dec 21, 2023
2 parents c489c64 + 6b10ce3 commit a0a2005
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 7 deletions.
1 change: 1 addition & 0 deletions agenta-cli/agenta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
)
from .sdk.utils.preinit import PreInitObject
from .sdk.agenta_init import Config, init
from .sdk.utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
1 change: 1 addition & 0 deletions agenta-cli/agenta/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
FileInputURL,
)
from .agenta_init import Config, init
from .utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
22 changes: 16 additions & 6 deletions agenta-cli/agenta/sdk/agenta_decorator.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
"""The code for the Agenta SDK"""
import os
import sys
import time
import inspect
import argparse
import traceback
import functools
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Callable, Dict, Optional, Tuple, List
from typing import Any, Callable, Dict, Optional, Tuple, List, Union

from fastapi import Body, FastAPI, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

import agenta
from .context import save_context
Expand All @@ -26,6 +27,7 @@
TextParam,
MessagesInput,
FileInputURL,
FuncResponse,
)

app = FastAPI()
Expand Down Expand Up @@ -90,15 +92,15 @@ async def wrapper_deployed(*args, **kwargs) -> Any:

update_function_signature(wrapper, func_signature, config_params, ingestible_files)
route = f"/{endpoint_name}"
app.post(route)(wrapper)
app.post(route, response_model=FuncResponse)(wrapper)

update_deployed_function_signature(
wrapper_deployed,
func_signature,
ingestible_files,
)
route_deployed = f"/{endpoint_name}_deployed"
app.post(route_deployed)(wrapper_deployed)
app.post(route_deployed, response_model=FuncResponse)(wrapper_deployed)
override_schema(
openapi_schema=app.openapi(),
func_name=func.__name__,
Expand Down Expand Up @@ -148,7 +150,9 @@ def ingest_files(
func_params[name] = ingest_file(func_params[name])


async def execute_function(func: Callable[..., Any], *args, **func_params) -> Any:
async def execute_function(
func: Callable[..., Any], *args, **func_params
) -> Union[Dict[str, Any], JSONResponse]:
"""Execute the function and handle any exceptions."""

try:
Expand All @@ -158,14 +162,20 @@ async def execute_function(func: Callable[..., Any], *args, **func_params) -> An
it awaits their execution.
"""
is_coroutine_function = inspect.iscoroutinefunction(func)
start_time = time.perf_counter()
if is_coroutine_function:
result = await func(*args, **func_params)
else:
result = func(*args, **func_params)
end_time = time.perf_counter()
latency = end_time - start_time

if isinstance(result, Context):
save_context(result)
return result
if isinstance(result, Dict):
return FuncResponse(**result, latency=round(latency, 4)).dict()
if isinstance(result, str):
return FuncResponse(message=result, latency=round(latency, 4)).dict()
except Exception as e:
return handle_exception(e)

Expand Down
15 changes: 14 additions & 1 deletion agenta-cli/agenta/sdk/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Extra, HttpUrl

Expand All @@ -10,6 +10,19 @@ def __init__(self, file_name: str, file_path: str):
self.file_path = file_path


class LLMTokenUsage(BaseModel):
completion_tokens: int
prompt_tokens: int
total_tokens: int


class FuncResponse(BaseModel):
message: str
usage: Optional[LLMTokenUsage]
cost: Optional[float]
latency: float


class DictInput(dict):
def __new__(cls, default_keys=None):
instance = super().__new__(cls, default_keys)
Expand Down
166 changes: 166 additions & 0 deletions agenta-cli/agenta/sdk/utils/helper/openai_cost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# https://raw.githubusercontent.com/langchain-ai/langchain/23eb480c3866db8693a3a2d63b787c898c54bb35/libs/community/langchain_community/callbacks/openai_info.py
MODEL_COST_PER_1K_TOKENS = {
# GPT-4 input
"gpt-4": 0.03,
"gpt-4-0314": 0.03,
"gpt-4-0613": 0.03,
"gpt-4-32k": 0.06,
"gpt-4-32k-0314": 0.06,
"gpt-4-32k-0613": 0.06,
"gpt-4-vision-preview": 0.01,
"gpt-4-1106-preview": 0.01,
# GPT-4 output
"gpt-4-completion": 0.06,
"gpt-4-0314-completion": 0.06,
"gpt-4-0613-completion": 0.06,
"gpt-4-32k-completion": 0.12,
"gpt-4-32k-0314-completion": 0.12,
"gpt-4-32k-0613-completion": 0.12,
"gpt-4-vision-preview-completion": 0.03,
"gpt-4-1106-preview-completion": 0.03,
# GPT-3.5 input
"gpt-3.5-turbo": 0.0015,
"gpt-3.5-turbo-0301": 0.0015,
"gpt-3.5-turbo-0613": 0.0015,
"gpt-3.5-turbo-1106": 0.001,
"gpt-3.5-turbo-instruct": 0.0015,
"gpt-3.5-turbo-16k": 0.003,
"gpt-3.5-turbo-16k-0613": 0.003,
# GPT-3.5 output
"gpt-3.5-turbo-completion": 0.002,
"gpt-3.5-turbo-0301-completion": 0.002,
"gpt-3.5-turbo-0613-completion": 0.002,
"gpt-3.5-turbo-1106-completion": 0.002,
"gpt-3.5-turbo-instruct-completion": 0.002,
"gpt-3.5-turbo-16k-completion": 0.004,
"gpt-3.5-turbo-16k-0613-completion": 0.004,
# Azure GPT-35 input
"gpt-35-turbo": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613": 0.0015,
"gpt-35-turbo-instruct": 0.0015,
"gpt-35-turbo-16k": 0.003,
"gpt-35-turbo-16k-0613": 0.003,
# Azure GPT-35 output
"gpt-35-turbo-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613-completion": 0.002,
"gpt-35-turbo-instruct-completion": 0.002,
"gpt-35-turbo-16k-completion": 0.004,
"gpt-35-turbo-16k-0613-completion": 0.004,
# Others
"text-ada-001": 0.0004,
"ada": 0.0004,
"text-babbage-001": 0.0005,
"babbage": 0.0005,
"text-curie-001": 0.002,
"curie": 0.002,
"text-davinci-003": 0.02,
"text-davinci-002": 0.02,
"code-davinci-002": 0.02,
# Fine Tuned input
"babbage-002-finetuned": 0.0016,
"davinci-002-finetuned": 0.012,
"gpt-3.5-turbo-0613-finetuned": 0.012,
# Fine Tuned output
"babbage-002-finetuned-completion": 0.0016,
"davinci-002-finetuned-completion": 0.012,
"gpt-3.5-turbo-0613-finetuned-completion": 0.016,
# Azure Fine Tuned input
"babbage-002-azure-finetuned": 0.0004,
"davinci-002-azure-finetuned": 0.002,
"gpt-35-turbo-0613-azure-finetuned": 0.0015,
# Azure Fine Tuned output
"babbage-002-azure-finetuned-completion": 0.0004,
"davinci-002-azure-finetuned-completion": 0.002,
"gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
# Legacy fine-tuned models
"ada-finetuned-legacy": 0.0016,
"babbage-finetuned-legacy": 0.0024,
"curie-finetuned-legacy": 0.012,
"davinci-finetuned-legacy": 0.12,
}


def standardize_model_name(
model_name: str,
is_completion: bool = False,
) -> str:
"""
Standardize the model name to a format that can be used in the OpenAI API.
Args:
model_name: Model name to standardize.
is_completion: Whether the model is used for completion or not.
Defaults to False.
Returns:
Standardized model name.
"""

model_name = model_name.lower()
if ".ft-" in model_name:
model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
if ":ft-" in model_name:
model_name = model_name.split(":")[0] + "-finetuned-legacy"
if "ft:" in model_name:
model_name = model_name.split(":")[1] + "-finetuned"
if is_completion and (
model_name.startswith("gpt-4")
or model_name.startswith("gpt-3.5")
or model_name.startswith("gpt-35")
or ("finetuned" in model_name and "legacy" not in model_name)
):
return model_name + "-completion"
else:
return model_name


def get_openai_token_cost_for_model(
model_name: str, num_tokens: int, is_completion: bool = False
) -> float:
"""
Get the cost in USD for a given model and number of tokens.
Args:
model_name: Name of the model
num_tokens: Number of tokens.
is_completion: Whether the model is used for completion or not.
Defaults to False.
Returns:
Cost in USD.
"""

model_name = standardize_model_name(model_name, is_completion=is_completion)
if model_name not in MODEL_COST_PER_1K_TOKENS:
raise ValueError(
f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
"Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
)
return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)


def calculate_token_usage(model_name: str, token_usage: dict) -> float:
"""Calculates the total cost of using a language model based on the model name and token
usage.
Args:
model_name: The name of the model used to determine the cost per token.
token_usage: Contains information about the usage of tokens for a particular model.
Returns:
Total cost of using a model.
"""

completion_tokens = token_usage.get("completion_tokens", 0)
prompt_tokens = token_usage.get("prompt_tokens", 0)
model_name = standardize_model_name(model_name)
if model_name in MODEL_COST_PER_1K_TOKENS:
completion_cost = get_openai_token_cost_for_model(
model_name, completion_tokens, is_completion=True
)
prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
total_cost = prompt_cost + completion_cost
return total_cost
return 0
41 changes: 41 additions & 0 deletions examples/async_chat_sdk_output_format/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import agenta as ag
from agenta import FloatParam, MessagesInput, MultipleChoiceParam
from openai import AsyncOpenAI


client = AsyncOpenAI()

SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups."
CHAT_LLM_GPT = [
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4",
]

ag.init()
ag.config.default(
temperature=FloatParam(0.2),
model=MultipleChoiceParam("gpt-3.5-turbo", CHAT_LLM_GPT),
max_tokens=ag.IntParam(-1, -1, 4000),
prompt_system=ag.TextParam(SYSTEM_PROMPT),
)


@ag.entrypoint
async def chat(inputs: MessagesInput = MessagesInput()):
messages = [{"role": "system", "content": ag.config.prompt_system}] + inputs
max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None
chat_completion = await client.chat.completions.create(
model=ag.config.model,
messages=messages,
temperature=ag.config.temperature,
max_tokens=max_tokens,
)
token_usage = chat_completion.usage.dict()
return {
"message": chat_completion.choices[0].message.content,
**{"usage": token_usage},
"cost": ag.calculate_token_usage(ag.config.model, token_usage),
}
2 changes: 2 additions & 0 deletions examples/async_chat_sdk_output_format/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agenta
openai

0 comments on commit a0a2005

Please sign in to comment.