Skip to content

Commit

Permalink
Merge branch 'main' into evaluations-in-backend
Browse files Browse the repository at this point in the history
  • Loading branch information
aybruhm committed Jan 9, 2024
2 parents a191f3f + 3aa948a commit dc1da36
Show file tree
Hide file tree
Showing 47 changed files with 3,601 additions and 129 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ Agenta allows developers and product teams to collaborate and build robust AI ap

| Using an LLM App Template (For Non-Technical Users) | Starting from Code |
| ------------- | ------------- |
|1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |
|1. [Create an application using a pre-built template from our UI](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)<br />2. Access a playground where you can test and compare different prompts and configurations side-by-side.<br /> 3. Systematically evaluate your application using pre-built or custom evaluators.<br /> 4. Deploy the application to production with one click. |1. [Add a few lines to any LLM application code to automatically create a playground for it](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain) <br />2. Experiment with prompts and configurations, and compare them side-by-side in the playground. <br />3. Systematically evaluate your application using pre-built or custom evaluators. <br />4. Deploy the application to production with one click. |

<br /><br />

# Quick Start

### [Try the cloud version](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github)
### [Create your first application in one-minute](https://docs.agenta.ai/quickstart/getting-started-ui)
### [Create an application using Langchain](https://docs.agenta.ai/tutorials/first-app-with-langchain)
### [Create your first application in one-minute](https://docs.agenta.ai/getting_started/getting-started-ui)
### [Create an application using Langchain](https://docs.agenta.ai/developer_guides/tutorials/first-app-with-langchain)
### [Self-host agenta](https://docs.agenta.ai/self-host/host-locally)
### [Read the Documentation](https://docs.agenta.ai)
### [Check the Cookbook](https://docs.agenta.ai/cookbook)
Expand Down
2 changes: 1 addition & 1 deletion agenta-backend/agenta_backend/routers/configs_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def save_config(
variant_to_overwrite = variant_db
break
if variant_to_overwrite is not None:
if payload.overwrite:
if payload.overwrite or variant_to_overwrite.config.parameters == {}:
print(f"update_variant_parameters ===> {payload.overwrite}")
await app_manager.update_variant_parameters(
app_variant_id=str(variant_to_overwrite.id),
Expand Down
5 changes: 5 additions & 0 deletions agenta-backend/agenta_backend/services/app_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,13 @@ async def update_variant_image(
)
# Update base with new image
await db_manager.update_base(app_variant_db.base, image=db_image)
# Update variant to remove configuration
await db_manager.update_variant_parameters(
app_variant_db=app_variant_db, parameters={}
)
# Update variant with new image
app_variant_db = await db_manager.update_app_variant(app_variant_db, image=db_image)

# Start variant
await start_variant(app_variant_db, **kwargs)

Expand Down
214 changes: 146 additions & 68 deletions agenta-cli/README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions agenta-cli/agenta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
)
from .sdk.utils.preinit import PreInitObject
from .sdk.agenta_init import Config, init
from .sdk.utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
1 change: 1 addition & 0 deletions agenta-cli/agenta/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
BinaryParam,
)
from .agenta_init import Config, init
from .utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
22 changes: 16 additions & 6 deletions agenta-cli/agenta/sdk/agenta_decorator.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
"""The code for the Agenta SDK"""
import os
import sys
import time
import inspect
import argparse
import traceback
import functools
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Callable, Dict, Optional, Tuple, List
from typing import Any, Callable, Dict, Optional, Tuple, List, Union

from fastapi import Body, FastAPI, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

import agenta
from .context import save_context
Expand All @@ -26,6 +27,7 @@
TextParam,
MessagesInput,
FileInputURL,
FuncResponse,
BinaryParam,
)

Expand Down Expand Up @@ -91,15 +93,15 @@ async def wrapper_deployed(*args, **kwargs) -> Any:

update_function_signature(wrapper, func_signature, config_params, ingestible_files)
route = f"/{endpoint_name}"
app.post(route)(wrapper)
app.post(route, response_model=FuncResponse)(wrapper)

update_deployed_function_signature(
wrapper_deployed,
func_signature,
ingestible_files,
)
route_deployed = f"/{endpoint_name}_deployed"
app.post(route_deployed)(wrapper_deployed)
app.post(route_deployed, response_model=FuncResponse)(wrapper_deployed)
override_schema(
openapi_schema=app.openapi(),
func_name=func.__name__,
Expand Down Expand Up @@ -149,7 +151,9 @@ def ingest_files(
func_params[name] = ingest_file(func_params[name])


async def execute_function(func: Callable[..., Any], *args, **func_params) -> Any:
async def execute_function(
func: Callable[..., Any], *args, **func_params
) -> Union[Dict[str, Any], JSONResponse]:
"""Execute the function and handle any exceptions."""

try:
Expand All @@ -159,14 +163,20 @@ async def execute_function(func: Callable[..., Any], *args, **func_params) -> An
it awaits their execution.
"""
is_coroutine_function = inspect.iscoroutinefunction(func)
start_time = time.perf_counter()
if is_coroutine_function:
result = await func(*args, **func_params)
else:
result = func(*args, **func_params)
end_time = time.perf_counter()
latency = end_time - start_time

if isinstance(result, Context):
save_context(result)
return result
if isinstance(result, Dict):
return FuncResponse(**result, latency=round(latency, 4)).dict()
if isinstance(result, str):
return FuncResponse(message=result, latency=round(latency, 4)).dict()
except Exception as e:
return handle_exception(e)

Expand Down
8 changes: 4 additions & 4 deletions agenta-cli/agenta/sdk/agenta_init.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from agenta.client.exceptions import APIRequestError
from agenta.client.backend.client import AgentaApi
import os
import logging
from typing import Any, Optional
Expand All @@ -7,8 +9,6 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

from agenta.client.backend.client import AgentaApi
from agenta.client.exceptions import APIRequestError

BACKEND_URL_SUFFIX = os.environ.get("BACKEND_URL_SUFFIX", "api")
CLIENT_API_KEY = os.environ.get("AGENTA_API_KEY")
Expand Down Expand Up @@ -104,11 +104,11 @@ def __init__(self, base_id, host):
else:
self.persist = True

def register_default(self, overwrite=True, **kwargs):
def register_default(self, overwrite=False, **kwargs):
"""alias for default"""
return self.default(overwrite=overwrite, **kwargs)

def default(self, overwrite=True, **kwargs):
def default(self, overwrite=False, **kwargs):
"""Saves the default parameters to the app_name and base_name in case they are not already saved.
Args:
overwrite: Whether to overwrite the existing configuration or not
Expand Down
15 changes: 14 additions & 1 deletion agenta-cli/agenta/sdk/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Extra, HttpUrl, Field

Expand All @@ -10,6 +10,19 @@ def __init__(self, file_name: str, file_path: str):
self.file_path = file_path


class LLMTokenUsage(BaseModel):
completion_tokens: int
prompt_tokens: int
total_tokens: int


class FuncResponse(BaseModel):
message: str
usage: Optional[LLMTokenUsage]
cost: Optional[float]
latency: float


class DictInput(dict):
def __new__(cls, default_keys=None):
instance = super().__new__(cls, default_keys)
Expand Down
166 changes: 166 additions & 0 deletions agenta-cli/agenta/sdk/utils/helper/openai_cost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# https://raw.githubusercontent.com/langchain-ai/langchain/23eb480c3866db8693a3a2d63b787c898c54bb35/libs/community/langchain_community/callbacks/openai_info.py
MODEL_COST_PER_1K_TOKENS = {
# GPT-4 input
"gpt-4": 0.03,
"gpt-4-0314": 0.03,
"gpt-4-0613": 0.03,
"gpt-4-32k": 0.06,
"gpt-4-32k-0314": 0.06,
"gpt-4-32k-0613": 0.06,
"gpt-4-vision-preview": 0.01,
"gpt-4-1106-preview": 0.01,
# GPT-4 output
"gpt-4-completion": 0.06,
"gpt-4-0314-completion": 0.06,
"gpt-4-0613-completion": 0.06,
"gpt-4-32k-completion": 0.12,
"gpt-4-32k-0314-completion": 0.12,
"gpt-4-32k-0613-completion": 0.12,
"gpt-4-vision-preview-completion": 0.03,
"gpt-4-1106-preview-completion": 0.03,
# GPT-3.5 input
"gpt-3.5-turbo": 0.0015,
"gpt-3.5-turbo-0301": 0.0015,
"gpt-3.5-turbo-0613": 0.0015,
"gpt-3.5-turbo-1106": 0.001,
"gpt-3.5-turbo-instruct": 0.0015,
"gpt-3.5-turbo-16k": 0.003,
"gpt-3.5-turbo-16k-0613": 0.003,
# GPT-3.5 output
"gpt-3.5-turbo-completion": 0.002,
"gpt-3.5-turbo-0301-completion": 0.002,
"gpt-3.5-turbo-0613-completion": 0.002,
"gpt-3.5-turbo-1106-completion": 0.002,
"gpt-3.5-turbo-instruct-completion": 0.002,
"gpt-3.5-turbo-16k-completion": 0.004,
"gpt-3.5-turbo-16k-0613-completion": 0.004,
# Azure GPT-35 input
"gpt-35-turbo": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613": 0.0015,
"gpt-35-turbo-instruct": 0.0015,
"gpt-35-turbo-16k": 0.003,
"gpt-35-turbo-16k-0613": 0.003,
# Azure GPT-35 output
"gpt-35-turbo-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613-completion": 0.002,
"gpt-35-turbo-instruct-completion": 0.002,
"gpt-35-turbo-16k-completion": 0.004,
"gpt-35-turbo-16k-0613-completion": 0.004,
# Others
"text-ada-001": 0.0004,
"ada": 0.0004,
"text-babbage-001": 0.0005,
"babbage": 0.0005,
"text-curie-001": 0.002,
"curie": 0.002,
"text-davinci-003": 0.02,
"text-davinci-002": 0.02,
"code-davinci-002": 0.02,
# Fine Tuned input
"babbage-002-finetuned": 0.0016,
"davinci-002-finetuned": 0.012,
"gpt-3.5-turbo-0613-finetuned": 0.012,
# Fine Tuned output
"babbage-002-finetuned-completion": 0.0016,
"davinci-002-finetuned-completion": 0.012,
"gpt-3.5-turbo-0613-finetuned-completion": 0.016,
# Azure Fine Tuned input
"babbage-002-azure-finetuned": 0.0004,
"davinci-002-azure-finetuned": 0.002,
"gpt-35-turbo-0613-azure-finetuned": 0.0015,
# Azure Fine Tuned output
"babbage-002-azure-finetuned-completion": 0.0004,
"davinci-002-azure-finetuned-completion": 0.002,
"gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
# Legacy fine-tuned models
"ada-finetuned-legacy": 0.0016,
"babbage-finetuned-legacy": 0.0024,
"curie-finetuned-legacy": 0.012,
"davinci-finetuned-legacy": 0.12,
}


def standardize_model_name(
model_name: str,
is_completion: bool = False,
) -> str:
"""
Standardize the model name to a format that can be used in the OpenAI API.
Args:
model_name: Model name to standardize.
is_completion: Whether the model is used for completion or not.
Defaults to False.
Returns:
Standardized model name.
"""

model_name = model_name.lower()
if ".ft-" in model_name:
model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
if ":ft-" in model_name:
model_name = model_name.split(":")[0] + "-finetuned-legacy"
if "ft:" in model_name:
model_name = model_name.split(":")[1] + "-finetuned"
if is_completion and (
model_name.startswith("gpt-4")
or model_name.startswith("gpt-3.5")
or model_name.startswith("gpt-35")
or ("finetuned" in model_name and "legacy" not in model_name)
):
return model_name + "-completion"
else:
return model_name


def get_openai_token_cost_for_model(
model_name: str, num_tokens: int, is_completion: bool = False
) -> float:
"""
Get the cost in USD for a given model and number of tokens.
Args:
model_name: Name of the model
num_tokens: Number of tokens.
is_completion: Whether the model is used for completion or not.
Defaults to False.
Returns:
Cost in USD.
"""

model_name = standardize_model_name(model_name, is_completion=is_completion)
if model_name not in MODEL_COST_PER_1K_TOKENS:
raise ValueError(
f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
"Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
)
return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)


def calculate_token_usage(model_name: str, token_usage: dict) -> float:
"""Calculates the total cost of using a language model based on the model name and token
usage.
Args:
model_name: The name of the model used to determine the cost per token.
token_usage: Contains information about the usage of tokens for a particular model.
Returns:
Total cost of using a model.
"""

completion_tokens = token_usage.get("completion_tokens", 0)
prompt_tokens = token_usage.get("prompt_tokens", 0)
model_name = standardize_model_name(model_name)
if model_name in MODEL_COST_PER_1K_TOKENS:
completion_cost = get_openai_token_cost_for_model(
model_name, completion_tokens, is_completion=True
)
prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
total_cost = prompt_cost + completion_cost
return total_cost
return 0
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ const WriteOwnAppModal: React.FC<Props> = ({...props}) => {
</div>
<span>
Check out{" "}
<a href="https://docs.agenta.ai/tutorials/your-first-llm-app">
<a href="https://docs.agenta.ai/advanced_guides/custom_applications">
our tutorial for writing your first LLM app
</a>
</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {testsetRowToChatMessages} from "@/lib/helpers/testset"
import EvaluationVotePanel from "../Evaluations/EvaluationCardView/EvaluationVotePanel"
import VariantAlphabet from "../Evaluations/EvaluationCardView/VariantAlphabet"
import {ParamsFormWithRun} from "./SingleModelEvaluationTable"
import {PassThrough} from "stream"

const {Title} = Typography

Expand Down Expand Up @@ -238,6 +239,9 @@ const ABTestingEvaluationTable: React.FC<EvaluationTableProps> = ({
? testsetRowToChatMessages(evaluation.testset.csvdata[rowIndex], false)
: [],
)
if (typeof result !== "string") {
result = result.message
}

setRowValue(rowIndex, variant.variantId, result)
;(outputs as KeyValuePair)[variant.variantId] = result
Expand Down
Loading

0 comments on commit dc1da36

Please sign in to comment.