From f3e04b15c4991bea3de615b4a216b1ef4199bc09 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Sat, 14 Dec 2024 21:47:41 +0100 Subject: [PATCH] reorganize and add new services for propmt --- docker-compose.yml | 22 ++- services/README.md | 33 ++++ services/chat-live-sdk/docker-compose.yml | 23 --- services/chat-new-sdk-prompt.rest | 33 ++++ .../Dockerfile | 0 .../Dockerfile.prerelease | 0 .../_app.py | 0 .../chat-new-sdk-prompt/docker-compose.yml | 23 +++ .../entrypoint.sh | 0 .../main.py | 0 .../mock_litellm.py | 0 .../supported_llm_models.py | 0 services/chat-new-sdk.rest | 29 ++++ .../Dockerfile | 0 .../Dockerfile.prerelease | 0 services/chat-new-sdk/_app.py | 65 +++++++ services/chat-new-sdk/docker-compose.yml | 23 +++ .../entrypoint.sh | 0 .../main.py | 0 .../mock_litellm.py | 0 .../supported_llm_models.py | 0 .../completion-live-sdk/docker-compose.yml | 23 --- services/completion-new-sdk-prompt.rest | 37 ++++ services/completion-new-sdk-prompt/Dockerfile | 18 ++ .../Dockerfile.prerelease | 16 ++ services/completion-new-sdk-prompt/_app.py | 159 ++++++++++++++++++ .../docker-compose.yml | 23 +++ .../completion-new-sdk-prompt/entrypoint.sh | 9 + services/completion-new-sdk-prompt/main.py | 14 ++ .../completion-new-sdk-prompt/mock_litellm.py | 53 ++++++ .../supported_llm_models.py | 91 ++++++++++ services/completion-new-sdk/Dockerfile | 18 ++ .../completion-new-sdk/Dockerfile.prerelease | 16 ++ .../_app.py | 0 .../completion-new-sdk/docker-compose.yml | 23 +++ services/completion-new-sdk/entrypoint.sh | 9 + services/completion-new-sdk/main.py | 14 ++ services/completion-new-sdk/mock_litellm.py | 53 ++++++ .../supported_llm_models.py | 91 ++++++++++ 39 files changed, 866 insertions(+), 52 deletions(-) create mode 100644 services/README.md delete mode 100644 services/chat-live-sdk/docker-compose.yml create mode 100644 services/chat-new-sdk-prompt.rest rename services/{chat-live-sdk => chat-new-sdk-prompt}/Dockerfile (100%) rename services/{chat-live-sdk => chat-new-sdk-prompt}/Dockerfile.prerelease (100%) rename services/{chat-live-sdk => chat-new-sdk-prompt}/_app.py (100%) create mode 100644 services/chat-new-sdk-prompt/docker-compose.yml rename services/{chat-live-sdk => chat-new-sdk-prompt}/entrypoint.sh (100%) rename services/{chat-live-sdk => chat-new-sdk-prompt}/main.py (100%) rename services/{chat-live-sdk => chat-new-sdk-prompt}/mock_litellm.py (100%) rename services/{chat-live-sdk => chat-new-sdk-prompt}/supported_llm_models.py (100%) rename services/{completion-live-sdk => chat-new-sdk}/Dockerfile (100%) rename services/{completion-live-sdk => chat-new-sdk}/Dockerfile.prerelease (100%) create mode 100644 services/chat-new-sdk/_app.py create mode 100644 services/chat-new-sdk/docker-compose.yml rename services/{completion-live-sdk => chat-new-sdk}/entrypoint.sh (100%) rename services/{completion-live-sdk => chat-new-sdk}/main.py (100%) rename services/{completion-live-sdk => chat-new-sdk}/mock_litellm.py (100%) rename services/{completion-live-sdk => chat-new-sdk}/supported_llm_models.py (100%) delete mode 100644 services/completion-live-sdk/docker-compose.yml create mode 100644 services/completion-new-sdk-prompt.rest create mode 100644 services/completion-new-sdk-prompt/Dockerfile create mode 100644 services/completion-new-sdk-prompt/Dockerfile.prerelease create mode 100644 services/completion-new-sdk-prompt/_app.py create mode 100644 services/completion-new-sdk-prompt/docker-compose.yml create mode 100755 services/completion-new-sdk-prompt/entrypoint.sh create mode 100644 services/completion-new-sdk-prompt/main.py create mode 100644 services/completion-new-sdk-prompt/mock_litellm.py create mode 100644 services/completion-new-sdk-prompt/supported_llm_models.py create mode 100644 services/completion-new-sdk/Dockerfile create mode 100644 services/completion-new-sdk/Dockerfile.prerelease rename services/{completion-live-sdk => completion-new-sdk}/_app.py (100%) create mode 100644 services/completion-new-sdk/docker-compose.yml create mode 100755 services/completion-new-sdk/entrypoint.sh create mode 100644 services/completion-new-sdk/main.py create mode 100644 services/completion-new-sdk/mock_litellm.py create mode 100644 services/completion-new-sdk/supported_llm_models.py diff --git a/docker-compose.yml b/docker-compose.yml index 63d2f71730..ca8812de6e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -214,15 +214,25 @@ services: file: ./services/completion-stateless-sdk/docker-compose.yml service: completion-stateless-sdk - completion-live-sdk: + completion-new-sdk: extends: - file: ./services/completion-live-sdk/docker-compose.yml - service: completion-live-sdk + file: ./services/completion-new-sdk/docker-compose.yml + service: completion-new-sdk - chat-live-sdk: + chat-new-sdk: extends: - file: ./services/chat-live-sdk/docker-compose.yml - service: chat-live-sdk + file: ./services/chat-new-sdk/docker-compose.yml + service: chat-new-sdk + + completion-new-sdk-prompt: + extends: + file: ./services/completion-new-sdk-prompt/docker-compose.yml + service: completion-new-sdk-prompt + + chat-new-sdk-prompt: + extends: + file: ./services/chat-new-sdk-prompt/docker-compose.yml + service: chat-new-sdk-prompt networks: agenta-network: diff --git a/services/README.md b/services/README.md new file mode 100644 index 0000000000..5098702964 --- /dev/null +++ b/services/README.md @@ -0,0 +1,33 @@ +# Agenta Services + +This directory contains various versions of Agenta's LLM services, each offering distinct capabilities and interfaces for language model interactions. + +## Service Overview + +### Legacy Services +- **completion-old-sdk**: Original completion service (as in current release) +- **chat-old-sdk**: Original chat service (as in current release) + +### New Services +All services with "new-sdk" utilize the modified SDK, which includes these changes: +- Configuration is now nested under `agenta_config` in the request body (no longer flattened) +- Implements the stateless SDK (no interface changes, but may introduce future issues in cloud deployment due to lack of testing) + +We've created two versions of each new service: +1. Original logic with new SDK: + - completion-new-sdk + - chat-new-sdk +2. New prompt object and updated logic: + - completion-new-sdk-prompt + - chat-new-sdk-prompt + +## Service Components + +Each service includes: +- Docker configuration (`docker-compose.yml`) +- REST API documentation (`.rest` files) +- Implementation code (`_app.py`) + +## Usage + +For usage examples and API details, refer to the `.rest` files in each service's directory. diff --git a/services/chat-live-sdk/docker-compose.yml b/services/chat-live-sdk/docker-compose.yml deleted file mode 100644 index c69497ca26..0000000000 --- a/services/chat-live-sdk/docker-compose.yml +++ /dev/null @@ -1,23 +0,0 @@ -services: - chat-live-sdk: - build: . - volumes: - - .:/app - - ../../agenta-cli:/agenta-cli - environment: - - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True - - AGENTA_HOST=http://host.docker.internal - networks: - - agenta-network - labels: - - "traefik.http.routers.chat-live-sdk.rule=PathPrefix(`/chat-live-sdk/`)" - - "traefik.http.routers.chat-live-sdk.entrypoints=web" - - "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.prefixes=/chat-live-sdk" - - "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.forceslash=true" - - "traefik.http.routers.chat-live-sdk.middlewares=chat-live-sdk-strip" - - "traefik.http.services.chat-live-sdk.loadbalancer.server.port=80" - - "traefik.http.routers.chat-live-sdk.service=chat-live-sdk" - -networks: - agenta-network: - external: true diff --git a/services/chat-new-sdk-prompt.rest b/services/chat-new-sdk-prompt.rest new file mode 100644 index 0000000000..462cfa83af --- /dev/null +++ b/services/chat-new-sdk-prompt.rest @@ -0,0 +1,33 @@ +### Test chat-new-sdk-prompt +POST http://localhost/chat-new-sdk-prompt/chat +Content-Type: application/json + +{ + "inputs": { + "message": "What is the capital of France?" + } +} + +### Test chat configuration with prompt +POST http://localhost/chat-new-sdk-prompt/configure +Content-Type: application/json + +{ + "model": "gpt-3.5-turbo", + "temperature": 0.7, + "max_tokens": 100, + "prompt": { + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant specializing in geography." + } + ], + "template_format": "fstring", + "response_format": { + "type": "text" + }, + "tools": null, + "tool_choice": "auto" + } +} diff --git a/services/chat-live-sdk/Dockerfile b/services/chat-new-sdk-prompt/Dockerfile similarity index 100% rename from services/chat-live-sdk/Dockerfile rename to services/chat-new-sdk-prompt/Dockerfile diff --git a/services/chat-live-sdk/Dockerfile.prerelease b/services/chat-new-sdk-prompt/Dockerfile.prerelease similarity index 100% rename from services/chat-live-sdk/Dockerfile.prerelease rename to services/chat-new-sdk-prompt/Dockerfile.prerelease diff --git a/services/chat-live-sdk/_app.py b/services/chat-new-sdk-prompt/_app.py similarity index 100% rename from services/chat-live-sdk/_app.py rename to services/chat-new-sdk-prompt/_app.py diff --git a/services/chat-new-sdk-prompt/docker-compose.yml b/services/chat-new-sdk-prompt/docker-compose.yml new file mode 100644 index 0000000000..a05ab0883b --- /dev/null +++ b/services/chat-new-sdk-prompt/docker-compose.yml @@ -0,0 +1,23 @@ +services: + chat-new-sdk-prompt: + build: . + volumes: + - .:/app + - ../../agenta-cli:/agenta-cli + environment: + - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True + - AGENTA_HOST=http://host.docker.internal + networks: + - agenta-network + labels: + - "traefik.http.routers.chat-new-sdk-prompt.rule=PathPrefix(`/chat-new-sdk-prompt/`)" + - "traefik.http.routers.chat-new-sdk-prompt.entrypoints=web" + - "traefik.http.middlewares.chat-new-sdk-prompt-strip.stripprefix.prefixes=/chat-new-sdk-prompt" + - "traefik.http.middlewares.chat-new-sdk-prompt-strip.stripprefix.forceslash=true" + - "traefik.http.routers.chat-new-sdk-prompt.middlewares=chat-new-sdk-prompt-strip" + - "traefik.http.services.chat-new-sdk-prompt.loadbalancer.server.port=80" + - "traefik.http.routers.chat-new-sdk-prompt.service=chat-new-sdk-prompt" + +networks: + agenta-network: + external: true diff --git a/services/chat-live-sdk/entrypoint.sh b/services/chat-new-sdk-prompt/entrypoint.sh similarity index 100% rename from services/chat-live-sdk/entrypoint.sh rename to services/chat-new-sdk-prompt/entrypoint.sh diff --git a/services/chat-live-sdk/main.py b/services/chat-new-sdk-prompt/main.py similarity index 100% rename from services/chat-live-sdk/main.py rename to services/chat-new-sdk-prompt/main.py diff --git a/services/chat-live-sdk/mock_litellm.py b/services/chat-new-sdk-prompt/mock_litellm.py similarity index 100% rename from services/chat-live-sdk/mock_litellm.py rename to services/chat-new-sdk-prompt/mock_litellm.py diff --git a/services/chat-live-sdk/supported_llm_models.py b/services/chat-new-sdk-prompt/supported_llm_models.py similarity index 100% rename from services/chat-live-sdk/supported_llm_models.py rename to services/chat-new-sdk-prompt/supported_llm_models.py diff --git a/services/chat-new-sdk.rest b/services/chat-new-sdk.rest index 3021247a89..7b21dc0833 100644 --- a/services/chat-new-sdk.rest +++ b/services/chat-new-sdk.rest @@ -8,6 +8,35 @@ ### Health Check GET {{baseUrl}}/{{service}}/health HTTP/1.1 +### Test chat-new-sdk +POST {{baseUrl}}/{{service}}/chat HTTP/1.1 +Content-Type: application/json + +{ + "inputs": { + "message": "What is the capital of France?" + } +} + +### Test chat configuration +POST {{baseUrl}}/{{service}}/configure HTTP/1.1 +Content-Type: application/json + +{ + "model": "gpt-3.5-turbo", + "temperature": 0.7, + "max_tokens": 100, + "prompt": { + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant specializing in geography." + } + ], + "template_format": "fstring" + } +} + ### Generate Chat Response POST {{baseUrl}}/{{service}}/generate HTTP/1.1 Content-Type: application/json diff --git a/services/completion-live-sdk/Dockerfile b/services/chat-new-sdk/Dockerfile similarity index 100% rename from services/completion-live-sdk/Dockerfile rename to services/chat-new-sdk/Dockerfile diff --git a/services/completion-live-sdk/Dockerfile.prerelease b/services/chat-new-sdk/Dockerfile.prerelease similarity index 100% rename from services/completion-live-sdk/Dockerfile.prerelease rename to services/chat-new-sdk/Dockerfile.prerelease diff --git a/services/chat-new-sdk/_app.py b/services/chat-new-sdk/_app.py new file mode 100644 index 0000000000..935118c1f5 --- /dev/null +++ b/services/chat-new-sdk/_app.py @@ -0,0 +1,65 @@ +from typing import Annotated, Any, Dict, List + +import agenta as ag +from agenta.sdk.assets import supported_llm_models +from pydantic import BaseModel, Field +import os +# Import mock if MOCK_LLM environment variable is set +if os.getenv("MOCK_LLM", True): + from mock_litellm import MockLiteLLM + + litellm = MockLiteLLM() +else: + import litellm + + litellm.drop_params = True + litellm.callbacks = [ag.callbacks.litellm_handler()] + +SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups." + +ag.init() + + +class MyConfig(BaseModel): + temperature: float = Field(default=0.2, le=1, ge=0) + model: Annotated[str, ag.MultipleChoice(choices=supported_llm_models)] = Field( + default="gpt-3.5-turbo" + ) + max_tokens: int = Field(default=-1, ge=-1, le=4000) + prompt_system: str = Field(default=SYSTEM_PROMPT) + + +@ag.instrument(spankind="llm") +async def llm_call(messages: List[Dict[str, Any]], maxtokens): + config = ag.ConfigManager.get_from_route(schema=MyConfig) + chat_completion = await litellm.acompletion( + model=config.model, + messages=messages, + temperature=config.temperature, + max_tokens=maxtokens, + ) + token_usage = chat_completion.usage.dict() + return { + "usage": token_usage, + "message": chat_completion.choices[0].message.content, + "cost": litellm.cost_calculator.completion_cost( + completion_response=chat_completion, model=config.model + ), + } + + +@ag.route("/", config_schema=MyConfig) +@ag.instrument() +async def chat(inputs: ag.MessagesInput = ag.MessagesInput()) -> Dict[str, Any]: + config = ag.ConfigManager.get_from_route(schema=MyConfig) + messages = [{"role": "system", "content": config.prompt_system}] + inputs + max_tokens = config.max_tokens if config.max_tokens != -1 else None + response = await llm_call( + messages=messages, + maxtokens=max_tokens, + ) + return { + "message": response["message"], + "usage": response.get("usage", None), + "cost": response.get("cost", None), + } diff --git a/services/chat-new-sdk/docker-compose.yml b/services/chat-new-sdk/docker-compose.yml new file mode 100644 index 0000000000..8b61dc78e2 --- /dev/null +++ b/services/chat-new-sdk/docker-compose.yml @@ -0,0 +1,23 @@ +services: + chat-new-sdk: + build: . + volumes: + - .:/app + - ../../agenta-cli:/agenta-cli + environment: + - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True + - AGENTA_HOST=http://host.docker.internal + networks: + - agenta-network + labels: + - "traefik.http.routers.chat-new-sdk.rule=PathPrefix(`/chat-new-sdk/`)" + - "traefik.http.routers.chat-new-sdk.entrypoints=web" + - "traefik.http.middlewares.chat-new-sdk-strip.stripprefix.prefixes=/chat-new-sdk" + - "traefik.http.middlewares.chat-new-sdk-strip.stripprefix.forceslash=true" + - "traefik.http.routers.chat-new-sdk.middlewares=chat-new-sdk-strip" + - "traefik.http.services.chat-new-sdk.loadbalancer.server.port=80" + - "traefik.http.routers.chat-new-sdk.service=chat-new-sdk" + +networks: + agenta-network: + external: true diff --git a/services/completion-live-sdk/entrypoint.sh b/services/chat-new-sdk/entrypoint.sh similarity index 100% rename from services/completion-live-sdk/entrypoint.sh rename to services/chat-new-sdk/entrypoint.sh diff --git a/services/completion-live-sdk/main.py b/services/chat-new-sdk/main.py similarity index 100% rename from services/completion-live-sdk/main.py rename to services/chat-new-sdk/main.py diff --git a/services/completion-live-sdk/mock_litellm.py b/services/chat-new-sdk/mock_litellm.py similarity index 100% rename from services/completion-live-sdk/mock_litellm.py rename to services/chat-new-sdk/mock_litellm.py diff --git a/services/completion-live-sdk/supported_llm_models.py b/services/chat-new-sdk/supported_llm_models.py similarity index 100% rename from services/completion-live-sdk/supported_llm_models.py rename to services/chat-new-sdk/supported_llm_models.py diff --git a/services/completion-live-sdk/docker-compose.yml b/services/completion-live-sdk/docker-compose.yml deleted file mode 100644 index 2e5466a54b..0000000000 --- a/services/completion-live-sdk/docker-compose.yml +++ /dev/null @@ -1,23 +0,0 @@ -services: - completion-live-sdk: - build: . - volumes: - - .:/app - - ../../agenta-cli:/agenta-cli - environment: - - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True - - AGENTA_HOST=http://host.docker.internal - networks: - - agenta-network - labels: - - "traefik.http.routers.completion-live-sdk.rule=PathPrefix(`/completion-live-sdk/`)" - - "traefik.http.routers.completion-live-sdk.entrypoints=web" - - "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.prefixes=/completion-live-sdk" - - "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.forceslash=true" - - "traefik.http.routers.completion-live-sdk.middlewares=completion-live-sdk-strip" - - "traefik.http.services.completion-live-sdk.loadbalancer.server.port=80" - - "traefik.http.routers.completion-live-sdk.service=completion-live-sdk" - -networks: - agenta-network: - external: true diff --git a/services/completion-new-sdk-prompt.rest b/services/completion-new-sdk-prompt.rest new file mode 100644 index 0000000000..9d09487b28 --- /dev/null +++ b/services/completion-new-sdk-prompt.rest @@ -0,0 +1,37 @@ +### Test completion-new-sdk-prompt +POST http://localhost/completion-new-sdk-prompt/generate +Content-Type: application/json + +{ + "inputs": { + "country": "France" + } +} + +### Test completion configuration with prompt +POST http://localhost/completion-new-sdk-prompt/configure +Content-Type: application/json + +{ + "model": "gpt-3.5-turbo", + "temperature": 0.7, + "max_tokens": 100, + "prompt": { + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is the capital of {country}?" + } + ], + "template_format": "fstring", + "response_format": { + "type": "text" + }, + "tools": null, + "tool_choice": "auto" + } +} diff --git a/services/completion-new-sdk-prompt/Dockerfile b/services/completion-new-sdk-prompt/Dockerfile new file mode 100644 index 0000000000..eeda92c313 --- /dev/null +++ b/services/completion-new-sdk-prompt/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.10-slim + +ARG ROOT_PATH=/ +ENV ROOT_PATH=${ROOT_PATH} + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip \ + && pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai + +# Add agenta-cli to PYTHONPATH so it can find the local agenta package +ENV PYTHONPATH=/agenta-cli:$PYTHONPATH + +EXPOSE 80 + +CMD ["./entrypoint.sh"] diff --git a/services/completion-new-sdk-prompt/Dockerfile.prerelease b/services/completion-new-sdk-prompt/Dockerfile.prerelease new file mode 100644 index 0000000000..422c537eaf --- /dev/null +++ b/services/completion-new-sdk-prompt/Dockerfile.prerelease @@ -0,0 +1,16 @@ +FROM python:3.10-slim + +ARG ROOT_PATH=/ +ENV ROOT_PATH=${ROOT_PATH} + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip \ + && pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \ + && pip install --no-cache-dir --pre agenta + +EXPOSE 80 + +CMD ["./entrypoint.sh"] diff --git a/services/completion-new-sdk-prompt/_app.py b/services/completion-new-sdk-prompt/_app.py new file mode 100644 index 0000000000..a316f9442b --- /dev/null +++ b/services/completion-new-sdk-prompt/_app.py @@ -0,0 +1,159 @@ +from typing import Annotated, List, Union, Optional, Dict, Literal +from pydantic import BaseModel, Field, root_validator + +import agenta as ag +from agenta.sdk.assets import supported_llm_models +import os +# Import mock if MOCK_LLM environment variable is set +if os.getenv("MOCK_LLM", True): + from mock_litellm import MockLiteLLM + + litellm = MockLiteLLM() +else: + import litellm + + litellm.drop_params = True + litellm.callbacks = [ag.callbacks.litellm_handler()] + + +prompts = { + "system_prompt": "You are an expert in geography.", + "user_prompt": """What is the capital of {country}?""", +} + +GPT_FORMAT_RESPONSE = ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"] + + +ag.init() + +class ToolCall(BaseModel): + id: str + type: Literal["function"] = "function" + function: Dict[str, str] + +class Message(BaseModel): + role: Literal["system", "user", "assistant", "tool", "function"] + content: Optional[str] = None + name: Optional[str] = None + tool_calls: Optional[List[ToolCall]] = None + tool_call_id: Optional[str] = None + +class ResponseFormat(BaseModel): + type: Literal["text", "json_object"] = "text" + schema: Optional[Dict] = None + +class Prompts(BaseModel): + messages: List[Message] = Field( + default=[ + Message(role="system", content=prompts["system_prompt"]), + Message(role="user", content=prompts["user_prompt"]) + ] + ) + system_prompt: Optional[str] = None + user_prompt: Optional[str] = None + template_format: Literal["fstring", "jinja2", "curly"] = Field( + default="fstring", + description="Format type for template variables: fstring {var}, jinja2 {{ var }}, or curly {{var}}" + ) + response_format: Optional[ResponseFormat] = Field( + default=None, + description="Specify the format of the response (text or JSON)" + ) + tools: Optional[List[Dict]] = Field( + default=None, + description="List of tools/functions the model can use" + ) + tool_choice: Optional[Union[Literal["none", "auto"], Dict]] = Field( + default="auto", + description="Control which tool the model should use" + ) + + class Config: + extra = "allow" + schema_extra = { + "x-prompt": True + } + + @root_validator(pre=True) + def init_messages(cls, values): + if "messages" not in values: + messages = [] + if "system_prompt" in values and values["system_prompt"]: + messages.append(Message(role="system", content=values["system_prompt"])) + if "user_prompt" in values and values["user_prompt"]: + messages.append(Message(role="user", content=values["user_prompt"])) + if messages: + values["messages"] = messages + return values + +class MyConfig(BaseModel): + prompt: Prompts = Field(default=Prompts()) + + + +@ag.instrument(spankind="llm") +async def llm_call(prompt_system: str, prompt_user: str): + config = ag.ConfigManager.get_from_route(schema=MyConfig) + response_format = ( + {"type": "json_object"} + if config.force_json and config.model in GPT_FORMAT_RESPONSE + else {"type": "text"} + ) + + max_tokens = config.max_tokens if config.max_tokens != -1 else None + + # Include frequency_penalty and presence_penalty only if supported + completion_params = {} + if config.model in GPT_FORMAT_RESPONSE: + completion_params["frequency_penalty"] = config.frequence_penalty + completion_params["presence_penalty"] = config.presence_penalty + + response = await litellm.acompletion( + **{ + "model": config.model, + "messages": config.prompt.messages, + "temperature": config.temperature, + "max_tokens": max_tokens, + "top_p": config.top_p, + "response_format": response_format, + **completion_params, + } + ) + token_usage = response.usage.dict() + return { + "message": response.choices[0].message.content, + "usage": token_usage, + "cost": litellm.cost_calculator.completion_cost( + completion_response=response, model=config.model + ), + } + + +@ag.route("/", config_schema=MyConfig) +@ag.instrument() +async def generate( + inputs: ag.DictInput = ag.DictInput(default_keys=["country"]), +): + config = ag.ConfigManager.get_from_route(schema=MyConfig) + print("popo", config) + try: + prompt_user = config.prompt_user.format(**inputs) + except Exception as e: + prompt_user = config.prompt_user + try: + prompt_system = config.prompt_system.format(**inputs) + except Exception as e: + prompt_system = config.prompt_system + + # SET MAX TOKENS - via completion() + if config.force_json and config.model not in GPT_FORMAT_RESPONSE: + raise ValueError( + "Model {} does not support JSON response format".format(config.model) + ) + + response = await llm_call(prompt_system=prompt_system, prompt_user=prompt_user) + return { + "message": response["message"], + "usage": response.get("usage", None), + "cost": response.get("cost", None), + } diff --git a/services/completion-new-sdk-prompt/docker-compose.yml b/services/completion-new-sdk-prompt/docker-compose.yml new file mode 100644 index 0000000000..58ecab1047 --- /dev/null +++ b/services/completion-new-sdk-prompt/docker-compose.yml @@ -0,0 +1,23 @@ +services: + completion-new-sdk-prompt: + build: . + volumes: + - .:/app + - ../../agenta-cli:/agenta-cli + environment: + - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True + - AGENTA_HOST=http://host.docker.internal + networks: + - agenta-network + labels: + - "traefik.http.routers.completion-new-sdk-prompt.rule=PathPrefix(`/completion-new-sdk-prompt/`)" + - "traefik.http.routers.completion-new-sdk-prompt.entrypoints=web" + - "traefik.http.middlewares.completion-new-sdk-prompt-strip.stripprefix.prefixes=/completion-new-sdk-prompt" + - "traefik.http.middlewares.completion-new-sdk-prompt-strip.stripprefix.forceslash=true" + - "traefik.http.routers.completion-new-sdk-prompt.middlewares=completion-new-sdk-prompt-strip" + - "traefik.http.services.completion-new-sdk-prompt.loadbalancer.server.port=80" + - "traefik.http.routers.completion-new-sdk-prompt.service=completion-new-sdk-prompt" + +networks: + agenta-network: + external: true diff --git a/services/completion-new-sdk-prompt/entrypoint.sh b/services/completion-new-sdk-prompt/entrypoint.sh new file mode 100755 index 0000000000..e9b7b1d586 --- /dev/null +++ b/services/completion-new-sdk-prompt/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +if [ -f .env ]; then + source .env +fi + +# Run uvicorn with reload watching both app and agenta-cli directories + +exec python main.py diff --git a/services/completion-new-sdk-prompt/main.py b/services/completion-new-sdk-prompt/main.py new file mode 100644 index 0000000000..c17d626bc3 --- /dev/null +++ b/services/completion-new-sdk-prompt/main.py @@ -0,0 +1,14 @@ +from uvicorn import run +import agenta +import _app # This will register the routes with the FastAPI application +import os + + +if __name__ == "__main__": + run( + "agenta:app", + host="0.0.0.0", + port=80, + reload=True, + reload_dirs=[".", "/agenta-cli"], + ) diff --git a/services/completion-new-sdk-prompt/mock_litellm.py b/services/completion-new-sdk-prompt/mock_litellm.py new file mode 100644 index 0000000000..a5b57a68cc --- /dev/null +++ b/services/completion-new-sdk-prompt/mock_litellm.py @@ -0,0 +1,53 @@ +from typing import Dict, Any, List +from dataclasses import dataclass + + +@dataclass +class MockUsage: + prompt_tokens: int = 10 + completion_tokens: int = 20 + total_tokens: int = 30 + + def dict(self): + return { + "prompt_tokens": self.prompt_tokens, + "completion_tokens": self.completion_tokens, + "total_tokens": self.total_tokens, + } + + +@dataclass +class MockMessage: + content: str = "This is a mock response from the LLM." + + +@dataclass +class MockChoice: + message: MockMessage = MockMessage() + + +@dataclass +class MockCompletion: + choices: List[MockChoice] = None + usage: MockUsage = None + + def __init__(self): + self.choices = [MockChoice()] + self.usage = MockUsage() + + +class MockLiteLLM: + async def acompletion( + self, + model: str, + messages: List[Dict[str, Any]], + temperature: float, + max_tokens: int = None, + **kwargs + ) -> MockCompletion: + return MockCompletion() + + class cost_calculator: + @staticmethod + def completion_cost(completion_response, model): + return 0.0001 # Mock cost diff --git a/services/completion-new-sdk-prompt/supported_llm_models.py b/services/completion-new-sdk-prompt/supported_llm_models.py new file mode 100644 index 0000000000..c314be0e37 --- /dev/null +++ b/services/completion-new-sdk-prompt/supported_llm_models.py @@ -0,0 +1,91 @@ +supported_llm_models = { + "Mistral AI": [ + "mistral/mistral-tiny", + "mistral/mistral-small", + "mistral/mistral-medium", + "mistral/mistral-large-latest", + ], + "Open AI": [ + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo", + "gpt-4", + "gpt-4o", + "gpt-4-1106-preview", + ], + "Gemini": [ + "gemini/gemini-1.5-pro-latest", + ], + "Cohere": [ + "cohere/command-light", + "cohere/command-r-plus", + "cohere/command-nightly", + ], + "Anthropic": [ + "anthropic/claude-2.1", + "anthropic/claude-2", + "anthropic/claude-instant-1.2", + "anthropic/claude-instant-1", + ], + "Anyscale": [ + "anyscale/meta-llama/Llama-2-13b-chat-hf", + "anyscale/meta-llama/Llama-2-70b-chat-hf", + ], + "Perplexity AI": [ + "perplexity/pplx-7b-chat", + "perplexity/pplx-70b-chat", + "perplexity/pplx-7b-online", + "perplexity/pplx-70b-online", + ], + "DeepInfra": [ + "deepinfra/meta-llama/Llama-2-70b-chat-hf", + "deepinfra/meta-llama/Llama-2-13b-chat-hf", + "deepinfra/codellama/CodeLlama-34b-Instruct-hf", + "deepinfra/mistralai/Mistral-7B-Instruct-v0.1", + "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1", + ], + "Together AI": [ + "together_ai/togethercomputer/llama-2-70b-chat", + "together_ai/togethercomputer/llama-2-70b", + "together_ai/togethercomputer/LLaMA-2-7B-32K", + "together_ai/togethercomputer/Llama-2-7B-32K-Instruct", + "together_ai/togethercomputer/llama-2-7b", + "together_ai/togethercomputer/alpaca-7b", + "together_ai/togethercomputer/CodeLlama-34b-Instruct", + "together_ai/togethercomputer/CodeLlama-34b-Python", + "together_ai/WizardLM/WizardCoder-Python-34B-V1.0", + "together_ai/NousResearch/Nous-Hermes-Llama2-13b", + "together_ai/Austism/chronos-hermes-13b", + ], + "Aleph Alpha": [ + "luminous-base", + "luminous-base-control", + "luminous-extended-control", + "luminous-supreme", + ], + "OpenRouter": [ + "openrouter/openai/gpt-3.5-turbo", + "openrouter/openai/gpt-3.5-turbo-16k", + "openrouter/anthropic/claude-instant-v1", + "openrouter/google/palm-2-chat-bison", + "openrouter/google/palm-2-codechat-bison", + "openrouter/meta-llama/llama-2-13b-chat", + "openrouter/meta-llama/llama-2-70b-chat", + ], + "Groq": [ + "groq/llama3-8b-8192", + "groq/llama3-70b-8192", + "groq/llama2-70b-4096", + "groq/mixtral-8x7b-32768", + "groq/gemma-7b-it", + ], +} + + +def get_all_supported_llm_models(): + """ + Returns a list of evaluators + + Returns: + List[dict]: A list of evaluator dictionaries. + """ + return supported_llm_models diff --git a/services/completion-new-sdk/Dockerfile b/services/completion-new-sdk/Dockerfile new file mode 100644 index 0000000000..eeda92c313 --- /dev/null +++ b/services/completion-new-sdk/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.10-slim + +ARG ROOT_PATH=/ +ENV ROOT_PATH=${ROOT_PATH} + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip \ + && pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai + +# Add agenta-cli to PYTHONPATH so it can find the local agenta package +ENV PYTHONPATH=/agenta-cli:$PYTHONPATH + +EXPOSE 80 + +CMD ["./entrypoint.sh"] diff --git a/services/completion-new-sdk/Dockerfile.prerelease b/services/completion-new-sdk/Dockerfile.prerelease new file mode 100644 index 0000000000..422c537eaf --- /dev/null +++ b/services/completion-new-sdk/Dockerfile.prerelease @@ -0,0 +1,16 @@ +FROM python:3.10-slim + +ARG ROOT_PATH=/ +ENV ROOT_PATH=${ROOT_PATH} + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip \ + && pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \ + && pip install --no-cache-dir --pre agenta + +EXPOSE 80 + +CMD ["./entrypoint.sh"] diff --git a/services/completion-live-sdk/_app.py b/services/completion-new-sdk/_app.py similarity index 100% rename from services/completion-live-sdk/_app.py rename to services/completion-new-sdk/_app.py diff --git a/services/completion-new-sdk/docker-compose.yml b/services/completion-new-sdk/docker-compose.yml new file mode 100644 index 0000000000..c116912817 --- /dev/null +++ b/services/completion-new-sdk/docker-compose.yml @@ -0,0 +1,23 @@ +services: + completion-new-sdk: + build: . + volumes: + - .:/app + - ../../agenta-cli:/agenta-cli + environment: + - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True + - AGENTA_HOST=http://host.docker.internal + networks: + - agenta-network + labels: + - "traefik.http.routers.completion-new-sdk.rule=PathPrefix(`/completion-new-sdk/`)" + - "traefik.http.routers.completion-new-sdk.entrypoints=web" + - "traefik.http.middlewares.completion-new-sdk-strip.stripprefix.prefixes=/completion-new-sdk" + - "traefik.http.middlewares.completion-new-sdk-strip.stripprefix.forceslash=true" + - "traefik.http.routers.completion-new-sdk.middlewares=completion-new-sdk-strip" + - "traefik.http.services.completion-new-sdk.loadbalancer.server.port=80" + - "traefik.http.routers.completion-new-sdk.service=completion-new-sdk" + +networks: + agenta-network: + external: true diff --git a/services/completion-new-sdk/entrypoint.sh b/services/completion-new-sdk/entrypoint.sh new file mode 100755 index 0000000000..e9b7b1d586 --- /dev/null +++ b/services/completion-new-sdk/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +if [ -f .env ]; then + source .env +fi + +# Run uvicorn with reload watching both app and agenta-cli directories + +exec python main.py diff --git a/services/completion-new-sdk/main.py b/services/completion-new-sdk/main.py new file mode 100644 index 0000000000..c17d626bc3 --- /dev/null +++ b/services/completion-new-sdk/main.py @@ -0,0 +1,14 @@ +from uvicorn import run +import agenta +import _app # This will register the routes with the FastAPI application +import os + + +if __name__ == "__main__": + run( + "agenta:app", + host="0.0.0.0", + port=80, + reload=True, + reload_dirs=[".", "/agenta-cli"], + ) diff --git a/services/completion-new-sdk/mock_litellm.py b/services/completion-new-sdk/mock_litellm.py new file mode 100644 index 0000000000..a5b57a68cc --- /dev/null +++ b/services/completion-new-sdk/mock_litellm.py @@ -0,0 +1,53 @@ +from typing import Dict, Any, List +from dataclasses import dataclass + + +@dataclass +class MockUsage: + prompt_tokens: int = 10 + completion_tokens: int = 20 + total_tokens: int = 30 + + def dict(self): + return { + "prompt_tokens": self.prompt_tokens, + "completion_tokens": self.completion_tokens, + "total_tokens": self.total_tokens, + } + + +@dataclass +class MockMessage: + content: str = "This is a mock response from the LLM." + + +@dataclass +class MockChoice: + message: MockMessage = MockMessage() + + +@dataclass +class MockCompletion: + choices: List[MockChoice] = None + usage: MockUsage = None + + def __init__(self): + self.choices = [MockChoice()] + self.usage = MockUsage() + + +class MockLiteLLM: + async def acompletion( + self, + model: str, + messages: List[Dict[str, Any]], + temperature: float, + max_tokens: int = None, + **kwargs + ) -> MockCompletion: + return MockCompletion() + + class cost_calculator: + @staticmethod + def completion_cost(completion_response, model): + return 0.0001 # Mock cost diff --git a/services/completion-new-sdk/supported_llm_models.py b/services/completion-new-sdk/supported_llm_models.py new file mode 100644 index 0000000000..c314be0e37 --- /dev/null +++ b/services/completion-new-sdk/supported_llm_models.py @@ -0,0 +1,91 @@ +supported_llm_models = { + "Mistral AI": [ + "mistral/mistral-tiny", + "mistral/mistral-small", + "mistral/mistral-medium", + "mistral/mistral-large-latest", + ], + "Open AI": [ + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo", + "gpt-4", + "gpt-4o", + "gpt-4-1106-preview", + ], + "Gemini": [ + "gemini/gemini-1.5-pro-latest", + ], + "Cohere": [ + "cohere/command-light", + "cohere/command-r-plus", + "cohere/command-nightly", + ], + "Anthropic": [ + "anthropic/claude-2.1", + "anthropic/claude-2", + "anthropic/claude-instant-1.2", + "anthropic/claude-instant-1", + ], + "Anyscale": [ + "anyscale/meta-llama/Llama-2-13b-chat-hf", + "anyscale/meta-llama/Llama-2-70b-chat-hf", + ], + "Perplexity AI": [ + "perplexity/pplx-7b-chat", + "perplexity/pplx-70b-chat", + "perplexity/pplx-7b-online", + "perplexity/pplx-70b-online", + ], + "DeepInfra": [ + "deepinfra/meta-llama/Llama-2-70b-chat-hf", + "deepinfra/meta-llama/Llama-2-13b-chat-hf", + "deepinfra/codellama/CodeLlama-34b-Instruct-hf", + "deepinfra/mistralai/Mistral-7B-Instruct-v0.1", + "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1", + ], + "Together AI": [ + "together_ai/togethercomputer/llama-2-70b-chat", + "together_ai/togethercomputer/llama-2-70b", + "together_ai/togethercomputer/LLaMA-2-7B-32K", + "together_ai/togethercomputer/Llama-2-7B-32K-Instruct", + "together_ai/togethercomputer/llama-2-7b", + "together_ai/togethercomputer/alpaca-7b", + "together_ai/togethercomputer/CodeLlama-34b-Instruct", + "together_ai/togethercomputer/CodeLlama-34b-Python", + "together_ai/WizardLM/WizardCoder-Python-34B-V1.0", + "together_ai/NousResearch/Nous-Hermes-Llama2-13b", + "together_ai/Austism/chronos-hermes-13b", + ], + "Aleph Alpha": [ + "luminous-base", + "luminous-base-control", + "luminous-extended-control", + "luminous-supreme", + ], + "OpenRouter": [ + "openrouter/openai/gpt-3.5-turbo", + "openrouter/openai/gpt-3.5-turbo-16k", + "openrouter/anthropic/claude-instant-v1", + "openrouter/google/palm-2-chat-bison", + "openrouter/google/palm-2-codechat-bison", + "openrouter/meta-llama/llama-2-13b-chat", + "openrouter/meta-llama/llama-2-70b-chat", + ], + "Groq": [ + "groq/llama3-8b-8192", + "groq/llama3-70b-8192", + "groq/llama2-70b-4096", + "groq/mixtral-8x7b-32768", + "groq/gemma-7b-it", + ], +} + + +def get_all_supported_llm_models(): + """ + Returns a list of evaluators + + Returns: + List[dict]: A list of evaluator dictionaries. + """ + return supported_llm_models