From f3e04b15c4991bea3de615b4a216b1ef4199bc09 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Sat, 14 Dec 2024 21:47:41 +0100
Subject: [PATCH] reorganize and add new services for propmt

---
 docker-compose.yml                            |  22 ++-
 services/README.md                            |  33 ++++
 services/chat-live-sdk/docker-compose.yml     |  23 ---
 services/chat-new-sdk-prompt.rest             |  33 ++++
 .../Dockerfile                                |   0
 .../Dockerfile.prerelease                     |   0
 .../_app.py                                   |   0
 .../chat-new-sdk-prompt/docker-compose.yml    |  23 +++
 .../entrypoint.sh                             |   0
 .../main.py                                   |   0
 .../mock_litellm.py                           |   0
 .../supported_llm_models.py                   |   0
 services/chat-new-sdk.rest                    |  29 ++++
 .../Dockerfile                                |   0
 .../Dockerfile.prerelease                     |   0
 services/chat-new-sdk/_app.py                 |  65 +++++++
 services/chat-new-sdk/docker-compose.yml      |  23 +++
 .../entrypoint.sh                             |   0
 .../main.py                                   |   0
 .../mock_litellm.py                           |   0
 .../supported_llm_models.py                   |   0
 .../completion-live-sdk/docker-compose.yml    |  23 ---
 services/completion-new-sdk-prompt.rest       |  37 ++++
 services/completion-new-sdk-prompt/Dockerfile |  18 ++
 .../Dockerfile.prerelease                     |  16 ++
 services/completion-new-sdk-prompt/_app.py    | 159 ++++++++++++++++++
 .../docker-compose.yml                        |  23 +++
 .../completion-new-sdk-prompt/entrypoint.sh   |   9 +
 services/completion-new-sdk-prompt/main.py    |  14 ++
 .../completion-new-sdk-prompt/mock_litellm.py |  53 ++++++
 .../supported_llm_models.py                   |  91 ++++++++++
 services/completion-new-sdk/Dockerfile        |  18 ++
 .../completion-new-sdk/Dockerfile.prerelease  |  16 ++
 .../_app.py                                   |   0
 .../completion-new-sdk/docker-compose.yml     |  23 +++
 services/completion-new-sdk/entrypoint.sh     |   9 +
 services/completion-new-sdk/main.py           |  14 ++
 services/completion-new-sdk/mock_litellm.py   |  53 ++++++
 .../supported_llm_models.py                   |  91 ++++++++++
 39 files changed, 866 insertions(+), 52 deletions(-)
 create mode 100644 services/README.md
 delete mode 100644 services/chat-live-sdk/docker-compose.yml
 create mode 100644 services/chat-new-sdk-prompt.rest
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/Dockerfile (100%)
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/Dockerfile.prerelease (100%)
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/_app.py (100%)
 create mode 100644 services/chat-new-sdk-prompt/docker-compose.yml
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/entrypoint.sh (100%)
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/main.py (100%)
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/mock_litellm.py (100%)
 rename services/{chat-live-sdk => chat-new-sdk-prompt}/supported_llm_models.py (100%)
 rename services/{completion-live-sdk => chat-new-sdk}/Dockerfile (100%)
 rename services/{completion-live-sdk => chat-new-sdk}/Dockerfile.prerelease (100%)
 create mode 100644 services/chat-new-sdk/_app.py
 create mode 100644 services/chat-new-sdk/docker-compose.yml
 rename services/{completion-live-sdk => chat-new-sdk}/entrypoint.sh (100%)
 rename services/{completion-live-sdk => chat-new-sdk}/main.py (100%)
 rename services/{completion-live-sdk => chat-new-sdk}/mock_litellm.py (100%)
 rename services/{completion-live-sdk => chat-new-sdk}/supported_llm_models.py (100%)
 delete mode 100644 services/completion-live-sdk/docker-compose.yml
 create mode 100644 services/completion-new-sdk-prompt.rest
 create mode 100644 services/completion-new-sdk-prompt/Dockerfile
 create mode 100644 services/completion-new-sdk-prompt/Dockerfile.prerelease
 create mode 100644 services/completion-new-sdk-prompt/_app.py
 create mode 100644 services/completion-new-sdk-prompt/docker-compose.yml
 create mode 100755 services/completion-new-sdk-prompt/entrypoint.sh
 create mode 100644 services/completion-new-sdk-prompt/main.py
 create mode 100644 services/completion-new-sdk-prompt/mock_litellm.py
 create mode 100644 services/completion-new-sdk-prompt/supported_llm_models.py
 create mode 100644 services/completion-new-sdk/Dockerfile
 create mode 100644 services/completion-new-sdk/Dockerfile.prerelease
 rename services/{completion-live-sdk => completion-new-sdk}/_app.py (100%)
 create mode 100644 services/completion-new-sdk/docker-compose.yml
 create mode 100755 services/completion-new-sdk/entrypoint.sh
 create mode 100644 services/completion-new-sdk/main.py
 create mode 100644 services/completion-new-sdk/mock_litellm.py
 create mode 100644 services/completion-new-sdk/supported_llm_models.py

diff --git a/docker-compose.yml b/docker-compose.yml
index 63d2f71730..ca8812de6e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -214,15 +214,25 @@ services:
             file: ./services/completion-stateless-sdk/docker-compose.yml
             service: completion-stateless-sdk
 
-    completion-live-sdk:
+    completion-new-sdk:
         extends:
-            file: ./services/completion-live-sdk/docker-compose.yml
-            service: completion-live-sdk
+            file: ./services/completion-new-sdk/docker-compose.yml
+            service: completion-new-sdk
     
-    chat-live-sdk:
+    chat-new-sdk:
         extends:
-            file: ./services/chat-live-sdk/docker-compose.yml
-            service: chat-live-sdk
+            file: ./services/chat-new-sdk/docker-compose.yml
+            service: chat-new-sdk
+
+    completion-new-sdk-prompt:
+        extends:
+            file: ./services/completion-new-sdk-prompt/docker-compose.yml
+            service: completion-new-sdk-prompt
+    
+    chat-new-sdk-prompt:
+        extends:
+            file: ./services/chat-new-sdk-prompt/docker-compose.yml
+            service: chat-new-sdk-prompt
 
 networks:
     agenta-network:
diff --git a/services/README.md b/services/README.md
new file mode 100644
index 0000000000..5098702964
--- /dev/null
+++ b/services/README.md
@@ -0,0 +1,33 @@
+# Agenta Services
+
+This directory contains various versions of Agenta's LLM services, each offering distinct capabilities and interfaces for language model interactions.
+
+## Service Overview
+
+### Legacy Services
+- **completion-old-sdk**: Original completion service (as in current release)
+- **chat-old-sdk**: Original chat service (as in current release)
+
+### New Services
+All services with "new-sdk" utilize the modified SDK, which includes these changes:
+- Configuration is now nested under `agenta_config` in the request body (no longer flattened)
+- Implements the stateless SDK (no interface changes, but may introduce future issues in cloud deployment due to lack of testing)
+
+We've created two versions of each new service:
+1. Original logic with new SDK:
+   - completion-new-sdk
+   - chat-new-sdk
+2. New prompt object and updated logic:
+   - completion-new-sdk-prompt
+   - chat-new-sdk-prompt
+
+## Service Components
+
+Each service includes:
+- Docker configuration (`docker-compose.yml`)
+- REST API documentation (`.rest` files)
+- Implementation code (`_app.py`)
+
+## Usage
+
+For usage examples and API details, refer to the `.rest` files in each service's directory.
diff --git a/services/chat-live-sdk/docker-compose.yml b/services/chat-live-sdk/docker-compose.yml
deleted file mode 100644
index c69497ca26..0000000000
--- a/services/chat-live-sdk/docker-compose.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-services:
-  chat-live-sdk:
-    build: .
-    volumes:
-      - .:/app
-      - ../../agenta-cli:/agenta-cli
-    environment:
-      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
-      - AGENTA_HOST=http://host.docker.internal
-    networks:
-      - agenta-network
-    labels:
-      - "traefik.http.routers.chat-live-sdk.rule=PathPrefix(`/chat-live-sdk/`)"
-      - "traefik.http.routers.chat-live-sdk.entrypoints=web"
-      - "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.prefixes=/chat-live-sdk"
-      - "traefik.http.middlewares.chat-live-sdk-strip.stripprefix.forceslash=true"
-      - "traefik.http.routers.chat-live-sdk.middlewares=chat-live-sdk-strip"
-      - "traefik.http.services.chat-live-sdk.loadbalancer.server.port=80"
-      - "traefik.http.routers.chat-live-sdk.service=chat-live-sdk"
-
-networks:
-  agenta-network:
-    external: true
diff --git a/services/chat-new-sdk-prompt.rest b/services/chat-new-sdk-prompt.rest
new file mode 100644
index 0000000000..462cfa83af
--- /dev/null
+++ b/services/chat-new-sdk-prompt.rest
@@ -0,0 +1,33 @@
+### Test chat-new-sdk-prompt
+POST http://localhost/chat-new-sdk-prompt/chat
+Content-Type: application/json
+
+{
+    "inputs": {
+        "message": "What is the capital of France?"
+    }
+}
+
+### Test chat configuration with prompt
+POST http://localhost/chat-new-sdk-prompt/configure
+Content-Type: application/json
+
+{
+    "model": "gpt-3.5-turbo",
+    "temperature": 0.7,
+    "max_tokens": 100,
+    "prompt": {
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant specializing in geography."
+            }
+        ],
+        "template_format": "fstring",
+        "response_format": {
+            "type": "text"
+        },
+        "tools": null,
+        "tool_choice": "auto"
+    }
+}
diff --git a/services/chat-live-sdk/Dockerfile b/services/chat-new-sdk-prompt/Dockerfile
similarity index 100%
rename from services/chat-live-sdk/Dockerfile
rename to services/chat-new-sdk-prompt/Dockerfile
diff --git a/services/chat-live-sdk/Dockerfile.prerelease b/services/chat-new-sdk-prompt/Dockerfile.prerelease
similarity index 100%
rename from services/chat-live-sdk/Dockerfile.prerelease
rename to services/chat-new-sdk-prompt/Dockerfile.prerelease
diff --git a/services/chat-live-sdk/_app.py b/services/chat-new-sdk-prompt/_app.py
similarity index 100%
rename from services/chat-live-sdk/_app.py
rename to services/chat-new-sdk-prompt/_app.py
diff --git a/services/chat-new-sdk-prompt/docker-compose.yml b/services/chat-new-sdk-prompt/docker-compose.yml
new file mode 100644
index 0000000000..a05ab0883b
--- /dev/null
+++ b/services/chat-new-sdk-prompt/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  chat-new-sdk-prompt:
+    build: .
+    volumes:
+      - .:/app
+      - ../../agenta-cli:/agenta-cli
+    environment:
+      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
+      - AGENTA_HOST=http://host.docker.internal
+    networks:
+      - agenta-network
+    labels:
+      - "traefik.http.routers.chat-new-sdk-prompt.rule=PathPrefix(`/chat-new-sdk-prompt/`)"
+      - "traefik.http.routers.chat-new-sdk-prompt.entrypoints=web"
+      - "traefik.http.middlewares.chat-new-sdk-prompt-strip.stripprefix.prefixes=/chat-new-sdk-prompt"
+      - "traefik.http.middlewares.chat-new-sdk-prompt-strip.stripprefix.forceslash=true"
+      - "traefik.http.routers.chat-new-sdk-prompt.middlewares=chat-new-sdk-prompt-strip"
+      - "traefik.http.services.chat-new-sdk-prompt.loadbalancer.server.port=80"
+      - "traefik.http.routers.chat-new-sdk-prompt.service=chat-new-sdk-prompt"
+
+networks:
+  agenta-network:
+    external: true
diff --git a/services/chat-live-sdk/entrypoint.sh b/services/chat-new-sdk-prompt/entrypoint.sh
similarity index 100%
rename from services/chat-live-sdk/entrypoint.sh
rename to services/chat-new-sdk-prompt/entrypoint.sh
diff --git a/services/chat-live-sdk/main.py b/services/chat-new-sdk-prompt/main.py
similarity index 100%
rename from services/chat-live-sdk/main.py
rename to services/chat-new-sdk-prompt/main.py
diff --git a/services/chat-live-sdk/mock_litellm.py b/services/chat-new-sdk-prompt/mock_litellm.py
similarity index 100%
rename from services/chat-live-sdk/mock_litellm.py
rename to services/chat-new-sdk-prompt/mock_litellm.py
diff --git a/services/chat-live-sdk/supported_llm_models.py b/services/chat-new-sdk-prompt/supported_llm_models.py
similarity index 100%
rename from services/chat-live-sdk/supported_llm_models.py
rename to services/chat-new-sdk-prompt/supported_llm_models.py
diff --git a/services/chat-new-sdk.rest b/services/chat-new-sdk.rest
index 3021247a89..7b21dc0833 100644
--- a/services/chat-new-sdk.rest
+++ b/services/chat-new-sdk.rest
@@ -8,6 +8,35 @@
 ### Health Check
 GET {{baseUrl}}/{{service}}/health HTTP/1.1
 
+### Test chat-new-sdk
+POST {{baseUrl}}/{{service}}/chat HTTP/1.1
+Content-Type: application/json
+
+{
+    "inputs": {
+        "message": "What is the capital of France?"
+    }
+}
+
+### Test chat configuration
+POST {{baseUrl}}/{{service}}/configure HTTP/1.1
+Content-Type: application/json
+
+{
+    "model": "gpt-3.5-turbo",
+    "temperature": 0.7,
+    "max_tokens": 100,
+    "prompt": {
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant specializing in geography."
+            }
+        ],
+        "template_format": "fstring"
+    }
+}
+
 ### Generate Chat Response
 POST {{baseUrl}}/{{service}}/generate HTTP/1.1
 Content-Type: application/json
diff --git a/services/completion-live-sdk/Dockerfile b/services/chat-new-sdk/Dockerfile
similarity index 100%
rename from services/completion-live-sdk/Dockerfile
rename to services/chat-new-sdk/Dockerfile
diff --git a/services/completion-live-sdk/Dockerfile.prerelease b/services/chat-new-sdk/Dockerfile.prerelease
similarity index 100%
rename from services/completion-live-sdk/Dockerfile.prerelease
rename to services/chat-new-sdk/Dockerfile.prerelease
diff --git a/services/chat-new-sdk/_app.py b/services/chat-new-sdk/_app.py
new file mode 100644
index 0000000000..935118c1f5
--- /dev/null
+++ b/services/chat-new-sdk/_app.py
@@ -0,0 +1,65 @@
+from typing import Annotated, Any, Dict, List
+
+import agenta as ag
+from agenta.sdk.assets import supported_llm_models
+from pydantic import BaseModel, Field
+import os
+# Import mock if MOCK_LLM environment variable is set
+if os.getenv("MOCK_LLM", True):
+    from mock_litellm import MockLiteLLM
+
+    litellm = MockLiteLLM()
+else:
+    import litellm
+
+    litellm.drop_params = True
+    litellm.callbacks = [ag.callbacks.litellm_handler()]
+
+SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups."
+
+ag.init()
+
+
+class MyConfig(BaseModel):
+    temperature: float = Field(default=0.2, le=1, ge=0)
+    model: Annotated[str, ag.MultipleChoice(choices=supported_llm_models)] = Field(
+        default="gpt-3.5-turbo"
+    )
+    max_tokens: int = Field(default=-1, ge=-1, le=4000)
+    prompt_system: str = Field(default=SYSTEM_PROMPT)
+
+
+@ag.instrument(spankind="llm")
+async def llm_call(messages: List[Dict[str, Any]], maxtokens):
+    config = ag.ConfigManager.get_from_route(schema=MyConfig)
+    chat_completion = await litellm.acompletion(
+        model=config.model,
+        messages=messages,
+        temperature=config.temperature,
+        max_tokens=maxtokens,
+    )
+    token_usage = chat_completion.usage.dict()
+    return {
+        "usage": token_usage,
+        "message": chat_completion.choices[0].message.content,
+        "cost": litellm.cost_calculator.completion_cost(
+            completion_response=chat_completion, model=config.model
+        ),
+    }
+
+
+@ag.route("/", config_schema=MyConfig)
+@ag.instrument()
+async def chat(inputs: ag.MessagesInput = ag.MessagesInput()) -> Dict[str, Any]:
+    config = ag.ConfigManager.get_from_route(schema=MyConfig)
+    messages = [{"role": "system", "content": config.prompt_system}] + inputs
+    max_tokens = config.max_tokens if config.max_tokens != -1 else None
+    response = await llm_call(
+        messages=messages,
+        maxtokens=max_tokens,
+    )
+    return {
+        "message": response["message"],
+        "usage": response.get("usage", None),
+        "cost": response.get("cost", None),
+    }
diff --git a/services/chat-new-sdk/docker-compose.yml b/services/chat-new-sdk/docker-compose.yml
new file mode 100644
index 0000000000..8b61dc78e2
--- /dev/null
+++ b/services/chat-new-sdk/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  chat-new-sdk:
+    build: .
+    volumes:
+      - .:/app
+      - ../../agenta-cli:/agenta-cli
+    environment:
+      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
+      - AGENTA_HOST=http://host.docker.internal
+    networks:
+      - agenta-network
+    labels:
+      - "traefik.http.routers.chat-new-sdk.rule=PathPrefix(`/chat-new-sdk/`)"
+      - "traefik.http.routers.chat-new-sdk.entrypoints=web"
+      - "traefik.http.middlewares.chat-new-sdk-strip.stripprefix.prefixes=/chat-new-sdk"
+      - "traefik.http.middlewares.chat-new-sdk-strip.stripprefix.forceslash=true"
+      - "traefik.http.routers.chat-new-sdk.middlewares=chat-new-sdk-strip"
+      - "traefik.http.services.chat-new-sdk.loadbalancer.server.port=80"
+      - "traefik.http.routers.chat-new-sdk.service=chat-new-sdk"
+
+networks:
+  agenta-network:
+    external: true
diff --git a/services/completion-live-sdk/entrypoint.sh b/services/chat-new-sdk/entrypoint.sh
similarity index 100%
rename from services/completion-live-sdk/entrypoint.sh
rename to services/chat-new-sdk/entrypoint.sh
diff --git a/services/completion-live-sdk/main.py b/services/chat-new-sdk/main.py
similarity index 100%
rename from services/completion-live-sdk/main.py
rename to services/chat-new-sdk/main.py
diff --git a/services/completion-live-sdk/mock_litellm.py b/services/chat-new-sdk/mock_litellm.py
similarity index 100%
rename from services/completion-live-sdk/mock_litellm.py
rename to services/chat-new-sdk/mock_litellm.py
diff --git a/services/completion-live-sdk/supported_llm_models.py b/services/chat-new-sdk/supported_llm_models.py
similarity index 100%
rename from services/completion-live-sdk/supported_llm_models.py
rename to services/chat-new-sdk/supported_llm_models.py
diff --git a/services/completion-live-sdk/docker-compose.yml b/services/completion-live-sdk/docker-compose.yml
deleted file mode 100644
index 2e5466a54b..0000000000
--- a/services/completion-live-sdk/docker-compose.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-services:
-  completion-live-sdk:
-    build: .
-    volumes:
-      - .:/app
-      - ../../agenta-cli:/agenta-cli
-    environment:
-      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
-      - AGENTA_HOST=http://host.docker.internal
-    networks:
-      - agenta-network
-    labels:
-      - "traefik.http.routers.completion-live-sdk.rule=PathPrefix(`/completion-live-sdk/`)"
-      - "traefik.http.routers.completion-live-sdk.entrypoints=web"
-      - "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.prefixes=/completion-live-sdk"
-      - "traefik.http.middlewares.completion-live-sdk-strip.stripprefix.forceslash=true"
-      - "traefik.http.routers.completion-live-sdk.middlewares=completion-live-sdk-strip"
-      - "traefik.http.services.completion-live-sdk.loadbalancer.server.port=80"
-      - "traefik.http.routers.completion-live-sdk.service=completion-live-sdk"
-
-networks:
-  agenta-network:
-    external: true
diff --git a/services/completion-new-sdk-prompt.rest b/services/completion-new-sdk-prompt.rest
new file mode 100644
index 0000000000..9d09487b28
--- /dev/null
+++ b/services/completion-new-sdk-prompt.rest
@@ -0,0 +1,37 @@
+### Test completion-new-sdk-prompt
+POST http://localhost/completion-new-sdk-prompt/generate
+Content-Type: application/json
+
+{
+    "inputs": {
+        "country": "France"
+    }
+}
+
+### Test completion configuration with prompt
+POST http://localhost/completion-new-sdk-prompt/configure
+Content-Type: application/json
+
+{
+    "model": "gpt-3.5-turbo",
+    "temperature": 0.7,
+    "max_tokens": 100,
+    "prompt": {
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": "What is the capital of {country}?"
+            }
+        ],
+        "template_format": "fstring",
+        "response_format": {
+            "type": "text"
+        },
+        "tools": null,
+        "tool_choice": "auto"
+    }
+}
diff --git a/services/completion-new-sdk-prompt/Dockerfile b/services/completion-new-sdk-prompt/Dockerfile
new file mode 100644
index 0000000000..eeda92c313
--- /dev/null
+++ b/services/completion-new-sdk-prompt/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.10-slim
+
+ARG ROOT_PATH=/
+ENV ROOT_PATH=${ROOT_PATH}
+
+WORKDIR /app
+
+COPY . .
+
+RUN pip install --upgrade pip \
+    && pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai
+
+# Add agenta-cli to PYTHONPATH so it can find the local agenta package
+ENV PYTHONPATH=/agenta-cli:$PYTHONPATH
+
+EXPOSE 80
+
+CMD ["./entrypoint.sh"]
diff --git a/services/completion-new-sdk-prompt/Dockerfile.prerelease b/services/completion-new-sdk-prompt/Dockerfile.prerelease
new file mode 100644
index 0000000000..422c537eaf
--- /dev/null
+++ b/services/completion-new-sdk-prompt/Dockerfile.prerelease
@@ -0,0 +1,16 @@
+FROM python:3.10-slim
+
+ARG ROOT_PATH=/
+ENV ROOT_PATH=${ROOT_PATH}
+
+WORKDIR /app
+
+COPY . .
+
+RUN pip install --upgrade pip \
+    && pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \
+    && pip install --no-cache-dir --pre agenta
+
+EXPOSE 80
+
+CMD ["./entrypoint.sh"]
diff --git a/services/completion-new-sdk-prompt/_app.py b/services/completion-new-sdk-prompt/_app.py
new file mode 100644
index 0000000000..a316f9442b
--- /dev/null
+++ b/services/completion-new-sdk-prompt/_app.py
@@ -0,0 +1,159 @@
+from typing import Annotated, List, Union, Optional, Dict, Literal
+from pydantic import BaseModel, Field, root_validator
+
+import agenta as ag
+from agenta.sdk.assets import supported_llm_models
+import os
+# Import mock if MOCK_LLM environment variable is set
+if os.getenv("MOCK_LLM", True):
+    from mock_litellm import MockLiteLLM
+
+    litellm = MockLiteLLM()
+else:
+    import litellm
+
+    litellm.drop_params = True
+    litellm.callbacks = [ag.callbacks.litellm_handler()]
+
+
+prompts = {
+    "system_prompt": "You are an expert in geography.",
+    "user_prompt": """What is the capital of {country}?""",
+}
+
+GPT_FORMAT_RESPONSE = ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"]
+
+
+ag.init()
+
+class ToolCall(BaseModel):
+    id: str
+    type: Literal["function"] = "function"
+    function: Dict[str, str]
+
+class Message(BaseModel):
+    role: Literal["system", "user", "assistant", "tool", "function"]
+    content: Optional[str] = None
+    name: Optional[str] = None
+    tool_calls: Optional[List[ToolCall]] = None
+    tool_call_id: Optional[str] = None
+
+class ResponseFormat(BaseModel):
+    type: Literal["text", "json_object"] = "text"
+    schema: Optional[Dict] = None
+
+class Prompts(BaseModel):
+    messages: List[Message] = Field(
+        default=[
+            Message(role="system", content=prompts["system_prompt"]),
+            Message(role="user", content=prompts["user_prompt"])
+        ]
+    )
+    system_prompt: Optional[str] = None
+    user_prompt: Optional[str] = None
+    template_format: Literal["fstring", "jinja2", "curly"] = Field(
+        default="fstring",
+        description="Format type for template variables: fstring {var}, jinja2 {{ var }}, or curly {{var}}"
+    )
+    response_format: Optional[ResponseFormat] = Field(
+        default=None,
+        description="Specify the format of the response (text or JSON)"
+    )
+    tools: Optional[List[Dict]] = Field(
+        default=None,
+        description="List of tools/functions the model can use"
+    )
+    tool_choice: Optional[Union[Literal["none", "auto"], Dict]] = Field(
+        default="auto",
+        description="Control which tool the model should use"
+    )
+
+    class Config:
+        extra = "allow"
+        schema_extra = {
+            "x-prompt": True
+        }
+
+    @root_validator(pre=True)
+    def init_messages(cls, values):
+        if "messages" not in values:
+            messages = []
+            if "system_prompt" in values and values["system_prompt"]:
+                messages.append(Message(role="system", content=values["system_prompt"]))
+            if "user_prompt" in values and values["user_prompt"]:
+                messages.append(Message(role="user", content=values["user_prompt"]))
+            if messages:
+                values["messages"] = messages
+        return values
+
+class MyConfig(BaseModel):
+    prompt: Prompts = Field(default=Prompts())
+    
+
+
+@ag.instrument(spankind="llm")
+async def llm_call(prompt_system: str, prompt_user: str):
+    config = ag.ConfigManager.get_from_route(schema=MyConfig)
+    response_format = (
+        {"type": "json_object"}
+        if config.force_json and config.model in GPT_FORMAT_RESPONSE
+        else {"type": "text"}
+    )
+
+    max_tokens = config.max_tokens if config.max_tokens != -1 else None
+
+    # Include frequency_penalty and presence_penalty only if supported
+    completion_params = {}
+    if config.model in GPT_FORMAT_RESPONSE:
+        completion_params["frequency_penalty"] = config.frequence_penalty
+        completion_params["presence_penalty"] = config.presence_penalty
+
+    response = await litellm.acompletion(
+        **{
+            "model": config.model,
+            "messages": config.prompt.messages,
+            "temperature": config.temperature,
+            "max_tokens": max_tokens,
+            "top_p": config.top_p,
+            "response_format": response_format,
+            **completion_params,
+        }
+    )
+    token_usage = response.usage.dict()
+    return {
+        "message": response.choices[0].message.content,
+        "usage": token_usage,
+        "cost": litellm.cost_calculator.completion_cost(
+            completion_response=response, model=config.model
+        ),
+    }
+
+
+@ag.route("/", config_schema=MyConfig)
+@ag.instrument()
+async def generate(
+    inputs: ag.DictInput = ag.DictInput(default_keys=["country"]),
+):
+    config = ag.ConfigManager.get_from_route(schema=MyConfig)
+    print("popo", config)
+    try:
+        prompt_user = config.prompt_user.format(**inputs)
+    except Exception as e:
+        prompt_user = config.prompt_user
+    try:
+        prompt_system = config.prompt_system.format(**inputs)
+    except Exception as e:
+        prompt_system = config.prompt_system
+
+    # SET MAX TOKENS - via completion()
+    if config.force_json and config.model not in GPT_FORMAT_RESPONSE:
+        raise ValueError(
+            "Model {} does not support JSON response format".format(config.model)
+        )
+
+    response = await llm_call(prompt_system=prompt_system, prompt_user=prompt_user)
+    return {
+        "message": response["message"],
+        "usage": response.get("usage", None),
+        "cost": response.get("cost", None),
+    }
diff --git a/services/completion-new-sdk-prompt/docker-compose.yml b/services/completion-new-sdk-prompt/docker-compose.yml
new file mode 100644
index 0000000000..58ecab1047
--- /dev/null
+++ b/services/completion-new-sdk-prompt/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  completion-new-sdk-prompt:
+    build: .
+    volumes:
+      - .:/app
+      - ../../agenta-cli:/agenta-cli
+    environment:
+      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
+      - AGENTA_HOST=http://host.docker.internal
+    networks:
+      - agenta-network
+    labels:
+      - "traefik.http.routers.completion-new-sdk-prompt.rule=PathPrefix(`/completion-new-sdk-prompt/`)"
+      - "traefik.http.routers.completion-new-sdk-prompt.entrypoints=web"
+      - "traefik.http.middlewares.completion-new-sdk-prompt-strip.stripprefix.prefixes=/completion-new-sdk-prompt"
+      - "traefik.http.middlewares.completion-new-sdk-prompt-strip.stripprefix.forceslash=true"
+      - "traefik.http.routers.completion-new-sdk-prompt.middlewares=completion-new-sdk-prompt-strip"
+      - "traefik.http.services.completion-new-sdk-prompt.loadbalancer.server.port=80"
+      - "traefik.http.routers.completion-new-sdk-prompt.service=completion-new-sdk-prompt"
+
+networks:
+  agenta-network:
+    external: true
diff --git a/services/completion-new-sdk-prompt/entrypoint.sh b/services/completion-new-sdk-prompt/entrypoint.sh
new file mode 100755
index 0000000000..e9b7b1d586
--- /dev/null
+++ b/services/completion-new-sdk-prompt/entrypoint.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+if [ -f .env ]; then
+    source .env
+fi
+
+# Run uvicorn with reload watching both app and agenta-cli directories
+
+exec python main.py
diff --git a/services/completion-new-sdk-prompt/main.py b/services/completion-new-sdk-prompt/main.py
new file mode 100644
index 0000000000..c17d626bc3
--- /dev/null
+++ b/services/completion-new-sdk-prompt/main.py
@@ -0,0 +1,14 @@
+from uvicorn import run
+import agenta
+import _app  # This will register the routes with the FastAPI application
+import os
+
+
+if __name__ == "__main__":
+    run(
+        "agenta:app",
+        host="0.0.0.0",
+        port=80,
+        reload=True,
+        reload_dirs=[".", "/agenta-cli"],
+    )
diff --git a/services/completion-new-sdk-prompt/mock_litellm.py b/services/completion-new-sdk-prompt/mock_litellm.py
new file mode 100644
index 0000000000..a5b57a68cc
--- /dev/null
+++ b/services/completion-new-sdk-prompt/mock_litellm.py
@@ -0,0 +1,53 @@
+from typing import Dict, Any, List
+from dataclasses import dataclass
+
+
+@dataclass
+class MockUsage:
+    prompt_tokens: int = 10
+    completion_tokens: int = 20
+    total_tokens: int = 30
+
+    def dict(self):
+        return {
+            "prompt_tokens": self.prompt_tokens,
+            "completion_tokens": self.completion_tokens,
+            "total_tokens": self.total_tokens,
+        }
+
+
+@dataclass
+class MockMessage:
+    content: str = "This is a mock response from the LLM."
+
+
+@dataclass
+class MockChoice:
+    message: MockMessage = MockMessage()
+
+
+@dataclass
+class MockCompletion:
+    choices: List[MockChoice] = None
+    usage: MockUsage = None
+
+    def __init__(self):
+        self.choices = [MockChoice()]
+        self.usage = MockUsage()
+
+
+class MockLiteLLM:
+    async def acompletion(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        temperature: float,
+        max_tokens: int = None,
+        **kwargs
+    ) -> MockCompletion:
+        return MockCompletion()
+
+    class cost_calculator:
+        @staticmethod
+        def completion_cost(completion_response, model):
+            return 0.0001  # Mock cost
diff --git a/services/completion-new-sdk-prompt/supported_llm_models.py b/services/completion-new-sdk-prompt/supported_llm_models.py
new file mode 100644
index 0000000000..c314be0e37
--- /dev/null
+++ b/services/completion-new-sdk-prompt/supported_llm_models.py
@@ -0,0 +1,91 @@
+supported_llm_models = {
+    "Mistral AI": [
+        "mistral/mistral-tiny",
+        "mistral/mistral-small",
+        "mistral/mistral-medium",
+        "mistral/mistral-large-latest",
+    ],
+    "Open AI": [
+        "gpt-3.5-turbo-1106",
+        "gpt-3.5-turbo",
+        "gpt-4",
+        "gpt-4o",
+        "gpt-4-1106-preview",
+    ],
+    "Gemini": [
+        "gemini/gemini-1.5-pro-latest",
+    ],
+    "Cohere": [
+        "cohere/command-light",
+        "cohere/command-r-plus",
+        "cohere/command-nightly",
+    ],
+    "Anthropic": [
+        "anthropic/claude-2.1",
+        "anthropic/claude-2",
+        "anthropic/claude-instant-1.2",
+        "anthropic/claude-instant-1",
+    ],
+    "Anyscale": [
+        "anyscale/meta-llama/Llama-2-13b-chat-hf",
+        "anyscale/meta-llama/Llama-2-70b-chat-hf",
+    ],
+    "Perplexity AI": [
+        "perplexity/pplx-7b-chat",
+        "perplexity/pplx-70b-chat",
+        "perplexity/pplx-7b-online",
+        "perplexity/pplx-70b-online",
+    ],
+    "DeepInfra": [
+        "deepinfra/meta-llama/Llama-2-70b-chat-hf",
+        "deepinfra/meta-llama/Llama-2-13b-chat-hf",
+        "deepinfra/codellama/CodeLlama-34b-Instruct-hf",
+        "deepinfra/mistralai/Mistral-7B-Instruct-v0.1",
+        "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1",
+    ],
+    "Together AI": [
+        "together_ai/togethercomputer/llama-2-70b-chat",
+        "together_ai/togethercomputer/llama-2-70b",
+        "together_ai/togethercomputer/LLaMA-2-7B-32K",
+        "together_ai/togethercomputer/Llama-2-7B-32K-Instruct",
+        "together_ai/togethercomputer/llama-2-7b",
+        "together_ai/togethercomputer/alpaca-7b",
+        "together_ai/togethercomputer/CodeLlama-34b-Instruct",
+        "together_ai/togethercomputer/CodeLlama-34b-Python",
+        "together_ai/WizardLM/WizardCoder-Python-34B-V1.0",
+        "together_ai/NousResearch/Nous-Hermes-Llama2-13b",
+        "together_ai/Austism/chronos-hermes-13b",
+    ],
+    "Aleph Alpha": [
+        "luminous-base",
+        "luminous-base-control",
+        "luminous-extended-control",
+        "luminous-supreme",
+    ],
+    "OpenRouter": [
+        "openrouter/openai/gpt-3.5-turbo",
+        "openrouter/openai/gpt-3.5-turbo-16k",
+        "openrouter/anthropic/claude-instant-v1",
+        "openrouter/google/palm-2-chat-bison",
+        "openrouter/google/palm-2-codechat-bison",
+        "openrouter/meta-llama/llama-2-13b-chat",
+        "openrouter/meta-llama/llama-2-70b-chat",
+    ],
+    "Groq": [
+        "groq/llama3-8b-8192",
+        "groq/llama3-70b-8192",
+        "groq/llama2-70b-4096",
+        "groq/mixtral-8x7b-32768",
+        "groq/gemma-7b-it",
+    ],
+}
+
+
+def get_all_supported_llm_models():
+    """
+    Returns a list of evaluators
+
+    Returns:
+        List[dict]: A list of evaluator dictionaries.
+    """
+    return supported_llm_models
diff --git a/services/completion-new-sdk/Dockerfile b/services/completion-new-sdk/Dockerfile
new file mode 100644
index 0000000000..eeda92c313
--- /dev/null
+++ b/services/completion-new-sdk/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.10-slim
+
+ARG ROOT_PATH=/
+ENV ROOT_PATH=${ROOT_PATH}
+
+WORKDIR /app
+
+COPY . .
+
+RUN pip install --upgrade pip \
+    && pip install --no-cache-dir agenta openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai
+
+# Add agenta-cli to PYTHONPATH so it can find the local agenta package
+ENV PYTHONPATH=/agenta-cli:$PYTHONPATH
+
+EXPOSE 80
+
+CMD ["./entrypoint.sh"]
diff --git a/services/completion-new-sdk/Dockerfile.prerelease b/services/completion-new-sdk/Dockerfile.prerelease
new file mode 100644
index 0000000000..422c537eaf
--- /dev/null
+++ b/services/completion-new-sdk/Dockerfile.prerelease
@@ -0,0 +1,16 @@
+FROM python:3.10-slim
+
+ARG ROOT_PATH=/
+ENV ROOT_PATH=${ROOT_PATH}
+
+WORKDIR /app
+
+COPY . .
+
+RUN pip install --upgrade pip \
+    && pip install --no-cache-dir openai python-dotenv uvicorn "litellm>=1.0,<2.0" google-generativeai \
+    && pip install --no-cache-dir --pre agenta
+
+EXPOSE 80
+
+CMD ["./entrypoint.sh"]
diff --git a/services/completion-live-sdk/_app.py b/services/completion-new-sdk/_app.py
similarity index 100%
rename from services/completion-live-sdk/_app.py
rename to services/completion-new-sdk/_app.py
diff --git a/services/completion-new-sdk/docker-compose.yml b/services/completion-new-sdk/docker-compose.yml
new file mode 100644
index 0000000000..c116912817
--- /dev/null
+++ b/services/completion-new-sdk/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  completion-new-sdk:
+    build: .
+    volumes:
+      - .:/app
+      - ../../agenta-cli:/agenta-cli
+    environment:
+      - AGENTA_UNAUTHORIZED_EXECUTION_ALLOWED=True
+      - AGENTA_HOST=http://host.docker.internal
+    networks:
+      - agenta-network
+    labels:
+      - "traefik.http.routers.completion-new-sdk.rule=PathPrefix(`/completion-new-sdk/`)"
+      - "traefik.http.routers.completion-new-sdk.entrypoints=web"
+      - "traefik.http.middlewares.completion-new-sdk-strip.stripprefix.prefixes=/completion-new-sdk"
+      - "traefik.http.middlewares.completion-new-sdk-strip.stripprefix.forceslash=true"
+      - "traefik.http.routers.completion-new-sdk.middlewares=completion-new-sdk-strip"
+      - "traefik.http.services.completion-new-sdk.loadbalancer.server.port=80"
+      - "traefik.http.routers.completion-new-sdk.service=completion-new-sdk"
+
+networks:
+  agenta-network:
+    external: true
diff --git a/services/completion-new-sdk/entrypoint.sh b/services/completion-new-sdk/entrypoint.sh
new file mode 100755
index 0000000000..e9b7b1d586
--- /dev/null
+++ b/services/completion-new-sdk/entrypoint.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+if [ -f .env ]; then
+    source .env
+fi
+
+# Run uvicorn with reload watching both app and agenta-cli directories
+
+exec python main.py
diff --git a/services/completion-new-sdk/main.py b/services/completion-new-sdk/main.py
new file mode 100644
index 0000000000..c17d626bc3
--- /dev/null
+++ b/services/completion-new-sdk/main.py
@@ -0,0 +1,14 @@
+from uvicorn import run
+import agenta
+import _app  # This will register the routes with the FastAPI application
+import os
+
+
+if __name__ == "__main__":
+    run(
+        "agenta:app",
+        host="0.0.0.0",
+        port=80,
+        reload=True,
+        reload_dirs=[".", "/agenta-cli"],
+    )
diff --git a/services/completion-new-sdk/mock_litellm.py b/services/completion-new-sdk/mock_litellm.py
new file mode 100644
index 0000000000..a5b57a68cc
--- /dev/null
+++ b/services/completion-new-sdk/mock_litellm.py
@@ -0,0 +1,53 @@
+from typing import Dict, Any, List
+from dataclasses import dataclass
+
+
+@dataclass
+class MockUsage:
+    prompt_tokens: int = 10
+    completion_tokens: int = 20
+    total_tokens: int = 30
+
+    def dict(self):
+        return {
+            "prompt_tokens": self.prompt_tokens,
+            "completion_tokens": self.completion_tokens,
+            "total_tokens": self.total_tokens,
+        }
+
+
+@dataclass
+class MockMessage:
+    content: str = "This is a mock response from the LLM."
+
+
+@dataclass
+class MockChoice:
+    message: MockMessage = MockMessage()
+
+
+@dataclass
+class MockCompletion:
+    choices: List[MockChoice] = None
+    usage: MockUsage = None
+
+    def __init__(self):
+        self.choices = [MockChoice()]
+        self.usage = MockUsage()
+
+
+class MockLiteLLM:
+    async def acompletion(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        temperature: float,
+        max_tokens: int = None,
+        **kwargs
+    ) -> MockCompletion:
+        return MockCompletion()
+
+    class cost_calculator:
+        @staticmethod
+        def completion_cost(completion_response, model):
+            return 0.0001  # Mock cost
diff --git a/services/completion-new-sdk/supported_llm_models.py b/services/completion-new-sdk/supported_llm_models.py
new file mode 100644
index 0000000000..c314be0e37
--- /dev/null
+++ b/services/completion-new-sdk/supported_llm_models.py
@@ -0,0 +1,91 @@
+supported_llm_models = {
+    "Mistral AI": [
+        "mistral/mistral-tiny",
+        "mistral/mistral-small",
+        "mistral/mistral-medium",
+        "mistral/mistral-large-latest",
+    ],
+    "Open AI": [
+        "gpt-3.5-turbo-1106",
+        "gpt-3.5-turbo",
+        "gpt-4",
+        "gpt-4o",
+        "gpt-4-1106-preview",
+    ],
+    "Gemini": [
+        "gemini/gemini-1.5-pro-latest",
+    ],
+    "Cohere": [
+        "cohere/command-light",
+        "cohere/command-r-plus",
+        "cohere/command-nightly",
+    ],
+    "Anthropic": [
+        "anthropic/claude-2.1",
+        "anthropic/claude-2",
+        "anthropic/claude-instant-1.2",
+        "anthropic/claude-instant-1",
+    ],
+    "Anyscale": [
+        "anyscale/meta-llama/Llama-2-13b-chat-hf",
+        "anyscale/meta-llama/Llama-2-70b-chat-hf",
+    ],
+    "Perplexity AI": [
+        "perplexity/pplx-7b-chat",
+        "perplexity/pplx-70b-chat",
+        "perplexity/pplx-7b-online",
+        "perplexity/pplx-70b-online",
+    ],
+    "DeepInfra": [
+        "deepinfra/meta-llama/Llama-2-70b-chat-hf",
+        "deepinfra/meta-llama/Llama-2-13b-chat-hf",
+        "deepinfra/codellama/CodeLlama-34b-Instruct-hf",
+        "deepinfra/mistralai/Mistral-7B-Instruct-v0.1",
+        "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1",
+    ],
+    "Together AI": [
+        "together_ai/togethercomputer/llama-2-70b-chat",
+        "together_ai/togethercomputer/llama-2-70b",
+        "together_ai/togethercomputer/LLaMA-2-7B-32K",
+        "together_ai/togethercomputer/Llama-2-7B-32K-Instruct",
+        "together_ai/togethercomputer/llama-2-7b",
+        "together_ai/togethercomputer/alpaca-7b",
+        "together_ai/togethercomputer/CodeLlama-34b-Instruct",
+        "together_ai/togethercomputer/CodeLlama-34b-Python",
+        "together_ai/WizardLM/WizardCoder-Python-34B-V1.0",
+        "together_ai/NousResearch/Nous-Hermes-Llama2-13b",
+        "together_ai/Austism/chronos-hermes-13b",
+    ],
+    "Aleph Alpha": [
+        "luminous-base",
+        "luminous-base-control",
+        "luminous-extended-control",
+        "luminous-supreme",
+    ],
+    "OpenRouter": [
+        "openrouter/openai/gpt-3.5-turbo",
+        "openrouter/openai/gpt-3.5-turbo-16k",
+        "openrouter/anthropic/claude-instant-v1",
+        "openrouter/google/palm-2-chat-bison",
+        "openrouter/google/palm-2-codechat-bison",
+        "openrouter/meta-llama/llama-2-13b-chat",
+        "openrouter/meta-llama/llama-2-70b-chat",
+    ],
+    "Groq": [
+        "groq/llama3-8b-8192",
+        "groq/llama3-70b-8192",
+        "groq/llama2-70b-4096",
+        "groq/mixtral-8x7b-32768",
+        "groq/gemma-7b-it",
+    ],
+}
+
+
+def get_all_supported_llm_models():
+    """
+    Returns a list of evaluators
+
+    Returns:
+        List[dict]: A list of evaluator dictionaries.
+    """
+    return supported_llm_models