Merge branch 'main' into agenta-web-enhancements

Agenta-AI · Nov 10, 2024 · bfaadb3 · bfaadb3
2 parents e33dde7 + 4a7557e
commit bfaadb3
Show file tree

Hide file tree

Showing 109 changed files with 24,477 additions and 7,922 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
diff --git a/.github/workflows/run-sdk-tests.yml b/.github/workflows/run-sdk-tests.yml
@@ -3,31 +3,31 @@ name: Run SDK tests
 on:
   pull_request:
     paths:
-      - 'agenta-cli/**'
-      - 'agenta-cli/pyproject.toml'
-      - 'agenta-cli/poetry.lock'
+      - "agenta-cli/**"
+      - "agenta-cli/pyproject.toml"
+      - "agenta-cli/poetry.lock"
   workflow_dispatch:
 
 jobs:
-  pytest:
+  sdk_tests:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.9'
+          python-version: "3.9"
 
       - name: Install Poetry
         run: pip install poetry
 
-      - name: Install dependencies and agenta SDK
+      - name: Activate shell, install dependencies and agenta SDK
         run: |
           cd agenta-cli
           poetry install
 
       - name: Run pytest
         run: |
-          cd agenta-cli
-          poetry run pytest tests
+          cd agenta-cli/agenta/tests
+          poetry run pytest prompt_sdk/
diff --git a/agenta-backend/agenta_backend/migrations/postgres/utils.py b/agenta-backend/agenta_backend/migrations/postgres/utils.py
@@ -5,16 +5,13 @@
 
 import click
 import asyncpg
-
-from sqlalchemy import inspect, text, Engine
-from sqlalchemy.exc import ProgrammingError
-from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine
-
 from alembic import command
+from sqlalchemy import Engine
 from alembic.config import Config
+from sqlalchemy import inspect, text
 from alembic.script import ScriptDirectory
-
-from agenta_backend.utils.common import isCloudEE, isCloudDev
+from sqlalchemy.exc import ProgrammingError
+from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine
 
 
 # Initializer logger
@@ -56,15 +53,15 @@ def is_initial_setup(engine) -> bool:
     return not all_tables_exist
 
 
-async def get_applied_migrations(engine: AsyncEngine):
+async def get_current_migration_head_from_db(engine: AsyncEngine):
     """
-    Checks the alembic_version table to get all the migrations that has been applied.
+    Checks the alembic_version table to get the current migration head that has been applied.
 
     Args:
         engine (Engine): The engine that connects to an sqlalchemy pool
 
     Returns:
-        a list of strings
+        the current migration head (where 'head' is the revision stored in the migration script)
     """
 
     async with engine.connect() as connection:
@@ -76,32 +73,37 @@ async def get_applied_migrations(engine: AsyncEngine):
             # to make Alembic start tracking the migration changes.
             # --------------------------------------------------------------------------------------
             # This effect (the exception raising) happens for both users (first-time and returning)
-            return ["alembic_version"]
+            return "alembic_version"
 
-        applied_migrations = [row[0] for row in result.fetchall()]
-        return applied_migrations
+        migration_heads = [row[0] for row in result.fetchall()]
+        assert (
+            len(migration_heads) == 1
+        ), "There can only be one migration head stored in the database."
+        return migration_heads[0]
 
 
-async def get_pending_migrations():
+async def get_pending_migration_head():
     """
-    Gets the migrations that have not been applied.
+    Gets the migration head that have not been applied.
 
     Returns:
-        the number of pending migrations
+        the pending migration head
     """
 
     engine = create_async_engine(url=os.environ["POSTGRES_URI"])
     try:
-        applied_migrations = await get_applied_migrations(engine=engine)
-        migration_files = [script.revision for script in script.walk_revisions()]
-        pending_migrations = [m for m in migration_files if m not in applied_migrations]
-
-        if "alembic_version" in applied_migrations:
-            pending_migrations.append("alembic_version")
+        current_migration_script_head = script.get_current_head()
+        migration_head_from_db = await get_current_migration_head_from_db(engine=engine)
+
+        pending_migration_head = []
+        if current_migration_script_head != migration_head_from_db:
+            pending_migration_head.append(current_migration_script_head)
+        if "alembic_version" == migration_head_from_db:
+            pending_migration_head.append("alembic_version")
     finally:
         await engine.dispose()
 
-    return pending_migrations
+    return pending_migration_head
 
 
 def run_alembic_migration():
@@ -110,9 +112,9 @@ def run_alembic_migration():
     """
 
     try:
-        pending_migrations = asyncio.run(get_pending_migrations())
+        pending_migration_head = asyncio.run(get_pending_migration_head())
         APPLY_AUTO_MIGRATIONS = os.environ.get("AGENTA_AUTO_MIGRATIONS")
-        FIRST_TIME_USER = True if "alembic_version" in pending_migrations else False
+        FIRST_TIME_USER = True if "alembic_version" in pending_migration_head else False
 
         if FIRST_TIME_USER or APPLY_AUTO_MIGRATIONS == "true":
             command.upgrade(alembic_cfg, "head")
@@ -134,7 +136,7 @@ def run_alembic_migration():
     except Exception as e:
         click.echo(
             click.style(
-                f"\nAn ERROR occured while applying migration: {traceback.format_exc()}\nThe container will now exit.",
+                f"\nAn ERROR occurred while applying migration: {traceback.format_exc()}\nThe container will now exit.",
                 fg="red",
             ),
             color=True,
@@ -147,11 +149,11 @@ async def check_for_new_migrations():
     Checks for new migrations and notify the user.
     """
 
-    pending_migrations = await get_pending_migrations()
-    if len(pending_migrations) >= 1:
+    pending_migration_head = await get_pending_migration_head()
+    if len(pending_migration_head) >= 1 and isinstance(pending_migration_head[0], str):
         click.echo(
             click.style(
-                f"\nWe have detected that there are pending database migrations {pending_migrations} that need to be applied to keep the application up to date. To ensure the application functions correctly with the latest updates, please follow the guide here => https://docs.agenta.ai/self-host/migration/applying-schema-migration\n",
+                f"\nWe have detected that there are pending database migrations {pending_migration_head} that need to be applied to keep the application up to date. To ensure the application functions correctly with the latest updates, please follow the guide here => https://docs.agenta.ai/self-host/migration/applying-schema-migration\n",
                 fg="yellow",
             ),
             color=True,

diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -193,10 +193,19 @@
         "settings_template": {
             "prompt_template": {
                 "label": "Prompt Template",
-                "type": "text",
-                "default": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below:\nEvaluation strategy: 0 to 10 0 is very bad and 10 is very good.\nPrompt: {llm_app_prompt_template}\nInputs: country: {country}\nExpected Answer Column:{correct_answer}\nEvaluate this: {variant_output}\n\nAnswer ONLY with one of the given grading or evaluation options.",
+                "type": "messages",
                 "description": "Template for AI critique prompts",
                 "required": True,
+                "default": [
+                    {
+                        "role": "system",
+                        "content": "You are an evaluator grading an LLM App.\n You will be given INPUTS, the LLM APP OUTPUT, the CORRECT ANSWER, the PROMPT used in the LLM APP.\n Here is the grade criteria to follow:\n:- Ensure that the LLM APP OUTPUT has the same meaning as the CORRECT ANSWER\n\nSCORE:\n-The score should be between 0 and 10\n-A score of 10 means that the answer is perfect. This is the highest (best) score. \nA score of 0 means that the answer does not any of of the criteria. This is the lowest possible score you can give.\n\nANSWER ONLY THE SCORE. DO NOT USE MARKDOWN. DO NOT PROVIDE ANYTHING OTHER THAN THE NUMBER",
+                    },
+                    {
+                        "role": "user",
+                        "content": "INPUTS:\n country: {country}\nCORRECT ANSWER:{correct_answer}\nLLM APP OUTPUT: {prediction}.",
+                    },
+                ],
             },
             "correct_answer_key": {
                 "label": "Expected Answer Column",
@@ -206,8 +215,29 @@
                 "ground_truth_key": True,  # Tells the frontend that is the name of the column in the test set that should be shown as a ground truth to the user
                 "description": "The name of the column in the test data that contains the correct answer",
             },
+            "model": {
+                "label": "Model",
+                "default": "gpt-3.5-turbo",
+                "type": "multiple_choice",
+                "options": [
+                    "gpt-3.5-turbo",
+                    "gpt-4o",
+                    "claude-3-5-sonnet-20240620",
+                    "claude-3-haiku-20240307",
+                    "claude-3-opus-20240229",
+                ],
+                "advanced": True,  # Tells the frontend that this setting is advanced and should be hidden by default
+                "description": "The LLM model to use for the evaluation",
+            },
+            "version": {
+                "label": "Version",
+                "type": "hidden",
+                "default": "2",
+                "description": "The version of the evaluator",  # ignore by the FE
+                "advanced": False,  # ignore by the FE
+            },
         },
-        "description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
+        "description": "LLM-as-a-judge uses a configurable prompt template that takes the output—and optionally inputs or data from the test case such as correct answer—to evaluate the generated output.",
         "oss": True,
         "tags": ["ai_llm", "functional"],
     },

diff --git a/agenta-backend/agenta_backend/routers/app_router.py b/agenta-backend/agenta_backend/routers/app_router.py
@@ -220,11 +220,8 @@ async def create_app(
                     Permission.CREATE_APPLICATION,
                 )
 
-            workspace_id_from_apikey = await db_manager_ee.get_workspace_id_from_apikey(
-                api_key_from_headers, request.state.user_id
-            )
             if payload.workspace_id is None:
-                payload.workspace_id = workspace_id_from_apikey
+                payload.workspace_id = request.state.workspace_id
 
             try:
                 user_org_workspace_data = await get_user_org_and_workspace_id(