diff --git a/.all-contributorsrc b/.all-contributorsrc
index d055b089f1..ecc7093dc3 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -417,6 +417,16 @@
"contributions": [
"doc"
]
+ },
+ {
+ "login": "LucasTrg",
+ "name": "LucasTrg",
+ "avatar_url": "https://avatars.githubusercontent.com/u/47852577?v=4",
+ "profile": "https://github.com/LucasTrg",
+ "contributions": [
+ "code",
+ "bug"
+ ]
}
],
"contributorsPerLine": 7,
diff --git a/README.md b/README.md
index 13e745b36a..62735b3dd7 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,7 @@ Check out our [Contributing Guide](https://docs.agenta.ai/contributing/getting-s
## Contributors ✨
-[![All Contributors](https://img.shields.io/badge/all_contributors-44-orange.svg?style=flat-square)](#contributors-)
+[![All Contributors](https://img.shields.io/badge/all_contributors-45-orange.svg?style=flat-square)](#contributors-)
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
@@ -236,6 +236,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
Vishal Vanpariya 💻 |
Youcef Boumar 📖 |
+ LucasTrg 💻 🐛 |
diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py
index 4e7ab1a9c5..2accbfe509 100644
--- a/agenta-backend/agenta_backend/services/llm_apps_service.py
+++ b/agenta-backend/agenta_backend/services/llm_apps_service.py
@@ -172,6 +172,7 @@ async def run_with_retry(
last_exception = e
logger.info(f"Error processing datapoint: {input_data}. {str(e)}")
logger.info("".join(traceback.format_exception_only(type(e), e)))
+ retries += 1
common.capture_exception_in_sentry(e)
# If max retries is reached or an exception that isn't in the second block,
@@ -186,7 +187,7 @@ async def run_with_retry(
result=Result(
type="error",
value=None,
- error=Error(message=exception_message, stacktrace=last_exception),
+ error=Error(message=exception_message, stacktrace=str(last_exception)),
)
)
diff --git a/agenta-backend/agenta_backend/services/security/sandbox.py b/agenta-backend/agenta_backend/services/security/sandbox.py
index 58a58f0d35..6a6988daa7 100644
--- a/agenta-backend/agenta_backend/services/security/sandbox.py
+++ b/agenta-backend/agenta_backend/services/security/sandbox.py
@@ -91,7 +91,7 @@ def execute_code_safely(
# Call the evaluation function, extract the result if it exists
# and is a float between 0 and 1
- result = environment["evaluate"](app_params, inputs, correct_answer, output)
+ result = environment["evaluate"](app_params, inputs, output, correct_answer)
if isinstance(result, float) and 0 <= result <= 1:
return result
return None
diff --git a/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py b/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py
new file mode 100644
index 0000000000..3462f7c94c
--- /dev/null
+++ b/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py
@@ -0,0 +1,162 @@
+import pytest
+from unittest.mock import patch, AsyncMock
+import asyncio
+import aiohttp
+
+from agenta_backend.services.llm_apps_service import (
+ batch_invoke,
+ InvokationResult,
+ Result,
+ Error,
+)
+
+
+@pytest.mark.asyncio
+async def test_batch_invoke_success():
+ """
+ Test the successful invocation of batch_invoke function.
+
+ This test mocks the get_parameters_from_openapi and invoke_app functions
+ to simulate successful invocations. It verifies that the batch_invoke
+ function correctly returns the expected results for the given test data.
+ """
+ with patch(
+ "agenta_backend.services.llm_apps_service.get_parameters_from_openapi",
+ new_callable=AsyncMock,
+ ) as mock_get_parameters_from_openapi, patch(
+ "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock
+ ) as mock_invoke_app, patch(
+ "asyncio.sleep", new_callable=AsyncMock
+ ) as mock_sleep:
+ mock_get_parameters_from_openapi.return_value = [
+ {"name": "param1", "type": "input"},
+ {"name": "param2", "type": "input"},
+ ]
+
+ # Mock the response of invoke_app to always succeed
+ def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters):
+ return InvokationResult(
+ result=Result(type="text", value="Success", error=None),
+ latency=0.1,
+ cost=0.01,
+ )
+
+ mock_invoke_app.side_effect = invoke_app_side_effect
+
+ uri = "http://example.com"
+ testset_data = [
+ {"id": 1, "param1": "value1", "param2": "value2"},
+ {"id": 2, "param1": "value1", "param2": "value2"},
+ ]
+ parameters = {}
+ rate_limit_config = {
+ "batch_size": 10,
+ "max_retries": 3,
+ "retry_delay": 3,
+ "delay_between_batches": 5,
+ }
+
+ results = await batch_invoke(uri, testset_data, parameters, rate_limit_config)
+
+ assert len(results) == 2
+ assert results[0].result.type == "text"
+ assert results[0].result.value == "Success"
+ assert results[1].result.type == "text"
+ assert results[1].result.value == "Success"
+
+
+@pytest.mark.asyncio
+async def test_batch_invoke_retries_and_failure():
+ """
+ Test the batch_invoke function with retries and eventual failure.
+
+ This test mocks the get_parameters_from_openapi and invoke_app functions
+ to simulate failures that trigger retries. It verifies that the batch_invoke
+ function correctly retries the specified number of times and returns an error
+ result after reaching the maximum retries.
+ """
+ with patch(
+ "agenta_backend.services.llm_apps_service.get_parameters_from_openapi",
+ new_callable=AsyncMock,
+ ) as mock_get_parameters_from_openapi, patch(
+ "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock
+ ) as mock_invoke_app, patch(
+ "asyncio.sleep", new_callable=AsyncMock
+ ) as mock_sleep:
+ mock_get_parameters_from_openapi.return_value = [
+ {"name": "param1", "type": "input"},
+ {"name": "param2", "type": "input"},
+ ]
+
+ # Mock the response of invoke_app to always fail
+ def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters):
+ raise aiohttp.ClientError("Test Error")
+
+ mock_invoke_app.side_effect = invoke_app_side_effect
+
+ uri = "http://example.com"
+ testset_data = [
+ {"id": 1, "param1": "value1", "param2": "value2"},
+ {"id": 2, "param1": "value1", "param2": "value2"},
+ ]
+ parameters = {}
+ rate_limit_config = {
+ "batch_size": 10,
+ "max_retries": 3,
+ "retry_delay": 3,
+ "delay_between_batches": 5,
+ }
+
+ results = await batch_invoke(uri, testset_data, parameters, rate_limit_config)
+
+ assert len(results) == 2
+ assert results[0].result.type == "error"
+ assert results[0].result.error.message == "Max retries reached"
+ assert results[1].result.type == "error"
+ assert results[1].result.error.message == "Max retries reached"
+
+
+@pytest.mark.asyncio
+async def test_batch_invoke_generic_exception():
+ """
+ Test the batch_invoke function with a generic exception.
+
+ This test mocks the get_parameters_from_openapi and invoke_app functions
+ to simulate a generic exception during invocation. It verifies that the
+ batch_invoke function correctly handles the exception and returns an error
+ result with the appropriate error message.
+ """
+ with patch(
+ "agenta_backend.services.llm_apps_service.get_parameters_from_openapi",
+ new_callable=AsyncMock,
+ ) as mock_get_parameters_from_openapi, patch(
+ "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock
+ ) as mock_invoke_app, patch(
+ "asyncio.sleep", new_callable=AsyncMock
+ ) as mock_sleep:
+ mock_get_parameters_from_openapi.return_value = [
+ {"name": "param1", "type": "input"},
+ {"name": "param2", "type": "input"},
+ ]
+
+ # Mock the response of invoke_app to raise a generic exception
+ def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters):
+ raise Exception("Generic Error")
+
+ mock_invoke_app.side_effect = invoke_app_side_effect
+
+ uri = "http://example.com"
+ testset_data = [{"id": 1, "param1": "value1", "param2": "value2"}]
+ parameters = {}
+ rate_limit_config = {
+ "batch_size": 1,
+ "max_retries": 3,
+ "retry_delay": 1,
+ "delay_between_batches": 1,
+ }
+
+ results = await batch_invoke(uri, testset_data, parameters, rate_limit_config)
+
+ assert len(results) == 1
+ assert results[0].result.type == "error"
+ assert results[0].result.error.message == "Max retries reached"
diff --git a/agenta-cli/poetry.lock b/agenta-cli/poetry.lock
index 8319af3a35..ef3818ea17 100644
--- a/agenta-cli/poetry.lock
+++ b/agenta-cli/poetry.lock
@@ -1019,13 +1019,13 @@ zstd = ["zstandard"]
[[package]]
name = "pytest"
-version = "8.2.0"
+version = "8.2.1"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"},
- {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"},
+ {file = "pytest-8.2.1-py3-none-any.whl", hash = "sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1"},
+ {file = "pytest-8.2.1.tar.gz", hash = "sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd"},
]
[package.dependencies]
diff --git a/agenta-web/dev.Dockerfile b/agenta-web/dev.Dockerfile
index c155bc07db..1e5e0c16f5 100644
--- a/agenta-web/dev.Dockerfile
+++ b/agenta-web/dev.Dockerfile
@@ -1,21 +1,17 @@
-FROM node:18-alpine
+FROM node:22-alpine3.18 AS base
WORKDIR /app
# Install dependencies based on the preferred package manager
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
- # echo "Standalone: $NEXT_PUBLIC_STANDALONE"; \
- # if [[ ! $NEXT_PUBLIC_STANDALONE == "true" ]]; then \
- if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
- elif [ -f package-lock.json ]; then npm i; \
- elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i; \
- # Allow install without lockfile, so example works even without Node.js installed locally
- else echo "Warning: Lockfile not found. It is recommended to commit lockfiles to version control." && yarn install; \
+ if [ -f yarn.lock ]; then yarn install --frozen-lockfile; \
+ elif [ -f package-lock.json ]; then npm install; \
+ elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm install; \
+ else yarn install; \
fi
-# else echo "NEXT_PUBLIC_STANDALONE is set, skipping install"; \
-# fi
+# Copy only the necessary files for development
COPY src ./src
COPY public ./public
COPY next.config.js .
@@ -23,25 +19,31 @@ COPY tsconfig.json .
COPY postcss.config.js .
COPY tailwind.config.ts .
COPY .env .
-RUN if [ -f .env.local ]; then cp .env.local .; fi
-# RUN if [ -f tailwind.config.ts ]; then cp tailwind.config.ts .; fi
-# # used in cloud
COPY sentry.* .
-# Next.js collects completely anonymous telemetry data about general usage. Learn more here: https://nextjs.org/telemetry
-# Uncomment the following line to disable telemetry at run time
-# ENV NEXT_TELEMETRY_DISABLED 1
-# Note: Don't expose ports here, Compose will handle that for us
+# Stage 2: Development Stage
+FROM node:22-alpine3.18 AS dev
+
+WORKDIR /app
+
+# Copy dependencies and application files from the base stage
+COPY --from=base /app /app
+
+# Install development dependencies
+RUN \
+ if [ -f yarn.lock ]; then yarn install; \
+ elif [ -f package-lock.json ]; then npm install; \
+ elif [ -f pnpm-lock.yaml ]; then pnpm install; \
+ else yarn install; \
+ fi
+
+# Expose the necessary ports
+EXPOSE 3000
# Start Next.js in development mode based on the preferred package manager
CMD \
- # echo "Standalone: $NEXT_PUBLIC_STANDALONE"; \
- # if [[ ! $NEXT_PUBLIC_STANDALONE == "true" ]]; then \
if [ -f yarn.lock ]; then yarn dev; \
elif [ -f package-lock.json ]; then npm run dev; \
elif [ -f pnpm-lock.yaml ]; then pnpm dev; \
else yarn dev; \
fi
-# else echo "NEXT_PUBLIC_STANDALONE is set, skipping run"; \
-# fi
-
diff --git a/agenta-web/prod.Dockerfile b/agenta-web/prod.Dockerfile
index dbe9b9da3c..a2b8e55c60 100644
--- a/agenta-web/prod.Dockerfile
+++ b/agenta-web/prod.Dockerfile
@@ -1,11 +1,11 @@
-FROM node:18-alpine
+# Stage 1: Build Stage
+FROM node:22-alpine3.18 AS builder
WORKDIR /app
# Install only production dependencies
COPY package.json package-lock.json* ./
-RUN npm ci --omit=dev
-
+RUN npm ci
# Copy only necessary files
COPY src ./src
COPY public ./public
@@ -14,10 +14,27 @@ COPY tsconfig.json .
COPY postcss.config.js .
COPY tailwind.config.ts .
COPY .env.production .
-# used in cloud
COPY sentry.* .
+
# Build the Next.js app for production
RUN npm run build
+# Stage 2: Production Stage
+FROM node:22-alpine3.18 AS prod
+
+WORKDIR /app
+
+# Copy only the necessary files from the build stage
+COPY --from=builder /app/package.json /app/package-lock.json* /app
+COPY --from=builder /app/.next /app/.next
+COPY --from=builder /app/public /app/public
+COPY --from=builder /app/next.config.js /app/tsconfig.json /app/postcss.config.js /app/tailwind.config.ts /app/.env.production /app/sentry.* /app/
+
+# Install only production dependencies
+RUN npm ci --omit=dev
+
+# Expose the necessary port
+EXPOSE 3000
+
# Start the production server
CMD ["npm", "start"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 476dede0cd..2832585b27 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -167,3 +167,4 @@ networks:
volumes:
mongodb_data:
redis_data:
+ nextjs_cache:
\ No newline at end of file