diff --git a/.all-contributorsrc b/.all-contributorsrc index d055b089f1..ecc7093dc3 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -417,6 +417,16 @@ "contributions": [ "doc" ] + }, + { + "login": "LucasTrg", + "name": "LucasTrg", + "avatar_url": "https://avatars.githubusercontent.com/u/47852577?v=4", + "profile": "https://github.com/LucasTrg", + "contributions": [ + "code", + "bug" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index 13e745b36a..62735b3dd7 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Check out our [Contributing Guide](https://docs.agenta.ai/contributing/getting-s ## Contributors ✨ -[![All Contributors](https://img.shields.io/badge/all_contributors-44-orange.svg?style=flat-square)](#contributors-) +[![All Contributors](https://img.shields.io/badge/all_contributors-45-orange.svg?style=flat-square)](#contributors-) Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): @@ -236,6 +236,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Vishal Vanpariya
Vishal Vanpariya

💻 Youcef Boumar
Youcef Boumar

📖 + LucasTrg
LucasTrg

💻 🐛 diff --git a/agenta-backend/agenta_backend/services/llm_apps_service.py b/agenta-backend/agenta_backend/services/llm_apps_service.py index 4e7ab1a9c5..2accbfe509 100644 --- a/agenta-backend/agenta_backend/services/llm_apps_service.py +++ b/agenta-backend/agenta_backend/services/llm_apps_service.py @@ -172,6 +172,7 @@ async def run_with_retry( last_exception = e logger.info(f"Error processing datapoint: {input_data}. {str(e)}") logger.info("".join(traceback.format_exception_only(type(e), e))) + retries += 1 common.capture_exception_in_sentry(e) # If max retries is reached or an exception that isn't in the second block, @@ -186,7 +187,7 @@ async def run_with_retry( result=Result( type="error", value=None, - error=Error(message=exception_message, stacktrace=last_exception), + error=Error(message=exception_message, stacktrace=str(last_exception)), ) ) diff --git a/agenta-backend/agenta_backend/services/security/sandbox.py b/agenta-backend/agenta_backend/services/security/sandbox.py index 58a58f0d35..6a6988daa7 100644 --- a/agenta-backend/agenta_backend/services/security/sandbox.py +++ b/agenta-backend/agenta_backend/services/security/sandbox.py @@ -91,7 +91,7 @@ def execute_code_safely( # Call the evaluation function, extract the result if it exists # and is a float between 0 and 1 - result = environment["evaluate"](app_params, inputs, correct_answer, output) + result = environment["evaluate"](app_params, inputs, output, correct_answer) if isinstance(result, float) and 0 <= result <= 1: return result return None diff --git a/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py b/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py new file mode 100644 index 0000000000..3462f7c94c --- /dev/null +++ b/agenta-backend/agenta_backend/tests/unit/test_llm_apps_service.py @@ -0,0 +1,162 @@ +import pytest +from unittest.mock import patch, AsyncMock +import asyncio +import aiohttp + +from agenta_backend.services.llm_apps_service import ( + batch_invoke, + InvokationResult, + Result, + Error, +) + + +@pytest.mark.asyncio +async def test_batch_invoke_success(): + """ + Test the successful invocation of batch_invoke function. + + This test mocks the get_parameters_from_openapi and invoke_app functions + to simulate successful invocations. It verifies that the batch_invoke + function correctly returns the expected results for the given test data. + """ + with patch( + "agenta_backend.services.llm_apps_service.get_parameters_from_openapi", + new_callable=AsyncMock, + ) as mock_get_parameters_from_openapi, patch( + "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock + ) as mock_invoke_app, patch( + "asyncio.sleep", new_callable=AsyncMock + ) as mock_sleep: + mock_get_parameters_from_openapi.return_value = [ + {"name": "param1", "type": "input"}, + {"name": "param2", "type": "input"}, + ] + + # Mock the response of invoke_app to always succeed + def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters): + return InvokationResult( + result=Result(type="text", value="Success", error=None), + latency=0.1, + cost=0.01, + ) + + mock_invoke_app.side_effect = invoke_app_side_effect + + uri = "http://example.com" + testset_data = [ + {"id": 1, "param1": "value1", "param2": "value2"}, + {"id": 2, "param1": "value1", "param2": "value2"}, + ] + parameters = {} + rate_limit_config = { + "batch_size": 10, + "max_retries": 3, + "retry_delay": 3, + "delay_between_batches": 5, + } + + results = await batch_invoke(uri, testset_data, parameters, rate_limit_config) + + assert len(results) == 2 + assert results[0].result.type == "text" + assert results[0].result.value == "Success" + assert results[1].result.type == "text" + assert results[1].result.value == "Success" + + +@pytest.mark.asyncio +async def test_batch_invoke_retries_and_failure(): + """ + Test the batch_invoke function with retries and eventual failure. + + This test mocks the get_parameters_from_openapi and invoke_app functions + to simulate failures that trigger retries. It verifies that the batch_invoke + function correctly retries the specified number of times and returns an error + result after reaching the maximum retries. + """ + with patch( + "agenta_backend.services.llm_apps_service.get_parameters_from_openapi", + new_callable=AsyncMock, + ) as mock_get_parameters_from_openapi, patch( + "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock + ) as mock_invoke_app, patch( + "asyncio.sleep", new_callable=AsyncMock + ) as mock_sleep: + mock_get_parameters_from_openapi.return_value = [ + {"name": "param1", "type": "input"}, + {"name": "param2", "type": "input"}, + ] + + # Mock the response of invoke_app to always fail + def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters): + raise aiohttp.ClientError("Test Error") + + mock_invoke_app.side_effect = invoke_app_side_effect + + uri = "http://example.com" + testset_data = [ + {"id": 1, "param1": "value1", "param2": "value2"}, + {"id": 2, "param1": "value1", "param2": "value2"}, + ] + parameters = {} + rate_limit_config = { + "batch_size": 10, + "max_retries": 3, + "retry_delay": 3, + "delay_between_batches": 5, + } + + results = await batch_invoke(uri, testset_data, parameters, rate_limit_config) + + assert len(results) == 2 + assert results[0].result.type == "error" + assert results[0].result.error.message == "Max retries reached" + assert results[1].result.type == "error" + assert results[1].result.error.message == "Max retries reached" + + +@pytest.mark.asyncio +async def test_batch_invoke_generic_exception(): + """ + Test the batch_invoke function with a generic exception. + + This test mocks the get_parameters_from_openapi and invoke_app functions + to simulate a generic exception during invocation. It verifies that the + batch_invoke function correctly handles the exception and returns an error + result with the appropriate error message. + """ + with patch( + "agenta_backend.services.llm_apps_service.get_parameters_from_openapi", + new_callable=AsyncMock, + ) as mock_get_parameters_from_openapi, patch( + "agenta_backend.services.llm_apps_service.invoke_app", new_callable=AsyncMock + ) as mock_invoke_app, patch( + "asyncio.sleep", new_callable=AsyncMock + ) as mock_sleep: + mock_get_parameters_from_openapi.return_value = [ + {"name": "param1", "type": "input"}, + {"name": "param2", "type": "input"}, + ] + + # Mock the response of invoke_app to raise a generic exception + def invoke_app_side_effect(uri, datapoint, parameters, openapi_parameters): + raise Exception("Generic Error") + + mock_invoke_app.side_effect = invoke_app_side_effect + + uri = "http://example.com" + testset_data = [{"id": 1, "param1": "value1", "param2": "value2"}] + parameters = {} + rate_limit_config = { + "batch_size": 1, + "max_retries": 3, + "retry_delay": 1, + "delay_between_batches": 1, + } + + results = await batch_invoke(uri, testset_data, parameters, rate_limit_config) + + assert len(results) == 1 + assert results[0].result.type == "error" + assert results[0].result.error.message == "Max retries reached" diff --git a/agenta-cli/poetry.lock b/agenta-cli/poetry.lock index 8319af3a35..ef3818ea17 100644 --- a/agenta-cli/poetry.lock +++ b/agenta-cli/poetry.lock @@ -1019,13 +1019,13 @@ zstd = ["zstandard"] [[package]] name = "pytest" -version = "8.2.0" +version = "8.2.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, - {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, + {file = "pytest-8.2.1-py3-none-any.whl", hash = "sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1"}, + {file = "pytest-8.2.1.tar.gz", hash = "sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd"}, ] [package.dependencies] diff --git a/agenta-web/dev.Dockerfile b/agenta-web/dev.Dockerfile index c155bc07db..1e5e0c16f5 100644 --- a/agenta-web/dev.Dockerfile +++ b/agenta-web/dev.Dockerfile @@ -1,21 +1,17 @@ -FROM node:18-alpine +FROM node:22-alpine3.18 AS base WORKDIR /app # Install dependencies based on the preferred package manager COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./ RUN \ - # echo "Standalone: $NEXT_PUBLIC_STANDALONE"; \ - # if [[ ! $NEXT_PUBLIC_STANDALONE == "true" ]]; then \ - if [ -f yarn.lock ]; then yarn --frozen-lockfile; \ - elif [ -f package-lock.json ]; then npm i; \ - elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i; \ - # Allow install without lockfile, so example works even without Node.js installed locally - else echo "Warning: Lockfile not found. It is recommended to commit lockfiles to version control." && yarn install; \ + if [ -f yarn.lock ]; then yarn install --frozen-lockfile; \ + elif [ -f package-lock.json ]; then npm install; \ + elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm install; \ + else yarn install; \ fi -# else echo "NEXT_PUBLIC_STANDALONE is set, skipping install"; \ -# fi +# Copy only the necessary files for development COPY src ./src COPY public ./public COPY next.config.js . @@ -23,25 +19,31 @@ COPY tsconfig.json . COPY postcss.config.js . COPY tailwind.config.ts . COPY .env . -RUN if [ -f .env.local ]; then cp .env.local .; fi -# RUN if [ -f tailwind.config.ts ]; then cp tailwind.config.ts .; fi -# # used in cloud COPY sentry.* . -# Next.js collects completely anonymous telemetry data about general usage. Learn more here: https://nextjs.org/telemetry -# Uncomment the following line to disable telemetry at run time -# ENV NEXT_TELEMETRY_DISABLED 1 -# Note: Don't expose ports here, Compose will handle that for us +# Stage 2: Development Stage +FROM node:22-alpine3.18 AS dev + +WORKDIR /app + +# Copy dependencies and application files from the base stage +COPY --from=base /app /app + +# Install development dependencies +RUN \ + if [ -f yarn.lock ]; then yarn install; \ + elif [ -f package-lock.json ]; then npm install; \ + elif [ -f pnpm-lock.yaml ]; then pnpm install; \ + else yarn install; \ + fi + +# Expose the necessary ports +EXPOSE 3000 # Start Next.js in development mode based on the preferred package manager CMD \ - # echo "Standalone: $NEXT_PUBLIC_STANDALONE"; \ - # if [[ ! $NEXT_PUBLIC_STANDALONE == "true" ]]; then \ if [ -f yarn.lock ]; then yarn dev; \ elif [ -f package-lock.json ]; then npm run dev; \ elif [ -f pnpm-lock.yaml ]; then pnpm dev; \ else yarn dev; \ fi -# else echo "NEXT_PUBLIC_STANDALONE is set, skipping run"; \ -# fi - diff --git a/agenta-web/prod.Dockerfile b/agenta-web/prod.Dockerfile index dbe9b9da3c..a2b8e55c60 100644 --- a/agenta-web/prod.Dockerfile +++ b/agenta-web/prod.Dockerfile @@ -1,11 +1,11 @@ -FROM node:18-alpine +# Stage 1: Build Stage +FROM node:22-alpine3.18 AS builder WORKDIR /app # Install only production dependencies COPY package.json package-lock.json* ./ -RUN npm ci --omit=dev - +RUN npm ci # Copy only necessary files COPY src ./src COPY public ./public @@ -14,10 +14,27 @@ COPY tsconfig.json . COPY postcss.config.js . COPY tailwind.config.ts . COPY .env.production . -# used in cloud COPY sentry.* . + # Build the Next.js app for production RUN npm run build +# Stage 2: Production Stage +FROM node:22-alpine3.18 AS prod + +WORKDIR /app + +# Copy only the necessary files from the build stage +COPY --from=builder /app/package.json /app/package-lock.json* /app +COPY --from=builder /app/.next /app/.next +COPY --from=builder /app/public /app/public +COPY --from=builder /app/next.config.js /app/tsconfig.json /app/postcss.config.js /app/tailwind.config.ts /app/.env.production /app/sentry.* /app/ + +# Install only production dependencies +RUN npm ci --omit=dev + +# Expose the necessary port +EXPOSE 3000 + # Start the production server CMD ["npm", "start"] diff --git a/docker-compose.yml b/docker-compose.yml index 476dede0cd..2832585b27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -167,3 +167,4 @@ networks: volumes: mongodb_data: redis_data: + nextjs_cache: \ No newline at end of file