diff --git a/README.md b/README.md index 592eb69be3..b32e8f506e 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,25 @@ -
- -
+

+ - - + + Shows the logo of agenta -

-

- Home Page | - Slack | - Documentation -

+

+

+ Documentation | + Website | + Slack +

-

Collaborate on prompts, evaluate, and deploy LLM applications with confidence

- The open-source LLM developer platform for prompt-engineering, evaluation, human feedback, and deployment of complex LLM apps. +

The Open source LLMOps Platform

+ Prompt playground, prompt management, evaluation, and observability

MIT license. - + Doc @@ -49,21 +48,25 @@

-
- - - - - - - -
-

-
+

+ - Glamour Shot + + + Try Agenta Live Demo + +

+ +
+
+
+ + + Screenshot Agenta + +

@@ -72,83 +75,58 @@ ---

- Quick Start • - Features • - Documentation • - Enterprise • - Roadmap • - Join Our Slack • - Contributing + Documentation • + Changelog • + Website • + Agenta Cloud +

--- -# โญ๏ธ Why Agenta? - -Agenta is an end-to-end LLM developer platform. It provides the tools for **prompt engineering and management**, โš–๏ธ **evaluation**, **human annotation**, and :rocket: **deployment**. All without imposing any restrictions on your choice of framework, library, or model. - -Agenta allows developers and product teams to collaborate in building production-grade LLM-powered applications in less time. - -### With Agenta, you can: - -- [๐Ÿงช **Experiment** and **compare** prompts](https://docs.agenta.ai/prompt_management/prompt_engineering) on [any LLM workflow](https://docs.agenta.ai/prompt_management/setting_up/custom_applications) (chain-of-prompts, Retrieval Augmented Generation (RAG), LLM agents...) -- โœ๏ธ Collect and [**annotate golden test sets**](https://docs.agenta.ai/evaluation/test_sets) for evaluation -- ๐Ÿ“ˆ [**Evaluate** your application](https://docs.agenta.ai/evaluation/automatic_evaluation) with pre-existing or [**custom evaluators**](https://docs.agenta.ai/evaluation/custom_evaluator) -- [๐Ÿ” **Annotate** and **A/B test**](https://docs.agenta.ai/evaluation/human_evaluation) your applications with **human feedback** -- [๐Ÿค **Collaborate with product teams**](https://docs.agenta.ai/misc/team_management) for prompt engineering and evaluation -- [๐Ÿš€ **Deploy your application**](https://docs.agenta.ai/prompt_management/deployment) in one-click in the UI, through CLI, or through github workflows. +# What is Agenta? -### Works with any LLM app workflow +Agenta is a platform for building production-grade LLM applications. It helps **engineering and product teams** create reliable LLM apps faster. -Agenta enables prompt engineering and evaluation on any LLM app architecture: -- Chain of prompts -- RAG -- Agents - -It works with any framework such as [Langchain](https://www.langchain.com/), [LlamaIndex](https://www.llamaindex.ai/) and any LLM provider (openAI, Cohere, Mistral). - -# Quick Start - -### [Get started for free](https://cloud.agenta.ai?utm_source=github&utm_medium=readme&utm_campaign=github) - -### [Explore the Docs](https://docs.agenta.ai/) - -### [Create your first application in one-minute](https://docs.agenta.ai/getting_started/quick-start) - -### [Create an application using Langchain](https://docs.agenta.ai/guides/tutorials/first-app-with-langchain) - -### [Self-host agenta](https://docs.agenta.ai/self-host/host-locally) - -### [Check the Cookbook](https://docs.agenta.ai/guides/cookbooks/evaluations_with_sdk) +Agenta provides end-to-end tools for the entire LLMOps workflow: building (**LLM playground**, **evaluation**), deploying (**prompt and configuration management**), and monitoring (**LLM observability and tracing**). # Features - -| Playground | Evaluation | -| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Compare and version prompts for any LLM app, from single prompt to agents.
- +
@@ -242,7 +240,7 @@ const ConfigureEvaluator = ({ -
+
{basicSettingsFields.length ? ( - + Parameters diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx index a9e70174ed..53d99ac647 100644 --- a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx +++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx @@ -16,21 +16,31 @@ import {useLocalStorage} from "usehooks-ts" type EvaluatorsModalProps = {} & React.ComponentProps const useStyles = createUseStyles(() => ({ - modalWrapper: { + modalWrapper: ({current, debugEvaluator}: {current: number; debugEvaluator: boolean}) => ({ + height: "95vh", + width: `${current === 2 && !debugEvaluator ? "600px" : "90vw"} !important`, + maxWidth: "1800px", + maxHeight: "1100px", + minWidth: current === 2 && !debugEvaluator ? "600px" : "1000px", + minHeight: "800px", transition: "width 0.3s ease", + "& > div": { + height: "100%", + }, "& .ant-modal-content": { - height: 800, + height: "100%", "& .ant-modal-body": { height: "100%", }, }, - }, + }), })) const EvaluatorsModal = ({...props}: EvaluatorsModalProps) => { - const classes = useStyles() const appId = useAppId() const [current, setCurrent] = useState(0) + const [debugEvaluator, setDebugEvaluator] = useLocalStorage("isDebugSelectionOpen", false) + const classes = useStyles({current, debugEvaluator}) const [evaluators, setEvaluators] = useAtom(evaluatorsAtom) const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom) const [selectedEvaluator, setSelectedEvaluator] = useState(null) @@ -51,7 +61,6 @@ const EvaluatorsModal = ({...props}: EvaluatorsModalProps) => { "list", ) const [selectedEvaluatorCategory, setSelectedEvaluatorCategory] = useState("view_all") - const [debugEvaluator, setDebugEvaluator] = useLocalStorage("isDebugSelectionOpen", false) const [selectedTestset, setSelectedTestset] = useState("") const evalConfigFetcher = () => { @@ -159,7 +168,6 @@ const EvaluatorsModal = ({...props}: EvaluatorsModalProps) => { return ( = ({children}) = : [], ) const [sort, setSort] = useState({} as SortResult) - const [pagination, setPagination] = useState({page: 1, size: 10}) + const [pagination, setPagination] = useState({page: 1, size: 50}) const fetchTraces = async () => { try { diff --git a/agenta-web/src/services/api.ts b/agenta-web/src/services/api.ts index 93cf6e7fc9..0f26e1e144 100644 --- a/agenta-web/src/services/api.ts +++ b/agenta-web/src/services/api.ts @@ -1,4 +1,5 @@ import axios from "@/lib//helpers/axiosConfig" +import Session from "supertokens-auth-react/recipe/session" import {formatDay} from "@/lib/helpers/dateTimeHelper" import { detectChatVariantFromOpenAISchema, @@ -113,17 +114,36 @@ export async function callVariant( } const appContainerURI = await fetchAppContainerURL(appId, undefined, baseId) + const jwt = await getJWT() return axios .post(`${appContainerURI}/generate`, requestBody, { signal, _ignoreError: ignoreAxiosError, + headers: { + Authorization: jwt && `Bearer ${jwt}`, + }, } as any) .then((res) => { return res.data }) } +/** + * Get the JWT from SuperTokens + */ +const getJWT = async () => { + try { + if (await Session.doesSessionExist()) { + let jwt = await Session.getAccessToken() + + return jwt + } + } catch (error) {} + + return undefined +} + /** * Parses the openapi.json from a variant and returns the parameters as an array of objects. * @param app @@ -138,7 +158,13 @@ export const fetchVariantParametersFromOpenAPI = async ( ) => { const appContainerURI = await fetchAppContainerURL(appId, variantId, baseId) const url = `${appContainerURI}/openapi.json` - const response = await axios.get(url, {_ignoreError: ignoreAxiosError} as any) + const jwt = await getJWT() + const response = await axios.get(url, { + _ignoreError: ignoreAxiosError, + headers: { + Authorization: jwt && `Bearer ${jwt}`, + }, + } as any) const isChatVariant = detectChatVariantFromOpenAISchema(response.data) let APIParams = openAISchemaToParameters(response.data) diff --git a/docs/docs/observability/03-observability-sdk.mdx b/docs/docs/observability/03-observability-sdk.mdx index 2ebe100ff7..cfeb563a79 100644 --- a/docs/docs/observability/03-observability-sdk.mdx +++ b/docs/docs/observability/03-observability-sdk.mdx @@ -176,14 +176,104 @@ def rag_workflow(query:str): ``` -## Excluding Inputs/Outputs from Capture +## Redacting sensitive data: how to exclude data from capture In some cases, you may want to exclude parts of the inputs or outputs due to privacy concerns or because the data is too large to be stored in the span. -You can achieve this by setting the ignore_inputs and ignore_outputs arguments to True in the instrument decorator. +You can do this by setting the `ignore_inputs` and/or `ignore_outputs` arguments to `True` in the instrument decorator. ```python -@ag.instrument(spankind="workflow", ignore_inputs=True, ignore_outputs=True) +@ag.instrument( + spankind="workflow", + ignore_inputs=True, + ignore_outputs=True +) def rag_workflow(query:str): ... ``` + +If you want more control, you can specify which parts of the inputs and outputs to exclude: + +```python +@ag.instrument( + spankind="workflow", + ignore_inputs=["user_id"], + ignore_outputs=["pii"], +) +def rag_workflow(query:str, user_id:str): + ... + return { + "result": ..., + "pii": ... + } +``` + +For even finer control, you can use a custom `redact()` callback, along with instructions in the case of errors. + +```python +def my_redact(name, field, data): + if name == "rag_workflow": + if field == "inputs": + del data["user_id"] + if field == "outputs": + del data["pii"] + + return data + + +@ag.instrument( + spankind="workflow", + redact=my_redact, + redact_on_error=False, +) +def rag_workflow(query:str, user_id:str): + ... + return { + "result": ..., + "pii": ... + } +``` + +Finally, if you want to set up global rules for redaction, you can provide a global `redact()` callback that applies everywhere. + +```python +def global_redact( + name:str, + field:str, + data: Dict[str, Any] +): + if "pii" in data: + del data["pii"] + + return data + + +ag.init( + redact=global_redact, + redact_on_error=True, +) + +def local_redact( + name:str, + field:str, + data: Dict[str, Any] +): + if name == "rag_workflow": + if field == "inputs": + del data["user_id"] + + return data + + +@ag.instrument( + spankind="workflow", + redact=local_redact, + redact_on_error=False, +) +def rag_workflow(query:str, user_id:str): + ... + return { + "result": ..., + "pii": ... + } +``` diff --git a/docs/docs/prompt-management/integration/02-fetch-prompts.mdx b/docs/docs/prompt-management/integration/02-fetch-prompts.mdx index 067a603d1b..87de178564 100644 --- a/docs/docs/prompt-management/integration/02-fetch-prompts.mdx +++ b/docs/docs/prompt-management/integration/02-fetch-prompts.mdx @@ -3,7 +3,7 @@ title: "Fetch prompts" description: "How to fetch the deployed version of your prompt using the Agenta SDK." --- -This guide shows you how to fetch the deployed version of your prompt in your code. You can do this using the Agenta SDK (Python). +This guide shows you how to fetch the deployed version of your prompt in your code using the Agenta SDK. ## Fetching prompts with the Agenta SDK @@ -14,32 +14,34 @@ Make sure to install the latest version of the agenta Python SDK (`pip -U instal - Set up environment variables: - `AGENTA_API_KEY` for cloud users. - `AGENTA_HOST` set to `http://localhost` if you are self-hosting. - - `AGENTA_PROJECT_ID` set to the project ID. ### Step 2: Fetch the prompt ```python -from agenta import Agenta -agenta = Agenta() -config = agenta.get_config(base_id="xxxxx", environment="production", cache_timeout=200) # Fetches the configuration with caching +import agenta as ag -``` +ag.init() +config = ag.ConfigManager.get_from_registry( + app_slug="my-app", + environment_slug="staging" # choose production, staging, or development +) -The response object is an instance of `GetConfigResponse` from `agenta.client.backend.types.get_config_response`. It contains the following attributes: +print("Fetched configuration from staging:") +print(config) +``` -- `config_name: 'default'` -- `current_version: 1` -- `parameters: This dictionary contains the configuration of the application, for instance: +**Sample Output:** ```python -{'temperature': 1.0, -'model': 'gpt-3.5-turbo', -'max_tokens': -1, -'prompt_system': 'You are an expert in geography.', -'prompt_user': 'What is the capital of {country}?', -'top_p': 1.0, -'frequence_penalty': 0.0, -'presence_penalty': 0.0, -'force_json': 0} - +Fetched configuration from staging: +{ + 'temperature': 0.7, + 'model': 'gpt-3.5-turbo', + 'max_tokens': 150, + 'prompt_system': 'You are an assistant that provides concise answers.', + 'prompt_user': 'Explain {topic} in simple terms.', + 'top_p': 1.0, + 'frequency_penalty': 0.0, + 'presence_penalty': 0.0 +} ```