diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
index cf00af533d..3faab67016 100644
--- a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
+++ b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -28,6 +28,9 @@
                 "type": "number",
                 "default": 0.5,
                 "description": "The threshold value for similarity comparison",
+                "min": 0,
+                "max": 1,
+                "required": True,
             }
         },
         "description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
@@ -43,6 +46,7 @@
                 "type": "regex",
                 "default": "",
                 "description": "Pattern for regex testing (ex: ^this_word\\d{3}$)",
+                "required": True,
             },
             "regex_should_match": {
                 "label": "Match/Mismatch",
@@ -62,6 +66,7 @@
                 "type": "string",
                 "default": "",
                 "description": "The name of the field in the JSON output that you wish to evaluate",
+                "required": True,
             }
         },
         "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
@@ -76,6 +81,7 @@
                 "type": "text",
                 "default": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below: Evaluation strategy: 0 to 10 0 is very bad and 10 is very good. Prompt: {llm_app_prompt_template} Inputs: country: {country} Correct Answer:{correct_answer} Evaluate this: {variant_output} Answer ONLY with one of the given grading or evaluation options.",
                 "description": "Template for AI critique prompts",
+                "required": True,
             }
         },
         "description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
@@ -90,6 +96,7 @@
                 "type": "code",
                 "default": "from typing import Dict\n\ndef evaluate(\n    app_params: Dict[str, str],\n    inputs: Dict[str, str],\n    output: str,\n    correct_answer: str\n) -> float:\n    # ...\n    return 0.75  # Replace with your calculated score",
                 "description": "Code for evaluating submissions",
+                "required": True,
             }
         },
         "description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
@@ -103,6 +110,7 @@
                 "label": "Webhook URL",
                 "type": "string",
                 "description": "https://your-webhook-url.com",
+                "required": True,
             },
         },
         "description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response indicating the correctness of the answer. You need to provide the URL of the webhook and the response of the webhook must be between 0 and 1.",
@@ -132,10 +140,7 @@
         "settings_template": {
             "label": "Single Model Testing Settings",
             "description": "Checks if the output starts with the specified prefix.",
-            "prefix": {
-                "label": "prefix",
-                "type": "string",
-            },
+            "prefix": {"label": "prefix", "type": "string", "required": True},
             "case_sensitive": {
                 "label": "Case Sensitive",
                 "type": "boolean",
@@ -161,6 +166,7 @@
                 "label": "suffix",
                 "type": "string",
                 "description": "The string to match at the end of the output.",
+                "required": True,
             },
         },
         "description": "Ends With evaluator checks if the output ends with a specified suffix, considering case sensitivity based on the settings.",
@@ -182,6 +188,7 @@
                 "label": "substring",
                 "type": "string",
                 "description": "The string to check if it is contained in the output.",
+                "required": True,
             },
         },
         "description": "Contains evaluator checks if the output contains a specified substring, considering case sensitivity based on the settings.",
@@ -203,6 +210,7 @@
                 "label": "substrings",
                 "type": "string",
                 "description": "Provide a comma-separated list of strings to check if any is contained in the output.",
+                "required": True,
             },
         },
         "description": "Contains Any evaluator checks if the output contains any of the specified substrings from a comma-separated list, considering case sensitivity based on the settings.",
@@ -224,10 +232,22 @@
                 "label": "substrings",
                 "type": "string",
                 "description": "Provide a comma-separated list of strings to check if all are contained in the output.",
+                "required": True,
             },
         },
         "description": "Contains All evaluator checks if the output contains all of the specified substrings from a comma-separated list, considering case sensitivity based on the settings.",
     },
+    {
+        "name": "Levenshtein Distance",
+        "key": "auto_levenshtein_distance",
+        "direct_use": False,
+        "settings_template": {
+            "label": "Levenshtein Distance Settings",
+            "description": "Evaluates the Levenshtein distance between the output and the correct answer. If a threshold is specified, it checks if the distance is below this threshold and returns a boolean value. If no threshold is specified, it returns the numerical Levenshtein distance.",
+            "threshold": {"label": "Threshold", "type": "number", "required": False},
+        },
+        "description": "This evaluator calculates the Levenshtein distance between the output and the correct answer. If a threshold is provided in the settings, it returns a boolean indicating whether the distance is within the threshold. If no threshold is provided, it returns the actual Levenshtein distance as a numerical value.",
+    },
 ]
 
 
diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index bab421c9c5..6a8219e056 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -421,6 +421,55 @@ def auto_contains_json(
         )
 
 
+def levenshtein_distance(s1, s2):
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+
+    if len(s2) == 0:
+        return len(s1)
+
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+
+    return previous_row[-1]
+
+
+def auto_levenshtein_distance(
+    inputs: Dict[str, Any],
+    output: str,
+    correct_answer: str,
+    app_params: Dict[str, Any],
+    settings_values: Dict[str, Any],
+    lm_providers_keys: Dict[str, Any],
+) -> Result:
+    try:
+        distance = levenshtein_distance(output, correct_answer)
+
+        if "threshold" in settings_values:
+            threshold = settings_values["threshold"]
+            is_within_threshold = distance <= threshold
+            return Result(type="bool", value=is_within_threshold)
+
+        return Result(type="number", value=distance)
+
+    except Exception as e:
+        return Result(
+            type="error",
+            value=None,
+            error=Error(
+                message="Error during Levenshtein threshold evaluation",
+                stacktrace=str(e),
+            ),
+        )
+
+
 def evaluate(
     evaluator_key: str,
     inputs: Dict[str, Any],
diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py
index 1882f35564..50ce322437 100644
--- a/agenta-backend/agenta_backend/tasks/evaluations.py
+++ b/agenta-backend/agenta_backend/tasks/evaluations.py
@@ -355,6 +355,7 @@ async def aggregate_evaluator_results(
             "auto_contains_any",
             "auto_contains_all",
             "auto_contains_json",
+            "auto_levenshtein_distance",
         ]:
             result = aggregation_service.aggregate_float(results)
 
diff --git a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
index 894233d7d4..f76dd9e6fb 100644
--- a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
+++ b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
@@ -1,6 +1,7 @@
 import pytest
 
 from agenta_backend.services.evaluators_service import (
+    auto_levenshtein_distance,
     auto_starts_with,
     auto_ends_with,
     auto_contains,
@@ -129,3 +130,22 @@ def test_auto_contains_all(output, substrings, case_sensitive, expected):
 def test_auto_contains_json(output, expected):
     result = auto_contains_json({}, output, "", {}, {}, {})
     assert result.value == expected
+
+
+@pytest.mark.parametrize(
+    "output, correct_answer, threshold, expected",
+    [
+        ("hello world", "hello world", 5, True),
+        ("hello world", "hola mundo", 5, False),
+        ("hello world", "hello world!", 2, True),
+        ("hello world", "hello wor", 10, True),
+        ("hello world", "hello worl", None, 1),
+        ("hello world", "helo world", None, 1),
+    ],
+)
+def test_auto_levenshtein_distance(output, correct_answer, threshold, expected):
+    settings_values = {"threshold": threshold} if threshold is not None else {}
+    result = auto_levenshtein_distance(
+        {}, output, correct_answer, {}, settings_values, {}
+    )
+    assert result.value == expected
diff --git a/agenta-backend/pyproject.toml b/agenta-backend/pyproject.toml
index 9471926a51..fbd07a5474 100644
--- a/agenta-backend/pyproject.toml
+++ b/agenta-backend/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta_backend"
-version = "0.12.3"
+version = "0.12.4"
 description = ""
 authors = ["Mahmoud Mabrouk <mahmoudmabrouk.mail@gmail.com>"]
 readme = "README.md"
diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml
index e9c50bafd8..5a7c126a1a 100644
--- a/agenta-cli/pyproject.toml
+++ b/agenta-cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.12.3"
+version = "0.12.4"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = ["Mahmoud Mabrouk <mahmoud@agenta.ai>"]
diff --git a/agenta-web/package-lock.json b/agenta-web/package-lock.json
index 86a835dbdc..aa2a4e4556 100644
--- a/agenta-web/package-lock.json
+++ b/agenta-web/package-lock.json
@@ -1,12 +1,12 @@
 {
     "name": "dashboard",
-    "version": "0.12.3",
+    "version": "0.12.4",
     "lockfileVersion": 3,
     "requires": true,
     "packages": {
         "": {
             "name": "dashboard",
-            "version": "0.12.3",
+            "version": "0.12.4",
             "dependencies": {
                 "@ant-design/colors": "^7.0.0",
                 "@ant-design/icons": "^5.0.1",
diff --git a/agenta-web/package.json b/agenta-web/package.json
index c11f4e7625..6f25692c17 100644
--- a/agenta-web/package.json
+++ b/agenta-web/package.json
@@ -1,6 +1,6 @@
 {
     "name": "agenta",
-    "version": "0.12.3",
+    "version": "0.12.4",
     "private": true,
     "engines": {
         "node": ">=18"
diff --git a/agenta-web/src/components/Evaluations/EvaluationCardView/index.tsx b/agenta-web/src/components/Evaluations/EvaluationCardView/index.tsx
index 45e114047c..f0097bc08e 100644
--- a/agenta-web/src/components/Evaluations/EvaluationCardView/index.tsx
+++ b/agenta-web/src/components/Evaluations/EvaluationCardView/index.tsx
@@ -361,33 +361,38 @@ const EvaluationCardView: React.FC<Props> = ({
                             </Button>
                         </div>
 
-                        <ParamsForm
-                            isChatVariant={isChat}
-                            onParamChange={(name, value) =>
-                                isChat
-                                    ? onChatChange(value)
-                                    : onInputChange(
-                                          {target: {value}} as any,
-                                          scenarioId,
-                                          scenario.inputs.findIndex((ip) => ip.input_name === name),
-                                      )
-                            }
-                            inputParams={
-                                isChat
-                                    ? [{name: "chat", value: chat} as any]
-                                    : variantData[0].inputParams?.map((item) => ({
-                                          ...item,
-                                          value: scenario.inputs.find(
-                                              (ip) => ip.input_name === item.name,
-                                          )?.input_value,
-                                      })) || []
-                            }
-                            key={scenarioId}
-                            useChatDefaultValue
-                            form={form}
-                            onFinish={() => onRun(scenarioId)}
-                            imageSize="large"
-                        />
+                        <div>
+                            <Typography.Text style={{fontSize: 20}}>Inputs</Typography.Text>
+                            <ParamsForm
+                                isChatVariant={isChat}
+                                onParamChange={(name, value) =>
+                                    isChat
+                                        ? onChatChange(value)
+                                        : onInputChange(
+                                              {target: {value}} as any,
+                                              scenarioId,
+                                              scenario.inputs.findIndex(
+                                                  (ip) => ip.input_name === name,
+                                              ),
+                                          )
+                                }
+                                inputParams={
+                                    isChat
+                                        ? [{name: "chat", value: chat} as any]
+                                        : variantData[0].inputParams?.map((item) => ({
+                                              ...item,
+                                              value: scenario.inputs.find(
+                                                  (ip) => ip.input_name === item.name,
+                                              )?.input_value,
+                                          })) || []
+                                }
+                                key={scenarioId}
+                                useChatDefaultValue
+                                form={form}
+                                onFinish={() => onRun(scenarioId)}
+                                imageSize="large"
+                            />
+                        </div>
 
                         <div className={classes.toolBar}>
                             <Tooltip title="Instructions">
@@ -405,11 +410,17 @@ const EvaluationCardView: React.FC<Props> = ({
                         </div>
 
                         <div>
-                            {!isAbTesting && (
-                                <Typography.Text style={{fontSize: 20}}>
-                                    Model Response
-                                </Typography.Text>
-                            )}
+                            <div style={{marginBottom: "1rem"}}>
+                                {!isAbTesting ? (
+                                    <Typography.Text style={{fontSize: 20}}>
+                                        Model Response
+                                    </Typography.Text>
+                                ) : (
+                                    <Typography.Text style={{fontSize: 20}}>
+                                        Outputs
+                                    </Typography.Text>
+                                )}
+                            </div>
 
                             <EvaluationCard
                                 isChat={isChat}
diff --git a/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx b/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
index 59e3cd10b9..866090f990 100644
--- a/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
+++ b/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
@@ -71,7 +71,7 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({
     },
 }))
 
-export function LongTextCellRenderer(params: ICellRendererParams) {
+export function LongTextCellRenderer(params: ICellRendererParams, output?: any) {
     const {value, api, node} = params
     const [expanded, setExpanded] = useState(
         node.rowHeight !== api.getSizesForCurrentTheme().rowHeight,
@@ -95,11 +95,11 @@ export function LongTextCellRenderer(params: ICellRendererParams) {
             cellsArr.forEach((cell) => {
                 cell.setAttribute(
                     "style",
-                    "overflow: visible; white-space: pre-wrap; text-overflow: unset;",
+                    "overflow: visible; white-space: pre-wrap; text-overflow: unset; line-height: 2.5em;",
                 )
             })
             const height = Math.max(...cellsArr.map((cell) => cell.scrollHeight))
-            node.setRowHeight(height <= defaultHeight ? defaultHeight * 2 : height)
+            node.setRowHeight(height <= defaultHeight ? defaultHeight * 2 : height + 10)
         } else {
             cellsArr.forEach((cell) => {
                 cell.setAttribute(
@@ -121,9 +121,9 @@ export function LongTextCellRenderer(params: ICellRendererParams) {
     return (
         <div
             className={classes.longCell}
-            style={expanded ? {textWrap: "wrap", lineHeight: "2em", paddingTop: 6.5} : undefined}
+            style={expanded ? {textWrap: "wrap", paddingTop: 6.5} : undefined}
         >
-            {value}
+            {output ? output : value}
             <Space align="center" size="middle">
                 {expanded ? (
                     <FullscreenExitOutlined onClick={onExpand} />
diff --git a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
index 52a9b498eb..547e7644b7 100644
--- a/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
+++ b/agenta-web/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
@@ -99,7 +99,7 @@ const EvaluationCompareMode: React.FC<Props> = () => {
                 field: `inputs.${ix}.value` as any,
                 ...getFilterParams("text"),
                 pinned: "left",
-                cellRenderer: LongTextCellRenderer,
+                cellRenderer: (params: any) => LongTextCellRenderer(params),
             })
         })
 
@@ -110,7 +110,7 @@ const EvaluationCompareMode: React.FC<Props> = () => {
             field: "correctAnswer",
             ...getFilterParams("text"),
             pinned: "left",
-            cellRenderer: LongTextCellRenderer,
+            cellRenderer: (params: any) => LongTextCellRenderer(params),
         })
 
         variants.forEach((variant, vi) => {
@@ -133,25 +133,28 @@ const EvaluationCompareMode: React.FC<Props> = () => {
                 cellRenderer: (params: any) => {
                     return (
                         <>
-                            {showDiff === "show" ? (
-                                <span>
-                                    <CompareOutputDiff
-                                        variantOutput={getTypedValue(
-                                            params.data?.variants.find(
-                                                (item: any) =>
-                                                    item.evaluationId === variant.evaluationId,
-                                            )?.output?.result,
-                                        )}
-                                        expectedOutput={params.data?.correctAnswer}
-                                    />
-                                </span>
-                            ) : (
-                                getTypedValue(
-                                    params.data?.variants.find(
-                                        (item: any) => item.evaluationId === variant.evaluationId,
-                                    )?.output?.result,
-                                )
-                            )}
+                            {showDiff === "show"
+                                ? LongTextCellRenderer(
+                                      params,
+                                      <CompareOutputDiff
+                                          variantOutput={getTypedValue(
+                                              params.data?.variants.find(
+                                                  (item: any) =>
+                                                      item.evaluationId === variant.evaluationId,
+                                              )?.output?.result,
+                                          )}
+                                          expectedOutput={params.data?.correctAnswer}
+                                      />,
+                                  )
+                                : LongTextCellRenderer(
+                                      params,
+                                      getTypedValue(
+                                          params.data?.variants.find(
+                                              (item: any) =>
+                                                  item.evaluationId === variant.evaluationId,
+                                          )?.output?.result,
+                                      ),
+                                  )}
                         </>
                     )
                 },
diff --git a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
index 04993ce5ba..687ae40f10 100644
--- a/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
+++ b/agenta-web/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
@@ -71,7 +71,7 @@ const EvaluationScenarios: React.FC<Props> = () => {
                 valueGetter: (params) => {
                     return getTypedValue(params.data?.inputs[index])
                 },
-                cellRenderer: LongTextCellRenderer,
+                cellRenderer: (params: any) => LongTextCellRenderer(params),
             })
         })
         colDefs.push({
@@ -83,7 +83,7 @@ const EvaluationScenarios: React.FC<Props> = () => {
             valueGetter: (params) => {
                 return params.data?.correct_answer?.toString() || ""
             },
-            cellRenderer: LongTextCellRenderer,
+            cellRenderer: (params: any) => LongTextCellRenderer(params),
         })
         evalaution?.variants.forEach((_, index) => {
             colDefs.push({
@@ -97,14 +97,15 @@ const EvaluationScenarios: React.FC<Props> = () => {
                     if (result && result.type == "error") {
                         return `${result?.error?.message}\n${result?.error?.stacktrace}`
                     }
-                    return showDiff === "show" ? (
-                        <CompareOutputDiff
-                            variantOutput={result?.value}
-                            expectedOutput={params.data?.correct_answer}
-                        />
-                    ) : (
-                        LongTextCellRenderer(params)
-                    )
+                    return showDiff === "show"
+                        ? LongTextCellRenderer(
+                              params,
+                              <CompareOutputDiff
+                                  variantOutput={result?.value}
+                                  expectedOutput={params.data?.correct_answer}
+                              />,
+                          )
+                        : LongTextCellRenderer(params)
                 },
                 valueGetter: (params) => {
                     const result = params.data?.outputs[index].result
diff --git a/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx b/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx
index 860fd03ca2..bb67065b5b 100644
--- a/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx
+++ b/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx
@@ -117,12 +117,15 @@ const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
     type,
     default: defaultVal,
     description,
+    min,
+    max,
+    required,
 }) => {
     const {appTheme} = useAppTheme()
     const classes = useStyles()
     const {token} = theme.useToken()
 
-    const rules: Rule[] = [{required: true, message: "This field is required"}]
+    const rules: Rule[] = [{required: required ?? true, message: "This field is required"}]
     if (type === "regex")
         rules.push({
             validator: (_, value) =>
@@ -167,7 +170,7 @@ const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
                 {type === "string" || type === "regex" ? (
                     <Input />
                 ) : type === "number" ? (
-                    <InputNumber min={0} max={1} step={0.1} />
+                    <InputNumber min={min} max={max} step={0.1} />
                 ) : type === "boolean" || type === "bool" ? (
                     <Switch />
                 ) : type === "text" ? (
@@ -295,25 +298,6 @@ const NewEvaluatorModal: React.FC<Props> = ({
                 )
             },
         },
-        {
-            title: "Type",
-            dataIndex: "type",
-            key: "type",
-            render(_, record) {
-                const template = Object.keys(record?.settings_template || {})
-                    .filter((key) => !!record?.settings_template[key]?.type)
-                    .map((key) => ({
-                        key,
-                        ...record?.settings_template[key]!,
-                    }))
-
-                return (
-                    <>
-                        <Tag color={record.color}>{template[0].type}</Tag>
-                    </>
-                )
-            },
-        },
         {
             title: "Description",
             dataIndex: "description",
diff --git a/agenta-web/src/lib/Types.ts b/agenta-web/src/lib/Types.ts
index d0df2873ea..b3d0632242 100644
--- a/agenta-web/src/lib/Types.ts
+++ b/agenta-web/src/lib/Types.ts
@@ -382,6 +382,9 @@ export interface EvaluationSettingsTemplate {
     label: string
     default?: ValueType
     description: string
+    min?: number
+    max?: number
+    required?: boolean
 }
 
 export interface Evaluator {