Skip to content

Commit

Permalink
Added auto similarity
Browse files Browse the repository at this point in the history
Removed langchain and use openai directly
  • Loading branch information
mmabrouk committed May 28, 2024
1 parent a0d0420 commit 1a6e4ce
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 12 deletions.
11 changes: 10 additions & 1 deletion agenta-backend/agenta_backend/resources/evaluators/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,16 @@
"default": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below:\nEvaluation strategy: 0 to 10 0 is very bad and 10 is very good.\nPrompt: {llm_app_prompt_template}\nInputs: country: {country}\nCorrect Answer:{correct_answer}\nEvaluate this: {variant_output}\n\nAnswer ONLY with one of the given grading or evaluation options.",
"description": "Template for AI critique prompts",
"required": True,
}
},
"correct_answer_key": {
"label": "Correct Answer",
"default": "correct_answer",
"type": "string",
"advanced": True, # Tells the frontend that this setting is advanced and should be hidden by default
"ground_truth_key": True, # Tells the frontend that is the name of the column in the test set that should be shown as a ground truth to the user
"description": "The name of the column in the test data that contains the correct answer",
},

},
"description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
},
Expand Down
62 changes: 51 additions & 11 deletions agenta-backend/agenta_backend/services/evaluators_service.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import re
import json
import logging
import re
from typing import Any, Dict, List, Tuple

import httpx
from typing import Any, Dict, Tuple, List
from openai import OpenAI

from agenta_backend.services.security import sandbox
from agenta_backend.models.db_models import Error, Result

from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import logging
from agenta_backend.services.security import sandbox

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -267,11 +265,11 @@ def auto_ai_critique(
{"role": "user", "content": str(chain_run_args)}
]

response = openai.ChatCompletion.create(
client = OpenAI(api_key=openai_api_key)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.8,
api_key=openai_api_key
temperature=0.8
)

evaluation_output = response.choices[0].message["content"].strip()
Expand Down Expand Up @@ -516,6 +514,47 @@ def auto_levenshtein_distance(
)


def auto_similarity_match(
inputs: Dict[str, Any],
output: str,
data_point: Dict[str, Any],
app_params: Dict[str, Any],
settings_values: Dict[str, Any],
lm_providers_keys: Dict[str, Any],
) -> Result:
try:
correct_answer = get_correct_answer(data_point, settings_values)
set1 = set(output.split())
set2 = set(correct_answer.split())
intersect = set1.intersection(set2)
union = set1.union(set2)

similarity = len(intersect) / len(union)

is_similar = (
True if similarity > settings_values["similarity_threshold"] else False
)
result = Result(type="bool", value=is_similar)
return result
except ValueError as e:
return Result(
type="error",
value=None,
error=Error(
message=str(e),
),
)
except Exception as e: # pylint: disable=broad-except
return Result(
type="error",
value=None,
error=Error(
message="Error during Auto Similarity Match evaluation",
stacktrace=str(e),
),
)


EVALUATOR_FUNCTIONS = {
"auto_exact_match": auto_exact_match,
"auto_regex_test": auto_regex_test,
Expand All @@ -530,6 +569,7 @@ def auto_levenshtein_distance(
"auto_contains_all": auto_contains_all,
"auto_contains_json": auto_contains_json,
"auto_levenshtein_distance": auto_levenshtein_distance,
"auto_similarity_match": auto_similarity_match, # Added here
}


Expand Down

0 comments on commit 1a6e4ce

Please sign in to comment.