Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-bklein committed Oct 29, 2024
1 parent 1645378 commit b9a055c
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 15 deletions.
4 changes: 2 additions & 2 deletions framework-evalanche/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ To run a reference dataset through your desired LLM pipelines on the data page,
will be passed to the stored procedure as:
```python
{
"QUESTION": "What is the capital of France?",
"QUESTION_TYPE": "Geography"
"TASK": "Tell a joke",
"PERSONA": "Pirate"
}
```
A appropriately crafted stored procedure could look like the below.
Expand Down
3 changes: 2 additions & 1 deletion framework-evalanche/pages/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def preview_merge_data() -> None:
st.error(f"Error: {e}")
if data is not None:
st.write(f"Limited to {limit} rows.")
st.dataframe(data, hide_index=True, use_container_width=True)
st.dataframe(data, hide_index=True, use_container_width=False)



def data_spec(key_name: str, instructions: str, height=200, join_key=True) -> None:
Expand Down
Binary file modified framework-evalanche/src.zip
Binary file not shown.
27 changes: 15 additions & 12 deletions framework-evalanche/src/custom_metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Python 3.8 type hints
from typing import Union
from typing import Union, Optional

from src.metrics import Metric
from src.prompts import *
Expand All @@ -19,28 +19,30 @@
"""

# Custom Metrics
# class CustomRelevancy(Metric):
# class CustomAnswerRelevancy(Metric):
# def __init__(
# self,
# model: str = "llama3.1-8b"
# ):
# super().__init__(
# name="CustomRelevancy",
# description="Evaluates the correctness, relevance, and helpfulness of a response compared to a reference answer.",
# prompt=Relevance_prompt,
# description="""
# Evaluates the relevance of a response to a user question on a scale of 1-5.
# 5 indicates the scorer strongly agrees that the response is relevant and 1 indicates strong disagreement.""",
# prompt=AnswerRelevancy_prompt,
# required={
# "question": "User question",
# "answer_ref": "Expected answer to the question",
# "ai_response": "LLM-generated response to the question",
# },
# )
# self.model = model

# def get_prompt(
# self, question: str, answer_ref: str, ai_response: str
# self, question: str, ai_response: str
# ) -> Union[str, None]:
# if self.prompt is not None:
# fstrings = {
# "question": question,
# "answer_ref": answer_ref,
# "ai_response": ai_response,
# }
# return self.prompt.format(**fstrings)
Expand All @@ -50,21 +52,22 @@
# def evaluate(
# self,
# question: str,
# answer_ref: str,
# ai_response: str,
# model: str = "llama3.1-8b",
# model: Optional[str] = None,
# ):
# import re

# prompt = self.get_prompt(question, answer_ref, ai_response)
# model_to_use = model if model else self.model

# response = run_async_sql_complete(self.session, model, prompt)
# prompt = self.get_prompt(question, ai_response)

# response = run_async_sql_complete(self.session, model_to_use, prompt)
# values = [str(i) for i in range(1, 11)]
# pattern = f"[{''.join(values)}]"
# match = re.search(pattern, response)

# return int(match.group(0)) if match else None

custom_metrics = [
# CustomRelevancy()
# CustomAnswerRelevancy()
]

0 comments on commit b9a055c

Please sign in to comment.