-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.py
60 lines (48 loc) · 1.68 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
import ast
df = pd.read_csv('route/to/file.tsv', sep='\t')
# Ground truth
ground_truth_columns = ['Species name', 'Cultivar', 'Genotype', 'Treatment', 'Dev stage', 'Tissue', 'Age']
# Response evaluation
def evaluate_response(model_response, ground_truth):
metrics = {
"TP": 0,
"FP": 0,
"FN": 0
}
# Comparison of keys and values - Ground truth/Model response
for key in ground_truth:
if key in model_response:
if model_response[key] == ground_truth[key]:
metrics["TP"] += 1 # True Positive
else:
metrics["FP"] += 1 # False Positive
else:
metrics["FN"] += 1 # False Negative
for key in model_response:
if key not in ground_truth:
metrics["FP"] += 1
return metrics
# Formatting
def convert_to_dict(response):
try:
return ast.literal_eval(response)
except:
return {}
# Evaluation
results = []
for index, row in df.iterrows():
ground_truth = {col: row[col] for col in ground_truth_columns}
for model in ['Llama', 'Phi3', 'Gemma', 'GPT 3.5', 'GPT 4', 'GPT 4-o']:
model_response = convert_to_dict(row[model])
evaluation = evaluate_response(model_response, ground_truth)
results.append({
'PMID/SRA': row['PMID/ SRA'],
'Model': model,
'TP': evaluation['TP'],
'FP': evaluation['FP'],
'FN': evaluation['FN']
})
results_df = pd.DataFrame(results)
results_df.to_excel('x_results.xlsx', index=False)
print("Evaluation completed and stored in 'x_results.xlsx'")