-
Notifications
You must be signed in to change notification settings - Fork 29
/
code_metrics_test.py
88 lines (77 loc) · 3.02 KB
/
code_metrics_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pytest
from continuous_eval.metrics.code.python.code_deterministic_metrics import CodeStringMatch, PythonASTSimilarity
from continuous_eval.metrics.code.sql.deterministic import ASTDiffWeightConfig, SQLASTSimilarity, SQLSyntaxMatch
from tests.helpers import example_datum
from tests.helpers.utils import all_close
def test_code_string_match():
expected_results = [
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.89},
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.73},
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.67},
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.21},
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.9},
{"Exact_Match_Score": 0, "Fuzzy_Match_Score": 0.71},
]
metric = CodeStringMatch()
assert all(
all_close(
metric(answer=datum["answer"], ground_truth_answers=datum["ground_truths"]),
expected,
)
for datum, expected in zip(example_datum.PYTHON_CODE_EXAMPLES, expected_results)
)
def test_python_ast_similarity():
expected_results = [
{"Python_AST_Similarity": 1.0},
{"Python_AST_Similarity": 0.0},
{"Python_AST_Similarity": 0.0224},
{"Python_AST_Similarity": 0.0},
{"Python_AST_Similarity": -1.0},
{"Python_AST_Similarity": 0.0937},
]
metric = PythonASTSimilarity()
assert all(
all_close(
metric(answer=datum["answer"], ground_truth_answers=datum["ground_truths"]),
expected,
)
for datum, expected in zip(example_datum.PYTHON_CODE_EXAMPLES, expected_results)
)
def test_sql_syntax_match():
expected_results = [{'SQL_Syntax_Match': 1.0}, {'SQL_Syntax_Match': 0}, {'SQL_Syntax_Match': 0}]
metric = SQLSyntaxMatch()
assert all(
all_close(
metric(answer=datum["answer"], ground_truth_answers=datum["ground_truths"]),
expected,
)
for datum, expected in zip(example_datum.SQL_CODE_EXAMPLES, expected_results)
)
def test_sql_ast_similarity():
expected_results = [{'SQL_AST_Similarity': 1.0}, {'SQL_AST_Similarity': 0.9375}, {'SQL_AST_Similarity': 0.8}]
metric = SQLASTSimilarity()
assert all(
all_close(
metric(answer=datum["answer"], ground_truth_answers=datum["ground_truths"]),
expected,
)
for datum, expected in zip(example_datum.SQL_CODE_EXAMPLES, expected_results)
)
def test_sql_optimized_ast_similarity():
expected_results = [{'SQL_AST_Similarity': 1.0}, {'SQL_AST_Similarity': 1.0}, {'SQL_AST_Similarity': 0.75}]
weights = ASTDiffWeightConfig(
keep=0.0,
update=2,
insert=1.0,
remove=1.5,
move=0,
default=0,
)
metric = SQLASTSimilarity(optimize=True, diff_weights=weights)
assert all(
all_close(
metric(answer=datum["answer"], ground_truth_answers=datum["ground_truths"]),
expected,
)
for datum, expected in zip(example_datum.SQL_CODE_EXAMPLES, expected_results)
)