-
Notifications
You must be signed in to change notification settings - Fork 449
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Fengzhe Zhou
authored
Apr 19, 2024
1 parent
c172401
commit 8c85edd
Showing
95 changed files
with
1,506 additions
and
408 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import GPQADataset, GPQAEvaluator | ||
from opencompass.utils import first_option_postprocess | ||
|
||
gpqa_reader_cfg = dict( | ||
input_columns=['question', 'A', 'B', 'C', 'D'], | ||
output_column='answer') | ||
|
||
gpqa_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict(role='HUMAN', prompt='What is the correct answer to this question: {question}\nChoices:\n' | ||
'(A){A}\n' | ||
'(B){B}\n' | ||
'(C){C}\n' | ||
'(D){D}\n' | ||
'Format your response as follows: "The correct answer is (insert answer here)"'), | ||
], )), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer)) | ||
|
||
gpqa_eval_cfg = dict(evaluator=dict(type=GPQAEvaluator), | ||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')) | ||
|
||
gpqa_datasets = [] | ||
gpqa_subsets = { | ||
'extended': 'gpqa_extended.csv', | ||
'main': 'gpqa_main.csv', | ||
'diamond': 'gpqa_diamond.csv' | ||
} | ||
|
||
for split in list(gpqa_subsets.keys()): | ||
gpqa_datasets.append( | ||
dict( | ||
abbr='GPQA_' + split, | ||
type=GPQADataset, | ||
path='./data/gpqa/', | ||
name=gpqa_subsets[split], | ||
reader_cfg=gpqa_reader_cfg, | ||
infer_cfg=gpqa_infer_cfg, | ||
eval_cfg=gpqa_eval_cfg) | ||
) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .mbpp_gen_1e1056 import mbpp_datasets # noqa: F401, F403 | ||
from .mbpp_gen_830460 import mbpp_datasets # noqa: F401, F403 |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator | ||
|
||
mbpp_reader_cfg = dict(input_columns=["text", "test_list"], output_column="test_column") | ||
|
||
mbpp_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n"), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=512), | ||
) | ||
|
||
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT") | ||
|
||
mbpp_datasets = [ | ||
dict( | ||
type=MBPPDataset_V2, | ||
abbr="mbpp_passk", | ||
path="./data/mbpp/mbpp.jsonl", | ||
reader_cfg=mbpp_reader_cfg, | ||
infer_cfg=mbpp_infer_cfg, | ||
eval_cfg=mbpp_eval_cfg, | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# This config is used for pass@k evaluation with dataset repetition | ||
# That model cannot generate multiple response for single input | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator | ||
|
||
mbpp_reader_cfg = dict(input_columns=["text", "test_list"], output_column="test_column") | ||
|
||
mbpp_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "), | ||
|
||
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"), | ||
dict(role="BOT", prompt="[BEGIN]\n"), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=512), | ||
) | ||
|
||
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT") | ||
|
||
mbpp_datasets = [ | ||
dict( | ||
type=MBPPDataset_V2, | ||
abbr="mbpp_repeat10", | ||
path="./data/mbpp/mbpp.jsonl", | ||
num_repeats=10, | ||
reader_cfg=mbpp_reader_cfg, | ||
infer_cfg=mbpp_infer_cfg, | ||
eval_cfg=mbpp_eval_cfg, | ||
) | ||
] |
Oops, something went wrong.