-
Notifications
You must be signed in to change notification settings - Fork 449
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Update] Update MATH dataset with model judge (#1711)
* Update math with llm judge * Update math with llm judge * Update math with llm judge * Update math with llm judge * Update math with llm judge
- Loading branch information
Showing
5 changed files
with
226 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from opencompass.configs.datasets.math.math_0shot_llm_judge_v2_gen_31d777 import math_datasets | ||
|
||
# 选择一个感兴趣的模型 | ||
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as qwen2_5_72b_instruct_model | ||
|
||
eval_model_name = 'eval_model_name' | ||
postprocessor_model_name = 'postprocessor_model_name' | ||
eval_model_urls = ['http://0.0.0.0:23333/v1'] | ||
postprocessor_model_urls = ['http://0.0.0.0:23333/v1'] | ||
|
||
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], []) | ||
models = sum([v for k, v in locals().items() if k.endswith('_model')], []) | ||
|
||
|
||
for dataset in datasets: | ||
dataset['eval_cfg']['evaluator']['model_name'] = eval_model_name | ||
dataset['eval_cfg']['evaluator']['url'] = eval_model_urls | ||
dataset['eval_cfg']['evaluator']['post_url'] = postprocessor_model_urls | ||
dataset['eval_cfg']['evaluator']['post_model_name'] = postprocessor_model_name | ||
|
||
|
||
# -------------Inferen Stage ---------------------------------------- | ||
|
||
from opencompass.runners import LocalRunner | ||
from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner | ||
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask | ||
|
||
infer = dict( | ||
partitioner=dict(type=NumWorkerPartitioner, num_worker=8), | ||
runner=dict( | ||
type=LocalRunner, | ||
max_num_workers=8, | ||
task=dict(type=OpenICLInferTask) | ||
), | ||
) | ||
|
||
eval = dict( | ||
partitioner=dict(type=NaivePartitioner, n=10), | ||
runner=dict( | ||
type=LocalRunner, | ||
max_num_workers=256, | ||
task=dict(type=OpenICLEvalTask) | ||
), | ||
) |
51 changes: 51 additions & 0 deletions
51
opencompass/configs/datasets/math/math_0shot_llm_judge_v2_gen_31d777.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MATHDataset, GaoKaoMATHEvaluator | ||
|
||
# ----------------------------- Model Eval Parameters ----------------------------- | ||
|
||
naive_model_name = 'dlc_model' # replace with your model name | ||
naive_model_url = ['http://0.0.0.0:23333/v1'] # Multi-apis for accerlation | ||
|
||
# ----------------------------- Detailed Config ----------------------------- | ||
|
||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution') | ||
|
||
math_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict(role='HUMAN', prompt='{problem}\nPlease reason step by step, and put your final answer within \\boxed{}.'), | ||
] | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=2048), | ||
) | ||
|
||
evaluator = dict( | ||
type=GaoKaoMATHEvaluator, | ||
model_name=naive_model_name, | ||
url=naive_model_url, | ||
language='en', | ||
with_postprocess=True, | ||
post_url=naive_model_url, | ||
post_model_name=naive_model_name, | ||
) | ||
|
||
math_eval_cfg = dict( | ||
evaluator=evaluator, | ||
) | ||
|
||
math_datasets = [ | ||
dict( | ||
type=MATHDataset, | ||
abbr='math', | ||
path='opencompass/math', | ||
reader_cfg=math_reader_cfg, | ||
infer_cfg=math_infer_cfg, | ||
eval_cfg=math_eval_cfg, | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters