-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/dev' into dev
# Conflicts: # examples/smart_coding/smart_coding_learning_bench/comment/benchmarkingjob.yaml # examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/basemodel.py # examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/gen_algorithm.yaml # examples/smart_coding/smart_coding_learning_bench/comment/testenv/llm_judgement.py # examples/smart_coding/smart_coding_learning_bench/comment/testenv/testenv.yaml # examples/smart_coding/smart_coding_learning_bench/issue/benchmarkingjob.yaml # examples/smart_coding/smart_coding_learning_bench/issue/testenv/llm_judgement.py # examples/smart_coding/smart_coding_learning_bench/issue/testenv/testenv.yaml
- Loading branch information
Showing
16 changed files
with
13,502 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import json | ||
import requests | ||
|
||
# 从 JSON 文件读取数据 | ||
with open('test_data.json', 'r') as f: | ||
data = json.load(f) | ||
|
||
# 用于保存所有提取的数据 | ||
all_extracted_data = [] | ||
|
||
# 遍历每个条目 | ||
for item in data: | ||
title = item['title'] | ||
comments_url = item['comments_url'] | ||
|
||
# 从评论 URL 获取评论数据 | ||
response = requests.get(comments_url) | ||
|
||
if response.status_code == 200: | ||
comments = response.json() | ||
|
||
# 提取评论数据 | ||
extracted_data = { | ||
"title": title, | ||
} | ||
|
||
for i, comment in enumerate(comments): | ||
entry = { | ||
"user_login": comment["user"]["login"], | ||
"created_at": comment["created_at"], | ||
"updated_at": comment["updated_at"], | ||
"body": comment["body"] | ||
} | ||
|
||
if i == 0: | ||
extracted_data.update(entry) # 第一条评论直接加入 | ||
else: | ||
extracted_data[f"answer_{i}"] = entry # 后续评论作为回答 | ||
|
||
# 添加到总提取数据中 | ||
all_extracted_data.append(extracted_data) | ||
else: | ||
print(f"请求失败,状态码: {response.status_code},对于标题: {title}") | ||
|
||
# 保存提取的数据到新的 JSON 文件 | ||
with open('extracted_data.json', 'w') as f: | ||
json.dump(all_extracted_data, f, indent=4) | ||
|
||
print("所有数据已提取并保存到 extracted_data.json") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
[ | ||
{ | ||
"title": "How to set_epoch with interleave_datasets?", | ||
"html_url": "https://github.com/huggingface/datasets/issues/7051", | ||
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments", | ||
"labels": "[]", | ||
"state": "open", | ||
"pull_request": "NaN", | ||
"is_pull_request": false | ||
}, | ||
{ | ||
"title": "add checkpoint and resume title in docs", | ||
"html_url": "https://github.com/huggingface/datasets/pull/7050", | ||
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments", | ||
"labels": "[]", | ||
"state": "closed", | ||
"pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}", | ||
"is_pull_request": true | ||
}, | ||
{ | ||
"title": "Save nparray as list", | ||
"html_url": "https://github.com/huggingface/datasets/issues/7049", | ||
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments", | ||
"labels": "[]", | ||
"state": "open", | ||
"pull_request": "NaN", | ||
"is_pull_request": false | ||
}, | ||
{ | ||
"title": "ImportError: numpy.core.multiarray when using `filter`", | ||
"html_url": "https://github.com/huggingface/datasets/issues/7048", | ||
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments", | ||
"labels": "[]", | ||
"state": "open", | ||
"pull_request": "NaN", | ||
"is_pull_request": false | ||
} | ||
] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# README | ||
|
||
## Simple QA | ||
|
||
### Prepare Data | ||
|
||
The data of simple-qa example structure is: | ||
|
||
``` | ||
. | ||
├── test_data | ||
│ └── data.jsonl | ||
└── train_data | ||
└── data.jsonl | ||
``` | ||
|
||
`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows: | ||
|
||
``` | ||
{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"} | ||
{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"} | ||
{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"} | ||
{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"} | ||
{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"} | ||
{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"} | ||
{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"} | ||
{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"} | ||
{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"} | ||
{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"} | ||
``` | ||
|
||
### Prepare Environment | ||
|
||
You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources` | ||
|
||
Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip` | ||
|
||
|
||
### Run Ianvs | ||
|
||
Run the following command: | ||
|
||
`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml` | ||
|
||
## OpenCompass Evaluation | ||
|
||
### Prepare Environment | ||
|
||
`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl` | ||
|
||
### Run Evaluation | ||
|
||
`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
benchmarkingjob: | ||
# job name of bechmarking; string type; | ||
name: "benchmarkingjob" | ||
# the url address of job workspace that will reserve the output of tests; string type; | ||
workspace: "/home/icyfeather/project/ianvs/workspace" | ||
|
||
# the url address of test environment configuration file; string type; | ||
# the file format supports yaml/yml; | ||
testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml" | ||
|
||
# the configuration of test object | ||
test_object: | ||
# test type; string type; | ||
# currently the option of value is "algorithms",the others will be added in succession. | ||
type: "algorithms" | ||
# test algorithm configuration files; list type; | ||
algorithms: | ||
# algorithm name; string type; | ||
- name: "simple_qa_singletask_learning" | ||
# the url address of test algorithm configuration file; string type; | ||
# the file format supports yaml/yml; | ||
url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml" | ||
|
||
# the configuration of ranking leaderboard | ||
rank: | ||
# rank leaderboard with metric of test case's evaluation and order ; list type; | ||
# the sorting priority is based on the sequence of metrics in the list from front to back; | ||
sort_by: [ { "acc": "descend" } ] | ||
|
||
# visualization configuration | ||
visualization: | ||
# mode of visualization in the leaderboard; string type; | ||
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. | ||
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. | ||
mode: "selected_only" | ||
# method of visualization for selected dataitems; string type; | ||
# currently the options of value are as follows: | ||
# 1> "print_table": print selected dataitems; | ||
method: "print_table" | ||
|
||
# selected dataitem configuration | ||
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", | ||
# so that the selected columns will be shown. | ||
selected_dataitem: | ||
# currently the options of value are as follows: | ||
# 1> "all": select all paradigms in the leaderboard; | ||
# 2> paradigms in the leaderboard, e.g., "singletasklearning" | ||
paradigms: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all modules in the leaderboard; | ||
# 2> modules in the leaderboard, e.g., "basemodel" | ||
modules: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all hyperparameters in the leaderboard; | ||
# 2> hyperparameters in the leaderboard, e.g., "momentum" | ||
hyperparameters: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all metrics in the leaderboard; | ||
# 2> metrics in the leaderboard, e.g., "f1_score" | ||
metrics: [ "acc" ] | ||
|
||
# model of save selected and all dataitems in workspace; string type; | ||
# currently the options of value are as follows: | ||
# 1> "selected_and_all": save selected and all dataitems; | ||
# 2> "selected_only": save selected dataitems; | ||
save_mode: "selected_and_all" | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"} | ||
{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"} | ||
{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"} | ||
{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"} | ||
{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"} | ||
{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"} | ||
{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"} | ||
{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"} | ||
{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"} | ||
{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"} | ||
|
||
{"question": "如下是一个Python函数\"def wait(self, wait_time: int) -> list:all_ready = False\n while not all_ready:\n self._instances = self.get_instances()\n if not self._instances:\n self._logger.warning(\n f\\\"No instance found, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n continue\n all_ready = True\n for instance in self._instances:\n if not instance[\\\"health\\\"]:\n self._logger.warning(\n f\\\"Instance {instance['name']} is not ready, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n all_ready = False\n break\n return self._instances\",请问它的作用是什么?\nA. 确保服务或组件的所有实例都可用,然后再继续执行\nB. 函数的作用是在等待指定时间后,立即返回一个包含所有实例的列表,不检查实例的状态\nC. 函数的目的是为每个不健康的实例记录一次警告日志,不进行任何等待或重试\nD. 函数会检查每个实例的健康状态,如果所有实例在首次检查时都健康,就继续等待直到 wait_time 结束,然后返回实例列表", "answer": "A"} | ||
{"question": "如下是一个Python函数\"def _to_instances(self, controller_instance) -> List[dict]:\n instance = {}\n instance[\\\"name\\\"] = controller_instance.name\n instance[\\\"hostname\\\"] = controller_instance.name\n instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\n instance[\\\"env\\\"] = {}\n for env in controller_instance.attrs[\\\"Config\\\"][\\\"Env\\\"]:\n variable = env.split(\\\"=\\\")[0]\n value = env.replace(f\\\"{variable}=\\\", \\\"\\\", 1)\n instance[\\\"env\\\"][variable] = value\n return [instance]\",请问它的作用是什么?\nA. 函数用于修改控制器实例的属性,如名称和主机名\nB. 函数返回一个包含所有控制器实例属性的复杂嵌套结构\nC. 处理单个Docker容器实例并将其信息转换为字典\nD. 函数用于删除控制器实例的环境变量配置", "answer": "C"} | ||
{"question": "如下是一个Python函数\"instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\",请问它的作用是什么?\nA. 代码段会删除controller_instance中的status属性\nB. 多个属性组合起来判断实例的健康状态\nC. 如果controller_instance的状态为running,此代码将instance[\\\"health\\\"]设置为False\nD. 代码段检查controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"]的值是否为unhealthy,然后相应地更新instance[\\\"health\\\"]", "answer": "B"} | ||
{"question": "如下是一个Python函数\"result = self.__custom_confs_rx.search(variable)\",请问它的作用是什么?\nA. 这段代码的作用是将字符串 variable 与 self.__custom_confs_rx 进行替换操作\nB. 这段代码会删除 variable 中所有与 self.__custom_confs_rx 匹配的内容\nC. 这段代码用于计算 variable 和 self.__custom_confs_rx 的长度差\nD. 使用正则表达式匹配变量名,判断是否符合特定的配置项格式", "answer": "D"} | ||
{"question": "如下是一个Python函数\"if not self.update_needed(self._instances, self._services, configs=self._configs):\",请问它的作用是什么?\nA. 此函数用于更新类实例中的所有服务和配置\nB. 当 `update_needed` 方法返回 `True` 时,该代码片段将终止程序运行\nC. 调用update_needed方法检查当前配置是否需要更新\nD. 此代码片段用于直接修改 `_instances`、`_services` 和 `_configs` 的值", "answer": "C"} | ||
{"question": "如下是一个Python函数\"def set_value_from_polygon(self, pol_x, pol_y, val, inside=True):\n \\\"\\\"\\\"set_value_from_polygon\n Setting value inside or outside polygon\n :param pol_x: x position list for a polygon\n :param pol_y: y position list for a polygon\n :param val: grid value\n :param inside: setting data inside or outside\n \\\"\\\"\\\"\n # making ring polygon\n if (pol_x[0] != pol_x[-1]) or (pol_y[0] != pol_y[-1]):\n np.append(pol_x, pol_x[0])\n np.append(pol_y, pol_y[0])\n # setting value for all grid\n for x_ind in range(self.width):\n for y_ind in range(self.height):\n x_pos, y_pos = self.calc_grid_central_xy_position_from_xy_index(\n x_ind, y_ind)\n flag = this.check_inside_polygon(x_pos, y_pos, pol_x, pol_y)\n if flag is inside:\n this.set_value_from_xy_index(x_ind, y_ind, val)\",请问它的作用是什么?\nA. 根据多边形的形状设置网格地图中的值\nB. 该函数用于计算多边形的面积\nC. 该函数用于绘制多边形图形\nD. 该函数用于从多边形的顶点坐标生成一个新的多边形对象", "answer": "A"} | ||
{"question": "如下是一个Python函数\"if not check_car_collision(x_list, y_list, yaw_list, ox, oy, kd_tree): return None\",请问它的作用是什么?\nA. 检查生成的路径是否与障碍物冲突。\nB. 这个函数用来检查给定的列表中是否所有元素都相等\nC. 这个函数返回所有在x_list和y_list中的元素的和\nD. 这个函数用于创建一个新的kd树来存储车辆位置数据", "answer": "A"} | ||
{"question": "如下是一个Python函数\"heapq.heappush(pq, (calc_cost(start_node, h_dp, config), calc_index(start_node, config)))\",请问它的作用是什么?\nA. 该函数从堆`pq`中删除一个元素\nB. 将节点添加到优先级队列(使用堆数据结构实现)\nC. 该函数返回堆`pq`中的最大元素\nD. 该函数用于创建一个新的空堆", "answer": "B"} | ||
{"question": "如下是一个Python函数\"def calc_index(node, x_width, x_min, y_min): return (node.y - y_min) * x_width + (node.x - x_min)\",请问它的作用是什么?\nA. 将节点坐标转换为一维索引\nB. 函数用于计算节点在二维网格中的行索引\nC. 函数返回的是从给定节点到最小节点的直线距离\nD. 函数用于计算节点的颜色值在一个色彩数组中的索引", "answer": "A"} | ||
{"question": "如下是一个Python函数\"if use_dynamic_weighting: w = (1 + epsilon - epsilon*depth/upper_bound_depth)\",请问它的作用是什么?\nA. 该函数用于重置 `w` 的值为固定常数\nB. 代码段检查 `depth` 是否大于 `upper_bound_depth`\nC. 调整启发式成本的计算,引入动态权重,优化搜索效率\nD. 该函数将 `w` 的值与 `depth` 成正比增加", "answer": "C"} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# Copyright 2022 The KubeEdge Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import os | ||
import tempfile | ||
import time | ||
import zipfile | ||
import logging | ||
|
||
import numpy as np | ||
from sedna.common.config import Context | ||
from sedna.common.class_factory import ClassType, ClassFactory | ||
|
||
|
||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
device = "cuda" # the device to load the model onto | ||
|
||
|
||
logging.disable(logging.WARNING) | ||
|
||
__all__ = ["BaseModel"] | ||
|
||
os.environ['BACKEND_TYPE'] = 'TORCH' | ||
|
||
|
||
@ClassFactory.register(ClassType.GENERAL, alias="gen") | ||
class BaseModel: | ||
|
||
def __init__(self, **kwargs): | ||
self.model = AutoModelForCausalLM.from_pretrained( | ||
"/home/icyfeather/models/Qwen2-0.5B-Instruct", | ||
torch_dtype="auto", | ||
device_map="auto" | ||
) | ||
self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct") | ||
|
||
def train(self, train_data, valid_data=None, **kwargs): | ||
print("BaseModel doesn't need to train") | ||
|
||
|
||
def save(self, model_path): | ||
print("BaseModel doesn't need to save") | ||
|
||
def predict(self, data, input_shape=None, **kwargs): | ||
print("BaseModel predict") | ||
answer_list = [] | ||
for line in data: | ||
response = self._infer(line) | ||
answer_list.append(response) | ||
return answer_list | ||
|
||
def load(self, model_url=None): | ||
print("BaseModel load") | ||
|
||
def evaluate(self, data, model_path, **kwargs): | ||
print("BaseModel evaluate") | ||
|
||
def _infer(self, prompt, system=None): | ||
if system: | ||
messages = [ | ||
{"role": "system", "content": system}, | ||
{"role": "user", "content": prompt} | ||
] | ||
else: | ||
messages = [ | ||
{"role": "user", "content": prompt} | ||
] | ||
text = self.tokenizer.apply_chat_template( | ||
messages, | ||
tokenize=False, | ||
add_generation_prompt=True | ||
) | ||
model_inputs = self.tokenizer([text], return_tensors="pt").to(device) | ||
|
||
generated_ids = self.model.generate( | ||
model_inputs.input_ids, | ||
max_new_tokens=512 | ||
) | ||
generated_ids = [ | ||
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | ||
] | ||
|
||
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | ||
|
||
return response |
Oops, something went wrong.