Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into dev
Browse files Browse the repository at this point in the history
# Conflicts:
#	examples/smart_coding/smart_coding_learning_bench/comment/benchmarkingjob.yaml
#	examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/basemodel.py
#	examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/gen_algorithm.yaml
#	examples/smart_coding/smart_coding_learning_bench/comment/testenv/llm_judgement.py
#	examples/smart_coding/smart_coding_learning_bench/comment/testenv/testenv.yaml
#	examples/smart_coding/smart_coding_learning_bench/issue/benchmarkingjob.yaml
#	examples/smart_coding/smart_coding_learning_bench/issue/testenv/llm_judgement.py
#	examples/smart_coding/smart_coding_learning_bench/issue/testenv/testenv.yaml
  • Loading branch information
safe-b committed Oct 25, 2024
2 parents e1a0b3d + 6f64867 commit c1af71a
Show file tree
Hide file tree
Showing 16 changed files with 13,502 additions and 0 deletions.
2,421 changes: 2,421 additions & 0 deletions examples/data/data_java.json

Large diffs are not rendered by default.

2,343 changes: 2,343 additions & 0 deletions examples/data/data_python.json

Large diffs are not rendered by default.

8,264 changes: 8,264 additions & 0 deletions examples/data/issue_comment.json

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions examples/data/request_issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json
import requests

# 从 JSON 文件读取数据
with open('test_data.json', 'r') as f:
data = json.load(f)

# 用于保存所有提取的数据
all_extracted_data = []

# 遍历每个条目
for item in data:
title = item['title']
comments_url = item['comments_url']

# 从评论 URL 获取评论数据
response = requests.get(comments_url)

if response.status_code == 200:
comments = response.json()

# 提取评论数据
extracted_data = {
"title": title,
}

for i, comment in enumerate(comments):
entry = {
"user_login": comment["user"]["login"],
"created_at": comment["created_at"],
"updated_at": comment["updated_at"],
"body": comment["body"]
}

if i == 0:
extracted_data.update(entry) # 第一条评论直接加入
else:
extracted_data[f"answer_{i}"] = entry # 后续评论作为回答

# 添加到总提取数据中
all_extracted_data.append(extracted_data)
else:
print(f"请求失败,状态码: {response.status_code},对于标题: {title}")

# 保存提取的数据到新的 JSON 文件
with open('extracted_data.json', 'w') as f:
json.dump(all_extracted_data, f, indent=4)

print("所有数据已提取并保存到 extracted_data.json")
38 changes: 38 additions & 0 deletions examples/data/test_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[
{
"title": "How to set_epoch with interleave_datasets?",
"html_url": "https://github.com/huggingface/datasets/issues/7051",
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments",
"labels": "[]",
"state": "open",
"pull_request": "NaN",
"is_pull_request": false
},
{
"title": "add checkpoint and resume title in docs",
"html_url": "https://github.com/huggingface/datasets/pull/7050",
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments",
"labels": "[]",
"state": "closed",
"pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}",
"is_pull_request": true
},
{
"title": "Save nparray as list",
"html_url": "https://github.com/huggingface/datasets/issues/7049",
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments",
"labels": "[]",
"state": "open",
"pull_request": "NaN",
"is_pull_request": false
},
{
"title": "ImportError: numpy.core.multiarray when using `filter`",
"html_url": "https://github.com/huggingface/datasets/issues/7048",
"comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments",
"labels": "[]",
"state": "open",
"pull_request": "NaN",
"is_pull_request": false
}
]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
54 changes: 54 additions & 0 deletions examples/llm_simple_qa/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# README

## Simple QA

### Prepare Data

The data of simple-qa example structure is:

```
.
├── test_data
│ └── data.jsonl
└── train_data
└── data.jsonl
```

`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows:

```
{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"}
{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"}
{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"}
{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"}
{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"}
{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"}
{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"}
{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"}
{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"}
```

### Prepare Environment

You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources`

Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip`


### Run Ianvs

Run the following command:

`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml`

## OpenCompass Evaluation

### Prepare Environment

`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl`

### Run Evaluation

`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py`

72 changes: 72 additions & 0 deletions examples/llm_simple_qa/benchmarkingjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
benchmarkingjob:
# job name of bechmarking; string type;
name: "benchmarkingjob"
# the url address of job workspace that will reserve the output of tests; string type;
workspace: "/home/icyfeather/project/ianvs/workspace"

# the url address of test environment configuration file; string type;
# the file format supports yaml/yml;
testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml"

# the configuration of test object
test_object:
# test type; string type;
# currently the option of value is "algorithms",the others will be added in succession.
type: "algorithms"
# test algorithm configuration files; list type;
algorithms:
# algorithm name; string type;
- name: "simple_qa_singletask_learning"
# the url address of test algorithm configuration file; string type;
# the file format supports yaml/yml;
url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml"

# the configuration of ranking leaderboard
rank:
# rank leaderboard with metric of test case's evaluation and order ; list type;
# the sorting priority is based on the sequence of metrics in the list from front to back;
sort_by: [ { "acc": "descend" } ]

# visualization configuration
visualization:
# mode of visualization in the leaderboard; string type;
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
mode: "selected_only"
# method of visualization for selected dataitems; string type;
# currently the options of value are as follows:
# 1> "print_table": print selected dataitems;
method: "print_table"

# selected dataitem configuration
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
# so that the selected columns will be shown.
selected_dataitem:
# currently the options of value are as follows:
# 1> "all": select all paradigms in the leaderboard;
# 2> paradigms in the leaderboard, e.g., "singletasklearning"
paradigms: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all modules in the leaderboard;
# 2> modules in the leaderboard, e.g., "basemodel"
modules: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all hyperparameters in the leaderboard;
# 2> hyperparameters in the leaderboard, e.g., "momentum"
hyperparameters: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all metrics in the leaderboard;
# 2> metrics in the leaderboard, e.g., "f1_score"
metrics: [ "acc" ]

# model of save selected and all dataitems in workspace; string type;
# currently the options of value are as follows:
# 1> "selected_and_all": save selected and all dataitems;
# 2> "selected_only": save selected dataitems;
save_mode: "selected_and_all"






23 changes: 23 additions & 0 deletions examples/llm_simple_qa/testalgorithms/data.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"}
{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"}
{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"}
{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"}
{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"}
{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"}
{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"}
{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"}
{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"}

{"question": "如下是一个Python函数\"def wait(self, wait_time: int) -> list:all_ready = False\n while not all_ready:\n self._instances = self.get_instances()\n if not self._instances:\n self._logger.warning(\n f\\\"No instance found, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n continue\n all_ready = True\n for instance in self._instances:\n if not instance[\\\"health\\\"]:\n self._logger.warning(\n f\\\"Instance {instance['name']} is not ready, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n all_ready = False\n break\n return self._instances\",请问它的作用是什么?\nA. 确保服务或组件的所有实例都可用,然后再继续执行\nB. 函数的作用是在等待指定时间后,立即返回一个包含所有实例的列表,不检查实例的状态\nC. 函数的目的是为每个不健康的实例记录一次警告日志,不进行任何等待或重试\nD. 函数会检查每个实例的健康状态,如果所有实例在首次检查时都健康,就继续等待直到 wait_time 结束,然后返回实例列表", "answer": "A"}
{"question": "如下是一个Python函数\"def _to_instances(self, controller_instance) -> List[dict]:\n instance = {}\n instance[\\\"name\\\"] = controller_instance.name\n instance[\\\"hostname\\\"] = controller_instance.name\n instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\n instance[\\\"env\\\"] = {}\n for env in controller_instance.attrs[\\\"Config\\\"][\\\"Env\\\"]:\n variable = env.split(\\\"=\\\")[0]\n value = env.replace(f\\\"{variable}=\\\", \\\"\\\", 1)\n instance[\\\"env\\\"][variable] = value\n return [instance]\",请问它的作用是什么?\nA. 函数用于修改控制器实例的属性,如名称和主机名\nB. 函数返回一个包含所有控制器实例属性的复杂嵌套结构\nC. 处理单个Docker容器实例并将其信息转换为字典\nD. 函数用于删除控制器实例的环境变量配置", "answer": "C"}
{"question": "如下是一个Python函数\"instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\",请问它的作用是什么?\nA. 代码段会删除controller_instance中的status属性\nB. 多个属性组合起来判断实例的健康状态\nC. 如果controller_instance的状态为running,此代码将instance[\\\"health\\\"]设置为False\nD. 代码段检查controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"]的值是否为unhealthy,然后相应地更新instance[\\\"health\\\"]", "answer": "B"}
{"question": "如下是一个Python函数\"result = self.__custom_confs_rx.search(variable)\",请问它的作用是什么?\nA. 这段代码的作用是将字符串 variable 与 self.__custom_confs_rx 进行替换操作\nB. 这段代码会删除 variable 中所有与 self.__custom_confs_rx 匹配的内容\nC. 这段代码用于计算 variable 和 self.__custom_confs_rx 的长度差\nD. 使用正则表达式匹配变量名,判断是否符合特定的配置项格式", "answer": "D"}
{"question": "如下是一个Python函数\"if not self.update_needed(self._instances, self._services, configs=self._configs):\",请问它的作用是什么?\nA. 此函数用于更新类实例中的所有服务和配置\nB. 当 `update_needed` 方法返回 `True` 时,该代码片段将终止程序运行\nC. 调用update_needed方法检查当前配置是否需要更新\nD. 此代码片段用于直接修改 `_instances`、`_services` 和 `_configs` 的值", "answer": "C"}
{"question": "如下是一个Python函数\"def set_value_from_polygon(self, pol_x, pol_y, val, inside=True):\n \\\"\\\"\\\"set_value_from_polygon\n Setting value inside or outside polygon\n :param pol_x: x position list for a polygon\n :param pol_y: y position list for a polygon\n :param val: grid value\n :param inside: setting data inside or outside\n \\\"\\\"\\\"\n # making ring polygon\n if (pol_x[0] != pol_x[-1]) or (pol_y[0] != pol_y[-1]):\n np.append(pol_x, pol_x[0])\n np.append(pol_y, pol_y[0])\n # setting value for all grid\n for x_ind in range(self.width):\n for y_ind in range(self.height):\n x_pos, y_pos = self.calc_grid_central_xy_position_from_xy_index(\n x_ind, y_ind)\n flag = this.check_inside_polygon(x_pos, y_pos, pol_x, pol_y)\n if flag is inside:\n this.set_value_from_xy_index(x_ind, y_ind, val)\",请问它的作用是什么?\nA. 根据多边形的形状设置网格地图中的值\nB. 该函数用于计算多边形的面积\nC. 该函数用于绘制多边形图形\nD. 该函数用于从多边形的顶点坐标生成一个新的多边形对象", "answer": "A"}
{"question": "如下是一个Python函数\"if not check_car_collision(x_list, y_list, yaw_list, ox, oy, kd_tree): return None\",请问它的作用是什么?\nA. 检查生成的路径是否与障碍物冲突。\nB. 这个函数用来检查给定的列表中是否所有元素都相等\nC. 这个函数返回所有在x_list和y_list中的元素的和\nD. 这个函数用于创建一个新的kd树来存储车辆位置数据", "answer": "A"}
{"question": "如下是一个Python函数\"heapq.heappush(pq, (calc_cost(start_node, h_dp, config), calc_index(start_node, config)))\",请问它的作用是什么?\nA. 该函数从堆`pq`中删除一个元素\nB. 将节点添加到优先级队列(使用堆数据结构实现)\nC. 该函数返回堆`pq`中的最大元素\nD. 该函数用于创建一个新的空堆", "answer": "B"}
{"question": "如下是一个Python函数\"def calc_index(node, x_width, x_min, y_min): return (node.y - y_min) * x_width + (node.x - x_min)\",请问它的作用是什么?\nA. 将节点坐标转换为一维索引\nB. 函数用于计算节点在二维网格中的行索引\nC. 函数返回的是从给定节点到最小节点的直线距离\nD. 函数用于计算节点的颜色值在一个色彩数组中的索引", "answer": "A"}
{"question": "如下是一个Python函数\"if use_dynamic_weighting: w = (1 + epsilon - epsilon*depth/upper_bound_depth)\",请问它的作用是什么?\nA. 该函数用于重置 `w` 的值为固定常数\nB. 代码段检查 `depth` 是否大于 `upper_bound_depth`\nC. 调整启发式成本的计算,引入动态权重,优化搜索效率\nD. 该函数将 `w` 的值与 `depth` 成正比增加", "answer": "C"}


98 changes: 98 additions & 0 deletions examples/llm_simple_qa/testalgorithms/gen/basemodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2022 The KubeEdge Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import, division, print_function

import os
import tempfile
import time
import zipfile
import logging

import numpy as np
from sedna.common.config import Context
from sedna.common.class_factory import ClassType, ClassFactory


from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto


logging.disable(logging.WARNING)

__all__ = ["BaseModel"]

os.environ['BACKEND_TYPE'] = 'TORCH'


@ClassFactory.register(ClassType.GENERAL, alias="gen")
class BaseModel:

def __init__(self, **kwargs):
self.model = AutoModelForCausalLM.from_pretrained(
"/home/icyfeather/models/Qwen2-0.5B-Instruct",
torch_dtype="auto",
device_map="auto"
)
self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")

def train(self, train_data, valid_data=None, **kwargs):
print("BaseModel doesn't need to train")


def save(self, model_path):
print("BaseModel doesn't need to save")

def predict(self, data, input_shape=None, **kwargs):
print("BaseModel predict")
answer_list = []
for line in data:
response = self._infer(line)
answer_list.append(response)
return answer_list

def load(self, model_url=None):
print("BaseModel load")

def evaluate(self, data, model_path, **kwargs):
print("BaseModel evaluate")

def _infer(self, prompt, system=None):
if system:
messages = [
{"role": "system", "content": system},
{"role": "user", "content": prompt}
]
else:
messages = [
{"role": "user", "content": prompt}
]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(device)

generated_ids = self.model.generate(
model_inputs.input_ids,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

return response
Loading

0 comments on commit c1af71a

Please sign in to comment.