Merge remote-tracking branch 'origin/dev' into dev

# Conflicts: # examples/smart_coding/smart_coding_learning_bench/comment/benchmarkingjob.yaml # examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/basemodel.py # examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/gen_algorithm.yaml # examples/smart_coding/smart_coding_learning_bench/comment/testenv/llm_judgement.py # examples/smart_coding/smart_coding_learning_bench/comment/testenv/testenv.yaml # examples/smart_coding/smart_coding_learning_bench/issue/benchmarkingjob.yaml # examples/smart_coding/smart_coding_learning_bench/issue/testenv/llm_judgement.py # examples/smart_coding/smart_coding_learning_bench/issue/testenv/testenv.yaml
kubeedge · Oct 25, 2024 · c1af71a · c1af71a
2 parents e1a0b3d + 6f64867
commit c1af71a
Show file tree

Hide file tree

Showing 16 changed files with 13,502 additions and 0 deletions.
diff --git a/examples/data/data_java.json b/examples/data/data_java.json
diff --git a/examples/data/data_python.json b/examples/data/data_python.json
diff --git a/examples/data/issue_comment.json b/examples/data/issue_comment.json
diff --git a/examples/data/request_issue.py b/examples/data/request_issue.py
@@ -0,0 +1,49 @@
+import json
+import requests
+
+# 从 JSON 文件读取数据
+with open('test_data.json', 'r') as f:
+    data = json.load(f)
+
+# 用于保存所有提取的数据
+all_extracted_data = []
+
+# 遍历每个条目
+for item in data:
+    title = item['title']
+    comments_url = item['comments_url']
+
+    # 从评论 URL 获取评论数据
+    response = requests.get(comments_url)
+
+    if response.status_code == 200:
+        comments = response.json()
+
+        # 提取评论数据
+        extracted_data = {
+            "title": title,
+        }
+
+        for i, comment in enumerate(comments):
+            entry = {
+                "user_login": comment["user"]["login"],
+                "created_at": comment["created_at"],
+                "updated_at": comment["updated_at"],
+                "body": comment["body"]
+            }
+
+            if i == 0:
+                extracted_data.update(entry)  # 第一条评论直接加入
+            else:
+                extracted_data[f"answer_{i}"] = entry  # 后续评论作为回答
+
+        # 添加到总提取数据中
+        all_extracted_data.append(extracted_data)
+    else:
+        print(f"请求失败，状态码: {response.status_code}，对于标题: {title}")
+
+# 保存提取的数据到新的 JSON 文件
+with open('extracted_data.json', 'w') as f:
+    json.dump(all_extracted_data, f, indent=4)
+
+print("所有数据已提取并保存到 extracted_data.json")
diff --git a/examples/data/test_data.json b/examples/data/test_data.json
@@ -0,0 +1,38 @@
+[
+    {
+        "title": "How to set_epoch with interleave_datasets?",
+        "html_url": "https://github.com/huggingface/datasets/issues/7051",
+        "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments",
+        "labels": "[]",
+        "state": "open",
+        "pull_request": "NaN",
+        "is_pull_request": false
+    },
+    {
+        "title": "add checkpoint and resume title in docs",
+        "html_url": "https://github.com/huggingface/datasets/pull/7050",
+        "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments",
+        "labels": "[]",
+        "state": "closed",
+        "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}",
+        "is_pull_request": true
+    },
+    {
+        "title": "Save nparray as list",
+        "html_url": "https://github.com/huggingface/datasets/issues/7049",
+        "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments",
+        "labels": "[]",
+        "state": "open",
+        "pull_request": "NaN",
+        "is_pull_request": false
+    },
+    {
+        "title": "ImportError: numpy.core.multiarray when using `filter`",
+        "html_url": "https://github.com/huggingface/datasets/issues/7048",
+        "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments",
+        "labels": "[]",
+        "state": "open",
+        "pull_request": "NaN",
+        "is_pull_request": false
+    }
+]
diff --git a/examples/government/singletask_learning_bench/imgs/img.png b/examples/government/singletask_learning_bench/imgs/img.png
diff --git a/examples/llm_simple_qa/README.md b/examples/llm_simple_qa/README.md
@@ -0,0 +1,54 @@
+# README
+
+## Simple QA
+
+### Prepare Data
+
+The data of simple-qa example structure is:
+
+```
+.
+├── test_data
+│   └── data.jsonl
+└── train_data
+    └── data.jsonl
+```
+
+`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows:
+
+```
+{"question": "如果小明有5个苹果，他给了小华3个，那么小明还剩下多少个苹果？\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"}
+{"question": "下列哪个数是最小的质数？\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
+{"question": "一个长方形的长是10厘米，宽是5厘米，它的周长是多少厘米？\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"}
+{"question": "下列哪个分数是最接近1的？\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"}
+{"question": "如果一个数加上10等于30，那么这个数是多少？\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"}
+{"question": "下列哪个算式的结果最大？\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"}
+{"question": "一个班级有24个学生，如果每个学生都带了2本书，那么总共有多少本书？\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"}
+{"question": "下列哪个是正确的乘法口诀？\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"}
+{"question": "如果一个数是另一个数的3倍，并且这个数是15，那么另一个数是多少？\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"}
+{"question": "下列哪个图形的周长最长？\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"}
+```
+
+### Prepare Environment
+
+You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources`
+
+Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip`
+
+
+### Run Ianvs
+
+Run the following command:
+
+`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml`
+
+## OpenCompass Evaluation
+
+### Prepare Environment
+
+`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl`
+
+### Run Evaluation
+
+`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py`
+
diff --git a/examples/llm_simple_qa/benchmarkingjob.yaml b/examples/llm_simple_qa/benchmarkingjob.yaml
@@ -0,0 +1,72 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "/home/icyfeather/project/ianvs/workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "simple_qa_singletask_learning"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml;
+        url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [ { "acc": "descend" } ]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "f1_score"
+      metrics: [ "acc" ]
+
+    # model of save selected and all dataitems in workspace; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"
+
+
+
+
+
+
diff --git a/examples/llm_simple_qa/testalgorithms/data.jsonl b/examples/llm_simple_qa/testalgorithms/data.jsonl
@@ -0,0 +1,23 @@
+{"question": "如果小明有5个苹果，他给了小华3个，那么小明还剩下多少个苹果？\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"}
+{"question": "下列哪个数是最小的质数？\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
+{"question": "一个长方形的长是10厘米，宽是5厘米，它的周长是多少厘米？\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"}
+{"question": "下列哪个分数是最接近1的？\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"}
+{"question": "如果一个数加上10等于30，那么这个数是多少？\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"}
+{"question": "下列哪个算式的结果最大？\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"}
+{"question": "一个班级有24个学生，如果每个学生都带了2本书，那么总共有多少本书？\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"}
+{"question": "下列哪个是正确的乘法口诀？\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"}
+{"question": "如果一个数是另一个数的3倍，并且这个数是15，那么另一个数是多少？\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"}
+{"question": "下列哪个图形的周长最长？\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"}
+
+{"question": "如下是一个Python函数\"def wait(self, wait_time: int) -> list:all_ready = False\n        while not all_ready:\n            self._instances = self.get_instances()\n            if not self._instances:\n                self._logger.warning(\n                    f\\\"No instance found, waiting {wait_time}s ...\\\",\n                )\n                sleep(wait_time)\n                continue\n            all_ready = True\n            for instance in self._instances:\n                if not instance[\\\"health\\\"]:\n                    self._logger.warning(\n                        f\\\"Instance {instance['name']} is not ready, waiting {wait_time}s ...\\\",\n                    )\n                    sleep(wait_time)\n                    all_ready = False\n                    break\n        return self._instances\"，请问它的作用是什么？\nA. 确保服务或组件的所有实例都可用，然后再继续执行\nB. 函数的作用是在等待指定时间后，立即返回一个包含所有实例的列表，不检查实例的状态\nC. 函数的目的是为每个不健康的实例记录一次警告日志，不进行任何等待或重试\nD. 函数会检查每个实例的健康状态，如果所有实例在首次检查时都健康，就继续等待直到 wait_time 结束，然后返回实例列表", "answer": "A"}
+{"question": "如下是一个Python函数\"def _to_instances(self, controller_instance) -> List[dict]:\n        instance = {}\n        instance[\\\"name\\\"] = controller_instance.name\n        instance[\\\"hostname\\\"] = controller_instance.name\n        instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\n        instance[\\\"env\\\"] = {}\n        for env in controller_instance.attrs[\\\"Config\\\"][\\\"Env\\\"]:\n            variable = env.split(\\\"=\\\")[0]\n            value = env.replace(f\\\"{variable}=\\\", \\\"\\\", 1)\n            instance[\\\"env\\\"][variable] = value\n        return [instance]\"，请问它的作用是什么？\nA. 函数用于修改控制器实例的属性，如名称和主机名\nB. 函数返回一个包含所有控制器实例属性的复杂嵌套结构\nC. 处理单个Docker容器实例并将其信息转换为字典\nD. 函数用于删除控制器实例的环境变量配置", "answer": "C"}
+{"question": "如下是一个Python函数\"instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\"，请问它的作用是什么？\nA. 代码段会删除controller_instance中的status属性\nB. 多个属性组合起来判断实例的健康状态\nC. 如果controller_instance的状态为running，此代码将instance[\\\"health\\\"]设置为False\nD. 代码段检查controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"]的值是否为unhealthy，然后相应地更新instance[\\\"health\\\"]", "answer": "B"}
+{"question": "如下是一个Python函数\"result = self.__custom_confs_rx.search(variable)\"，请问它的作用是什么？\nA. 这段代码的作用是将字符串 variable 与 self.__custom_confs_rx 进行替换操作\nB. 这段代码会删除 variable 中所有与 self.__custom_confs_rx 匹配的内容\nC. 这段代码用于计算 variable 和 self.__custom_confs_rx 的长度差\nD. 使用正则表达式匹配变量名，判断是否符合特定的配置项格式", "answer": "D"}
+{"question": "如下是一个Python函数\"if not self.update_needed(self._instances, self._services, configs=self._configs):\"，请问它的作用是什么？\nA. 此函数用于更新类实例中的所有服务和配置\nB. 当 `update_needed` 方法返回 `True` 时，该代码片段将终止程序运行\nC. 调用update_needed方法检查当前配置是否需要更新\nD. 此代码片段用于直接修改 `_instances`、`_services` 和 `_configs` 的值", "answer": "C"}
+{"question": "如下是一个Python函数\"def set_value_from_polygon(self, pol_x, pol_y, val, inside=True):\n        \\\"\\\"\\\"set_value_from_polygon\n        Setting value inside or outside polygon\n        :param pol_x: x position list for a polygon\n        :param pol_y: y position list for a polygon\n        :param val: grid value\n        :param inside: setting data inside or outside\n        \\\"\\\"\\\"\n        # making ring polygon\n        if (pol_x[0] != pol_x[-1]) or (pol_y[0] != pol_y[-1]):\n            np.append(pol_x, pol_x[0])\n            np.append(pol_y, pol_y[0])\n        # setting value for all grid\n        for x_ind in range(self.width):\n            for y_ind in range(self.height):\n                x_pos, y_pos = self.calc_grid_central_xy_position_from_xy_index(\n                    x_ind, y_ind)\n                flag = this.check_inside_polygon(x_pos, y_pos, pol_x, pol_y)\n                if flag is inside:\n                    this.set_value_from_xy_index(x_ind, y_ind, val)\"，请问它的作用是什么？\nA. 根据多边形的形状设置网格地图中的值\nB. 该函数用于计算多边形的面积\nC. 该函数用于绘制多边形图形\nD. 该函数用于从多边形的顶点坐标生成一个新的多边形对象", "answer": "A"}
+{"question": "如下是一个Python函数\"if not check_car_collision(x_list, y_list, yaw_list, ox, oy, kd_tree): return None\"，请问它的作用是什么？\nA. 检查生成的路径是否与障碍物冲突。\nB. 这个函数用来检查给定的列表中是否所有元素都相等\nC. 这个函数返回所有在x_list和y_list中的元素的和\nD. 这个函数用于创建一个新的kd树来存储车辆位置数据", "answer": "A"}
+{"question": "如下是一个Python函数\"heapq.heappush(pq, (calc_cost(start_node, h_dp, config), calc_index(start_node, config)))\"，请问它的作用是什么？\nA. 该函数从堆`pq`中删除一个元素\nB. 将节点添加到优先级队列（使用堆数据结构实现）\nC. 该函数返回堆`pq`中的最大元素\nD. 该函数用于创建一个新的空堆", "answer": "B"}
+{"question": "如下是一个Python函数\"def calc_index(node, x_width, x_min, y_min): return (node.y - y_min) * x_width + (node.x - x_min)\"，请问它的作用是什么？\nA. 将节点坐标转换为一维索引\nB. 函数用于计算节点在二维网格中的行索引\nC. 函数返回的是从给定节点到最小节点的直线距离\nD. 函数用于计算节点的颜色值在一个色彩数组中的索引", "answer": "A"}
+{"question": "如下是一个Python函数\"if use_dynamic_weighting: w = (1 + epsilon - epsilon*depth/upper_bound_depth)\"，请问它的作用是什么？\nA. 该函数用于重置 `w` 的值为固定常数\nB. 代码段检查 `depth` 是否大于 `upper_bound_depth`\nC. 调整启发式成本的计算，引入动态权重，优化搜索效率\nD. 该函数将 `w` 的值与 `depth` 成正比增加", "answer": "C"}
+
+
diff --git a/examples/llm_simple_qa/testalgorithms/gen/basemodel.py b/examples/llm_simple_qa/testalgorithms/gen/basemodel.py
@@ -0,0 +1,98 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import tempfile
+import time
+import zipfile
+import logging
+
+import numpy as np
+from sedna.common.config import Context
+from sedna.common.class_factory import ClassType, ClassFactory
+
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+
+
+logging.disable(logging.WARNING)
+
+__all__ = ["BaseModel"]
+
+os.environ['BACKEND_TYPE'] = 'TORCH'
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="gen")
+class BaseModel:
+
+    def __init__(self, **kwargs):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "/home/icyfeather/models/Qwen2-0.5B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")
+
+    def train(self, train_data, valid_data=None, **kwargs):
+        print("BaseModel doesn't need to train")
+
+
+    def save(self, model_path):
+        print("BaseModel doesn't need to save")
+
+    def predict(self, data, input_shape=None, **kwargs):
+        print("BaseModel predict")
+        answer_list = []
+        for line in data:
+            response = self._infer(line)
+            answer_list.append(response)
+        return answer_list
+
+    def load(self, model_url=None):
+        print("BaseModel load")
+
+    def evaluate(self, data, model_path, **kwargs):
+        print("BaseModel evaluate")
+
+    def _infer(self, prompt, system=None):
+        if system:   
+            messages = [
+                {"role": "system", "content": system},
+                {"role": "user", "content": prompt}
+            ]
+        else:
+            messages = [
+                {"role": "user", "content": prompt}
+            ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(device)
+
+        generated_ids = self.model.generate(
+            model_inputs.input_ids,
+            max_new_tokens=512
+        )
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+
+        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+        return response