LLM_Agent_Benchmark

Signed-off-by: Frank-lilinjie <[email protected]> delete proposal in this branch Signed-off-by: Frank-lilinjie <[email protected]> fix: deprecate outdated interface of pandas Signed-off-by: Yu Fan <[email protected]> fix: pylint R0917 error Signed-off-by: Yu Fan <[email protected]> fix the issuse of Chinese comments Signed-off-by: Frank-lilinjie <[email protected]> parent 5fd70d2 author Frank-lilinjie <[email protected]> 1730356419 +0800 committer Frank-lilinjie <[email protected]> 1730365262 +0800 parent 5fd70d2 author Frank-lilinjie <[email protected]> 1730356419 +0800 committer Frank-lilinjie <[email protected]> 1730365240 +0800 Print replaced with logging Signed-off-by: Frank-lilinjie <[email protected]> add a proposal of Smart Coding benchmark suite Signed-off-by: boX <[email protected]> update and improve the proposal Improve the architecture diagram Signed-off-by: boX <[email protected]> update and improve the proposal Signed-off-by: boX <[email protected]> add Proposal for Large Language Model Edge Benchmark Suite: Implementation on KubeEdge-lanvs Signed-off-by: yexiaochuan <[email protected]> add Proposal for Large Language Model Edge Benchmark Suite: Implementation on KubeEdge-lanvs Signed-off-by: yexiaochuan <[email protected]> llm suite benchmark implement Signed-off-by: yexiaochuan <[email protected]> Revert "llm suite benchmark implement" This reverts commit f341b0f. Signed-off-by: yexiaochuan <[email protected]> Fix llm edge benchmark suite description and highlight edge in the name of the documents Signed-off-by: yexiaochuan <[email protected]> fix pylint 3.9 too-many-positional-arguments Signed-off-by: yexiaochuan <[email protected]> Trigger CI Signed-off-by: yexiaochuan <[email protected]> Trigger CI Signed-off-by: yexiaochuan <[email protected]> Fix pylint disable R0913 Signed-off-by: yexiaochuan <[email protected]> Fix pylint disable R0917 Signed-off-by: yexiaochuan <[email protected]> Revert: Remove extra pylint fix changes Signed-off-by: yexiaochuan <[email protected]> chore: Empty commit to trigger CI Signed-off-by: yexiaochuan <[email protected]> add: Heterogeneous Multi-Edge Collaborative Neural Network Inference for High Mobility Scenarios: Base on KubeEdge-Ianvs proposal Signed-off-by: wyoung1 <[email protected]> coding for ospp Signed-off-by: wyoung1 <[email protected]> fix pylint Signed-off-by: wyoung1 <[email protected]> fix pylint issue Signed-off-by: wyoung1 <[email protected]> add: add a simple demo of feadereated learning in ianvs Signed-off-by: Marchons <[email protected]> Signed-off-by: Marchons <[email protected]> Revert "OSPP: Development of Federated Incremental Learning for Label Scarcity: Base on KubeEdge-Ianvs" Revert "Merge pull request kubeedge#160 from kubeedge/revert-143-dev_script" This reverts commit 4f01ee5, reversing changes made to 9553051. Signed-off-by: Marchons <[email protected]> add Impl for llm edge benchmark suite Signed-off-by: yexiaochuan <[email protected]> adapt Impl in core for llm edge benchmark suite Signed-off-by: yexiaochuan <[email protected]> Fix impl on singletast_learning with compression Signed-off-by: yexiaochuan <[email protected]> chore: trigger CI Signed-off-by: yexiaochuan <[email protected]> CI: fix pylint warnings Signed-off-by: yexiaochuan <[email protected]> CI: fix pylint warnings Signed-off-by: yexiaochuan <[email protected]> fix: update comments and configuration parameters Signed-off-by: yexiaochuan <[email protected]> fix core of dataset Signed-off-by: Frank-lilinjie <[email protected]>
Frank-lilinjie · Oct 31, 2024 · 7829adf · 7829adf
1 parent 176107b
commit 7829adf
Show file tree

Hide file tree

Showing 15 changed files with 386 additions and 120 deletions.
diff --git a/...ning/Personalized LLM Agent based on KubeEdge-Ianvs Cloud-Edge Collaboration.md b/...ning/Personalized LLM Agent based on KubeEdge-Ianvs Cloud-Edge Collaboration.md
diff --git a/docs/proposals/algorithms/single-task-learning/images/Q2A_task_plan.png b/docs/proposals/algorithms/single-task-learning/images/Q2A_task_plan.png
diff --git a/docs/proposals/algorithms/single-task-learning/images/agent-overview.png b/docs/proposals/algorithms/single-task-learning/images/agent-overview.png
diff --git a/docs/proposals/algorithms/single-task-learning/images/bert_score.png b/docs/proposals/algorithms/single-task-learning/images/bert_score.png
diff --git a/docs/proposals/algorithms/single-task-learning/images/continual_llm_agent.png b/docs/proposals/algorithms/single-task-learning/images/continual_llm_agent.png
diff --git a/docs/proposals/algorithms/single-task-learning/images/llm_agent_ianvs.png b/docs/proposals/algorithms/single-task-learning/images/llm_agent_ianvs.png
diff --git a/docs/proposals/algorithms/single-task-learning/images/personalized_agent.png b/docs/proposals/algorithms/single-task-learning/images/personalized_agent.png
diff --git a/examples/llm-agent/config/config.json b/examples/llm-agent/config/config.json
@@ -0,0 +1,11 @@
+{
+    "tokenizer_dir": "./examples/LLM-Agent-Benchmark/pretrains/Langboat/bloom-1b4-zh",
+    "auth_token": "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",
+    "data_dir" :"./examples/LLM-Agent-Benchmark/dataset/activity_classification.json",
+    "token_factor": 32,
+    "half_model": true,
+    "token_padding": "right",
+    "trust_remote": true,
+    "device": "auto",
+    "output_dir": "./checkpoint"
+  }
diff --git a/examples/llm-agent/config/train_config.json b/examples/llm-agent/config/train_config.json
@@ -0,0 +1,11 @@
+{
+    "per_device_train_batch_size":5,
+    "logging_steps":50,
+    "num_train_epochs":2,
+    "output_dir":"./checkpoint",
+    "half_lora":"True",
+    "learning_rate":2e-4,
+    "weight_decay":0.01,
+    "save_strategy":"epoch",
+    "save_total_limit":10
+}
diff --git a/examples/llm-agent/singletask_learning_bench/README.md b/examples/llm-agent/singletask_learning_bench/README.md
@@ -0,0 +1,115 @@
+# Quick Start about Personalized LLM Agent 
+
+Welcome to Ianvs! Ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, in order to facilitate more efficient and effective development. Quick start helps you to test your algorithm on Ianvs with a simple example of industrial defect detection. You can reduce manual procedures to just a few steps so that you can build and start your distributed synergy AI solution development within minutes.
+
+Before using Ianvs, you might want to have the device ready:
+
+- One machine is all you need, i.e., a laptop or a virtual machine is sufficient and a cluster is not necessary
+- 2 CPUs or more
+- 4GB+ free memory depends on the algorithm and simulation setting
+- 10GB+ free disk space
+- Internet connection for GitHub and pip, etc
+- Python 3.6+ installed
+
+In this example, we are using the Linux platform with Python 3.7.1. If you are using Windows, most steps should still apply but a few like commands and package requirements might be different.
+
+The proposal for this demo: [Personalized LLM Agent based on KubeEdge-Ianvs Cloud-Edge Collaboration](https://github.com/Frank-lilinjie/ianvs/blob/main/docs/proposals/algorithms/single-task-learning/Personalized%20LLM%20Agent%20based%20on%20KubeEdge-Ianvs%20Cloud-Edge%20Collaboration.md)
+
+## Step 1. Ianvs Preparation
+
+First, we download the code of Ianvs. Assuming that we are using `/ianvs` as workspace, Ianvs can be cloned with `Git` as:
+
+```shell
+mkdir /ianvs
+cd /ianvs #One might use another path preferred
+
+mkdir project
+cd project
+git clone https://github.com/kubeedge/ianvs.git   
+```
+
+Then, we install third-party dependencies for ianvs.
+
+**Attention**: The project requires updating the sedna.zip in the file to sednaJsonForAgent.zip.
+
+```shell
+sudo apt-get update
+sudo apt-get install libgl1-mesa-glx -y
+python -m pip install --upgrade pip
+
+cd ianvs 
+python -m pip install ./examples/resources/third_party/*
+python -m pip install -r requirements.txt
+```
+
+We are now ready to install Ianvs.
+
+```shell
+python setup.py install 
+```
+
+## Step 2. Dataset and Model Preparation
+
+In this case, we have provided datasets for three different scenarios: human pose detection, environmental sound classification, and facial recognition. These datasets are generated by GPT-4. They adhere to the standard Agent data structure, featuring two distinct roles: User and Assistant. Their content corresponds to the prompt and label, respectively. You can customize your dataset in a similar format.
+
+- Place the dataset at the following path: `./examples/LLM-Agent-Benchmark/dataset/`
+
+- Place the configuration file at the following path: `./examples/LLM-Agent-Benchmark/config/`
+
+- Place the pre-trained model at the following path: `./examples/LLM-Agent-Benchmark/pretrains/`
+
+- Place the evaluate at the following path: `./examples/LLM-Agent-Benchmark/evaluate/`. The source code can be obtained from [Evaluate](https://github.com/huggingface/evaluate)
+
+The pretrain model used in the current case originates from [bloom-1b4-zh](https://huggingface.co/Langboat/bloom-1b4-zh)
+
+The file path for the project is as follows:
+
+```
+-ianvs
+	|-....
+	|-examples
+		|-...
+		|-LLM-Agent-Benchmark
+			|-config
+			|-dataset
+			|-evaluate
+			|-pretrains
+			|-singletask_learning_bench
+				|-testalgorithms
+					|-basemodel.py
+					|-test_algorithm.yaml
+       	|-testenv
+       		|-rouge.py
+       		|-testenv.yaml
+       	|-benchmarkingjob.yaml
+       	|-README.md
+```
+
+
+
+## Step 3. Ianvs Execution and Presentation
+
+We are now ready to run the ianvs for benchmarking.
+
+```shell
+cd /ianvs/project
+
+ianvs -f ./examples/LLM-Agent-Benchmark/singletask_learning_bench/benchmarkingjob.yaml
+```
+
+Finally, the user can check the result of benchmarking on the console and also in the output path( e.g. `/ianvs/lifelong_learning_bench/workspace`) defined in the benchmarking config file ( e.g. `benchmarkingjob.yaml`). In this quick start, we have done all configurations for you and the interested readers can refer to [benchmarkingJob.yaml](https://ianvs.readthedocs.io/en/latest/guides/how-to-test-algorithms.html#step-1-test-environment-preparation) for more details.
+
+| rank | algorithm | rouge1   | rouge2   | rougeL   | paradigm           | basemodel | basemodel-config                                  | basemodel-train_config                                  | time                | url                                                          |
+| ---- | --------- | -------- | -------- | -------- | ------------------ | --------- | ------------------------------------------------- | ------------------------------------------------------- | ------------------- | ------------------------------------------------------------ |
+| 1    | LLM_agent | 0.401155 | 0.310173 | 0.401876 | singletasklearning | LLM_agent | ./examples/LLM-Agent-Benchmark/config/config.json | ./examples/LLM-Agent-Benchmark/config/train_config.json | 2024-09-24 15:08:17 | ./workspace/benchmarkingjob/LLM_agent/adb8baf8-7a43-11ef-960e-b07b25dd6922 |
+
+This ends the quick start experiment.
+
+# What is next
+
+If any problems happen, the user can refer to [the issue page on Github](https://github.com/kubeedge/ianvs/issues) for help and are also welcome to raise any new issue.
+
+Enjoy your journey on Ianvs!
+
+
+
diff --git a/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml b/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml
@@ -0,0 +1,66 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "./workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "LLM_agent"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml
+        url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/test_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [{ "rouge1": "descend" }]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "F1_SCORE"
+      metrics: ["rouge1","rouge2","rougeL"]
+
+    # model of save selected and all dataitems in workspace `./rank` ; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"