Skip to content

Commit

Permalink
add AgentBench.old
Browse files Browse the repository at this point in the history
  • Loading branch information
lr-tsinghua11 committed Oct 20, 2023
1 parent 22f7083 commit 01cd2fa
Show file tree
Hide file tree
Showing 366 changed files with 140,234 additions and 0 deletions.
4 changes: 4 additions & 0 deletions AgentBench.old/configs/agents/do_nothing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module: "src.agents.DoNothingAgent"
parameters:
name: "Do-Nothing-Agent"
sleep: 0.01
10 changes: 10 additions & 0 deletions AgentBench.old/configs/agents/tgi_clients/AgentLM-13b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module: "src.agents.TGIAgent"
parameters:
ip: "http://127.0.0.1"
# Also you can set up the AgentLM on many address
# The TGIAgent will automatically balance the load
# address ∈ [address_from, address_to)
address_from: 30013
address_to: 30014
model_name: "AgentLM-13b"
max_tokens: 4096
10 changes: 10 additions & 0 deletions AgentBench.old/configs/agents/tgi_clients/AgentLM-70b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module: "src.agents.TGIAgent"
parameters:
ip: "http://127.0.0.1"
# Also you can set up the AgentLM on many address
# The TGIAgent will automatically balance the load
# address ∈ [address_from, address_to)
address_from: 30070
address_to: 30071
model_name: "AgentLM-70b"
max_tokens: 4096
10 changes: 10 additions & 0 deletions AgentBench.old/configs/agents/tgi_clients/AgentLM-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module: "src.agents.TGIAgent"
parameters:
ip: "http://127.0.0.1"
# Also you can set up the AgentLM on many address
# The TGIAgent will automatically balance the load
# address ∈ [address_from, address_to)
address_from: 30007
address_to: 30008
model_name: "AgentLM-7b"
max_tokens: 4096
9 changes: 9 additions & 0 deletions AgentBench.old/configs/tasks/alfworld/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module: "src.tasks.alfworld.ALFWorld"
parameters:
name: "ALFWorld"
data_path: "/AgentBench/data/alfworld" # TODO replace it with your own data path
config_path: "src/tasks/alfworld/configs/base_config.yaml"
prompts_path: "src/tasks/alfworld/prompts/alfworld_multiturn_react.json"
split: "dev"
max_step: 35

9 changes: 9 additions & 0 deletions AgentBench.old/configs/tasks/alfworld/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module: "src.tasks.alfworld.ALFWorld"
parameters:
name: "ALFWorld"
data_path: "/AgentBench/data/alfworld" # TODO replace it with your own data path
config_path: "src/tasks/alfworld/configs/base_config.yaml"
prompts_path: "src/tasks/alfworld/prompts/alfworld_multiturn_react.json"
split: "std"
max_step: 35

6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/card_game/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: "src.tasks.CardGame"

parameters:
name: "CardGame"
port: 12347
test_time: 3
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/card_game/ext.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: "src.tasks.CardGame"

parameters:
name: "CardGame"
port: 12349
test_time: 50
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/card_game/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: "src.tasks.CardGame"

parameters:
name: "CardGame"
port: 12342
test_time: 5
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/dbbench/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: src.tasks.DBBench

parameters:
name: "DBBench"
data_file: data/dbbench/dev.jsonl
max_round: 15
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/dbbench/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: src.tasks.DBBench

parameters:
name: "DBBench"
data_file: data/dbbench/standard.jsonl
max_round: 15
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/knowledgegraph/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: "src.tasks.KnowledgeGraph"
parameters:
name: "KnowledgeGraph-dev"
round: 15
data_file: "data/knowledgegraph/dev.json"
sparql_url: "http://164.107.116.56:3093/sparql"
6 changes: 6 additions & 0 deletions AgentBench.old/configs/tasks/knowledgegraph/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module: "src.tasks.KnowledgeGraph"
parameters:
name: "KnowledgeGraph-std"
round: 15
data_file: "data/knowledgegraph/std.json"
sparql_url: "http://164.107.116.56:3093/sparql"
22 changes: 22 additions & 0 deletions AgentBench.old/configs/tasks/mind2web/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
module: "src.tasks.Mind2Web"
parameters:
name: "Mind2Web-dev"
data:
data_path: "."
cache_path: "./data/mind2web/.cache/data"
test_split_files:
test_domain: /root/work/data/data_dev/*.json
score_file: /root/work/data/scores_all_data.pkl
train:
neg_ratio: 0.2
num_candidates: 5
max_context_len: 512
model:
mode: "multichoice"
name: flan-t5-base
model_name_or_path: "google/flan-t5-base"
max_seq_length: 2048
eval:
topk: 10
seed: 123
llm_prompt: data/mind2web/prompt/llm_prompt_cot.json
22 changes: 22 additions & 0 deletions AgentBench.old/configs/tasks/mind2web/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
module: "src.tasks.Mind2Web"
parameters:
name: "Mind2Web-std"
data:
data_path: "."
cache_path: "./data/mind2web/.cache/data"
test_split_files:
test_domain: /root/work/data/data_std/*.json
score_file: /root/work/data/scores_all_data.pkl
train:
neg_ratio: 0.2
num_candidates: 5
max_context_len: 512
model:
mode: "multichoice"
name: flan-t5-base
model_name_or_path: "google/flan-t5-base"
max_seq_length: 2048
eval:
topk: 10
seed: 123
llm_prompt: data/mind2web/prompt/llm_prompt_cot.json
21 changes: 21 additions & 0 deletions AgentBench.old/configs/tasks/os_interaction/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module: "src.tasks.OSInteraction"
parameters:
name: "OS-Interaction"
match_problem: true
check_problem: true
round_limit: 8

docker_config:
localhost: local-os
directory: data/os_interaction/res/dockerfiles

scripts:
directory: data/os_interaction/res/scripts

data_config:
files:
- problem_file: data/os_interaction/data/dev.json
script_dir: data/os_interaction/scripts/dev/

bk: []
ignore: []
33 changes: 33 additions & 0 deletions AgentBench.old/configs/tasks/os_interaction/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
module: "src.tasks.OSInteraction"
parameters:
name: "OS-Interaction"
match_problem: true
check_problem: true
round_limit: 8

docker_config:
localhost: local-os
directory: data/os_interaction/res/dockerfiles

scripts:
directory: data/os_interaction/res/scripts

data_config:
files:
- problem_file: data/os_interaction/data/1/*.json
script_dir: data/os_interaction/scripts/1/
- problem_file: data/os_interaction/data/2/*.json
script_dir: data/os_interaction/scripts/2/
- problem_file: data/os_interaction/data/3/*.json
script_dir: data/os_interaction/scripts/3/
- problem_file: data/os_interaction/data/4/*.json
script_dir: data/os_interaction/scripts/4/
- problem_file: data/os_interaction/data/5/*.json
script_dir: data/os_interaction/scripts/5/
- problem_file: data/os_interaction/data/6/*.json
script_dir: data/os_interaction/scripts/6/
- problem_file: data/os_interaction/data/7/*.json
script_dir: data/os_interaction/scripts/7/

bk: []
ignore: []
8 changes: 8 additions & 0 deletions AgentBench.old/configs/tasks/webshop/dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module: src.tasks.WebShop

parameters:
name: "WebShop"
start: 200
end: 280
num_envs: 3
worker_limit: 3
8 changes: 8 additions & 0 deletions AgentBench.old/configs/tasks/webshop/std.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module: src.tasks.WebShop

parameters:
name: "WebShop"
start: 0
end: 200
num_envs: 3
worker_limit: 3
34 changes: 34 additions & 0 deletions AgentBench.old/data/alfworld/dev.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"pick_and_place": [
"json_2.1.1/valid_unseen/pick_and_place_simple-SoapBottle-None-Toilet-424/trial_T20190907_004404_604165/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-Pencil-None-Shelf-308/trial_T20190908_122154_042763/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Cabinet-10/trial_T20190906_191445_723170/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-Mug-None-Desk-308/trial_T20190909_210238_431966/game.tw-pddl"
],
"pick_clean_then_place": [
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Cloth-None-Cabinet-424/trial_T20190908_022436_073995/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-Cabinet-424/trial_T20190908_215019_162873/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_032543_712058/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-CounterTop-424/trial_T20190907_074106_050405/game.tw-pddl"
],
"pick_heat_then_place": [
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083346_800823/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083507_594820/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Tomato-None-GarbageCan-10/trial_T20190908_225453_272533/game.tw-pddl"
],
"pick_cool_then_place": [
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Potato-None-Microwave-10/trial_T20190907_033157_424297/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_114622_738670/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Bread-None-CounterTop-10/trial_T20190908_091747_866951/game.tw-pddl"
],
"look_at_obj": [
"json_2.1.1/valid_unseen/look_at_obj_in_light-Mug-None-DeskLamp-308/trial_T20190908_161733_213242/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Bowl-None-DeskLamp-308/trial_T20190907_133935_066606/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Pencil-None-DeskLamp-308/trial_T20190908_220656_510400/game.tw-pddl"
],
"pick_two_obj": [
"json_2.1.1/valid_unseen/pick_two_obj_and_place-Pillow-None-Sofa-219/trial_T20190907_163327_486300/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-SoapBar-None-GarbageCan-424/trial_T20190909_064053_839817/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-CD-None-Safe-308/trial_T20190907_051013_060265/game.tw-pddl"
]
}
64 changes: 64 additions & 0 deletions AgentBench.old/data/alfworld/std.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"pick_and_place": [
"json_2.1.1/valid_unseen/pick_and_place_simple-Pencil-None-Shelf-308/trial_T20190908_121952_610012/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-PepperShaker-None-Drawer-10/trial_T20190918_154424_844749/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-Watch-None-Safe-219/trial_T20190907_074643_810052/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Drawer-10/trial_T20190909_021613_077537/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Drawer-10/trial_T20190909_021728_339782/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Cabinet-10/trial_T20190906_191501_563086/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SoapBottle-None-Toilet-424/trial_T20190907_004351_281384/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SoapBottle-None-Toilet-424/trial_T20190907_004321_405868/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Cabinet-10/trial_T20190906_191429_743650/game.tw-pddl"
],
"pick_clean_then_place": [
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Mug-None-CoffeeMachine-10/trial_T20190907_221355_558505/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Egg-None-Microwave-10/trial_T20190909_120712_273910/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Bowl-None-Cabinet-10/trial_T20190909_061158_110530/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Plate-None-CounterTop-10/trial_T20190908_213533_897289/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-CounterTop-424/trial_T20190907_074045_109439/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-CounterTop-424/trial_T20190907_074124_966890/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Knife-None-CounterTop-10/trial_T20190909_110347_624008/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Spatula-None-Drawer-10/trial_T20190907_080730_211959/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Cloth-None-Cabinet-424/trial_T20190908_022321_380927/game.tw-pddl"
],
"pick_heat_then_place": [
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Mug-None-Cabinet-10/trial_T20190909_021247_306737/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Apple-None-GarbageCan-10/trial_T20190908_145356_918528/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Egg-None-GarbageCan-10/trial_T20190908_113610_425142/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Apple-None-GarbageCan-10/trial_T20190908_145143_820541/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Apple-None-GarbageCan-10/trial_T20190908_145050_918567/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Tomato-None-GarbageCan-10/trial_T20190908_225359_617900/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Potato-None-GarbageCan-10/trial_T20190907_161745_664033/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Mug-None-Cabinet-10/trial_T20190909_021200_669381/game.tw-pddl"
],
"pick_cool_then_place": [
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Mug-None-Cabinet-10/trial_T20190909_121635_622676/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Lettuce-None-CounterTop-10/trial_T20190909_174807_646433/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Mug-None-Cabinet-10/trial_T20190909_121710_650938/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Lettuce-None-CounterTop-10/trial_T20190909_123133_763972/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Tomato-None-Microwave-10/trial_T20190909_102608_318800/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Potato-None-Microwave-10/trial_T20190907_033228_194678/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Tomato-None-Microwave-10/trial_T20190909_102644_926781/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Mug-None-CoffeeMachine-10/trial_T20190907_183715_299073/game.tw-pddl"
],
"look_at_obj": [
"json_2.1.1/valid_unseen/look_at_obj_in_light-CD-None-DeskLamp-308/trial_T20190908_141942_810052/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Book-None-DeskLamp-308/trial_T20190908_020048_814402/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Mug-None-DeskLamp-308/trial_T20190908_201421_021646/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Pencil-None-DeskLamp-308/trial_T20190908_220604_010430/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Pencil-None-DeskLamp-308/trial_T20190908_220545_153480/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-CD-None-DeskLamp-308/trial_T20190908_142046_281296/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Bowl-None-DeskLamp-308/trial_T20190907_133919_856963/game.tw-pddl",
"json_2.1.1/valid_unseen/look_at_obj_in_light-Mug-None-DeskLamp-308/trial_T20190908_201444_037645/game.tw-pddl"
],
"pick_two_obj": [
"json_2.1.1/valid_unseen/pick_two_obj_and_place-KeyChain-None-Safe-219/trial_T20190909_012027_782483/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-PepperShaker-None-Drawer-10/trial_T20190912_221016_460197/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-SoapBar-None-GarbageCan-424/trial_T20190909_064309_357168/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-CD-None-Safe-308/trial_T20190907_051056_585414/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-SoapBar-None-GarbageCan-424/trial_T20190909_064221_368939/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-CD-None-Safe-308/trial_T20190907_050942_897916/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-KeyChain-None-Safe-219/trial_T20190909_011803_423115/game.tw-pddl",
"json_2.1.1/valid_unseen/pick_two_obj_and_place-PepperShaker-None-Drawer-10/trial_T20190908_010306_215435/game.tw-pddl"
]
}
Loading

0 comments on commit 01cd2fa

Please sign in to comment.