From 896198af51dd86dc3cfc2e258c3479948844e283 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Mon, 3 Jul 2023 15:55:17 -0700 Subject: [PATCH 1/6] Integrate smol developer with agbenchmark --- benchmarks.py | 33 +++++++++++++++++++++++++++++++++ config.json | 5 +++++ regression_tests.json | 7 +++++++ 3 files changed, 45 insertions(+) create mode 100644 benchmarks.py create mode 100644 config.json create mode 100644 regression_tests.json diff --git a/benchmarks.py b/benchmarks.py new file mode 100644 index 000000000..cd1118a21 --- /dev/null +++ b/benchmarks.py @@ -0,0 +1,33 @@ +import os +import glob +import subprocess +import sys +from typing import Tuple + + +def run_specific_agent(task: str) -> Tuple[str, int]: + # Construct the command + command = ['python', 'main_no_modal.py', task] + subprocess.run(command, text=True) + +def execute_generated_files(): + # Navigate to generated directory + os.chdir('generated') + + # Iterate over every .txt file in the directory + for file_name in glob.glob('*.txt'): + with open(file_name, 'r') as file: + python_code = file.read() + python_code = python_code.replace('```python', '') + python_code = python_code.replace('```', '') + # Execute the code in the .txt file + exec(python_code) + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + task = sys.argv[1] + run_specific_agent(task) + execute_generated_files() diff --git a/config.json b/config.json new file mode 100644 index 000000000..cb391de81 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "workspace": "generated", + "func_path": "benchmarks.py", + "cutoff": 60 +} diff --git a/regression_tests.json b/regression_tests.json new file mode 100644 index 000000000..e3633a2af --- /dev/null +++ b/regression_tests.json @@ -0,0 +1,7 @@ +{ + "TestWriteFile": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py" + } +} \ No newline at end of file From 5a3ad43103b238b9c8f2a2acceff250888be263e Mon Sep 17 00:00:00 2001 From: Silen Naihin Date: Tue, 4 Jul 2023 00:30:34 -0400 Subject: [PATCH 2/6] update config --- config.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config.json b/config.json index cb391de81..d0a72bd85 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,6 @@ { "workspace": "generated", - "func_path": "benchmarks.py", + "entry_path": "benchmarks.py", + "home_path": "", "cutoff": 60 } From c52b14b1d5b1b74d886f08d9914e7f43437f609d Mon Sep 17 00:00:00 2001 From: Silen Naihin Date: Mon, 10 Jul 2023 21:36:25 -0400 Subject: [PATCH 3/6] add reports, consolidate, update benchmark files --- benchmarks.py => agbenchmark/benchmarks.py | 13 +++++++------ agbenchmark/config.json | 5 +++++ .../regression_tests.json | 3 ++- agbenchmark/reports/1.json | 18 ++++++++++++++++++ config.json | 6 ------ generated/.gitkeep | 3 --- 6 files changed, 32 insertions(+), 16 deletions(-) rename benchmarks.py => agbenchmark/benchmarks.py (69%) create mode 100644 agbenchmark/config.json rename regression_tests.json => agbenchmark/regression_tests.json (51%) create mode 100644 agbenchmark/reports/1.json delete mode 100644 config.json delete mode 100644 generated/.gitkeep diff --git a/benchmarks.py b/agbenchmark/benchmarks.py similarity index 69% rename from benchmarks.py rename to agbenchmark/benchmarks.py index cd1118a21..a4e6f4c93 100644 --- a/benchmarks.py +++ b/agbenchmark/benchmarks.py @@ -7,19 +7,20 @@ def run_specific_agent(task: str) -> Tuple[str, int]: # Construct the command - command = ['python', 'main_no_modal.py', task] + command = ["python", "main_no_modal.py", task] subprocess.run(command, text=True) + def execute_generated_files(): # Navigate to generated directory - os.chdir('generated') + os.chdir("generated") # Iterate over every .txt file in the directory - for file_name in glob.glob('*.txt'): - with open(file_name, 'r') as file: + for file_name in glob.glob("../*.txt"): + with open(file_name, "r") as file: python_code = file.read() - python_code = python_code.replace('```python', '') - python_code = python_code.replace('```', '') + python_code = python_code.replace("```python", "") + python_code = python_code.replace("```", "") # Execute the code in the .txt file exec(python_code) diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 000000000..bc89d5ef4 --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1,5 @@ +{ + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 +} diff --git a/regression_tests.json b/agbenchmark/regression_tests.json similarity index 51% rename from regression_tests.json rename to agbenchmark/regression_tests.json index e3633a2af..c6434ffc0 100644 --- a/regression_tests.json +++ b/agbenchmark/regression_tests.json @@ -2,6 +2,7 @@ "TestWriteFile": { "difficulty": "basic", "dependencies": [], - "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py" + "test": "agbenchmark/challenges/interface/write_file", + "success": true } } \ No newline at end of file diff --git a/agbenchmark/reports/1.json b/agbenchmark/reports/1.json new file mode 100644 index 000000000..48a631c5b --- /dev/null +++ b/agbenchmark/reports/1.json @@ -0,0 +1,18 @@ +{ + "command": "agbenchmark start --test TestWriteFile --mock", + "completion_time": "2023-07-10-21:19", + "time_elapsed": "8.34 seconds", + "tests": { + "TestWriteFile": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/interface/write_file", + "success": true + } + }, + "config": { + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 + } +} \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index d0a72bd85..000000000 --- a/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "workspace": "generated", - "entry_path": "benchmarks.py", - "home_path": "", - "cutoff": 60 -} diff --git a/generated/.gitkeep b/generated/.gitkeep deleted file mode 100644 index 4eb9b6ffa..000000000 --- a/generated/.gitkeep +++ /dev/null @@ -1,3 +0,0 @@ -# generated folder - -by default, `main.py` will generate the app in this folder (you can customize with the `--directory=newFolderHere` flag). \ No newline at end of file From aa8233925090c0c9314ceef68397ab37baf17766 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Tue, 11 Jul 2023 11:41:35 -0700 Subject: [PATCH 4/6] Change entrypath and add __init__.py Signed-off-by: Merwane Hamadi --- agbenchmark/__init__.py | 0 agbenchmark/config.json | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 agbenchmark/__init__.py diff --git a/agbenchmark/__init__.py b/agbenchmark/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agbenchmark/config.json b/agbenchmark/config.json index bc89d5ef4..729b3e83f 100644 --- a/agbenchmark/config.json +++ b/agbenchmark/config.json @@ -1,5 +1,5 @@ { "workspace": "generated", - "entry_path": "agbenchmark/benchmarks.py", + "entry_path": "agbenchmark.benchmarks", "cutoff": 60 } From f4f4395511ed6ba59ec09100d6596bf81d68a898 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Fri, 14 Jul 2023 18:13:14 -0700 Subject: [PATCH 5/6] Add more regression tests --- agbenchmark/regression_tests.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/agbenchmark/regression_tests.json b/agbenchmark/regression_tests.json index c6434ffc0..b66a16cf3 100644 --- a/agbenchmark/regression_tests.json +++ b/agbenchmark/regression_tests.json @@ -1,8 +1,10 @@ { "TestWriteFile": { - "difficulty": "basic", + "difficulty": "interface", "dependencies": [], - "test": "agbenchmark/challenges/interface/write_file", - "success": true + "data_path": "agbenchmark/challenges/interface/write_file" + }, + "TestBasicCodeGeneration": { + "data_path": "agbenchmark/challenges/code/d4", } -} \ No newline at end of file +} From a23d01369cea976e80b7889fdbf1096619471301 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Sun, 16 Jul 2023 07:35:24 -0700 Subject: [PATCH 6/6] Remove cutoff --- agbenchmark/config.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/agbenchmark/config.json b/agbenchmark/config.json index 729b3e83f..5fd051ecc 100644 --- a/agbenchmark/config.json +++ b/agbenchmark/config.json @@ -1,5 +1,4 @@ { "workspace": "generated", - "entry_path": "agbenchmark.benchmarks", - "cutoff": 60 + "entry_path": "agbenchmark.benchmarks" }