From 896198af51dd86dc3cfc2e258c3479948844e283 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Mon, 3 Jul 2023 15:55:17 -0700
Subject: [PATCH 1/6] Integrate smol developer with agbenchmark

---
 benchmarks.py         | 33 +++++++++++++++++++++++++++++++++
 config.json           |  5 +++++
 regression_tests.json |  7 +++++++
 3 files changed, 45 insertions(+)
 create mode 100644 benchmarks.py
 create mode 100644 config.json
 create mode 100644 regression_tests.json
diff --git a/benchmarks.py b/benchmarks.py
new file mode 100644
index 000000000..cd1118a21
--- /dev/null
+++ b/benchmarks.py
@@ -0,0 +1,33 @@
+import os
+import glob
+import subprocess
+import sys
+from typing import Tuple
+
+
+def run_specific_agent(task: str) -> Tuple[str, int]:
+    # Construct the command
+    command = ['python', 'main_no_modal.py', task]
+    subprocess.run(command, text=True)
+
+def execute_generated_files():
+    # Navigate to generated directory
+    os.chdir('generated')
+
+    # Iterate over every .txt file in the directory
+    for file_name in glob.glob('*.txt'):
+        with open(file_name, 'r') as file:
+            python_code = file.read()
+            python_code = python_code.replace('```python', '')
+            python_code = python_code.replace('```', '')
+            # Execute the code in the .txt file
+            exec(python_code)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python script.py <task>")
+        sys.exit(1)
+    task = sys.argv[1]
+    run_specific_agent(task)
+    execute_generated_files()
diff --git a/config.json b/config.json
new file mode 100644
index 000000000..cb391de81
--- /dev/null
+++ b/config.json
@@ -0,0 +1,5 @@
+{
+  "workspace": "generated",
+  "func_path": "benchmarks.py",
+  "cutoff": 60
+}
diff --git a/regression_tests.json b/regression_tests.json
new file mode 100644
index 000000000..e3633a2af
--- /dev/null
+++ b/regression_tests.json
@@ -0,0 +1,7 @@
+{
+    "TestWriteFile": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
+    }
+}
\ No newline at end of file

From 5a3ad43103b238b9c8f2a2acceff250888be263e Mon Sep 17 00:00:00 2001
From: Silen Naihin <silen.naihin@gmail.com>
Date: Tue, 4 Jul 2023 00:30:34 -0400
Subject: [PATCH 2/6] update config

---
 config.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/config.json b/config.json
index cb391de81..d0a72bd85 100644
--- a/config.json
+++ b/config.json
@@ -1,5 +1,6 @@
 {
   "workspace": "generated",
-  "func_path": "benchmarks.py",
+  "entry_path": "benchmarks.py",
+  "home_path": "",
   "cutoff": 60
 }

From c52b14b1d5b1b74d886f08d9914e7f43437f609d Mon Sep 17 00:00:00 2001
From: Silen Naihin <silen.naihin@gmail.com>
Date: Mon, 10 Jul 2023 21:36:25 -0400
Subject: [PATCH 3/6] add reports, consolidate, update benchmark files

---
 benchmarks.py => agbenchmark/benchmarks.py     | 13 +++++++------
 agbenchmark/config.json                        |  5 +++++
 .../regression_tests.json                      |  3 ++-
 agbenchmark/reports/1.json                     | 18 ++++++++++++++++++
 config.json                                    |  6 ------
 generated/.gitkeep                             |  3 ---
 6 files changed, 32 insertions(+), 16 deletions(-)
 rename benchmarks.py => agbenchmark/benchmarks.py (69%)
 create mode 100644 agbenchmark/config.json
 rename regression_tests.json => agbenchmark/regression_tests.json (51%)
 create mode 100644 agbenchmark/reports/1.json
 delete mode 100644 config.json
 delete mode 100644 generated/.gitkeep

diff --git a/benchmarks.py b/agbenchmark/benchmarks.py
similarity index 69%
rename from benchmarks.py
rename to agbenchmark/benchmarks.py
index cd1118a21..a4e6f4c93 100644
--- a/benchmarks.py
+++ b/agbenchmark/benchmarks.py
@@ -7,19 +7,20 @@
 
 def run_specific_agent(task: str) -> Tuple[str, int]:
     # Construct the command
-    command = ['python', 'main_no_modal.py', task]
+    command = ["python", "main_no_modal.py", task]
     subprocess.run(command, text=True)
 
+
 def execute_generated_files():
     # Navigate to generated directory
-    os.chdir('generated')
+    os.chdir("generated")
 
     # Iterate over every .txt file in the directory
-    for file_name in glob.glob('*.txt'):
-        with open(file_name, 'r') as file:
+    for file_name in glob.glob("../*.txt"):
+        with open(file_name, "r") as file:
             python_code = file.read()
-            python_code = python_code.replace('```python', '')
-            python_code = python_code.replace('```', '')
+            python_code = python_code.replace("```python", "")
+            python_code = python_code.replace("```", "")
             # Execute the code in the .txt file
             exec(python_code)
 
diff --git a/agbenchmark/config.json b/agbenchmark/config.json
new file mode 100644
index 000000000..bc89d5ef4
--- /dev/null
+++ b/agbenchmark/config.json
@@ -0,0 +1,5 @@
+{
+  "workspace": "generated",
+  "entry_path": "agbenchmark/benchmarks.py",
+  "cutoff": 60
+}
diff --git a/regression_tests.json b/agbenchmark/regression_tests.json
similarity index 51%
rename from regression_tests.json
rename to agbenchmark/regression_tests.json
index e3633a2af..c6434ffc0 100644
--- a/regression_tests.json
+++ b/agbenchmark/regression_tests.json
@@ -2,6 +2,7 @@
     "TestWriteFile": {
         "difficulty": "basic",
         "dependencies": [],
-        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
+        "test": "agbenchmark/challenges/interface/write_file",
+        "success": true
     }
 }
\ No newline at end of file
diff --git a/agbenchmark/reports/1.json b/agbenchmark/reports/1.json
new file mode 100644
index 000000000..48a631c5b
--- /dev/null
+++ b/agbenchmark/reports/1.json
@@ -0,0 +1,18 @@
+{
+    "command": "agbenchmark start --test TestWriteFile --mock",
+    "completion_time": "2023-07-10-21:19",
+    "time_elapsed": "8.34 seconds",
+    "tests": {
+        "TestWriteFile": {
+            "difficulty": "basic",
+            "dependencies": [],
+            "test": "agbenchmark/challenges/interface/write_file",
+            "success": true
+        }
+    },
+    "config": {
+        "workspace": "generated",
+        "entry_path": "agbenchmark/benchmarks.py",
+        "cutoff": 60
+    }
+}
\ No newline at end of file
diff --git a/config.json b/config.json
deleted file mode 100644
index d0a72bd85..000000000
--- a/config.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "workspace": "generated",
-  "entry_path": "benchmarks.py",
-  "home_path": "",
-  "cutoff": 60
-}
diff --git a/generated/.gitkeep b/generated/.gitkeep
deleted file mode 100644
index 4eb9b6ffa..000000000
--- a/generated/.gitkeep
+++ /dev/null
@@ -1,3 +0,0 @@
-# generated folder
-
-by default, `main.py` will generate the app in this folder (you can customize with the `--directory=newFolderHere` flag).
\ No newline at end of file

From aa8233925090c0c9314ceef68397ab37baf17766 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Tue, 11 Jul 2023 11:41:35 -0700
Subject: [PATCH 4/6] Change entrypath and add __init__.py

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 agbenchmark/__init__.py | 0
 agbenchmark/config.json | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 agbenchmark/__init__.py

diff --git a/agbenchmark/__init__.py b/agbenchmark/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agbenchmark/config.json b/agbenchmark/config.json
index bc89d5ef4..729b3e83f 100644
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -1,5 +1,5 @@
 {
   "workspace": "generated",
-  "entry_path": "agbenchmark/benchmarks.py",
+  "entry_path": "agbenchmark.benchmarks",
   "cutoff": 60
 }

From f4f4395511ed6ba59ec09100d6596bf81d68a898 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Fri, 14 Jul 2023 18:13:14 -0700
Subject: [PATCH 5/6] Add more regression tests

---
 agbenchmark/regression_tests.json | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/agbenchmark/regression_tests.json b/agbenchmark/regression_tests.json
index c6434ffc0..b66a16cf3 100644
--- a/agbenchmark/regression_tests.json
+++ b/agbenchmark/regression_tests.json
@@ -1,8 +1,10 @@
 {
     "TestWriteFile": {
-        "difficulty": "basic",
+        "difficulty": "interface",
         "dependencies": [],
-        "test": "agbenchmark/challenges/interface/write_file",
-        "success": true
+        "data_path": "agbenchmark/challenges/interface/write_file"
+    },
+    "TestBasicCodeGeneration": {
+        "data_path": "agbenchmark/challenges/code/d4",
     }
-}
\ No newline at end of file
+}

From a23d01369cea976e80b7889fdbf1096619471301 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Sun, 16 Jul 2023 07:35:24 -0700
Subject: [PATCH 6/6] Remove cutoff

---
 agbenchmark/config.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/agbenchmark/config.json b/agbenchmark/config.json
index 729b3e83f..5fd051ecc 100644
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -1,5 +1,4 @@
 {
   "workspace": "generated",
-  "entry_path": "agbenchmark.benchmarks",
-  "cutoff": 60
+  "entry_path": "agbenchmark.benchmarks"
 }