Skip to content

Commit

Permalink
Notebook for ingesting all swe bench repos
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Jun 13, 2024
1 parent d282c0f commit bda099d
Showing 1 changed file with 270 additions and 0 deletions.
270 changes: 270 additions & 0 deletions notebooks/ingest.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": [
"# Run ingestion\n",
"Ingest all SWE-Bench instances and evaluate the results."
],
"id": "d2e538dc56ed690c"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "4d7e85ae729b9c1a"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T08:52:16.128716Z",
"start_time": "2024-05-21T08:51:59.638654Z"
}
},
"cell_type": "code",
"source": [
"from moatless.benchmark.swebench import load_instances\n",
"import os\n",
"import json\n",
"\n",
"instance_by_id = load_instances(\"princeton-nlp/SWE-bench_Lite\", split=\"test\")\n",
"\n",
"evaluation_report = \"report.jsonl\"\n",
"\n",
"previous_instances = {\n",
"}\n",
"\n",
"if os.path.exists(evaluation_report):\n",
" with open(evaluation_report, \"r\") as f:\n",
" for line in f:\n",
" report = json.loads(line)\n",
" previous_instance = instance_by_id[report[\"instance_id\"]]\n",
" previous_instances[previous_instance[\"repo\"]] = previous_instance\n",
" del instance_by_id[report[\"instance_id\"]]\n",
"\n",
"instances = list(instance_by_id.values())\n",
"instances = sorted(instances, key=lambda x: x[\"created_at\"])\n",
"\n",
"print(f\"Number of instances: {len(instances)}\")"
],
"id": "2a935c4beaaa4635",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T08:52:16.133527Z",
"start_time": "2024-05-21T08:52:16.130077Z"
}
},
"cell_type": "code",
"source": [
"from moatless.benchmark.swebench import setup_swebench_repo\n",
"\n",
"def next_instance(instances):\n",
" if not instances:\n",
" return None\n",
" instance = instances.pop(0)\n",
" print(f\"Instance: {instance['instance_id']}, {len(instances)} instances left\")\n",
" return instance\n",
"\n",
"instance = next_instance(instances)"
],
"id": "c2b3e2c270d2e5c3",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T08:52:16.924594Z",
"start_time": "2024-05-21T08:52:16.134279Z"
}
},
"cell_type": "code",
"source": [
"from moatless.index.settings import IndexSettings\n",
"from moatless.index.code_index import CodeIndex\n",
"from dotenv import load_dotenv\n",
"from moatless.benchmark.swebench import get_repo_dir_name\n",
"import os\n",
"\n",
"index_settings = IndexSettings(\n",
" embed_model=\"voyage-code-2\"\n",
")\n",
"\n",
"load_dotenv('../.env')\n",
"\n",
"def get_persist_dir(instance):\n",
" return os.path.join(\"/tmp/index_store\", get_repo_dir_name(instance[\"instance_id\"]))\n",
"\n",
"def create_index(instance):\n",
" previous_instance = previous_instances.get(instance[\"repo\"])\n",
" if previous_instance:\n",
" return CodeIndex.from_persist_dir(get_persist_dir(previous_instance))\n",
" else:\n",
" return CodeIndex(settings=index_settings)\n",
"\n",
"code_index = create_index(instance)"
],
"id": "af25431875f4a923",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T09:00:40.704370Z",
"start_time": "2024-05-21T08:52:16.926814Z"
}
},
"cell_type": "code",
"source": [
"def ingest(code_index, instance):\n",
" repo_path = setup_swebench_repo(instance)\n",
" print(f\"Repo path: {repo_path}\")\n",
"\n",
" vectors, indexed_tokens = code_index.run_ingestion(repo_path=repo_path, num_workers=4)\n",
" print(f\"Indexed {vectors} vectors and {indexed_tokens} tokens.\")\n",
" \n",
" persist_dir = get_persist_dir(instance)\n",
" code_index.persist(persist_dir=persist_dir)\n",
" print(f\"Index persisted to {persist_dir}\")\n",
" \n",
" previous_instances[instance[\"repo\"]] = instance\n",
" return vectors, indexed_tokens\n",
"\n",
"vectors, indexed_tokens = ingest(code_index, instance)"
],
"id": "3f749928390182fb",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T09:00:41.269230Z",
"start_time": "2024-05-21T09:00:40.705860Z"
}
},
"cell_type": "code",
"source": [
"from moatless.benchmark.utils import calculate_estimated_context_window\n",
"\n",
"def evaluate(code_index, instance):\n",
" results = code_index.find_code(instance[\"problem_statement\"], top_k=1000)\n",
" \n",
" expected_changes, sum_tokens = calculate_estimated_context_window(instance, results)\n",
" all_matching_context_window = None\n",
" any_matching_context_window = None\n",
" \n",
" expected_matches = [context for context in expected_changes if context[\"context_window\"] is not None]\n",
" if expected_matches:\n",
" all_matching_context_window = max(context[\"context_window\"] for context in expected_matches)\n",
" any_matching_context_window = min(context[\"context_window\"] for context in expected_matches)\n",
" \n",
" if len(expected_matches) == len(expected_changes):\n",
" print(f\"Found all expected changes within a context window of {all_matching_context_window} tokens, first match at context window {any_matching_context_window}\")\n",
" else:\n",
" any_matching_context_window = min(context[\"context_window\"] for context in expected_changes if context[\"context_window\"] is not None)\n",
" print(f\"Found {len(expected_matches)} expected changes within a context window {all_matching_context_window} tokens, first match at context window {any_matching_context_window} max context window {sum_tokens} tokens\")\n",
" \n",
" \n",
" else:\n",
" print(f\"No expected changes found in context window of {sum_tokens} tokens\")\n",
" \n",
" for change in expected_changes:\n",
" if change[\"context_window\"] is None:\n",
" print(f\"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) not fund, closest match: {change.get('closest_match_lines')}\")\n",
" else:\n",
" print(f\"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) found at context window {change['context_window']} tokens. Distance: {change['distance']}. Position: {change['position']}\")\n",
" \n",
" \n",
" return expected_changes, all_matching_context_window, any_matching_context_window\n",
"\n",
"expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance)"
],
"id": "ac7a612e3e7fb834",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T09:00:41.273680Z",
"start_time": "2024-05-21T09:00:41.270252Z"
}
},
"cell_type": "code",
"source": [
"import json\n",
"\n",
"def write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window):\n",
" with open(\"report.jsonl\", \"a\") as f:\n",
" f.write(json.dumps({\n",
" \"instance_id\": instance[\"instance_id\"],\n",
" \"vectors\": vectors,\n",
" \"indexed_tokens\": indexed_tokens,\n",
" \"all_matching_context_window\": all_matching_context_window,\n",
" \"any_matching_context_window\": any_matching_context_window,\n",
" \"expected_changes\": expected_changes,\n",
" }) + \"\\n\")\n",
" \n",
"write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window)"
],
"id": "7f9a0ee8beabe3d6",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"jupyter": {
"is_executing": true
},
"ExecuteTime": {
"start_time": "2024-05-21T09:00:41.274579Z"
}
},
"cell_type": "code",
"source": [
"\n",
"def index_next_instance():\n",
" instance = next_instance(instances)\n",
" while instance:\n",
" code_index = create_index(instance)\n",
" vectors, indexed_tokens = ingest(code_index, instance)\n",
" expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance)\n",
" write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window)\n",
" instance = next_instance(instances)\n",
"\n",
"index_next_instance()"
],
"id": "69a610864e1d85f5",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit bda099d

Please sign in to comment.