From 439fbbd5414ae10c8a35aa444df0f9b91f0dcd60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20=C3=96rwall?= Date: Wed, 14 Aug 2024 19:59:53 +0200 Subject: [PATCH] Fix types and add indexing script --- moatless/benchmark/report_v2.py | 2 +- .../benchmark/swebench/index_instances.py | 179 ++++++++++++++++++ moatless/benchmark/utils.py | 2 +- moatless/edit/clarify.py | 2 +- moatless/edit/edit.py | 2 +- moatless/edit/plan.py | 2 +- moatless/edit/plan_lines.py | 2 +- moatless/edit/review.py | 2 +- moatless/file_context.py | 2 +- moatless/find/decide.py | 2 +- moatless/find/identify.py | 2 +- moatless/find/search.py | 2 +- moatless/index/code_index.py | 2 +- moatless/loop.py | 2 +- moatless/{types.py => schema.py} | 0 moatless/state.py | 2 +- moatless/trajectory.py | 2 +- moatless/verify/lint.py | 2 +- moatless/verify/maven.py | 2 +- moatless/verify/verify.py | 2 +- moatless/workspace.py | 2 +- tests/edit/test_clarify.py | 2 +- tests/edit/test_edit.py | 2 +- tests/edit/test_plan.py | 2 +- tests/find/test_decide.py | 2 +- tests/find/test_identify.py | 2 +- tests/find/test_search.py | 2 +- tests/index/test_epic_split.py | 52 +++++ tests/test_file_context.py | 2 +- tests/test_loop.py | 2 +- tests/test_state.py | 2 +- tests/test_transition_rules.py | 2 +- 32 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 moatless/benchmark/swebench/index_instances.py rename moatless/{types.py => schema.py} (100%) create mode 100644 tests/index/test_epic_split.py diff --git a/moatless/benchmark/report_v2.py b/moatless/benchmark/report_v2.py index b8430cce..88c906d9 100644 --- a/moatless/benchmark/report_v2.py +++ b/moatless/benchmark/report_v2.py @@ -26,7 +26,7 @@ from moatless.benchmark.utils import get_missing_files from moatless.file_context import FileContext from moatless.trajectory import Trajectory -from moatless.types import ActionTransaction, Usage, Content +from moatless.schema import ActionTransaction, Usage, Content from moatless.state import AgenticState logger = logging.getLogger(__name__) diff --git a/moatless/benchmark/swebench/index_instances.py b/moatless/benchmark/swebench/index_instances.py new file mode 100644 index 00000000..05b91467 --- /dev/null +++ b/moatless/benchmark/swebench/index_instances.py @@ -0,0 +1,179 @@ +import logging + +from moatless import FileRepository +from moatless.benchmark.swebench import load_instances, setup_swebench_repo +import json + +from moatless.benchmark.utils import calculate_estimated_context_window +from moatless.index.settings import IndexSettings, CommentStrategy +from moatless.index.code_index import CodeIndex +from dotenv import load_dotenv +from moatless.benchmark.swebench import get_repo_dir_name +import os + + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +index_store_dir = "/home/albert/.moatless/index_stores/20240814-voyage-code-2" + +evaluation_report = "report.jsonl" + + +def create_instance_list(): + #lite_instance_by_id = load_instances("princeton-nlp/SWE-bench_Lite", split="test") + instance_by_id = load_instances("princeton-nlp/SWE-bench_Verified", split="test") + + logger.info( + f"Number of instances: {len(instance_by_id)} from {len(instance_by_id)} SWE-bench_Lite and SWE-bench_Verified") + + instances = list(instance_by_id.values()) + # instances = [instance for instance in instances if instance["instance_id"] in white_list] + instances = sorted(instances, key=lambda x: x["created_at"]) + + logger.info(f"Number of instances: {len(instances)}") + return instances + +#with open("index_eval.csv", "w") as f: +# f.write("instance_id,vectors,indexed_tokens,all_matching_context_window,any_matching_context_window\n") + + +previous_instances = { +} + + + +index_settings = IndexSettings( + embed_model="voyage-code-2", + dimensions=1536, + language="python", + min_chunk_size=200, + chunk_size=750, + hard_token_limit=3000, + max_chunks=200, + comment_strategy=CommentStrategy.ASSOCIATE +) + +load_dotenv('../.env') + + +def get_persist_dir(instance): + return os.path.join(index_store_dir, get_repo_dir_name(instance["instance_id"])) + + +def ingest(code_index, instance): + vectors, indexed_tokens = code_index.run_ingestion(num_workers=1) + logger.info(f"Indexed {vectors} vectors and {indexed_tokens} tokens.") + + persist_dir = get_persist_dir(instance) + code_index.persist(persist_dir=persist_dir) + logger.info(f"Index persisted to {persist_dir}") + return vectors, indexed_tokens + +def evaluate(code_index, instance): + results = code_index._vector_search(instance["problem_statement"], top_k=1000) + + expected_changes, sum_tokens = calculate_estimated_context_window(instance, results) + all_matching_context_window = None + any_matching_context_window = None + + expected_matches = [context for context in expected_changes if context["context_window"] is not None] + if expected_matches: + all_matching_context_window = max(context["context_window"] for context in expected_matches) + any_matching_context_window = min(context["context_window"] for context in expected_matches) + + if len(expected_matches) == len(expected_changes): + logger.info( + f"Found all expected changes within a context window of {all_matching_context_window} tokens, first match at context window {any_matching_context_window}") + else: + any_matching_context_window = min( + context["context_window"] for context in expected_changes if context["context_window"] is not None) + logger.info( + f"Found {len(expected_matches)} expected changes within a context window {all_matching_context_window} tokens, first match at context window {any_matching_context_window} max context window {sum_tokens} tokens") + else: + logger.info(f"No expected changes found in context window of {sum_tokens} tokens") + + for change in expected_changes: + if change["context_window"] is None: + logger.info( + f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) not fund, closest match: {change.get('closest_match_lines')}") + else: + logger.info( + f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) found at context window {change['context_window']} tokens. Distance: {change['distance']}. Position: {change['position']}") + + return expected_changes, all_matching_context_window, any_matching_context_window + + +def write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, + any_matching_context_window): + with open("report.jsonl", "a") as f: + f.write(json.dumps({ + "instance_id": instance["instance_id"], + "vectors": vectors, + "indexed_tokens": indexed_tokens, + "all_matching_context_window": all_matching_context_window, + "any_matching_context_window": any_matching_context_window, + "expected_changes": expected_changes, + }) + "\n") + + with open("index_eval.csv", "a") as f: + f.write(f"{instance['instance_id']},{vectors},{indexed_tokens},{all_matching_context_window},{any_matching_context_window}\n") + +def run_indexing(): + #lite_instance_by_id = load_instances("princeton-nlp/SWE-bench_Lite", split="test") + instance_by_id = load_instances("princeton-nlp/SWE-bench_Verified", split="test") + + #instance_by_id = {**lite_instance_by_id, **verified_instance_by_id} + #logger.info( + # f"Number of instances: {len(instance_by_id)} from {len(lite_instance_by_id)} SWE-bench_Lite and {len(verified_instance_by_id)} SWE-bench_Verified") + + instances = list(instance_by_id.values()) + instances = sorted(instances, key=lambda x: x["created_at"]) + + logger.info(f"Number of instances: {len(instances)}") + + if os.path.exists(evaluation_report): + with open(evaluation_report, "r") as f: + for line in f: + report = json.loads(line) + previous_instance = instance_by_id.get(report["instance_id"]) + if previous_instance: + previous_instances[previous_instance["repo"]] = previous_instance + del instance_by_id[report["instance_id"]] + + for i, instance in enumerate(instances): + logger.info(f"Processing instance {i + 1}/{len(instances)}: {instance['instance_id']} {instance['created_at']}") + + repo_dir = setup_swebench_repo(instance) + repo = FileRepository(repo_dir) + persist_dir = get_persist_dir(instance) + + code_index = None + + if os.path.exists(persist_dir): + logger.info(f"Index exists on {persist_dir}") + #try: + # logger.info(f"Loading index from {persist_dir}") + # code_index = CodeIndex.from_persist_dir(persist_dir, file_repo=repo) + #except Exception as e: + # logger.error(f"Error loading index: {e}") + else: + logger.info(f"No index found at {persist_dir}") + + #if not code_index: + previous_instance = previous_instances.get(instance["repo"]) + if previous_instance: + logger.info(f"Loading previous index from {get_persist_dir(previous_instance)}") + code_index = CodeIndex.from_persist_dir(get_persist_dir(previous_instance), file_repo=repo) + else: + code_index = CodeIndex(settings=index_settings, file_repo=repo) + + vectors, indexed_tokens = ingest(code_index, instance) + expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance) + write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, + any_matching_context_window) + + previous_instances[instance["repo"]] = instance + + +run_indexing() diff --git a/moatless/benchmark/utils.py b/moatless/benchmark/utils.py index 847e775b..7d4a1f44 100644 --- a/moatless/benchmark/utils.py +++ b/moatless/benchmark/utils.py @@ -6,7 +6,7 @@ from moatless.codeblocks.module import Module from moatless.index.types import SearchCodeHit, CodeSnippet from moatless.repository import FileRepository -from moatless.types import FileWithSpans +from moatless.schema import FileWithSpans logger = logging.getLogger(__name__) diff --git a/moatless/edit/clarify.py b/moatless/edit/clarify.py index 9cc3398a..f78c0baa 100644 --- a/moatless/edit/clarify.py +++ b/moatless/edit/clarify.py @@ -8,7 +8,7 @@ from moatless.edit.prompt import CLARIFY_CHANGE_SYSTEM_PROMPT from moatless.repository import CodeFile from moatless.state import ActionResponse, AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, FileWithSpans, Message, diff --git a/moatless/edit/edit.py b/moatless/edit/edit.py index 776eb3e6..75d87970 100644 --- a/moatless/edit/edit.py +++ b/moatless/edit/edit.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, Field, PrivateAttr from moatless.state import AgenticState, Finished -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, AssistantMessage, diff --git a/moatless/edit/plan.py b/moatless/edit/plan.py index 93f84e5c..c9733fd9 100644 --- a/moatless/edit/plan.py +++ b/moatless/edit/plan.py @@ -11,7 +11,7 @@ SELECT_SPAN_SYSTEM_PROMPT, ) from moatless.state import AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, AssistantMessage, diff --git a/moatless/edit/plan_lines.py b/moatless/edit/plan_lines.py index 42e43bb0..6c4d0e08 100644 --- a/moatless/edit/plan_lines.py +++ b/moatless/edit/plan_lines.py @@ -11,7 +11,7 @@ SELECT_LINES_SYSTEM_PROMPT, ) from moatless.state import AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, AssistantMessage, diff --git a/moatless/edit/review.py b/moatless/edit/review.py index 73b14091..408c539a 100644 --- a/moatless/edit/review.py +++ b/moatless/edit/review.py @@ -11,7 +11,7 @@ CODER_FINAL_SYSTEM_PROMPT, ) from moatless.state import AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, Message, diff --git a/moatless/file_context.py b/moatless/file_context.py index 09c0e3df..dd73562f 100644 --- a/moatless/file_context.py +++ b/moatless/file_context.py @@ -15,7 +15,7 @@ SpanType, ) from moatless.repository import CodeFile, FileRepository, UpdateResult -from moatless.types import FileWithSpans +from moatless.schema import FileWithSpans logger = logging.getLogger(__name__) diff --git a/moatless/find/decide.py b/moatless/find/decide.py index bd7c1a4a..770e45e2 100644 --- a/moatless/find/decide.py +++ b/moatless/find/decide.py @@ -5,7 +5,7 @@ from moatless.find import SearchCode from moatless.state import AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, Message, diff --git a/moatless/find/identify.py b/moatless/find/identify.py index 479dbd21..4502697f 100644 --- a/moatless/find/identify.py +++ b/moatless/find/identify.py @@ -6,7 +6,7 @@ from moatless.file_context import RankedFileSpan from moatless.state import AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, FileWithSpans, diff --git a/moatless/find/search.py b/moatless/find/search.py index 49b019db..3f81d9e8 100644 --- a/moatless/find/search.py +++ b/moatless/find/search.py @@ -8,7 +8,7 @@ from moatless.file_context import RankedFileSpan from moatless.index.types import SearchCodeHit from moatless.state import ActionResponse, AgenticState -from moatless.types import ( +from moatless.schema import ( ActionRequest, AssistantMessage, Message, diff --git a/moatless/index/code_index.py b/moatless/index/code_index.py index cc20cc0e..c7eb4e77 100644 --- a/moatless/index/code_index.py +++ b/moatless/index/code_index.py @@ -33,7 +33,7 @@ SearchCodeResponse, ) from moatless.repository import FileRepository -from moatless.types import FileWithSpans +from moatless.schema import FileWithSpans from moatless.utils.tokenizer import count_tokens logger = logging.getLogger(__name__) diff --git a/moatless/loop.py b/moatless/loop.py index 5d72753c..122481c2 100644 --- a/moatless/loop.py +++ b/moatless/loop.py @@ -27,7 +27,7 @@ ) from moatless.trajectory import Trajectory from moatless.transition_rules import TransitionRule, TransitionRules -from moatless.types import ( +from moatless.schema import ( ActionRequest, AssistantMessage, Content, diff --git a/moatless/types.py b/moatless/schema.py similarity index 100% rename from moatless/types.py rename to moatless/schema.py diff --git a/moatless/state.py b/moatless/state.py index cffa6093..a12f32a0 100644 --- a/moatless/state.py +++ b/moatless/state.py @@ -9,7 +9,7 @@ from moatless.file_context import FileContext from moatless.repository import FileRepository -from moatless.types import ( +from moatless.schema import ( ActionRequest, ActionResponse, ActionTransaction, diff --git a/moatless/trajectory.py b/moatless/trajectory.py index 59ab437b..65448128 100644 --- a/moatless/trajectory.py +++ b/moatless/trajectory.py @@ -9,7 +9,7 @@ from moatless.workspace import Workspace from moatless.transition_rules import TransitionRules from moatless.state import AgenticState, get_state_class -from moatless.types import ActionRequest, ActionTransaction, ActionResponse, Usage, Content +from moatless.schema import ActionRequest, ActionTransaction, ActionResponse, Usage, Content logger = logging.getLogger(__name__) diff --git a/moatless/verify/lint.py b/moatless/verify/lint.py index 7b64d3b1..2a4d7847 100644 --- a/moatless/verify/lint.py +++ b/moatless/verify/lint.py @@ -5,7 +5,7 @@ from pylint.testutils import MinimalTestReporter from moatless.repository import CodeFile -from moatless.types import VerificationError +from moatless.schema import VerificationError from moatless.verify.verify import Verifier logger = logging.getLogger(__name__) diff --git a/moatless/verify/maven.py b/moatless/verify/maven.py index e7cd393f..b1647a63 100644 --- a/moatless/verify/maven.py +++ b/moatless/verify/maven.py @@ -4,7 +4,7 @@ import subprocess from moatless.repository import CodeFile -from moatless.types import VerificationError +from moatless.schema import VerificationError from moatless.verify.verify import Verifier logger = logging.getLogger(__name__) diff --git a/moatless/verify/verify.py b/moatless/verify/verify.py index 6901f08d..6b188fa2 100644 --- a/moatless/verify/verify.py +++ b/moatless/verify/verify.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from moatless.repository import CodeFile -from moatless.types import VerificationError +from moatless.schema import VerificationError class Verifier(ABC): diff --git a/moatless/workspace.py b/moatless/workspace.py index 0dbaae78..fe0c5bb2 100644 --- a/moatless/workspace.py +++ b/moatless/workspace.py @@ -6,7 +6,7 @@ from moatless.index import IndexSettings from moatless.index.code_index import CodeIndex from moatless.repository import CodeFile, FileRepository, GitRepository -from moatless.types import FileWithSpans, VerificationError +from moatless.schema import FileWithSpans, VerificationError from moatless.verify.lint import PylintVerifier from moatless.verify.maven import MavenVerifier diff --git a/tests/edit/test_clarify.py b/tests/edit/test_clarify.py index 8a14b308..19469164 100644 --- a/tests/edit/test_clarify.py +++ b/tests/edit/test_clarify.py @@ -1,7 +1,7 @@ import pytest from unittest.mock import Mock, patch from moatless.edit.clarify import ClarifyCodeChange, LineNumberClarification -from moatless.types import ActionResponse, FileWithSpans +from moatless.schema import ActionResponse, FileWithSpans from moatless.workspace import Workspace from moatless.file_context import FileContext from moatless.repository import CodeFile diff --git a/tests/edit/test_edit.py b/tests/edit/test_edit.py index e608dd41..4dcd4a19 100644 --- a/tests/edit/test_edit.py +++ b/tests/edit/test_edit.py @@ -2,7 +2,7 @@ from unittest.mock import Mock, patch from moatless.edit.edit import EditCode from moatless.repository.file import UpdateResult -from moatless.types import ActionResponse, Content +from moatless.schema import ActionResponse, Content from moatless.workspace import Workspace from moatless.file_context import FileContext from moatless.repository import CodeFile diff --git a/tests/edit/test_plan.py b/tests/edit/test_plan.py index a6d9fe54..6107b7d9 100644 --- a/tests/edit/test_plan.py +++ b/tests/edit/test_plan.py @@ -1,7 +1,7 @@ import pytest from unittest.mock import Mock, patch from moatless.edit.plan import PlanToCode, ApplyChange -from moatless.types import ActionResponse, ActionTransaction +from moatless.schema import ActionResponse, ActionTransaction from moatless.workspace import Workspace from moatless.file_context import FileContext diff --git a/tests/find/test_decide.py b/tests/find/test_decide.py index d588fe43..b0bb3d29 100644 --- a/tests/find/test_decide.py +++ b/tests/find/test_decide.py @@ -1,7 +1,7 @@ import pytest from moatless.find.decide import DecideRelevance, Decision from moatless.find.identify import Identify, IdentifyCode -from moatless.types import ActionResponse, ActionTransaction +from moatless.schema import ActionResponse, ActionTransaction from moatless.workspace import Workspace from moatless.file_context import FileContext from unittest.mock import Mock, MagicMock, patch diff --git a/tests/find/test_identify.py b/tests/find/test_identify.py index 10c2d998..c3591e76 100644 --- a/tests/find/test_identify.py +++ b/tests/find/test_identify.py @@ -3,7 +3,7 @@ from moatless.find.identify import IdentifyCode, Identify, is_test_pattern from moatless.file_context import RankedFileSpan from moatless.repository.file import CodeFile -from moatless.types import FileWithSpans, ActionResponse +from moatless.schema import FileWithSpans, ActionResponse from moatless.workspace import Workspace from unittest.mock import Mock, MagicMock diff --git a/tests/find/test_search.py b/tests/find/test_search.py index e2ff1d18..c8dd2cc5 100644 --- a/tests/find/test_search.py +++ b/tests/find/test_search.py @@ -1,6 +1,6 @@ import pytest from moatless.find.search import SearchCode, Search, SearchRequest -from moatless.types import ActionResponse +from moatless.schema import ActionResponse from moatless.workspace import Workspace from unittest.mock import Mock, MagicMock from pydantic import ValidationError diff --git a/tests/index/test_epic_split.py b/tests/index/test_epic_split.py new file mode 100644 index 00000000..ce020def --- /dev/null +++ b/tests/index/test_epic_split.py @@ -0,0 +1,52 @@ +import cProfile +import pstats + +from llama_index.core import SimpleDirectoryReader + +from moatless.benchmark.swebench import get_moatless_instance, setup_swebench_repo +from moatless.index import IndexSettings +from moatless.index.epic_split import EpicSplitter + + +def test_epic_split(): + instance_id = "django__django-16139" + instance = get_moatless_instance(instance_id) + repo_path = setup_swebench_repo(instance) + + file = "tests/admin_views/tests.py" + input_files = [f"{repo_path}/{file}"] + + settings = IndexSettings() + splitter = EpicSplitter( + language=settings.language, + min_chunk_size=settings.min_chunk_size, + chunk_size=settings.chunk_size, + hard_token_limit=settings.hard_token_limit, + max_chunks=settings.max_chunks, + comment_strategy=settings.comment_strategy, + min_lines_to_parse_block=50, + repo_path=repo_path, + ) + + reader = SimpleDirectoryReader( + input_dir=repo_path, + input_files=input_files, + filename_as_id=True, + recursive=True, + ) + docs = reader.load_data() + print(f"Read {len(docs)} documents") + + # Profile the get_nodes_from_documents method + profiler = cProfile.Profile() + profiler.enable() + prepared_nodes = splitter.get_nodes_from_documents(docs, show_progress=True) + profiler.disable() + + # Print the profiling results + stats = pstats.Stats(profiler).sort_stats(pstats.SortKey.CUMULATIVE) + stats.print_stats(40) + + + for node in prepared_nodes: + print(f"{node.id_} {node.metadata['tokens']} {node.metadata['start_line']}-{node.metadata['end_line']} {node.metadata['span_ids']}") diff --git a/tests/test_file_context.py b/tests/test_file_context.py index 2f55c57a..9d5c7665 100644 --- a/tests/test_file_context.py +++ b/tests/test_file_context.py @@ -1,6 +1,6 @@ from moatless.benchmark.swebench import setup_swebench_repo from moatless.file_context import FileContext -from moatless.types import FileWithSpans +from moatless.schema import FileWithSpans def test_file_context_to_dict(): diff --git a/tests/test_loop.py b/tests/test_loop.py index 0db0d9e7..5b3cbeca 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -7,7 +7,7 @@ from moatless.state import AgenticState, Finished, Rejected, Pending from moatless.transition_rules import TransitionRules, TransitionRule from moatless.workspace import Workspace -from moatless.types import ActionRequest, ActionResponse, Content +from moatless.schema import ActionRequest, ActionResponse, Content from moatless.benchmark.swebench import create_workspace, load_instance from moatless.repository import GitRepository diff --git a/tests/test_state.py b/tests/test_state.py index ef45cedd..8e8283a6 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -4,7 +4,7 @@ from moatless.workspace import Workspace from moatless.repository import FileRepository from moatless.file_context import FileContext -from moatless.types import ActionRequest, ActionResponse, Content, FileWithSpans, Usage +from moatless.schema import ActionRequest, ActionResponse, Content, FileWithSpans, Usage class ConcreteAgenticState(AgenticState): diff --git a/tests/test_transition_rules.py b/tests/test_transition_rules.py index b19e7c67..cde3d37e 100644 --- a/tests/test_transition_rules.py +++ b/tests/test_transition_rules.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from moatless.transition_rules import TransitionRules, TransitionRule from moatless.state import AgenticState, Finished, Rejected, Pending -from moatless.types import ActionResponse +from moatless.schema import ActionResponse class MockStateA(AgenticState):