diff --git a/tests/benchmark/test_report_v2.py b/tests/benchmark/test_report_v2.py index c0a77753..be8d02b8 100644 --- a/tests/benchmark/test_report_v2.py +++ b/tests/benchmark/test_report_v2.py @@ -5,6 +5,7 @@ import pytest from moatless.benchmark.report_v2 import EditStats, PlanStats, SearchStats, StateStats, to_result, BenchmarkResult, to_dataframe +from moatless.benchmark.utils import get_moatless_instance from moatless.trajectory import Trajectory @@ -13,6 +14,11 @@ def django_trajectory(): file_path = Path("tests/trajectories/django__django_16379.json") return Trajectory.load(str(file_path)) +@pytest.fixture +def scikit_trajectory(): + file_path = Path("tests/trajectories/scikit-learn__scikit-learn-13779/trajectory.json") + return Trajectory.load(str(file_path)) + @pytest.fixture def dataset(): @@ -22,10 +28,12 @@ def dataset(): @pytest.fixture def django_instance(dataset): - for instance in dataset: - if instance["instance_id"] == "django__django-16379": - return instance - return None + return get_moatless_instance("django__django-16379", split="lite") + + +@pytest.fixture +def scikit_instance(dataset): + return get_moatless_instance("scikit-learn__scikit-learn-13779", split="verified") def test_to_result(django_trajectory, django_instance): @@ -69,6 +77,14 @@ def test_to_result(django_trajectory, django_instance): assert result.expected_files == len(result.expected_spans_details) +def test_scikit_not_edited(scikit_trajectory, scikit_instance): + result = to_result(scikit_instance, scikit_trajectory) + + print(json.dumps(result.model_dump(), indent=2)) + + assert result.edit.status == "expected_files" + + def test_to_result_error_case(django_trajectory, django_instance): # Simulate an error in the trajectory django_trajectory._info["error"] = "Simulated error" diff --git a/tests/edit/test_clarify.py b/tests/edit/test_clarify.py index 1151bf88..0628ffcc 100644 --- a/tests/edit/test_clarify.py +++ b/tests/edit/test_clarify.py @@ -1,7 +1,10 @@ import pytest from unittest.mock import Mock, patch + +from moatless.benchmark.swebench import create_workspace +from moatless.benchmark.utils import get_moatless_instance from moatless.edit.clarify import ClarifyCodeChange, LineNumberClarification -from moatless.schema import StateOutcome, FileWithSpans +from moatless.state import StateOutcome, FileWithSpans from moatless.workspace import Workspace from moatless.file_context import FileContext from moatless.repository import CodeFile @@ -133,3 +136,15 @@ def test_verify_line_numbers_invalid(self, mock_span, mock_file, clarify_code_ch assert result is not None assert "covers the whole code span" in result + +def test_clarify_class(): + instance = get_moatless_instance("django__django-11095", split="verified") + workspace = create_workspace(instance) + workspace.file_context.add_spans_to_context("django/contrib/admin/checks.py", ["ModelAdminChecks", "ModelAdminChecks._check_inlines_item"]) + + clarify_code = ClarifyCodeChange(id=0, _workspace=workspace, initial_message="Test initial message", instructions="update", span_id="ModelAdminChecks", file_path="django/contrib/admin/checks.py") + prompt_message = clarify_code.messages()[-1].content + + assert "class ModelAdminChecks(BaseModelAdminChecks):" in prompt_message + assert "def _check_inlines_item(self, obj, inline, label):" in prompt_message + assert "return inline(obj.model, obj.admin_site).check()" in prompt_message diff --git a/tests/edit/test_edit.py b/tests/edit/test_edit.py index 77dfae9a..accf29b8 100644 --- a/tests/edit/test_edit.py +++ b/tests/edit/test_edit.py @@ -5,7 +5,7 @@ from moatless.benchmark.swebench import load_instance, create_workspace from moatless.edit.edit import EditCode from moatless.repository.file import UpdateResult -from moatless.schema import StateOutcome, Content +from moatless.state import StateOutcome, Content from moatless.settings import Settings from moatless.trajectory import Trajectory from moatless.workspace import Workspace diff --git a/tests/edit/test_plan.py b/tests/edit/test_plan.py index 1f6b7427..280f05ae 100644 --- a/tests/edit/test_plan.py +++ b/tests/edit/test_plan.py @@ -1,7 +1,11 @@ import pytest from unittest.mock import Mock, patch + +from moatless.benchmark.swebench import create_workspace +from moatless.benchmark.utils import get_moatless_instance +from moatless.edit.expand import ExpandContext from moatless.edit.plan import PlanToCode, ApplyChange -from moatless.schema import StateOutcome, ActionTransaction +from moatless.state import StateOutcome, ActionTransaction from moatless.workspace import Workspace from moatless.file_context import FileContext @@ -112,4 +116,46 @@ def test_request_for_change_file_not_found(self, mock_get_spans, mock_get_file, assert response.trigger == "retry" assert "File nonexistent.py is not found in the file context" in response.retry_message - # Add more tests for other scenarios in _request_for_change method \ No newline at end of file +def test_select_class(): + instance = get_moatless_instance("django__django-11095", split="verified") + workspace = create_workspace(instance) + + plan_to_code = PlanToCode(id=0, _workspace=workspace, initial_message="Test initial message") + + workspace.file_context.add_spans_to_context("django/contrib/admin/checks.py", ["ModelAdminChecks", "ModelAdminChecks._check_inlines_item"]) + mocked_action = ApplyChange( + scratch_pad="Applying change", + action="modify", + file_path="django/contrib/admin/checks.py", + span_id="ModelAdminChecks", + instructions="Update function" + ) + + outcome = plan_to_code.execute(mocked_action) + + # Expect clarification + assert outcome.trigger == "edit_code" + assert outcome.output == {'instructions': 'Update function', 'file_path': 'django/contrib/admin/checks.py', 'span_id': 'ModelAdminChecks'} + + +def test_impl_span(): + instance = get_moatless_instance("django__django-12419", split="verified") + workspace = create_workspace(instance) + + plan_to_code = PlanToCode(id=0, _workspace=workspace, initial_message="Test initial message") + + workspace.file_context.add_spans_to_context("django/contrib/admin/checks.py", ["ModelAdminChecks", "ModelAdminChecks._check_inlines_item"]) + mocked_action = ApplyChange( + scratch_pad="Applying change", + action="modify", + file_path="django/contrib/admin/checks.py", + span_id="ModelAdminChecks", + instructions="Update function" + ) + + outcome = plan_to_code.execute(mocked_action) + + # Expect clarification + assert outcome.trigger == "edit_code" + assert outcome.output == {'instructions': 'Update function', 'file_path': 'django/contrib/admin/checks.py', 'span_id': 'ModelAdminChecks'} + diff --git a/tests/find/test_decide.py b/tests/find/test_decide.py index dd4b3978..1739d9e0 100644 --- a/tests/find/test_decide.py +++ b/tests/find/test_decide.py @@ -1,7 +1,7 @@ import pytest from moatless.find.decide import DecideRelevance, Decision from moatless.find.identify import Identify, IdentifyCode -from moatless.schema import StateOutcome, ActionTransaction +from moatless.state import StateOutcome, ActionTransaction from moatless.workspace import Workspace from moatless.file_context import FileContext from unittest.mock import Mock, MagicMock, patch diff --git a/tests/find/test_identify.py b/tests/find/test_identify.py index 791a0cbb..54297dc1 100644 --- a/tests/find/test_identify.py +++ b/tests/find/test_identify.py @@ -3,7 +3,8 @@ from moatless.find.identify import IdentifyCode, Identify, is_test_pattern from moatless.file_context import RankedFileSpan from moatless.repository.file import CodeFile -from moatless.schema import FileWithSpans, StateOutcome +from moatless.schema import FileWithSpans +from moatless.state import StateOutcome from moatless.workspace import Workspace from unittest.mock import Mock, MagicMock diff --git a/tests/find/test_search.py b/tests/find/test_search.py index a371493d..c4244231 100644 --- a/tests/find/test_search.py +++ b/tests/find/test_search.py @@ -1,6 +1,9 @@ import pytest + +from moatless.benchmark.swebench import create_workspace +from moatless.benchmark.utils import get_moatless_instance, get_moatless_instances from moatless.find.search import SearchCode, Search, SearchRequest -from moatless.schema import StateOutcome +from moatless.state import StateOutcome from moatless.workspace import Workspace from unittest.mock import Mock, MagicMock from pydantic import ValidationError @@ -117,4 +120,82 @@ def test_handle_direct_search_attributes(self): assert len(search.search_requests) == 1 assert search.search_requests[0].file_pattern == "*.js" - assert search.search_requests[0].query == "javascript query" \ No newline at end of file + assert search.search_requests[0].query == "javascript query" + + +def test_find_impl_span(): + + instances = get_moatless_instances(split="verified") + + # Filter and sort instances + filtered_instances = { + k: v for k, v in instances.items() + if "django__django-" in k and "12273" <= k.split("-")[-1] <= "12419" + } + sorted_instances = dict(sorted(filtered_instances.items())) + + for instance_id, instance in sorted_instances.items(): + print(f"Instance: {instance_id}") + workspace = create_workspace(instance) + + search_code = SearchCode(id=0, _workspace=workspace, initial_message="Test initial message") + + mocked_action = Search( + scratch_pad="Applying change", + search_requests=[ + SearchRequest(file_pattern="**/global_settings.py", query="SECURE_REFERRER_POLICY setting") + ] + ) + + outcome = search_code.execute(mocked_action) + print(outcome) + + workspace.file_context.add_ranked_spans(outcome.output["ranked_spans"]) + assert "SECURE_REFERRER_POLICY" in workspace.file_context.create_prompt() + +def test_find(): + instance_id = "django__django-12419" # + instance = get_moatless_instance(instance_id, split="verified") + print(f"Instance: {instance_id}") + workspace = create_workspace(instance) + + search_code = SearchCode(id=0, _workspace=workspace, initial_message="Test initial message") + + mocked_action = Search( + scratch_pad="Applying change", + search_requests=[ + SearchRequest(file_pattern="**/global_settings.py", query="SECURE_REFERRER_POLICY setting") + ] + ) + + outcome = search_code.execute(mocked_action) + + for ranked_span in outcome.output["ranked_spans"]: + print(ranked_span) + + workspace.file_context.add_ranked_spans(outcome.output["ranked_spans"]) + print(workspace.file_context.create_prompt(show_span_ids=True)) + assert "SECURE_REFERRER_POLICY = None" in workspace.file_context.create_prompt() + + +def test_find_2(): + instance_id = "django__django-15104" + instance = get_moatless_instance(instance_id, split="verified") + workspace = create_workspace(instance) + + search_code = SearchCode(id=0, _workspace=workspace, initial_message="Test initial message") + + print(instance["expected_spans"]) + + mocked_action = Search( + scratch_pad="Applying change", + max_search_results=250, + search_requests=[ + SearchRequest(file_pattern="**/migrations/*.py", query="MigrationAutodetector class with generate_renamed_models method") + ] + ) + + outcome = search_code.execute(mocked_action) + + for span in outcome.output["ranked_spans"]: + print(span) \ No newline at end of file diff --git a/tests/index/test_code_index.py b/tests/index/test_code_index.py new file mode 100644 index 00000000..68bc1ecc --- /dev/null +++ b/tests/index/test_code_index.py @@ -0,0 +1,32 @@ +from moatless.benchmark.swebench import setup_swebench_repo +from moatless.benchmark.utils import get_moatless_instance +from moatless.index import IndexSettings, CodeIndex +from moatless.index.settings import CommentStrategy +from moatless.repository import GitRepository, FileRepository + + +def test_ingestion(): + index_settings = IndexSettings( + embed_model="voyage-code-2", + dimensions=1536, + language="python", + min_chunk_size=200, + chunk_size=750, + hard_token_limit=3000, + max_chunks=200, + comment_strategy=CommentStrategy.ASSOCIATE, + ) + + instance_id = "django__django-12419" + instance = get_moatless_instance(instance_id, split="verified") + repo_dir = setup_swebench_repo(instance) + print(repo_dir) + repo = FileRepository(repo_dir) + code_index = CodeIndex(settings=index_settings, file_repo=repo) + + vectors, indexed_tokens = code_index.run_ingestion(num_workers=1, input_files=["django/conf/global_settings.py"]) + + results = code_index._vector_search("SECURE_REFERRER_POLICY setting") + + for result in results: + print(result) diff --git a/tests/index/test_epic_split.py b/tests/index/test_epic_split.py index ce020def..2a05aabd 100644 --- a/tests/index/test_epic_split.py +++ b/tests/index/test_epic_split.py @@ -3,7 +3,8 @@ from llama_index.core import SimpleDirectoryReader -from moatless.benchmark.swebench import get_moatless_instance, setup_swebench_repo +from moatless.benchmark.swebench import setup_swebench_repo +from moatless.benchmark.utils import get_moatless_instance from moatless.index import IndexSettings from moatless.index.epic_split import EpicSplitter @@ -48,5 +49,49 @@ def test_epic_split(): stats.print_stats(40) + for node in prepared_nodes: + print(f"{node.id_} {node.metadata['tokens']} {node.metadata['start_line']}-{node.metadata['end_line']} {node.metadata['span_ids']}") + + +def test_impl_spans(): + instance_id = "django__django-12419" + instance = get_moatless_instance(instance_id, split="verified") + repo_path = setup_swebench_repo(instance) + + file = "django/conf/global_settings.py" + input_files = [f"{repo_path}/{file}"] + + settings = IndexSettings() + splitter = EpicSplitter( + language=settings.language, + min_chunk_size=settings.min_chunk_size, + chunk_size=settings.chunk_size, + hard_token_limit=settings.hard_token_limit, + max_chunks=settings.max_chunks, + comment_strategy=settings.comment_strategy, + min_lines_to_parse_block=50, + repo_path=repo_path, + ) + + reader = SimpleDirectoryReader( + input_dir=repo_path, + input_files=input_files, + filename_as_id=True, + recursive=True, + ) + docs = reader.load_data() + print(f"Read {len(docs)} documents") + + # Profile the get_nodes_from_documents method + profiler = cProfile.Profile() + profiler.enable() + prepared_nodes = splitter.get_nodes_from_documents(docs, show_progress=True) + profiler.disable() + + # Print the profiling results + stats = pstats.Stats(profiler).sort_stats(pstats.SortKey.CUMULATIVE) + stats.print_stats(40) + + for node in prepared_nodes: print(f"{node.id_} {node.metadata['tokens']} {node.metadata['start_line']}-{node.metadata['end_line']} {node.metadata['span_ids']}") diff --git a/tests/index/test_simple_faiss.py b/tests/index/test_simple_faiss.py new file mode 100644 index 00000000..55039697 --- /dev/null +++ b/tests/index/test_simple_faiss.py @@ -0,0 +1,19 @@ +import os + +from llama_index.core.vector_stores import VectorStoreQuery + +from moatless.index import SimpleFaissVectorStore + + +def test_search_index(): + index_store_dir = os.getenv("INDEX_STORE_DIR") + vector_store = SimpleFaissVectorStore.from_persist_dir(f"{index_store_dir}/django__django-12419") + + query_bundle = VectorStoreQuery( + query_str="SECURE_REFERRER_POLICY setting", + similarity_top_k=100, + ) + + result = vector_store.query(query_bundle) + for res in result.ids: + print(res) \ No newline at end of file diff --git a/tests/integration_test.py b/tests/integration_test.py index 8e69b350..e3569eba 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -7,8 +7,9 @@ from moatless import AgenticLoop from moatless.benchmark.swebench import load_instance, create_workspace -from moatless.benchmark.utils import trace_metadata +from moatless.benchmark.utils import trace_metadata, get_moatless_instance from moatless.edit import EditCode, PlanToCode, ClarifyCodeChange +from moatless.edit.expand import ExpandContext from moatless.find import SearchCode, IdentifyCode, DecideRelevance from moatless.state import Finished, Pending from moatless.transition_rules import TransitionRule, TransitionRules @@ -280,3 +281,83 @@ def test_deepseek_coder_django_12286_edit_code(): assert diff + + +#@pytest.mark.llm_integration +def test_search_and_code_no_class(): + model = "claude-3-5-sonnet-20240620" + edit_model = "azure/gpt-4o" + instance_id = "django__django-12419" + litellm.drop_params = True + + global_params = { + "model": model, + "temperature": 0.5, + "max_tokens": 2000, + "max_prompt_file_tokens": 8000, + } + + state_params = { + SearchCode: { + "model": "claude-3-5-sonnet-20240620", + "temperature": 0.2, + "provide_initial_context": True, + "max_search_results": 75, + "initial_context_tokens": 6000, + "initial_search_results": 100, + "initial_context_spans_per_file": 5, + }, + IdentifyCode: {"model": "azure/gpt-4o", "temperature": 0.2, "expand_context": True}, + DecideRelevance: { + "model": "azure/gpt-4o", + "temperature": 0.2, + "finish_after_relevant_count": 1, + }, + PlanToCode: { + "model": model, + "temperature": 0.2, + "max_tokens_in_edit_prompt": 750, + "write_code_suggestions": False, + "finish_on_review": True, + }, + ExpandContext: { + "expand_to_max_tokens": 8000 + }, + EditCode: { + "model": edit_model, + "temperature": 0.0, + "chain_of_thought": False, + "show_file_context": False, + "max_prompt_file_tokens": 8000, + } + } + + instance = get_moatless_instance(instance_id, split="verified") + workspace = create_workspace(instance) + + datestr = datetime.now().strftime("%Y%m%d-%H%M%S") + dir = f"{moatless_dir}/{datestr}_test_django_12286_deepseek_coder" + trajectory_path = f"{dir}/trajectory.json" + + loop = AgenticLoop( + search_and_code_transitions(global_params=global_params, state_params=state_params), + initial_message=instance["problem_statement"], + workspace=workspace, + trajectory_path=trajectory_path, + prompt_log_dir=dir, + ) + + response = loop.run() + print("Response") + print(response) + print("Response Output") + print(response.output) + + assert response.status == "finished" + + diff = loop.workspace.file_repo.diff() + print("Diff") + print(diff) + assert diff + + diff --git a/tests/test_loop.py b/tests/test_loop.py index 3c55bcba..335407da 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -7,7 +7,7 @@ from moatless.state import AgenticState, Finished, Rejected, Pending from moatless.transition_rules import TransitionRules, TransitionRule from moatless.workspace import Workspace -from moatless.schema import ActionRequest, StateOutcome, Content +from moatless.state import ActionRequest, StateOutcome, Content from moatless.benchmark.swebench import create_workspace, load_instance from moatless.repository import GitRepository diff --git a/tests/test_state.py b/tests/test_state.py index 1aef065e..90d3772a 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -1,15 +1,16 @@ import pytest from unittest.mock import MagicMock -from moatless.state import State, AgenticState, NoopState, Finished +from moatless.state import State, AgenticState, NoopState, Finished, ActionRequest, StateOutcome, Content from moatless.workspace import Workspace -from moatless.repository import FileRepository -from moatless.file_context import FileContext -from moatless.schema import ActionRequest, StateOutcome, Completion, Content, FileWithSpans, Usage +from moatless.schema import Completion, FileWithSpans, Usage class ConcreteState(State): def clone(self): return ConcreteState(**self.model_dump()) + def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOutcome: + return StateOutcome(output={"content": "Test response"}) + class ConcreteAgenticState(AgenticState): def _execute_action(self, action: ActionRequest) -> StateOutcome: return StateOutcome(output={"content": "Test response"}) diff --git a/tests/test_transition_rules.py b/tests/test_transition_rules.py index 44af1e73..87cff190 100644 --- a/tests/test_transition_rules.py +++ b/tests/test_transition_rules.py @@ -1,15 +1,16 @@ import json import pytest -from pydantic import BaseModel from moatless.transition_rules import TransitionRules, TransitionRule -from moatless.state import AgenticState, Finished, Rejected, Pending, State -from moatless.schema import StateOutcome +from moatless.state import Finished, Rejected, Pending, State, ActionRequest, StateOutcome class MockStateA(State): value: int = 0 + def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOutcome: + return StateOutcome(output={"value": self.value}) + class MockStateB(State): default_name: str = "" diff --git a/tests/test_types.py b/tests/test_types.py index 887fa4d3..6851280d 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -1,6 +1,4 @@ import anthropic.types -from pydantic import BaseModel -import pytest from moatless.schema import Completion, Usage class TestCompletion: