From d5778441ac973c364fef8c560f91db28e015cd5e Mon Sep 17 00:00:00 2001 From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> Date: Wed, 24 Jan 2024 12:05:06 -0500 Subject: [PATCH] Remove shared corpus feature (#3654) It was never used. --- configs/test/project.yaml | 4 -- .../bot/tasks/utasks/corpus_pruning_task.py | 7 ---- .../bot/untrusted_runner/environment.py | 1 - .../_internal/cron/project_setup.py | 10 +---- .../handlers/cron/project_setup_test.py | 42 ------------------- .../tasks/utasks/corpus_pruning_task_test.py | 14 ++----- .../tests/core/local/butler/deploy_test.py | 2 - src/local/butler/run_server.py | 2 - 8 files changed, 5 insertions(+), 77 deletions(-) diff --git a/configs/test/project.yaml b/configs/test/project.yaml index 995a42e610..67a2980015 100644 --- a/configs/test/project.yaml +++ b/configs/test/project.yaml @@ -86,10 +86,6 @@ env: # back into the main corpus bucket. QUARANTINE_BUCKET: test-quarantine-bucket - # Default bucket to store shared corpus across all job types. This is planned for future cross - # pollination with other data sources on the web. - SHARED_CORPUS_BUCKET: test-shared-corpus-bucket - # Default bucket to store fuzzing logs from testcase runs. This is different from the fuzzer logs # above which logs the fuzzer run that generates the testcases, whereas this one logs the run of # the testcases against the target application. diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py b/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py index db7d900d73..83a6661b85 100644 --- a/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py +++ b/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py @@ -204,9 +204,6 @@ def __init__(self, fuzz_target, cross_pollinate_fuzzers): self.fuzz_target.project_qualified_name(), quarantine=True) - shared_corpus_bucket = environment.get_value('SHARED_CORPUS_BUCKET') - self.shared_corpus = corpus_manager.GcsCorpus(shared_corpus_bucket) - def restore_quarantined_units(self): """Restore units from the quarantine.""" logs.log('Restoring units from quarantine.') @@ -242,10 +239,6 @@ def sync_to_disk(self): 'Failed to sync quarantine corpus to disk.', fuzz_target=self.fuzz_target) - if not self.shared_corpus.rsync_to_disk(self.shared_corpus_path): - logs.log_error( - 'Failed to sync shared corpus to disk.', fuzz_target=self.fuzz_target) - self._cross_pollinate_other_fuzzer_corpuses() def sync_to_gcs(self): diff --git a/src/clusterfuzz/_internal/bot/untrusted_runner/environment.py b/src/clusterfuzz/_internal/bot/untrusted_runner/environment.py index 5312b8f39f..10a20ac124 100644 --- a/src/clusterfuzz/_internal/bot/untrusted_runner/environment.py +++ b/src/clusterfuzz/_internal/bot/untrusted_runner/environment.py @@ -45,7 +45,6 @@ r'^PATH$', r'^PY_UNITTESTS$', r'^QUARANTINE_BUCKET$', - r'^SHARED_CORPUS_BUCKET$', r'^STRATEGY_SELECTION_DISTRIBUTION$', r'^STRATEGY_SELECTION_METHOD$', r'^TASK_NAME$', diff --git a/src/clusterfuzz/_internal/cron/project_setup.py b/src/clusterfuzz/_internal/cron/project_setup.py index b5f175d6cf..7f917e7335 100644 --- a/src/clusterfuzz/_internal/cron/project_setup.py +++ b/src/clusterfuzz/_internal/cron/project_setup.py @@ -35,7 +35,6 @@ from clusterfuzz._internal.google_cloud_utils import pubsub from clusterfuzz._internal.google_cloud_utils import storage from clusterfuzz._internal.metrics import logs -from clusterfuzz._internal.system import environment from . import service_accounts @@ -657,10 +656,6 @@ def _deployment_bucket_name(self): """Deployment bucket name.""" return f'{utils.get_application_id()}-deployment' - def _shared_corpus_bucket_name(self): - """Shared corpus bucket name.""" - return environment.get_value('SHARED_CORPUS_BUCKET') - def _backup_bucket_name(self, project_name): """Return the backup_bucket_name.""" return project_name + '-backup.' + data_handler.bucket_domain_suffix() @@ -707,12 +702,9 @@ def _create_service_accounts_and_buckets(self, project, info): except Exception as e: logs.log_error(f'Failed to add bucket IAMs for {project}: {e}.') - # Grant the service account read access to deployment, shared corpus and - # mutator plugin buckets. + # Grant the service account read access to deployment bucket. add_service_account_to_bucket(client, self._deployment_bucket_name(), service_account, OBJECT_VIEWER_IAM_ROLE) - add_service_account_to_bucket(client, self._shared_corpus_bucket_name(), - service_account, OBJECT_VIEWER_IAM_ROLE) data_bundles = { fuzzer_entity.get().data_bundle_name for fuzzer_entity in self._fuzzer_entities.values() diff --git a/src/clusterfuzz/_internal/tests/appengine/handlers/cron/project_setup_test.py b/src/clusterfuzz/_internal/tests/appengine/handlers/cron/project_setup_test.py index e91fe30747..5c7b7c346e 100644 --- a/src/clusterfuzz/_internal/tests/appengine/handlers/cron/project_setup_test.py +++ b/src/clusterfuzz/_internal/tests/appengine/handlers/cron/project_setup_test.py @@ -1063,20 +1063,6 @@ def test_execute(self): }] }, bucket='clusterfuzz-external-deployment'), - mock.call( - body={ - 'resourceId': - 'fake', - 'kind': - 'storage#policy', - 'etag': - 'fake', - 'bindings': [{ - 'role': 'roles/storage.objectViewer', - 'members': ['serviceAccount:lib1@serviceaccount.com'] - }] - }, - bucket='test-shared-corpus-bucket'), mock.call( body={ 'resourceId': @@ -1161,20 +1147,6 @@ def test_execute(self): }] }, bucket='clusterfuzz-external-deployment'), - mock.call( - body={ - 'resourceId': - 'fake', - 'kind': - 'storage#policy', - 'etag': - 'fake', - 'bindings': [{ - 'role': 'roles/storage.objectViewer', - 'members': ['serviceAccount:lib2@serviceaccount.com'] - }] - }, - bucket='test-shared-corpus-bucket'), mock.call( body={ 'resourceId': @@ -1313,20 +1285,6 @@ def test_execute(self): }] }, bucket='clusterfuzz-external-deployment'), - mock.call( - body={ - 'resourceId': - 'fake', - 'kind': - 'storage#policy', - 'etag': - 'fake', - 'bindings': [{ - 'role': 'roles/storage.objectViewer', - 'members': ['serviceAccount:lib3@serviceaccount.com'] - }] - }, - bucket='test-shared-corpus-bucket'), mock.call( body={ 'resourceId': diff --git a/src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py b/src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py index 14f798021d..e546c6e4fc 100644 --- a/src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py +++ b/src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py @@ -43,7 +43,6 @@ os.path.dirname(os.path.realpath(__file__)), 'corpus_pruning_task_data') TEST_GLOBAL_BUCKET = 'clusterfuzz-test-global-bundle' -TEST_SHARED_BUCKET = 'clusterfuzz-test-shared-corpus' TEST2_BACKUP_BUCKET = 'clusterfuzz-test2-backup-bucket' @@ -123,7 +122,6 @@ def mocked_unpack_seed_corpus_if_needed(*args, **kwargs): os.environ['FUZZ_INPUTS_DISK'] = self.fuzz_inputs_disk os.environ['CORPUS_BUCKET'] = 'bucket' os.environ['QUARANTINE_BUCKET'] = 'bucket-quarantine' - os.environ['SHARED_CORPUS_BUCKET'] = 'bucket-shared' os.environ['JOB_NAME'] = 'libfuzzer_asan_job' os.environ['FAIL_RETRIES'] = '1' os.environ['APP_REVISION'] = '1337' @@ -141,8 +139,6 @@ def _mock_rsync_to_disk(self, _, sync_dir, timeout=None, delete=None): """Mock rsync_to_disk.""" if 'quarantine' in sync_dir: corpus_dir = self.quarantine_dir - elif 'shared' in sync_dir: - corpus_dir = self.shared_corpus_dir else: corpus_dir = self.corpus_dir @@ -181,6 +177,7 @@ def setUp(self): ]) self.mock.setup_build.side_effect = self._mock_setup_build self.mock.get_application_id.return_value = 'project' + self.maxDiff = None def test_preprocess_existing_task_running(self): """Preprocess test when another task is running.""" @@ -220,9 +217,8 @@ def test_prune(self): 'crash-7acd6a2b3fe3c5ec97fa37e5a980c106367491fa') corpus = os.listdir(self.corpus_dir) - self.assertEqual(4, len(corpus)) + self.assertEqual(3, len(corpus)) self.assertCountEqual([ - '39e0574a4abfd646565a3e436c548eeb1684fb57', '7d157d7c000ae27db146575c08ce30df893d3a64', '31836aeaab22dc49555a97edb4c753881432e01d', '6fa8c57336628a7d733f684dc9404fbd09020543', @@ -249,9 +245,9 @@ def test_prune(self): 'corpus_location': 'gs://bucket/libFuzzer/test_fuzzer/', 'corpus_size_bytes': - 8, + 6, 'corpus_size_units': - 4, + 3, 'date': today, # Coverage numbers are expected to be None as they come from fuzzer @@ -438,8 +434,6 @@ def setUp(self): job='libfuzzer_asan_job2', last_run=datetime.datetime.now()).put() - environment.set_value('SHARED_CORPUS_BUCKET', TEST_SHARED_BUCKET) - # Set up remote corpora. self.corpus = corpus_manager.FuzzTargetCorpus('libFuzzer', 'test_fuzzer') self.corpus.rsync_from_disk(os.path.join(TEST_DIR, 'corpus'), delete=True) diff --git a/src/clusterfuzz/_internal/tests/core/local/butler/deploy_test.py b/src/clusterfuzz/_internal/tests/core/local/butler/deploy_test.py index 4d12883b86..3542200b87 100644 --- a/src/clusterfuzz/_internal/tests/core/local/butler/deploy_test.py +++ b/src/clusterfuzz/_internal/tests/core/local/butler/deploy_test.py @@ -70,8 +70,6 @@ def _check_env_variables(self, yaml_paths): self.assertEqual('test-corpus-bucket', env_variables['CORPUS_BUCKET']) self.assertEqual('test-quarantine-bucket', env_variables['QUARANTINE_BUCKET']) - self.assertEqual('test-shared-corpus-bucket', - env_variables['SHARED_CORPUS_BUCKET']) def _check_no_env_variables(self, yaml_paths): """Check that environment variables are not written to yaml paths.""" diff --git a/src/local/butler/run_server.py b/src/local/butler/run_server.py index 628bdf7e2a..32785a14d9 100644 --- a/src/local/butler/run_server.py +++ b/src/local/butler/run_server.py @@ -69,8 +69,6 @@ def bootstrap_gcs(storage_path): create_local_bucket(local_gcs_buckets_path, config.get('env.CORPUS_BUCKET')) create_local_bucket(local_gcs_buckets_path, config.get('env.QUARANTINE_BUCKET')) - create_local_bucket(local_gcs_buckets_path, - config.get('env.SHARED_CORPUS_BUCKET')) create_local_bucket(local_gcs_buckets_path, config.get('env.FUZZ_LOGS_BUCKET'))