Merge master into oss-fuzz (#4449)

From now on I will commit oss-fuzz work specifically to this branch. --------- Co-authored-by: Vitor Guidi <[email protected]> Co-authored-by: Ali HIJAZI <[email protected]>
google · Nov 26, 2024 · 0c86007 · 0c86007
1 parent c3a3efe
commit 0c86007
Show file tree

Hide file tree

Showing 12 changed files with 157 additions and 62 deletions.
diff --git a/src/clusterfuzz/_internal/base/concurrency.py b/src/clusterfuzz/_internal/base/concurrency.py
@@ -36,7 +36,7 @@ def map(self, f, l):
 def make_pool(pool_size=POOL_SIZE, cpu_bound=False, max_pool_size=None):
   """Returns a pool that can (usually) execute tasks concurrently."""
   if max_pool_size is not None:
-    pool_size = max(pool_size, max_pool_size)
+    pool_size = min(pool_size, max_pool_size)
 
   # Don't use processes on Windows and unittests to avoid hangs.
   if (environment.get_value('PY_UNITTESTS') or

diff --git a/src/clusterfuzz/_internal/bot/tasks/commands.py b/src/clusterfuzz/_internal/bot/tasks/commands.py
@@ -80,6 +80,8 @@ class AlreadyRunningError(Error):
 def cleanup_task_state():
   """Cleans state before and after a task is executed."""
   # Cleanup stale processes.
+  if environment.is_tworker():
+    return
   process_handler.cleanup_stale_processes()
 
   # Clear build urls, temp and testcase directories.
@@ -468,7 +470,6 @@ def process_command_impl(task_name, task_argument, job_name, high_end,
     return run_command(task_name, task_argument, job_name, uworker_env)
   finally:
     # Final clean up.
-    if not environment.is_tworker():
-      cleanup_task_state()
+    cleanup_task_state()
     if 'CF_TASK_ID' in os.environ:
       del os.environ['CF_TASK_ID']
diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py b/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py
@@ -441,16 +441,15 @@ def utask_main(uworker_input):
 
   test_for_reproducibility(fuzz_target, testcase, testcase_file_path, state,
                            test_timeout)
-  one_time_flag = testcase.one_time_crasher_flag
 
-  analyze_task_output.one_time_crasher_flag = one_time_flag
+  analyze_task_output.one_time_crasher_flag = testcase.one_time_crasher_flag
 
   monitoring_metrics.ANALYZE_TASK_REPRODUCIBILITY.increment(
       labels={
           'fuzzer_name': uworker_input.fuzzer_name,
           'job': uworker_input.job_type,
           'crashes': True,
-          'reproducible': not one_time_flag,
+          'reproducible': not testcase.one_time_crasher_flag,
           'platform': environment.platform(),
       })
 
@@ -559,9 +558,6 @@ def _update_testcase(output):
 def utask_postprocess(output):
   """Trusted: Cleans up after a uworker execute_task, writing anything needed to
   the db."""
-  testcase = data_handler.get_testcase_by_id(output.uworker_input.testcase_id)
-  testcase_upload_metadata = testcase_utils.get_testcase_upload_metadata(
-      output.uworker_input.testcase_id)
   testcase_utils.emit_testcase_triage_duration_metric(
       int(output.uworker_input.testcase_id),
       testcase_utils.TESTCASE_TRIAGE_DURATION_ANALYZE_COMPLETED_STEP)
@@ -570,6 +566,10 @@ def utask_postprocess(output):
     _ERROR_HANDLER.handle(output)
     return
 
+  testcase = data_handler.get_testcase_by_id(output.uworker_input.testcase_id)
+  testcase_upload_metadata = testcase_utils.get_testcase_upload_metadata(
+      output.uworker_input.testcase_id)
+
   log_message = (f'Testcase crashed in {output.test_timeout} seconds '
                  f'(r{testcase.crash_revision})')
   data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED,

diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/fuzz_task.py b/src/clusterfuzz/_internal/bot/tasks/utasks/fuzz_task.py
@@ -1567,6 +1567,23 @@ def do_engine_fuzzing(self, engine_impl):
 
     return crashes, fuzzer_metadata
 
+  def _emit_testcase_generation_time_metric(self, start_time, testcase_count,
+                                            fuzzer, job):
+    testcase_generation_finish = time.time()
+    elapsed_testcase_generation_time = testcase_generation_finish
+    elapsed_testcase_generation_time -= start_time
+    # Avoid division by zero.
+    if testcase_count:
+      average_time_per_testcase = elapsed_testcase_generation_time
+      average_time_per_testcase = average_time_per_testcase / testcase_count
+      monitoring_metrics.TESTCASE_GENERATION_AVERAGE_TIME.add(
+          average_time_per_testcase,
+          labels={
+              'job': job,
+              'fuzzer': fuzzer,
+              'platform': environment.platform(),
+          })
+
   def do_blackbox_fuzzing(self, fuzzer, fuzzer_directory, job_type):
     """Run blackbox fuzzing. Currently also used for engine fuzzing."""
     # Set the thread timeout values.
@@ -1590,11 +1607,15 @@ def do_blackbox_fuzzing(self, fuzzer, fuzzer_directory, job_type):
 
     # Run the fuzzer to generate testcases. If error occurred while trying
     # to run the fuzzer, bail out.
+    testcase_generation_start = time.time()
     generate_result = self.generate_blackbox_testcases(
         fuzzer, job_type, fuzzer_directory, testcase_count)
     if not generate_result.success:
       return None, None, None, None
 
+    self._emit_testcase_generation_time_metric(
+        testcase_generation_start, testcase_count, fuzzer.name, job_type)
+
     environment.set_value('FUZZER_NAME', self.fully_qualified_fuzzer_name)
 
     # Initialize a list of crashes.
@@ -1887,15 +1908,12 @@ def run(self):
     self.fuzz_task_output.crash_groups.extend(crash_groups)
 
     fuzzing_session_duration = time.time() - start_time
-    labels = {
-        'fuzzer': self.fuzzer_name,
-        'job': self.job_type,
-        'platform': environment.platform()
-    }
-    logs.info(f'FUZZING_SESSION_DURATION: add {fuzzing_session_duration} '
-              'for {labels}.')
-    monitoring_metrics.FUZZING_SESSION_DURATION.add(fuzzing_session_duration,
-                                                    labels)
+    monitoring_metrics.FUZZING_SESSION_DURATION.add(
+        fuzzing_session_duration, {
+            'fuzzer': self.fuzzer_name,
+            'job': self.job_type,
+            'platform': environment.platform()
+        })
 
     return uworker_msg_pb2.Output(fuzz_task_output=self.fuzz_task_output)  # pylint: disable=no-member
 
@@ -2007,18 +2025,25 @@ def _pick_fuzz_target():
   return build_manager.pick_random_fuzz_target(target_weights)
 
 
-def _get_fuzz_target_from_db(engine_name, fuzz_target_binary, job_type):
+def _get_or_create_fuzz_target(engine_name, fuzz_target_binary, job_type):
+  """Gets or creates a FuzzTarget db entity."""
   project = data_handler.get_project_name(job_type)
   qualified_name = data_types.fuzz_target_fully_qualified_name(
       engine_name, project, fuzz_target_binary)
   key = ndb.Key(data_types.FuzzTarget, qualified_name)
-  return key.get()
+  fuzz_target = key.get()
+  if fuzz_target:
+    return fuzz_target
+  fuzz_target = data_types.FuzzTarget(
+      engine=engine_name, binary=fuzz_target_binary, project=project)
+  fuzz_target.put()
+  return fuzz_target
 
 
 def _preprocess_get_fuzz_target(fuzzer_name, job_type):
   fuzz_target_name = _pick_fuzz_target()
   if fuzz_target_name:
-    return _get_fuzz_target_from_db(fuzzer_name, fuzz_target_name, job_type)
+    return _get_or_create_fuzz_target(fuzzer_name, fuzz_target_name, job_type)
   return None
 
 

diff --git a/src/clusterfuzz/_internal/build_management/build_manager.py b/src/clusterfuzz/_internal/build_management/build_manager.py
@@ -300,17 +300,14 @@ def set_env_var(name, value):
 
 
 def _emit_job_build_retrieval_metric(start_time, step, build_type):
-  """Emits a metrick to track the distribution of build retrieval times."""
   elapsed_minutes = (time.time() - start_time) / 60
-  labels = {
-      'job': os.getenv('JOB_NAME'),
-      'platform': environment.platform(),
-      'step': step,
-      'build_type': build_type,
-  }
-  logs.info(f'JOB_BUILD_RETRIEVAL_TIME: adding {elapsed_minutes} '
-            f'for labels {labels}.')
-  monitoring_metrics.JOB_BUILD_RETRIEVAL_TIME.add(elapsed_minutes, labels)
+  monitoring_metrics.JOB_BUILD_RETRIEVAL_TIME.add(
+      elapsed_minutes, {
+          'job': os.getenv('JOB_NAME'),
+          'platform': environment.platform(),
+          'step': step,
+          'build_type': build_type,
+      })
 
 
 class BaseBuild:
@@ -1223,14 +1220,24 @@ def _emit_build_age_metric(gcs_path):
         'platform': environment.platform(),
         'task': os.getenv('TASK_NAME'),
     }
-    logs.info(f'JOB_BUILD_AGE: adding {elapsed_time_in_hours} for {labels}')
     monitoring_metrics.JOB_BUILD_AGE.add(elapsed_time_in_hours, labels)
     # This field is expected as a datetime object
     # https://cloud.google.com/storage/docs/json_api/v1/objects#resource
   except Exception as e:
     logs.error(f'Failed to emit build age metric for {gcs_path}: {e}')
 
 
+def _emit_build_revision_metric(revision):
+  """Emits a gauge metric to track the build revision."""
+  monitoring_metrics.JOB_BUILD_REVISION.set(
+      revision,
+      labels={
+          'job': os.getenv('JOB_NAME'),
+          'platform': environment.platform(),
+          'task': os.getenv('TASK_NAME'),
+      })
+
+
 def _get_build_url(bucket_path: Optional[str], revision: int,
                    job_type: Optional[str]):
   """Returns the GCS url for a build, given a bucket path and revision"""
@@ -1301,6 +1308,7 @@ def setup_regular_build(revision,
 
   if revision == latest_revision:
     _emit_build_age_metric(build_url)
+    _emit_build_revision_metric(revision)
 
   # build_url points to a GCP bucket, and we're only converting it to its HTTP
   # endpoint so that we can use remote unzipping.

diff --git a/src/clusterfuzz/_internal/common/testcase_utils.py b/src/clusterfuzz/_internal/common/testcase_utils.py
@@ -61,17 +61,12 @@ def emit_testcase_triage_duration_metric(testcase_id: int, step: str):
                  ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
     return
 
-  labels = {
-      'job': testcase.job_type,
-      'step': step,
-  }
-
-  logs.info(
-      f'TESTCASE_UPLOAD_TRIAGE_DURATION: adding {elapsed_time_since_upload} for {labels}.'
-  )
-
   monitoring_metrics.TESTCASE_UPLOAD_TRIAGE_DURATION.add(
-      elapsed_time_since_upload, labels=labels)
+      elapsed_time_since_upload,
+      labels={
+          'job': testcase.job_type,
+          'step': step,
+      })
 
 
 def get_testcase_upload_metadata(

diff --git a/src/clusterfuzz/_internal/cron/triage.py b/src/clusterfuzz/_internal/cron/triage.py
@@ -254,6 +254,16 @@ def _check_and_update_similar_bug(testcase, issue_tracker):
   return False
 
 
+def _emit_bug_filing_from_testcase_elapsed_time_metric(testcase):
+  testcase_age = testcase.get_age_in_seconds()
+  monitoring_metrics.BUG_FILING_FROM_TESTCASE_ELAPSED_TIME.add(
+      testcase_age,
+      labels={
+          'job': testcase.job_type,
+          'platform': testcase.platform,
+      })
+
+
 def _file_issue(testcase, issue_tracker, throttler):
   """File an issue for the testcase."""
   logs.info(f'_file_issue for {testcase.key.id()}')
@@ -282,6 +292,7 @@ def _file_issue(testcase, issue_tracker, throttler):
         'fuzzer_name': testcase.fuzzer_name,
         'status': 'success',
     })
+    _emit_bug_filing_from_testcase_elapsed_time_metric(testcase)
   except Exception as e:
     file_exception = e
 
@@ -306,16 +317,12 @@ def _emit_untriaged_testcase_age_metric(critical_tasks_completed: bool,
   if not testcase.timestamp:
     return
 
-  labels = {
-      'job': testcase.job_type,
-      'platform': testcase.platform,
-  }
-
-  logs.info(f'UNTRIAGED_TESTCASE_AGE: adding {testcase.get_age_in_seconds()}'
-            ' for {labels}')
-
   monitoring_metrics.UNTRIAGED_TESTCASE_AGE.add(
-      testcase.get_age_in_seconds(), labels=labels)
+      testcase.get_age_in_seconds(),
+      labels={
+          'job': testcase.job_type,
+          'platform': testcase.platform,
+      })
 
 
 def main():

diff --git a/src/clusterfuzz/_internal/fuzzing/leak_blacklist.py b/src/clusterfuzz/_internal/fuzzing/leak_blacklist.py
@@ -17,6 +17,7 @@
 import re
 
 from clusterfuzz._internal.base import errors
+from clusterfuzz._internal.base import memoize
 from clusterfuzz._internal.datastore import data_handler
 from clusterfuzz._internal.datastore import data_types
 from clusterfuzz._internal.datastore import ndb_utils
@@ -65,7 +66,9 @@ def cleanup_global_blacklist():
   ndb_utils.delete_multi(blacklists_to_delete)
 
 
+@memoize.wrap(memoize.Memcache(60 * 10))
 def get_global_blacklisted_functions():
+  """Gets global blacklisted functions."""
   # Copy global blacklist into local blacklist.
   global_blacklists = data_types.Blacklist.query(
       data_types.Blacklist.tool_name == LSAN_TOOL_NAME)

diff --git a/src/clusterfuzz/_internal/google_cloud_utils/credentials.py b/src/clusterfuzz/_internal/google_cloud_utils/credentials.py
@@ -19,6 +19,7 @@
 from google.auth.transport import requests
 from google.oauth2 import service_account
 
+from clusterfuzz._internal.base import memoize
 from clusterfuzz._internal.base import retry
 from clusterfuzz._internal.base import utils
 from clusterfuzz._internal.google_cloud_utils import secret_manager
@@ -59,6 +60,7 @@ def _use_anonymous_credentials():
     retries=FAIL_RETRIES,
     delay=FAIL_WAIT,
     function='google_cloud_utils.credentials.get_default')
+@memoize.wrap(memoize.FifoInMemory(1))
 def get_default(scopes=None):
   """Get default Google Cloud credentials."""
   if _use_anonymous_credentials():