diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py b/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py index 785974f735..1f0b7e8f58 100644 --- a/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py +++ b/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py @@ -552,6 +552,8 @@ def _update_testcase(output): if analyze_task_output.platform_id: testcase.platform_id = analyze_task_output.platform_id + testcase.analyze_pending = False + testcase.put() diff --git a/src/clusterfuzz/_internal/common/testcase_utils.py b/src/clusterfuzz/_internal/common/testcase_utils.py index 73fe9edfd4..4314dcd55f 100644 --- a/src/clusterfuzz/_internal/common/testcase_utils.py +++ b/src/clusterfuzz/_internal/common/testcase_utils.py @@ -31,59 +31,39 @@ def emit_testcase_triage_duration_metric(testcase_id: int, step: str): - '''Finds out if a testcase is fuzzer generated or manually uploaded, - and emits the TESTCASE_UPLOAD_TRIAGE_DURATION metric.''' - testcase_upload_metadata = get_testcase_upload_metadata(testcase_id) - if not testcase_upload_metadata: - logs.warning(f'No upload metadata found for testcase {testcase_id},' - ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.') - return - if not testcase_upload_metadata.timestamp: - logs.warning( - f'No timestamp for testcase {testcase_upload_metadata.testcase_id},' - ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.') - return - assert step in [ - 'analyze_launched', 'analyze_completed', 'minimize_completed', - 'regression_completed', 'impact_completed', 'issue_updated' - ] - + """Finds out if a testcase is fuzzer generated or manually uploaded, + and emits the TESTCASE_TRIAGE_DURATION metric.""" testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: logs.warning(f'No testcase found with id {testcase_id},' - ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.') + ' failed to emit TESTCASE_TRIAGE_DURATION metric.') return if not testcase.job_type: logs.warning(f'No job_type associated to testcase {testcase_id},' - ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.') + ' failed to emit TESTCASE_TRIAGE_DURATION metric.') return from_fuzzer = not get_testcase_upload_metadata(testcase_id) - assert step in [ - 'analyze_launched', 'analyze_completed', 'minimize_completed', - 'regression_completed', 'impact_completed', 'issue_updated' - ] - if not testcase.get_age_in_seconds(): logs.warning(f'No timestamp associated to testcase {testcase_id},' - ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.') + ' failed to emit TESTCASE_TRIAGE_DURATION metric.') return - testcase_age_in_hours = testcase.get_age_in_seconds() / 3600 + testcase_age_in_hours = testcase.get_age_in_seconds() / (60 * 60) - logs.info('Emiting TESTCASE_UPLOAD_TRIAGE_DURATION metric for testcase ' + logs.info('Emiting TESTCASE_TRIAGE_DURATION metric for testcase ' f'{testcase_id} (age = {testcase_age_in_hours} hours.) ' - 'in step {step}.') + f'in step {step}, from_fuzzer: {from_fuzzer}.') - monitoring_metrics.TESTCASE_UPLOAD_TRIAGE_DURATION.add( + monitoring_metrics.TESTCASE_TRIAGE_DURATION.add( testcase_age_in_hours, labels={ 'job': testcase.job_type, 'step': step, - 'origin': 'fuzzer' if from_fuzzer else 'manually_uploaded' + 'from_fuzzer': from_fuzzer }) diff --git a/src/clusterfuzz/_internal/cron/triage.py b/src/clusterfuzz/_internal/cron/triage.py index 1f3b375922..ccfb03e8ae 100644 --- a/src/clusterfuzz/_internal/cron/triage.py +++ b/src/clusterfuzz/_internal/cron/triage.py @@ -256,6 +256,8 @@ def _check_and_update_similar_bug(testcase, issue_tracker): def _emit_bug_filing_from_testcase_elapsed_time_metric(testcase): testcase_age = testcase.get_age_in_seconds() + if not testcase_age: + return monitoring_metrics.BUG_FILING_FROM_TESTCASE_ELAPSED_TIME.add( testcase_age, labels={ @@ -336,27 +338,30 @@ def _emit_untriaged_testcase_count_metric(): }) -def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase): +PENDING_ANALYZE = 'pending_analyze' +PENDING_CRITICAL_TASKS = 'pending_critical_tasks' +PENDING_PROGRESSION = 'pending_progression' +PENDING_GROUPING = 'pending_grouping' +PENDING_FILING = 'pending_filing' + + +def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase, + step: str): """Emmits a metric to track age of untriaged testcases.""" - if not testcase.timestamp: + if not testcase.get_age_in_seconds(): return logs.info(f'Emiting UNTRIAGED_TESTCASE_AGE for testcase {testcase.key.id()} ' - f'(age = {testcase.get_age_in_seconds()})') + f'(age = {testcase.get_age_in_seconds()}), step = {step}') monitoring_metrics.UNTRIAGED_TESTCASE_AGE.add( testcase.get_age_in_seconds() / 3600, labels={ 'job': testcase.job_type, 'platform': testcase.platform, + 'step': step, }) -PENDING_CRITICAL_TASKS = 'pending_critical_tasks' -PENDING_PROGRESSION = 'pending_progression' -PENDING_GROUPING = 'pending_grouping' -PENDING_FILING = 'pending_filing' - - def main(): """Files bugs.""" try: @@ -409,7 +414,7 @@ def main(): if testcase.get_metadata('progression_pending'): _set_testcase_stuck_state(testcase, True) logs.info(f'Skipping testcase {testcase_id}, progression pending') - _emit_untriaged_testcase_age_metric(testcase) + _emit_untriaged_testcase_age_metric(testcase, PENDING_PROGRESSION) _increment_untriaged_testcase_count(testcase.job_type, PENDING_PROGRESSION) continue @@ -432,10 +437,12 @@ def main(): # Require that all tasks like minimizaton, regression testing, etc have # finished. if not critical_tasks_completed: - _emit_untriaged_testcase_age_metric(testcase) + status = PENDING_CRITICAL_TASKS + if testcase.analyze_pending: + status = PENDING_ANALYZE + _emit_untriaged_testcase_age_metric(testcase, status) _set_testcase_stuck_state(testcase, True) - _increment_untriaged_testcase_count(testcase.job_type, - PENDING_CRITICAL_TASKS) + _increment_untriaged_testcase_count(testcase.job_type, status) logs.info( f'Skipping testcase {testcase_id}, critical tasks still pending.') continue @@ -452,7 +459,7 @@ def main(): # metadata works well. if not testcase.group_id and not dates.time_has_expired( testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT): - _emit_untriaged_testcase_age_metric(testcase) + _emit_untriaged_testcase_age_metric(testcase, PENDING_GROUPING) _set_testcase_stuck_state(testcase, True) _increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING) logs.info(f'Skipping testcase {testcase_id}, pending grouping.') @@ -460,7 +467,7 @@ def main(): if not testcase.get_metadata('ran_grouper'): # Testcase should be considered by the grouper first before filing. - _emit_untriaged_testcase_age_metric(testcase) + _emit_untriaged_testcase_age_metric(testcase, PENDING_GROUPING) _set_testcase_stuck_state(testcase, True) _increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING) logs.info(f'Skipping testcase {testcase_id}, pending grouping.') @@ -490,16 +497,15 @@ def main(): # Clean up old triage messages that would be not applicable now. testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False) - # A testcase is untriaged, until immediately before a bug is opened - _emit_untriaged_testcase_age_metric(testcase) - _set_testcase_stuck_state(testcase, False) - _increment_untriaged_testcase_count(testcase.job_type, PENDING_FILING) - # File the bug first and then create filed bug metadata. if not _file_issue(testcase, issue_tracker, throttler): + _emit_untriaged_testcase_age_metric(testcase, PENDING_FILING) + _increment_untriaged_testcase_count(testcase.job_type, PENDING_FILING) logs.info(f'Issue filing failed for testcase id {testcase_id}') continue + _set_testcase_stuck_state(testcase, False) + _create_filed_bug_metadata(testcase) issue_filer.notify_issue_update(testcase, 'new') diff --git a/src/clusterfuzz/_internal/datastore/data_handler.py b/src/clusterfuzz/_internal/datastore/data_handler.py index ff9d608cfc..cf1aafb5ff 100644 --- a/src/clusterfuzz/_internal/datastore/data_handler.py +++ b/src/clusterfuzz/_internal/datastore/data_handler.py @@ -921,7 +921,7 @@ def critical_tasks_completed(testcase): return testcase.minimized_keys and testcase.regression return bool(testcase.minimized_keys and testcase.regression and - testcase.is_impact_set_flag) + testcase.is_impact_set_flag and not testcase.analyze_pending) # ------------------------------------------------------------------------------ @@ -1380,6 +1380,7 @@ def create_user_uploaded_testcase(key, testcase.timestamp = utils.utcnow() testcase.created = testcase.timestamp + testcase.analyze_pending = True testcase.uploader_email = uploader_email testcase.put() diff --git a/src/clusterfuzz/_internal/datastore/data_types.py b/src/clusterfuzz/_internal/datastore/data_types.py index 8aa009ad36..cbc63c7f4b 100644 --- a/src/clusterfuzz/_internal/datastore/data_types.py +++ b/src/clusterfuzz/_internal/datastore/data_types.py @@ -583,6 +583,10 @@ class Testcase(Model): # Tracks if a testcase is stuck during triage. stuck_in_triage = ndb.BooleanProperty(default=False) + # Tracks if analyze task is pending. + # Defaults to false, since most testcases are fuzzer produced. + analyze_pending = ndb.BooleanProperty(default=False) + def is_chromium(self): return self.project_name in ('chromium', 'chromium-testing') diff --git a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py index cb54c9da54..dfd92e100e 100644 --- a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py +++ b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py @@ -231,17 +231,17 @@ ], ) -TESTCASE_UPLOAD_TRIAGE_DURATION = monitor.CumulativeDistributionMetric( - 'uploaded_testcase_analysis/triage_duration_secs', +TESTCASE_TRIAGE_DURATION = monitor.CumulativeDistributionMetric( + 'testcase_analysis/triage_duration_hours', description=('Time elapsed between testcase upload and completion' - ' of relevant tasks in the testcase upload lifecycle.' + ' of relevant tasks in the testcase lifecycle.' ' Origin can be either from a fuzzer, or a manual' ' upload. Measured in hours.'), bucketer=monitor.GeometricBucketer(), field_spec=[ monitor.StringField('step'), monitor.StringField('job'), - monitor.StringField('origin'), + monitor.BooleanField('from_fuzzer'), ], ) @@ -365,6 +365,7 @@ field_spec=[ monitor.StringField('job'), monitor.StringField('platform'), + monitor.StringField('step'), ]) UNTRIAGED_TESTCASE_COUNT = monitor.GaugeMetric(