Skip to content

Commit

Permalink
[Monitoring] Remove granularity for stuck testcases metric (#4496)
Browse files Browse the repository at this point in the history
The metric for untriaged testcae age was not considering bugs that were
being filed legitimately, so there was no metric emission at all.

Also, removes granularity in the stuck testcase count metric.
  • Loading branch information
vitorguidi authored Dec 12, 2024
1 parent 1208d14 commit 667338c
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 27 deletions.
35 changes: 12 additions & 23 deletions src/clusterfuzz/_internal/cron/triage.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,22 +309,11 @@ def _file_issue(testcase, issue_tracker, throttler):
return filed


untriaged_testcase_count = {}


def _increment_untriaged_testcase_count(testcase: data_types.Testcase):
identifier = (testcase.job_type, testcase.platform)
if identifier not in untriaged_testcase_count:
untriaged_testcase_count[identifier] = 0
untriaged_testcase_count[identifier] += 1


def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase):
"""Emmits a metric to track age of untriaged testcases."""
if not testcase.timestamp:
return

_increment_untriaged_testcase_count(testcase)
logs.info(f'Emiting UNTRIAGED_TESTCASE_AGE for testcase {testcase.key.id()} '
f'(age = {testcase.get_age_in_seconds()})')
monitoring_metrics.UNTRIAGED_TESTCASE_AGE.add(
Expand All @@ -335,16 +324,6 @@ def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase):
})


def _emit_untriaged_testcase_count_metric():
for (job, platform) in untriaged_testcase_count:
monitoring_metrics.UNTRIAGED_TESTCASE_COUNT.set(
untriaged_testcase_count[(job, platform)],
labels={
'job': job,
'platform': platform,
})


def main():
"""Files bugs."""
try:
Expand All @@ -367,6 +346,8 @@ def main():

throttler = Throttler()

untriaged_testcases = 0

for testcase_id in data_handler.get_open_testcase_id_iterator():
logs.info(f'Triaging {testcase_id}')
try:
Expand Down Expand Up @@ -395,6 +376,7 @@ def main():
if testcase.get_metadata('progression_pending'):
logs.info(f'Skipping testcase {testcase_id}, progression pending')
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
continue

# If the testcase has a bug filed already, no triage is needed.
Expand All @@ -414,6 +396,7 @@ def main():
# finished.
if not critical_tasks_completed:
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
logs.info(
f'Skipping testcase {testcase_id}, critical tasks still pending.')
continue
Expand All @@ -431,12 +414,14 @@ def main():
if not testcase.group_id and not dates.time_has_expired(
testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT):
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
continue

if not testcase.get_metadata('ran_grouper'):
# Testcase should be considered by the grouper first before filing.
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
continue

Expand All @@ -463,10 +448,13 @@ def main():
# Clean up old triage messages that would be not applicable now.
testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False)

# A testcase is untriaged, until immediately before a bug is opened
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1

# File the bug first and then create filed bug metadata.
if not _file_issue(testcase, issue_tracker, throttler):
logs.info(f'Issue filing failed for testcase id {testcase_id}')
_emit_untriaged_testcase_age_metric(testcase)
continue

_create_filed_bug_metadata(testcase)
Expand All @@ -475,7 +463,8 @@ def main():
logs.info('Filed new issue %s for testcase %d.' % (testcase.bug_information,
testcase_id))

_emit_untriaged_testcase_count_metric()
monitoring_metrics.UNTRIAGED_TESTCASE_COUNT.set(
untriaged_testcases, labels={})

logs.info('Triage testcases succeeded.')
return True
Expand Down
6 changes: 2 additions & 4 deletions src/clusterfuzz/_internal/metrics/monitoring_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,8 @@
description='Number of testcases that were not yet triaged '
'(have not yet completed analyze, regression,'
' minimization, impact task), in hours.',
field_spec=[
monitor.StringField('job'),
monitor.StringField('platform'),
])
field_spec=[],
)

ANALYZE_TASK_REPRODUCIBILITY = monitor.CounterMetric(
'task/analyze/reproducibility',
Expand Down

0 comments on commit 667338c

Please sign in to comment.