From 246122fed21c12f5ba892db31a58ab49e3ba5c86 Mon Sep 17 00:00:00 2001
From: Vitor Guidi <vitorguidi@gmail.com>
Date: Fri, 27 Dec 2024 12:40:16 -0300
Subject: [PATCH] Merge remainder monitoring PRs to oss fuzz (#4563)

This merges #4512 , #4516 , #4547 , #4497,  #4560 and #4561
---
 infra/terraform/monitoring.tf                 | 1622 +++++++++++++++++
 .../_internal/bot/tasks/utasks/__init__.py    |   61 +-
 .../bot/tasks/utasks/analyze_task.py          |    2 +
 .../_internal/common/testcase_utils.py        |   40 +-
 src/clusterfuzz/_internal/cron/triage.py      |   46 +-
 .../_internal/datastore/data_handler.py       |    3 +-
 .../_internal/datastore/data_types.py         |    4 +
 .../_internal/metrics/monitoring_metrics.py   |   13 +-
 8 files changed, 1684 insertions(+), 107 deletions(-)
 create mode 100644 infra/terraform/monitoring.tf

diff --git a/infra/terraform/monitoring.tf b/infra/terraform/monitoring.tf
new file mode 100644
index 0000000000..7d9a540d76
--- /dev/null
+++ b/infra/terraform/monitoring.tf
@@ -0,0 +1,1622 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+resource "google_monitoring_dashboard" "clusterfuzz_sli_dashboard" {
+  dashboard_json = <<JSON
+{
+  "displayName": "Clusterfuzz Relability Metrics",
+  "mosaicLayout": {
+    "columns": 48,
+    "tiles": [
+      {
+        "yPos": 76,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum by (task)(rate(custom_googleapis_com:task_outcome\\{monitored_resource=\"gce_instance\",subtask=\"uworker_main\"\\}[$\\{__interval\\}]))\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "utask_main execution count",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 108,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum by (task)(rate(custom_googleapis_com:task_outcome\\{monitored_resource=\"gce_instance\",subtask=\"preprocess\"\\}[$\\{__interval\\}]))\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "preprocess execution count",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 140,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum by (task)(rate(custom_googleapis_com:task_outcome\\{monitored_resource=\"gce_instance\",subtask=\"postprocess\"\\}[$\\{__interval\\}]))\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "postprocess execution count",
+          "id": ""
+        }
+      },
+      {
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "Business level metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 72,
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "Task metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 172,
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "Testcase metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 4,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"custom.googleapis.com/task/fuzz/job/total_time\" resource.type=\"gce_instance\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": []
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Fuzzing hours",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 4,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "topk(10, sum by (job)(rate(custom_googleapis_com:task_fuzz_job_total_time\\{monitored_resource=\"gce_instance\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Fuzzing hours (top 10 jobs)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 4,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "topk(10, sum by (fuzzer)(rate(custom_googleapis_com:task_fuzz_fuzzer_total_time\\{monitored_resource=\"gce_instance\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Fuzzing hours (top 10 fuzzers)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 20,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum(increase(custom_googleapis_com:issues_filing{monitored_resource=\"gce_instance\",status=\"success\"}[24h]))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Issues filed in a 24h window",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 20,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum(increase(custom_googleapis_com:issues_closing_success{monitored_resource=\"gce_instance\"}[24h]))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Issues closed in a 24h window",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 192,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "title": "Untriaged testcase age (p50 - hours)",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "minAlignmentPeriod": "60s",
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "crossSeriesReducer": "REDUCE_PERCENTILE_50",
+                      "groupByFields": [
+                        "metric.label.\"step\""
+                      ],
+                      "perSeriesAligner": "ALIGN_DELTA"
+                    },
+                    "filter": "metric.type=\"custom.googleapis.com/issues/untriaged_testcase_age\" resource.type=\"gce_instance\""
+                  }
+                }
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            }
+          }
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 192,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "title": "Untriaged testcase age (p95 - hours)",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "minAlignmentPeriod": "60s",
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "crossSeriesReducer": "REDUCE_PERCENTILE_95",
+                      "groupByFields": [
+                        "metric.label.\"step\""
+                      ],
+                      "perSeriesAligner": "ALIGN_DELTA"
+                    },
+                    "filter": "metric.type=\"custom.googleapis.com/issues/untriaged_testcase_age\" resource.type=\"gce_instance\""
+                  }
+                }
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            }
+          }
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 192,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "title": "Untriaged testcase age (p99 - hours)",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "minAlignmentPeriod": "60s",
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "crossSeriesReducer": "REDUCE_PERCENTILE_99",
+                      "groupByFields": [
+                        "metric.label.\"step\""
+                      ],
+                      "perSeriesAligner": "ALIGN_DELTA"
+                    },
+                    "filter": "metric.type=\"custom.googleapis.com/issues/untriaged_testcase_age\" resource.type=\"gce_instance\""
+                  }
+                }
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            }
+          }
+        }
+      },
+      {
+        "yPos": 36,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum(increase(custom_googleapis_com:task_fuzz_fuzzer_known_crash_count{monitored_resource=\"gce_instance\"}[24h]))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Known crash counts over the last 24h",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 36,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum(increase(custom_googleapis_com:task_fuzz_fuzzer_new_crash_count{monitored_resource=\"gce_instance\"}[24h]))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "New crash counts over the last 24h",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 124,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.50,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"preprocess\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p50 preprocess duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 92,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.50,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"uworker_main\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p50 utask_main duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 156,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.50,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"postprocess\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p50 postprocess duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 124,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.95,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"preprocess\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p95 preprocess duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 92,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.95,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"uworker_main\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p95 utask_main duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 156,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.95,\n  sum by (le, task) (\n    increase(custom_googleapis_com:utask_subtask_duration_secs_bucket{\n      monitored_resource=\"gce_instance\",\n      subtask=\"postprocess\",\n    }[1h])\n  )\n)",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "p95 postprocess duration by task (seconds)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 176,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.50,\n  sum by (le, step) (\n    increase(custom_googleapis_com:testcase_analysis_triage_duration_hours_bucket{\n      monitored_resource=\"gce_instance\",\n    }[1h])\n  )\n)\n\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Testcase triage duration (p50 - hours) - by step",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 176,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.90,\n  sum by (le, step) (\n    increase(custom_googleapis_com:testcase_analysis_triage_duration_hours_bucket{\n      monitored_resource=\"gce_instance\",\n    }[1h])\n  )\n)\n\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Testcase triage duration (p90 - hours) - by step",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 176,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.95,\n  sum by (le, step) (\n    increase(custom_googleapis_com:testcase_analysis_triage_duration_hours_bucket{\n      monitored_resource=\"gce_instance\",\n    }[1h])\n  )\n)\n",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Testcase triage duration (p95 - hours) - by step",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 224,
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "GCS metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 228,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"storage.googleapis.com/api/request_count\" resource.type=\"gcs_bucket\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metric.label.\"method\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "GCS - Request count by method",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 228,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "100 * (sum by (method)(rate(storage_googleapis_com:api_request_count\\{monitored_resource=\"gcs_bucket\", response_code!=\"OK\"\\}[$\\{__interval\\}])) \n/\nsum by (method)(rate(storage_googleapis_com:api_request_count\\{monitored_resource=\"gcs_bucket\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "GCS - Error rate by method",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 228,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"storage.googleapis.com/api/request_count\" resource.type=\"gcs_bucket\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metric.label.\"response_code\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "GCS Bucket - Request count by response code",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 248,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"datastore.googleapis.com/api/request_count\" resource.type=\"datastore_request\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metric.label.\"api_method\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Request count by method",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 248,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": " 100 * sum by (api_method)(rate(datastore_googleapis_com:api_request_count\\{monitored_resource=\"datastore_request\", response_code!='OK'\\}[$\\{__interval\\}])) /\nsum by (api_method)(rate(datastore_googleapis_com:api_request_count\\{monitored_resource=\"datastore_request\"\\}[$\\{__interval\\}]))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Error rate by method",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 264,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.50,sum by (api_method,le)(increase(firestore_googleapis_com:api_request_latencies_bucket\\{monitored_resource=\"firestore.googleapis.com/Database\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Latency by method (p50)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 248,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"datastore.googleapis.com/api/request_count\" resource.type=\"datastore_request\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metric.label.\"response_code\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Request count by response code",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 264,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.90,sum by (api_method,le)(increase(firestore_googleapis_com:api_request_latencies_bucket\\{monitored_resource=\"firestore.googleapis.com/Database\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Latency by method (p90)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 264,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "histogram_quantile(0.95,sum by (api_method,le)(increase(firestore_googleapis_com:api_request_latencies_bucket\\{monitored_resource=\"firestore.googleapis.com/Database\"\\}[$\\{__interval\\}])))",
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "Datastore - Latency by method (p95)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 244,
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "Datastore metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 52,
+        "width": 48,
+        "height": 4,
+        "widget": {
+          "title": "PubSub metrics",
+          "text": {
+            "content": "",
+            "format": "MARKDOWN",
+            "style": {
+              "backgroundColor": "#FFFFFF",
+              "textColor": "#212121",
+              "horizontalAlignment": "H_CENTER",
+              "verticalAlignment": "V_TOP",
+              "padding": "P_EXTRA_SMALL",
+              "fontSize": "FS_LARGE",
+              "pointerLocation": "POINTER_LOCATION_UNSPECIFIED"
+            }
+          },
+          "id": ""
+        }
+      },
+      {
+        "yPos": 56,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"pubsub.googleapis.com/subscription/num_undelivered_messages\" resource.type=\"pubsub_subscription\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metadata.system_labels.\"topic_id\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "PubSub - Unacked messages (per topic)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 56,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"pubsub.googleapis.com/topic/send_request_count\" resource.type=\"pubsub_topic\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.\"topic_id\""
+                      ]
+                    }
+                  },
+                  "unitOverride": "",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "minAlignmentPeriod": "60s",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "PubSub - Sent messages (per topic)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 108,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "100 * sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\",subtask=\"preprocess\", error_condition=\"UNHANDLED_EXCEPTION\"\\}[$\\{__interval\\}]))\n/ sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\",subtask=\"preprocess\"\\}[$\\{__interval\\}]))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "preprocess unhandled exception rate (by task)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 32,
+        "yPos": 76,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "100 * sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\",subtask=\"uworker_main\", error_condition=\"UNHANDLED_EXCEPTION\"\\}[$\\{__interval\\}]))\n/ sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\",subtask=\"uworker_main\"\\}[$\\{__interval\\}]))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "uworker_main unhandled exception rate (by task)",
+          "id": ""
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 140,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\", subtask=\"postprocess\", error_condition=\"UNHANDLED_EXCEPTION\"\\}[$\\{__interval\\}]))\n/ sum by (task)(rate(custom_googleapis_com:task_outcome_by_error_type\\{monitored_resource=\"gce_instance\", subtask=\"postprocess\"\\}[$\\{__interval\\}]))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "postprocess unhandled exception rate (by task)",
+          "id": ""
+        }
+      },
+      {
+        "yPos": 208,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "title": "Untriaged testcase count (by status)",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "prometheusQuery": "sum by (status)(last_over_time((custom_googleapis_com:issues_untriaged_testcase_count{monitored_resource=\"gce_instance\"}[1h])))\n",
+                  "unitOverride": ""
+                }
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            }
+          }
+        }
+      },
+      {
+        "xPos": 16,
+        "yPos": 76,
+        "width": 16,
+        "height": 16,
+        "widget": {
+          "xyChart": {
+            "dataSets": [
+              {
+                "timeSeriesQuery": {
+                  "prometheusQuery": "100 * sum by (task)(rate(custom_googleapis_com:task_outcome\\{monitored_resource=\"gce_instance\",subtask=\"uworker_main\", task_succeeded=\"false\"\\}[$\\{__interval\\}]))\n/ sum by (task)(rate(custom_googleapis_com:task_outcome\\{monitored_resource=\"gce_instance\",subtask=\"uworker_main\"\\}[$\\{__interval\\}]))",
+                  "unitOverride": "%",
+                  "outputFullDuration": false
+                },
+                "plotType": "LINE",
+                "legendTemplate": "",
+                "targetAxis": "Y1",
+                "dimensions": [],
+                "measures": [],
+                "breakdowns": []
+              }
+            ],
+            "thresholds": [],
+            "yAxis": {
+              "label": "",
+              "scale": "LINEAR"
+            },
+            "chartOptions": {
+              "mode": "COLOR",
+              "showLegend": false,
+              "displayHorizontal": false
+            }
+          },
+          "title": "uworker_main overall failure rate (by task) - CAN BE DRILLED BY JOB",
+          "id": ""
+        }
+      }
+    ]
+  },
+  "dashboardFilters": [],
+  "labels": {}
+}
+JSON
+}
\ No newline at end of file
diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/__init__.py b/src/clusterfuzz/_internal/bot/tasks/utasks/__init__.py
index 1dea6b20ed..b8d8f0f663 100644
--- a/src/clusterfuzz/_internal/bot/tasks/utasks/__init__.py
+++ b/src/clusterfuzz/_internal/bot/tasks/utasks/__init__.py
@@ -85,49 +85,16 @@ def __init__(self, subtask: _Subtask):
     self._labels = None
     self.utask_main_failure = None
     self._utask_success_conditions = [
+        None,  # This can be a successful return value in, ie, fuzz task
         uworker_msg_pb2.ErrorType.NO_ERROR,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.ANALYZE_NO_CRASH,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.PROGRESSION_BAD_STATE_MIN_MAX,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.REGRESSION_NO_CRASH,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.REGRESSION_LOW_CONFIDENCE_IN_REGRESSION_RANGE,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.MINIMIZE_UNREPRODUCIBLE_CRASH,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.MINIMIZE_CRASH_TOO_FLAKY,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.LIBFUZZER_MINIMIZATION_UNREPRODUCIBLE,  # pylint: disable=no-member
         uworker_msg_pb2.ErrorType.ANALYZE_CLOSE_INVALID_UPLOADED,  # pylint: disable=no-member
     ]
-    self._utask_maybe_retry_conditions = [
-        uworker_msg_pb2.ErrorType.ANALYZE_BUILD_SETUP,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.ANALYZE_NO_REVISIONS_LIST,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.TESTCASE_SETUP,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.MINIMIZE_SETUP,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.FUZZ_DATA_BUNDLE_SETUP_FAILURE,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.FUZZ_NO_FUZZ_TARGET_SELECTED,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_NO_CRASH,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_TIMEOUT,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_BUILD_SETUP_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.REGRESSION_BUILD_SETUP_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.REGRESSION_TIMEOUT_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.SYMBOLIZE_BUILD_SETUP_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.MINIMIZE_DEADLINE_EXCEEDED,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.MINIMIZE_DEADLINE_EXCEEDED_IN_MAIN_FILE_PHASE,  # pylint: disable=no-member
-    ]
-    self._utask_failure_conditions = [
-        uworker_msg_pb2.ErrorType.ANALYZE_NO_REVISION_INDEX,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.UNHANDLED,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.VARIANT_BUILD_SETUP,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.FUZZ_BUILD_SETUP_FAILURE,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.FUZZ_NO_FUZZER,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_REVISION_LIST_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_BUILD_NOT_FOUND,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.PROGRESSION_BAD_BUILD,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.REGRESSION_REVISION_LIST_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.REGRESSION_BUILD_NOT_FOUND,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.REGRESSION_BAD_BUILD_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.LIBFUZZER_MINIMIZATION_FAILED,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.CORPUS_PRUNING_FUZZER_SETUP_FAILED,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.CORPUS_PRUNING_ERROR,  # pylint: disable=no-member
-        uworker_msg_pb2.ErrorType.FUZZ_BAD_BUILD,  # pylint: disable=no-member
-    ]
 
     if subtask == _Subtask.PREPROCESS:
       self._preprocess_start_time_ns = self.start_time_ns
@@ -169,17 +136,11 @@ def set_task_details(self,
       # Ensure we always have a value after this method returns.
       assert self._preprocess_start_time_ns is not None
 
-  def _infer_uworker_main_outcome(self, exc_type, uworker_error):
-    '''Infers, on a best effort basis, whether an uworker output implies
-      success or failure. If an unequivocal response is not possible,
-      classifies as maybe_retry.'''
-    if exc_type or uworker_error in self._utask_failure_conditions:
-      outcome = 'error'
-    elif uworker_error in self._utask_maybe_retry_conditions:
-      outcome = 'maybe_retry'
-    else:
-      outcome = 'success'
-    return outcome
+  def _infer_uworker_main_outcome(self, exc_type, uworker_error) -> bool:
+    """Returns True if task succeeded, False otherwise."""
+    if exc_type or uworker_error not in self._utask_success_conditions:
+      return False
+    return True
 
   def __exit__(self, _exc_type, _exc_value, _traceback):
     # Ignore exception details, let Python continue unwinding the stack.
@@ -201,12 +162,12 @@ def __exit__(self, _exc_type, _exc_value, _traceback):
     # The only case where a task might fail without throwing, is in
     # utask_main, by returning an ErrorType proto which indicates
     # failure.
-    outcome = self._infer_uworker_main_outcome(_exc_type,
-                                               self.utask_main_failure)
+    task_succeeded = self._infer_uworker_main_outcome(_exc_type,
+                                                      self.utask_main_failure)
     monitoring_metrics.TASK_OUTCOME_COUNT.increment({
-        **self._labels, 'outcome': outcome
+        **self._labels, 'task_succeeded': task_succeeded
     })
-    if outcome == "success":
+    if task_succeeded:
       error_condition = 'N/A'
     elif _exc_type:
       error_condition = 'UNHANDLED_EXCEPTION'
@@ -218,7 +179,7 @@ def __exit__(self, _exc_type, _exc_value, _traceback):
     # labels limit recommended by gcp.
     trimmed_labels = self._labels
     del trimmed_labels['job']
-    trimmed_labels['outcome'] = outcome
+    trimmed_labels['task_succeeded'] = task_succeeded
     trimmed_labels['error_condition'] = error_condition
     monitoring_metrics.TASK_OUTCOME_COUNT_BY_ERROR_TYPE.increment(
         trimmed_labels)
diff --git a/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py b/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py
index 785974f735..1f0b7e8f58 100644
--- a/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py
+++ b/src/clusterfuzz/_internal/bot/tasks/utasks/analyze_task.py
@@ -552,6 +552,8 @@ def _update_testcase(output):
   if analyze_task_output.platform_id:
     testcase.platform_id = analyze_task_output.platform_id
 
+  testcase.analyze_pending = False
+
   testcase.put()
 
 
diff --git a/src/clusterfuzz/_internal/common/testcase_utils.py b/src/clusterfuzz/_internal/common/testcase_utils.py
index 73fe9edfd4..4314dcd55f 100644
--- a/src/clusterfuzz/_internal/common/testcase_utils.py
+++ b/src/clusterfuzz/_internal/common/testcase_utils.py
@@ -31,59 +31,39 @@
 
 
 def emit_testcase_triage_duration_metric(testcase_id: int, step: str):
-  '''Finds out if a testcase is fuzzer generated or manually uploaded,
-      and emits the TESTCASE_UPLOAD_TRIAGE_DURATION metric.'''
-  testcase_upload_metadata = get_testcase_upload_metadata(testcase_id)
-  if not testcase_upload_metadata:
-    logs.warning(f'No upload metadata found for testcase {testcase_id},'
-                 ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
-    return
-  if not testcase_upload_metadata.timestamp:
-    logs.warning(
-        f'No timestamp for testcase {testcase_upload_metadata.testcase_id},'
-        ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
-    return
-  assert step in [
-      'analyze_launched', 'analyze_completed', 'minimize_completed',
-      'regression_completed', 'impact_completed', 'issue_updated'
-  ]
-
+  """Finds out if a testcase is fuzzer generated or manually uploaded,
+      and emits the TESTCASE_TRIAGE_DURATION metric."""
   testcase = data_handler.get_testcase_by_id(testcase_id)
 
   if not testcase:
     logs.warning(f'No testcase found with id {testcase_id},'
-                 ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
+                 ' failed to emit TESTCASE_TRIAGE_DURATION metric.')
     return
 
   if not testcase.job_type:
     logs.warning(f'No job_type associated to testcase {testcase_id},'
-                 ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
+                 ' failed to emit TESTCASE_TRIAGE_DURATION metric.')
     return
 
   from_fuzzer = not get_testcase_upload_metadata(testcase_id)
 
-  assert step in [
-      'analyze_launched', 'analyze_completed', 'minimize_completed',
-      'regression_completed', 'impact_completed', 'issue_updated'
-  ]
-
   if not testcase.get_age_in_seconds():
     logs.warning(f'No timestamp associated to testcase {testcase_id},'
-                 ' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
+                 ' failed to emit TESTCASE_TRIAGE_DURATION metric.')
     return
 
-  testcase_age_in_hours = testcase.get_age_in_seconds() / 3600
+  testcase_age_in_hours = testcase.get_age_in_seconds() / (60 * 60)
 
-  logs.info('Emiting TESTCASE_UPLOAD_TRIAGE_DURATION metric for testcase '
+  logs.info('Emiting TESTCASE_TRIAGE_DURATION metric for testcase '
             f'{testcase_id} (age = {testcase_age_in_hours} hours.) '
-            'in step {step}.')
+            f'in step {step}, from_fuzzer: {from_fuzzer}.')
 
-  monitoring_metrics.TESTCASE_UPLOAD_TRIAGE_DURATION.add(
+  monitoring_metrics.TESTCASE_TRIAGE_DURATION.add(
       testcase_age_in_hours,
       labels={
           'job': testcase.job_type,
           'step': step,
-          'origin': 'fuzzer' if from_fuzzer else 'manually_uploaded'
+          'from_fuzzer': from_fuzzer
       })
 
 
diff --git a/src/clusterfuzz/_internal/cron/triage.py b/src/clusterfuzz/_internal/cron/triage.py
index 1f3b375922..ccfb03e8ae 100644
--- a/src/clusterfuzz/_internal/cron/triage.py
+++ b/src/clusterfuzz/_internal/cron/triage.py
@@ -256,6 +256,8 @@ def _check_and_update_similar_bug(testcase, issue_tracker):
 
 def _emit_bug_filing_from_testcase_elapsed_time_metric(testcase):
   testcase_age = testcase.get_age_in_seconds()
+  if not testcase_age:
+    return
   monitoring_metrics.BUG_FILING_FROM_TESTCASE_ELAPSED_TIME.add(
       testcase_age,
       labels={
@@ -336,27 +338,30 @@ def _emit_untriaged_testcase_count_metric():
         })
 
 
-def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase):
+PENDING_ANALYZE = 'pending_analyze'
+PENDING_CRITICAL_TASKS = 'pending_critical_tasks'
+PENDING_PROGRESSION = 'pending_progression'
+PENDING_GROUPING = 'pending_grouping'
+PENDING_FILING = 'pending_filing'
+
+
+def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase,
+                                        step: str):
   """Emmits a metric to track age of untriaged testcases."""
-  if not testcase.timestamp:
+  if not testcase.get_age_in_seconds():
     return
 
   logs.info(f'Emiting UNTRIAGED_TESTCASE_AGE for testcase {testcase.key.id()} '
-            f'(age = {testcase.get_age_in_seconds()})')
+            f'(age = {testcase.get_age_in_seconds()}), step = {step}')
   monitoring_metrics.UNTRIAGED_TESTCASE_AGE.add(
       testcase.get_age_in_seconds() / 3600,
       labels={
           'job': testcase.job_type,
           'platform': testcase.platform,
+          'step': step,
       })
 
 
-PENDING_CRITICAL_TASKS = 'pending_critical_tasks'
-PENDING_PROGRESSION = 'pending_progression'
-PENDING_GROUPING = 'pending_grouping'
-PENDING_FILING = 'pending_filing'
-
-
 def main():
   """Files bugs."""
   try:
@@ -409,7 +414,7 @@ def main():
     if testcase.get_metadata('progression_pending'):
       _set_testcase_stuck_state(testcase, True)
       logs.info(f'Skipping testcase {testcase_id}, progression pending')
-      _emit_untriaged_testcase_age_metric(testcase)
+      _emit_untriaged_testcase_age_metric(testcase, PENDING_PROGRESSION)
       _increment_untriaged_testcase_count(testcase.job_type,
                                           PENDING_PROGRESSION)
       continue
@@ -432,10 +437,12 @@ def main():
     # Require that all tasks like minimizaton, regression testing, etc have
     # finished.
     if not critical_tasks_completed:
-      _emit_untriaged_testcase_age_metric(testcase)
+      status = PENDING_CRITICAL_TASKS
+      if testcase.analyze_pending:
+        status = PENDING_ANALYZE
+      _emit_untriaged_testcase_age_metric(testcase, status)
       _set_testcase_stuck_state(testcase, True)
-      _increment_untriaged_testcase_count(testcase.job_type,
-                                          PENDING_CRITICAL_TASKS)
+      _increment_untriaged_testcase_count(testcase.job_type, status)
       logs.info(
           f'Skipping testcase {testcase_id}, critical tasks still pending.')
       continue
@@ -452,7 +459,7 @@ def main():
     # metadata works well.
     if not testcase.group_id and not dates.time_has_expired(
         testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT):
-      _emit_untriaged_testcase_age_metric(testcase)
+      _emit_untriaged_testcase_age_metric(testcase, PENDING_GROUPING)
       _set_testcase_stuck_state(testcase, True)
       _increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING)
       logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
@@ -460,7 +467,7 @@ def main():
 
     if not testcase.get_metadata('ran_grouper'):
       # Testcase should be considered by the grouper first before filing.
-      _emit_untriaged_testcase_age_metric(testcase)
+      _emit_untriaged_testcase_age_metric(testcase, PENDING_GROUPING)
       _set_testcase_stuck_state(testcase, True)
       _increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING)
       logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
@@ -490,16 +497,15 @@ def main():
     # Clean up old triage messages that would be not applicable now.
     testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False)
 
-    # A testcase is untriaged, until immediately before a bug is opened
-    _emit_untriaged_testcase_age_metric(testcase)
-    _set_testcase_stuck_state(testcase, False)
-    _increment_untriaged_testcase_count(testcase.job_type, PENDING_FILING)
-
     # File the bug first and then create filed bug metadata.
     if not _file_issue(testcase, issue_tracker, throttler):
+      _emit_untriaged_testcase_age_metric(testcase, PENDING_FILING)
+      _increment_untriaged_testcase_count(testcase.job_type, PENDING_FILING)
       logs.info(f'Issue filing failed for testcase id {testcase_id}')
       continue
 
+    _set_testcase_stuck_state(testcase, False)
+
     _create_filed_bug_metadata(testcase)
     issue_filer.notify_issue_update(testcase, 'new')
 
diff --git a/src/clusterfuzz/_internal/datastore/data_handler.py b/src/clusterfuzz/_internal/datastore/data_handler.py
index abe8d5c677..6b378609c4 100644
--- a/src/clusterfuzz/_internal/datastore/data_handler.py
+++ b/src/clusterfuzz/_internal/datastore/data_handler.py
@@ -921,7 +921,7 @@ def critical_tasks_completed(testcase):
     return testcase.minimized_keys and testcase.regression
 
   return bool(testcase.minimized_keys and testcase.regression and
-              testcase.is_impact_set_flag)
+              testcase.is_impact_set_flag and not testcase.analyze_pending)
 
 
 # ------------------------------------------------------------------------------
@@ -1379,6 +1379,7 @@ def create_user_uploaded_testcase(key,
 
   testcase.timestamp = utils.utcnow()
   testcase.created = testcase.timestamp
+  testcase.analyze_pending = True
   testcase.uploader_email = uploader_email
   testcase.put()
 
diff --git a/src/clusterfuzz/_internal/datastore/data_types.py b/src/clusterfuzz/_internal/datastore/data_types.py
index 8aa009ad36..cbc63c7f4b 100644
--- a/src/clusterfuzz/_internal/datastore/data_types.py
+++ b/src/clusterfuzz/_internal/datastore/data_types.py
@@ -583,6 +583,10 @@ class Testcase(Model):
   # Tracks if a testcase is stuck during triage.
   stuck_in_triage = ndb.BooleanProperty(default=False)
 
+  # Tracks if analyze task is pending.
+  # Defaults to false, since most testcases are fuzzer produced.
+  analyze_pending = ndb.BooleanProperty(default=False)
+
   def is_chromium(self):
     return self.project_name in ('chromium', 'chromium-testing')
 
diff --git a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py
index cac65fd4ec..dfd92e100e 100644
--- a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py
+++ b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py
@@ -231,17 +231,17 @@
     ],
 )
 
-TESTCASE_UPLOAD_TRIAGE_DURATION = monitor.CumulativeDistributionMetric(
-    'uploaded_testcase_analysis/triage_duration_secs',
+TESTCASE_TRIAGE_DURATION = monitor.CumulativeDistributionMetric(
+    'testcase_analysis/triage_duration_hours',
     description=('Time elapsed between testcase upload and completion'
-                 ' of relevant tasks in the testcase upload lifecycle.'
+                 ' of relevant tasks in the testcase lifecycle.'
                  ' Origin can be either from a fuzzer, or a manual'
                  ' upload. Measured in hours.'),
     bucketer=monitor.GeometricBucketer(),
     field_spec=[
         monitor.StringField('step'),
         monitor.StringField('job'),
-        monitor.StringField('origin'),
+        monitor.BooleanField('from_fuzzer'),
     ],
 )
 
@@ -263,7 +263,7 @@
         monitor.StringField('subtask'),
         monitor.StringField('mode'),
         monitor.StringField('platform'),
-        monitor.StringField('outcome'),
+        monitor.BooleanField('task_succeeded'),
     ])
 
 TASK_OUTCOME_COUNT_BY_ERROR_TYPE = monitor.CounterMetric(
@@ -274,7 +274,7 @@
         monitor.StringField('subtask'),
         monitor.StringField('mode'),
         monitor.StringField('platform'),
-        monitor.StringField('outcome'),
+        monitor.BooleanField('task_succeeded'),
         monitor.StringField('error_condition'),
     ])
 
@@ -365,6 +365,7 @@
     field_spec=[
         monitor.StringField('job'),
         monitor.StringField('platform'),
+        monitor.StringField('step'),
     ])
 
 UNTRIAGED_TESTCASE_COUNT = monitor.GaugeMetric(