From a20705c20d1e89bdd3db59664d73913674ed4a8f Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Thu, 29 Feb 2024 21:53:46 +0100 Subject: [PATCH] Fix issue creation for grafana alerts (#30424) * fix issue creation for grafana alerts * fix issue creation for grafana alerts * fix issue creation for grafana alerts * fix dashboard category issues * addressed the comments * code cleanup * restrict number of runs to consider --- .github/workflows/flaky_test_detection.yml | 7 +- .../GA-Post-Commits_status_dashboard.json | 28 ++-- .../provisioning/alerts/flaky_test.yaml | 130 +++++------------- .../github_runs_prefetcher/code/main.py | 109 +++++++++++---- .test-infra/tools/flaky_test_detection.py | 36 +++-- .test-infra/tools/requirements.txt | 17 +++ 6 files changed, 170 insertions(+), 157 deletions(-) create mode 100644 .test-infra/tools/requirements.txt diff --git a/.github/workflows/flaky_test_detection.yml b/.github/workflows/flaky_test_detection.yml index ca23cf11d15d..c8505ff584ef 100644 --- a/.github/workflows/flaky_test_detection.yml +++ b/.github/workflows/flaky_test_detection.yml @@ -21,16 +21,19 @@ permissions: issues: write on: - workflow_dispatch: + schedule: + - cron: '30 */3 * * *' + workflow_dispatch: inputs: READ_ONLY: description: 'Run in read-only mode' required: false default: 'true' - + env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} READ_ONLY: ${{ inputs.READ_ONLY }} + jobs: flaky-test-detection: runs-on: [self-hosted, ubuntu-20.04, main] diff --git a/.test-infra/metrics/grafana/dashboards/GA-Post-Commits_status_dashboard.json b/.test-infra/metrics/grafana/dashboards/GA-Post-Commits_status_dashboard.json index 30999373eddd..6607bfde701e 100644 --- a/.test-infra/metrics/grafana/dashboards/GA-Post-Commits_status_dashboard.json +++ b/.test-infra/metrics/grafana/dashboards/GA-Post-Commits_status_dashboard.json @@ -3865,7 +3865,7 @@ { "matcher": { "id": "byName", - "options": "job_name" + "options": "workflow_name" }, "properties": [ { @@ -6605,7 +6605,7 @@ "hide": false, "metricColumn": "none", "rawQuery": true, - "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.job_name, t1.job_yml_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name, filename, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, job_name text, job_yml_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(job_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.job_name = t2.job_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'runners_python'\n)\nselect\n job_name,\n job_yml_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", + "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.workflow_name, t1.workflow_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name as workflow_name, filename AS workflow_name, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, workflow_name text, workflow_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(workflow_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.workflow_name = t2.workflow_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'runners_python'\n)\nselect\n workflow_name,\n workflow_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", "refId": "A", "select": [ [ @@ -6700,7 +6700,7 @@ { "matcher": { "id": "byName", - "options": "job_name" + "options": "workflow_name" }, "properties": [ { @@ -6717,7 +6717,7 @@ { "targetBlank": true, "title": "", - "url": "https://github.com/apache/beam/actions/${__data.fields.job_yml_filename}" + "url": "https://github.com/apache/beam/actions/${__data.fields.workflow_filename}" } ] }, @@ -6738,7 +6738,7 @@ { "matcher": { "id": "byName", - "options": "job_yml_filename" + "options": "workflow_filename" }, "properties": [ { @@ -7550,7 +7550,7 @@ "hide": false, "metricColumn": "none", "rawQuery": true, - "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.job_name, t1.job_yml_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name, filename, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, job_name text, job_yml_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(job_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.job_name = t2.job_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'load_perf_python'\n)\nselect\n job_name,\n job_yml_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", + "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.workflow_name, t1.workflow_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name as workflow_name, filename AS workflow_name, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, workflow_name text, workflow_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(workflow_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.workflow_name = t2.workflow_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'load_perf_python'\n)\nselect\n workflow_name,\n workflow_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", "refId": "A", "select": [ [ @@ -7645,7 +7645,7 @@ { "matcher": { "id": "byName", - "options": "job_name" + "options": "workflow_name" }, "properties": [ { @@ -7662,7 +7662,7 @@ { "targetBlank": true, "title": "", - "url": "https://github.com/apache/beam/actions/${__data.fields.job_yml_filename}" + "url": "https://github.com/apache/beam/actions/${__data.fields.workflow_filename}" } ] }, @@ -7683,7 +7683,7 @@ { "matcher": { "id": "byName", - "options": "job_yml_filename" + "options": "workflow_filename" }, "properties": [ { @@ -8495,7 +8495,7 @@ "hide": false, "metricColumn": "none", "rawQuery": true, - "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.job_name, t1.job_yml_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name, filename, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, job_name text, job_yml_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(job_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.job_name = t2.job_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'go'\n)\nselect\n job_name,\n job_yml_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", + "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.workflow_name, t1.workflow_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name as workflow_name, filename AS workflow_name, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, workflow_name text, workflow_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(workflow_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.workflow_name = t2.workflow_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'go'\n)\nselect\n workflow_name,\n workflow_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", "refId": "A", "select": [ [ @@ -8590,7 +8590,7 @@ { "matcher": { "id": "byName", - "options": "job_name" + "options": "workflow_name" }, "properties": [ { @@ -8607,7 +8607,7 @@ { "targetBlank": true, "title": "", - "url": "https://github.com/apache/beam/actions/${__data.fields.job_yml_filename}" + "url": "https://github.com/apache/beam/actions/${__data.fields.workflow_filename}" } ] }, @@ -8628,7 +8628,7 @@ { "matcher": { "id": "byName", - "options": "job_yml_filename" + "options": "workflow_filename" }, "properties": [ { @@ -9440,7 +9440,7 @@ "hide": false, "metricColumn": "none", "rawQuery": true, - "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.job_name, t1.job_yml_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name, filename, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, job_name text, job_yml_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(job_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.job_name = t2.job_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'misc'\n)\nselect\n job_name,\n job_yml_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", + "rawSql": "with workflows as (\n\twith temp as (SELECT t1.workflow_id, t1.workflow_name, t1.workflow_filename, t1.dashboard_category, t1.run1, t2.run1id ,\nt1.run2, t2.run2id ,\nt1.run3, t2.run3id ,\nt1.run4, t2.run4id ,\nt1.run5, t2.run5id ,\nt1.run6, t2.run6id ,\nt1.run7, t2.run7id ,\nt1.run8, t2.run8id ,\nt1.run9, t2.run9id ,\nt1.run10, t2.run10id \nFROM (SELECT * FROM crosstab('SELECT github_workflows.workflow_id, name as workflow_name, filename AS workflow_name, dashboard_category, run_number, status \n FROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,5 DESC\n','SELECT m from generate_series(1,10) m')\nAS c1(workflow_id text, workflow_name text, workflow_filename text, dashboard_category text, run1 text, run2 text,run3 text, \n run4 text ,run5 text,run6 text, run7 text, \n run8 text ,run9 text,run10 text) \n\t ) AS t1\n JOIN (SELECT * FROM crosstab('SELECT name, status, github_workflow_runs.url AS run_url\nFROM github_workflow_runs\nINNER JOIN github_workflows ON github_workflow_runs.workflow_id = github_workflows.workflow_id\nORDER BY 1,3 DESC\n\n')\nAS c2(workflow_name text, run1id text, run2id text,run3id text, \n run4id text ,run5id text,run6id text, run7id text, \n run8id text ,run9id text,run10id text) \n\t\t\t ) AS t2\n ON t1.workflow_name = t2.workflow_name\n\t\t )\n select\n *,\n case when run1 like 'success' then 1 when run1 like 'in_progress' then 2 when run1 like 'queued' then 2 when run1 like 'waiting' then 2 when run1 like 'cancelled' then 3 when run1 like 'failure' then 0 else 4 end as run_1,\n case when run2 like 'success' then 1 when run2 like 'in_progress' then 2 when run2 like 'queued' then 2 when run2 like 'waiting' then 2 when run2 like 'cancelled' then 3 when run2 like 'failure' then 0 else 4 end as run_2,\n case when run3 like 'success' then 1 when run3 like 'in_progress' then 2 when run3 like 'queued' then 2 when run3 like 'waiting' then 2 when run3 like 'cancelled' then 3 when run3 like 'failure' then 0 else 4 end as run_3,\n case when run4 like 'success' then 1 when run4 like 'in_progress' then 2 when run4 like 'queued' then 2 when run4 like 'waiting' then 2 when run4 like 'cancelled' then 3 when run4 like 'failure' then 0 else 4 end as run_4,\n case when run5 like 'success' then 1 when run5 like 'in_progress' then 2 when run5 like 'queued' then 2 when run5 like 'waiting' then 2 when run5 like 'cancelled' then 3 when run5 like 'failure' then 0 else 4 end as run_5,\n case when run6 like 'success' then 1 when run6 like 'in_progress' then 2 when run6 like 'queued' then 2 when run6 like 'waiting' then 2 when run6 like 'cancelled' then 3 when run6 like 'failure' then 0 else 4 end as run_6,\n case when run7 like 'success' then 1 when run7 like 'in_progress' then 2 when run7 like 'queued' then 2 when run7 like 'waiting' then 2 when run7 like 'cancelled' then 3 when run7 like 'failure' then 0 else 4 end as run_7,\n case when run8 like 'success' then 1 when run8 like 'in_progress' then 2 when run8 like 'queued' then 2 when run8 like 'waiting' then 2 when run8 like 'cancelled' then 3 when run8 like 'failure' then 0 else 4 end as run_8,\n case when run9 like 'success' then 1 when run9 like 'in_progress' then 2 when run9 like 'queued' then 2 when run9 like 'waiting' then 2 when run9 like 'cancelled' then 3 when run9 like 'failure' then 0 else 4 end as run_9,\n case when run10 like 'success' then 1 when run10 like 'in_progress' then 2 when run10 like 'queued' then 2 when run10 like 'waiting' then 2 when run10 like 'cancelled' then 3 when run10 like 'failure' then 0 else 4 end as run_10\n from\n temp\n where\n dashboard_category = 'misc'\n)\nselect\n workflow_name,\n workflow_filename,\n run_1,\n run1Id,\n run_2,\n run2Id,\n run_3,\n run3Id,\n run_4,\n run4Id,\n run_5,\n run5Id,\n run_6,\n run6Id,\n run_7,\n run7Id,\n run_8,\n run8Id,\n run_9,\n run9Id,\n run_10,\n run10Id\nfrom\n workflows;", "refId": "A", "select": [ [ diff --git a/.test-infra/metrics/grafana/provisioning/alerts/flaky_test.yaml b/.test-infra/metrics/grafana/provisioning/alerts/flaky_test.yaml index 66bcc46f1e13..4a04ddb57490 100644 --- a/.test-infra/metrics/grafana/provisioning/alerts/flaky_test.yaml +++ b/.test-infra/metrics/grafana/provisioning/alerts/flaky_test.yaml @@ -21,7 +21,7 @@ groups: rules: - uid: flaky_test title: flaky_test - condition: E + condition: B data: - refId: A relativeTimeRange: @@ -35,113 +35,49 @@ groups: intervalMs: 1000 maxDataPoints: 43200 rawQuery: true - rawSql: "SELECT COUNT(run_id), CAST (github_workflows.workflow_id AS TEXT), name AS workflow_name , filename AS workflow_filename , dashboard_category , github_workflows.url AS workflow_url, CAST(threshold AS TEXT) AS workflow_treshold \nFROM github_workflow_runs INNER JOIN github_workflows\nON github_workflow_runs.workflow_id = github_workflows.workflow_id \nWHERE status = 'failure' \nGROUP BY github_workflows.workflow_id, workflow_name, workflow_filename" + rawSql: |- + SELECT COUNT(workflow_id), CAST(workflow_id AS TEXT), name AS workflow_name, filename AS workflow_filename, url AS workflow_url, CAST(threshold AS TEXT) AS workflow_threshold + FROM github_workflows + WHERE is_flaky = true + GROUP BY workflow_id refId: A + sql: + columns: + - parameters: [ ] + type: function + groupBy: + - property: + type: string + type: groupBy + limit: 50 - refId: B - relativeTimeRange: - from: 600 - to: 0 - datasourceUid: beampsql - model: - datasource: - type: postgres - uid: beampsql - editorMode: code - format: table - hide: false - intervalMs: 1000 - maxDataPoints: 43200 - rawQuery: true - rawSql: SELECT MAX(threshold), CAST(workflow_id AS TEXT) FROM github_workflows GROUP BY workflow_id - refId: B - - refId: C - relativeTimeRange: - from: 600 - to: 0 - datasourceUid: __expr__ - model: - conditions: - - evaluator: - params: - - 0 - - 0 - type: gt - operator: - type: and - query: - params: [] - reducer: - params: [] - type: avg - type: query - datasource: - name: Expression - type: __expr__ - uid: __expr__ - expression: 30 * $B - hide: false - intervalMs: 1000 - maxDataPoints: 43200 - refId: C - type: math - - refId: D relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: conditions: - - evaluator: - params: - - 0 - - 0 - type: gt - operator: - type: and - query: - params: [] - reducer: - params: [] - type: avg - type: query + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [ ] + reducer: + params: [ ] + type: avg + type: query datasource: - name: Expression - type: __expr__ - uid: __expr__ - expression: $C - $A - hide: false + name: Expression + type: __expr__ + uid: __expr__ + expression: A intervalMs: 1000 maxDataPoints: 43200 - refId: D - type: math - - refId: E - relativeTimeRange: - from: 600 - to: 0 - datasourceUid: __expr__ - model: - conditions: - - evaluator: - params: - - 0 - - 0 - type: lt - operator: - type: and - query: - params: [] - reducer: - params: [] - type: avg - type: query - datasource: - name: Expression - type: __expr__ - uid: __expr__ - expression: D - intervalMs: 1000 - maxDataPoints: 43200 - refId: E + refId: B type: threshold noDataState: NoData execErrState: Error diff --git a/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py b/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py index 12a7f65eeba6..a66052647f57 100644 --- a/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py +++ b/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py @@ -16,13 +16,11 @@ """ This module queries GitHub API to collect Beam-related workflows metrics and put them in PostgreSQL. -This script is running every 3 hours as a cloud function -"github_actions_workflows_dashboard_sync" in apache-beam-testing project: -https://console.cloud.google.com/functions/details/us-central1/github_actions_workflows_dashboard_sync?env=gen1&project=apache-beam-testing -This cloud function is triggered by a pubsub topic: -https://console.cloud.google.com/cloudpubsub/topic/detail/github_actions_workflows_sync?project=apache-beam-testing -Cron Job: -https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_actions_workflows_dashboard_sync?project=apache-beam-testing +This script is running every 3 hours as a http cloud function +"github_workflow_prefetcher" in apache-beam-testing project: +https://console.cloud.google.com/functions/details/us-central1/github_workflow_prefetcher?env=gen1&project=apache-beam-testing +This cloud function is triggered by a scheduler: +https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_workflow_prefetcher-scheduler?project=apache-beam-testing """ import asyncio @@ -34,9 +32,11 @@ import time import re import psycopg2 +import uuid from psycopg2 import extras from ruamel.yaml import YAML from github import GithubIntegration +from datetime import datetime DB_HOST = os.environ["DB_HOST"] DB_PORT = os.environ["DB_PORT"] @@ -47,20 +47,21 @@ GH_APP_INSTALLATION_ID = os.environ["GH_APP_INSTALLATION_ID"] GH_PEM_KEY = os.environ["GH_PEM_KEY"] GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH = os.environ["GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH"] -GIT_REPO = "beam" +GIT_ORG = "apache" GIT_PATH = ".github/workflows" GIT_FILESYSTEM_PATH = "/tmp/git" class Workflow: - def __init__(self, id, name, filename, url, category=None, threshold=0.5): + def __init__(self, id, name, filename, url, category=None, threshold=0.5, is_flaky=False): self.id = id self.name = name self.filename = filename self.url = url - self.runs = [] self.category = category self.threshold = threshold + self.is_flaky = is_flaky + self.runs = [] class WorkflowRun: @@ -77,7 +78,7 @@ def clone_git_beam_repo(dest_path): if not os.path.exists(filesystem_path): os.mkdir(filesystem_path) os.chdir(filesystem_path) - os.system(f"git clone --filter=blob:none --sparse https://github.com/apache/beam") + os.system(f"git clone --filter=blob:none --sparse https://github.com/{GIT_ORG}/beam") os.chdir("beam") os.system("git sparse-checkout init --cone") os.system(f"git sparse-checkout set {dest_path}") @@ -134,7 +135,7 @@ def enhance_workflow(workflow): workflow_filename = workflow.filename.replace("workflows/", "") try: workflow_yaml = get_yaml( - f"{GIT_FILESYSTEM_PATH}/{GIT_REPO}/{GIT_PATH}/{workflow_filename}" + f"{GIT_FILESYSTEM_PATH}/beam/{GIT_PATH}/{workflow_filename}" ) if "env" in workflow_yaml: if "ALERT_THRESHOLD" in workflow_yaml["env"]: @@ -143,6 +144,45 @@ def enhance_workflow(workflow): print(f"No yaml file found for workflow: {workflow.name}") +async def check_workflow_flakiness(workflow): + def filter_workflow_runs(run, issue): + started_at = datetime.strptime(run.started_at, "%Y-%m-%dT%H:%M:%SZ") + closed_at = datetime.strptime(issue["closed_at"], "%Y-%m-%dT%H:%M:%SZ") + if started_at > closed_at: + return True + return False + + if not len(workflow.runs): + return False + + url = f"https://api.github.com/repos/{GIT_ORG}/beam/issues" + headers = {"Authorization": get_token()} + semaphore = asyncio.Semaphore(5) + workflow_runs = workflow.runs + params = { + "state": "closed", + "labels": f"flaky_test,workflow_id: {workflow.id}", + } + response = await fetch(url, semaphore, params, headers) + if len(response): + print(f"Found a recently closed issue for the {workflow.name} workflow") + workflow_runs = [run for run in workflow_runs if filter_workflow_runs(run, response[0])] + + print(f"Number of workflow runs to consider: {len(workflow_runs)}") + if len(workflow_runs) < 3: + print(f"Insufficient number of runs for consideration") + return False + + success_rate = 1.0 + if len(workflow_runs): + failed_runs = list(filter(lambda r: r.status == "failure", workflow_runs)) + print(f"Number of failed workflow runs: {len(failed_runs)}") + success_rate -= (len(failed_runs) / len(workflow_runs)) + + print(f"Success rate: {success_rate}") + return True if success_rate < workflow.threshold else False + + def github_workflows_dashboard_sync(request): # Entry point for cloud function, don't change signature return asyncio.run(sync_workflow_runs()) @@ -165,6 +205,13 @@ async def sync_workflow_runs(): for workflow in workflows: enhance_workflow(workflow) + for workflow in workflows: + print(f"Checking if the {workflow.name} workflow is flaky...") + is_flaky = await check_workflow_flakiness(workflow) + if is_flaky: + workflow.is_flaky = True + print(f"Workflow {workflow.name} is flaky!") + save_workflows(workflows) print("Done") @@ -242,7 +289,7 @@ def append_workflow_runs(workflow, runs): ) ) - url = "https://api.github.com/repos/apache/beam/actions/workflows" + url = f"https://api.github.com/repos/{GIT_ORG}/beam/actions/workflows" headers = {"Authorization": get_token()} page = 1 number_of_entries_per_page = 100 # The number of results per page (max 100) @@ -341,10 +388,16 @@ def append_workflow_runs(workflow, runs): number_of_runs_to_add = int(GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH) - len( workflow.runs ) - workflow.runs.extend( - [WorkflowRun(0, "None", "None", workflow.id, "None")] - * number_of_runs_to_add - ) + for _ in range(number_of_runs_to_add): + workflow.runs.append( + WorkflowRun( + uuid.uuid4().int, + "None", + "None", + workflow.id, + "0001-01-01T00:00:00Z" + ) + ) if len(workflow.runs) >= int(GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH): workflow_ids_to_fetch_extra_runs.pop(workflow_id, None) print(f"Successfully fetched extra workflow runs for: {workflow.filename}") @@ -353,7 +406,7 @@ def append_workflow_runs(workflow, runs): for workflow in list(workflows.values()): runs = sorted(workflow.runs, key=lambda r: r.id, reverse=True) - workflow.runs = runs[: int(GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH)] + workflow.runs = runs[:int(GH_NUMBER_OF_WORKFLOW_RUNS_TO_FETCH)] return list(workflows.values()) @@ -370,25 +423,26 @@ def save_workflows(workflows): cursor.execute(f"DROP TABLE IF EXISTS {workflows_table_name};") create_workflows_table_query = f""" CREATE TABLE IF NOT EXISTS {workflows_table_name} ( - workflow_id integer NOT NULL PRIMARY KEY, + workflow_id text NOT NULL PRIMARY KEY, name text NOT NULL, filename text NOT NULL, url text NOT NULL, dashboard_category text NOT NULL, - threshold real NOT NULL)\n""" + threshold real NOT NULL, + is_flaky boolean NOT NULL)\n""" create_workflow_runs_table_query = f""" CREATE TABLE IF NOT EXISTS {workflow_runs_table_name} ( run_id text NOT NULL PRIMARY KEY, run_number integer NOT NULL, status text NOT NULL, url text NOT NULL, - workflow_id integer NOT NULL, + workflow_id text NOT NULL, started_at timestamp with time zone NOT NULL, CONSTRAINT fk_workflow FOREIGN KEY(workflow_id) REFERENCES {workflows_table_name}(workflow_id))\n""" cursor.execute(create_workflows_table_query) cursor.execute(create_workflow_runs_table_query) insert_workflows_query = f""" - INSERT INTO {workflows_table_name} (workflow_id, name, filename, url, dashboard_category, threshold) + INSERT INTO {workflows_table_name} (workflow_id, name, filename, url, dashboard_category, threshold, is_flaky) VALUES %s""" insert_workflow_runs_query = f""" INSERT INTO {workflow_runs_table_name} (run_id, run_number, status, url, workflow_id, started_at) @@ -404,16 +458,11 @@ def save_workflows(workflows): workflow.url, workflow.category, workflow.threshold, + workflow.is_flaky, ) ) - run_number = 1 - for run in workflow.runs: - if run.id != 0: - started_at = run.started_at.replace("T", " ") - insert_workflow_runs.append( - (run.id, run_number, run.status, run.url, workflow.id, started_at) - ) - run_number += 1 + for idx, run in enumerate(workflow.runs): + insert_workflow_runs.append((run.id, idx+1, run.status, run.url, run.workflow_id, run.started_at)) psycopg2.extras.execute_values(cursor, insert_workflows_query, insert_workflows) psycopg2.extras.execute_values( cursor, insert_workflow_runs_query, insert_workflow_runs diff --git a/.test-infra/tools/flaky_test_detection.py b/.test-infra/tools/flaky_test_detection.py index ec21e44bc1de..dc69aa5982e5 100644 --- a/.test-infra/tools/flaky_test_detection.py +++ b/.test-infra/tools/flaky_test_detection.py @@ -20,10 +20,9 @@ from github import Auth +ALERT_NAME = "flaky_test" GIT_ORG = "apache" -GIT_REPO = "beam" GRAFANA_URL = "https://metrics.beam.apache.org" -ALERT_NAME = "flaky_test" READ_ONLY = os.environ.get("READ_ONLY", "false") @@ -39,24 +38,24 @@ def __init__( self.workflow_id = workflow_id self.workflow_url = workflow_url self.workflow_name = workflow_name - self.workflow_file_name = workflow_filename + self.workflow_filename = workflow_filename self.workflow_threshold = round(float(workflow_threshold), 2) -def extract_workflow_id_from_issue_label(issues): - label_ids = [] +def get_workflow_issues(issues): + workflows = {} for issue in issues: for label in issue.get_labels(): match = re.search(r"workflow_id:\s*(\d+)", str(label.name)) if match: - label_id = match.group(1) - label_ids.append(label_id) + workflow_id = match.group(1) + workflows[workflow_id] = issue - return label_ids + return workflows def create_github_issue(repo, alert): - failing_runs_url = f"https://github.com/{GIT_ORG}/beam/actions/{alert.workflow_file_name}?query=is%3Afailure+branch%3Amaster" + failing_runs_url = f"https://github.com/{GIT_ORG}/beam/actions/{alert.workflow_filename}?query=is%3Afailure+branch%3Amaster" title = f"The {alert.workflow_name} job is flaky" body = f"The {alert.workflow_name } is failing over {int(alert.workflow_threshold * 100)}% of the time \nPlease visit {failing_runs_url} to see the logs." labels = ["flaky_test", f"workflow_id: {alert.workflow_id}", "bug", "P1"] @@ -103,17 +102,26 @@ def main(): token = os.environ["GITHUB_TOKEN"] auth = Auth.Token(token) g = Github(auth=auth) - repo = g.get_repo(f"{GIT_ORG}/{GIT_REPO}") + repo = g.get_repo(f"{GIT_ORG}/beam") alerts = get_grafana_alerts() open_issues = repo.get_issues(state="open", labels=["flaky_test"]) - workflow_ids = extract_workflow_id_from_issue_label(open_issues) + closed_issues = repo.get_issues(state="closed", labels=["flaky_test"]) + workflow_open_issues = get_workflow_issues(open_issues) + workflow_closed_issues = get_workflow_issues(closed_issues) for alert in alerts: - if alert.workflow_id not in workflow_ids: + if alert.workflow_id in workflow_closed_issues.keys(): + issue = workflow_closed_issues[alert.workflow_id] + if READ_ONLY == "true": + print("READ_ONLY is true, not reopening issue") + else: + issue.edit(state="open") + issue.create_comment(body="Reopening since the workflow is still flaky") + print(f"The issue for the workflow {alert.workflow_id} has been reopened") + elif alert.workflow_id not in workflow_open_issues.keys(): create_github_issue(repo, alert) - workflow_ids.append(alert.workflow_id) else: - print("Issue already exists, skipping") + print("Issue is already open, skipping") g.close() diff --git a/.test-infra/tools/requirements.txt b/.test-infra/tools/requirements.txt new file mode 100644 index 000000000000..70d4f0a32715 --- /dev/null +++ b/.test-infra/tools/requirements.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +requests +PyGithub \ No newline at end of file