From d20964395d896d6aea6550a45db8d169c1e9d3cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9r=C3=A9nice=20Batut?= Date: Fri, 6 Sep 2024 15:49:02 +0200 Subject: [PATCH] Reorganize GitHub action workflows - Merge weekly workflows into 1 workflow that wil open a PR with the changes - Update other workflows to the new structure --- .github/workflows/fetch_all_tools.yaml | 100 ---------------- .github/workflows/fetch_all_tutorials.yaml | 51 -------- .github/workflows/fetch_filter_resources.yaml | 75 ++++++++++++ .github/workflows/filter_communities.yaml | 110 +++--------------- .github/workflows/run_tests.yaml | 24 +--- .github/workflows/utilities.yml | 46 -------- 6 files changed, 96 insertions(+), 310 deletions(-) delete mode 100644 .github/workflows/fetch_all_tools.yaml delete mode 100644 .github/workflows/fetch_all_tutorials.yaml create mode 100644 .github/workflows/fetch_filter_resources.yaml delete mode 100644 .github/workflows/utilities.yml diff --git a/.github/workflows/fetch_all_tools.yaml b/.github/workflows/fetch_all_tools.yaml deleted file mode 100644 index 4bbfc5e8..00000000 --- a/.github/workflows/fetch_all_tools.yaml +++ /dev/null @@ -1,100 +0,0 @@ -name: Fetch all tools - -on: - workflow_dispatch: - schedule: - #Every Sunday at 8:00 am - - cron: "0 8 * * 0" - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "tools" - cancel-in-progress: false - -permissions: - contents: write - -jobs: - - fetch-all-tools-stepwise: - runs-on: ubuntu-20.04 - environment: fetch-tools - name: Fetch all tool stepwise - strategy: - max-parallel: 1 #need to run one after another, since otherwise there is a chance, that mulitple jobs want to push to the results branch at the same time (which fails due to merge) - matrix: - python-version: [3.11] - subset: - - repositories01.list - - repositories02.list - - repositories03.list - - repositories04.list - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install requirement - run: python -m pip install -r requirements.txt - - name: Run script #needs PAT to access other repos - run: | - bash bin/extract_all_tools.sh "${{ matrix.subset }}" - env: - GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} - - name: Commit all tools - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: fetch all tool stepwise - commmit {sha}" - - fetch-all-tools-merge: - runs-on: ubuntu-20.04 - needs: fetch-all-tools-stepwise - name: Fetch all tools merge - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install requirement - run: | - python -m pip install -r requirements.txt - sudo apt-get install jq - - name: Merge all tools - run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry) - awk 'FNR==1 && NR!=1{next;}{print}' results/repositories*.list_tools.tsv > results/all_tools.tsv - jq -s 'map(.[])' results/repositories*.list_tools.json > results/all_tools.json - - name: Wordcloud and interactive table - run: | - bash bin/format_tools.sh - - name: Commit all tools - # add or commit any changes in results if there was a change, merge with main and push as bot - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: fetch all tools merge - commmit {sha}" \ No newline at end of file diff --git a/.github/workflows/fetch_all_tutorials.yaml b/.github/workflows/fetch_all_tutorials.yaml deleted file mode 100644 index 85412d40..00000000 --- a/.github/workflows/fetch_all_tutorials.yaml +++ /dev/null @@ -1,51 +0,0 @@ -name: Fetch all tutorials - -on: - workflow_dispatch: - schedule: - #Every Sunday at 8:00 am - - cron: "0 8 * * 0" - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "tutorials" - cancel-in-progress: false - -permissions: - contents: write - -jobs: - fetch-all-tutorials: - runs-on: ubuntu-20.04 - environment: fetch-tutorials - name: Fetch all tutorials - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install requirement - run: python -m pip install -r requirements.txt - - name: Run script #needs PAT to access other repos - run: | - bash bin/extract_all_tutorials.sh - env: - PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }} - - name: Commit all tools - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: fetch all tutorials - commmit {sha}" diff --git a/.github/workflows/fetch_filter_resources.yaml b/.github/workflows/fetch_filter_resources.yaml new file mode 100644 index 00000000..a46f60ef --- /dev/null +++ b/.github/workflows/fetch_filter_resources.yaml @@ -0,0 +1,75 @@ +name: Weekly resource fetching and community filtering + +on: + workflow_dispatch: + schedule: + #Every Sunday at 8:00 am + - cron: "0 8 * * 0" + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "tools" + cancel-in-progress: false + +jobs: + fetch-filter: + runs-on: ubuntu-20.04 + name: Fetch tool stepwise and merge, fetch tutorials and filter the resources for communities + strategy: + max-parallel: 1 #need to run one after another, since otherwise there is a chance, that mulitple jobs want to push to the results branch at the same time (which fails due to merge) + matrix: + python-version: [3.11] + subset: + - repositories01.list + - repositories02.list + - repositories03.list + - repositories04.list + steps: + - name: Checkout main + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirement + run: | + python -m pip install -r requirements.txt + sudo apt-get install jq + - name: Fetch list of all available servers + run: | + python sources/bin/get_public_galaxy_servers.py -o sources/data/available_public_servers.csv + - name: Fetch all tool stepwise + run: | + bash sources/bin/extract_all_tools.sh "${{ matrix.subset }}" + env: + GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} + - name: Merge all tools + run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry) + awk 'FNR==1 && NR!=1{next;}{print}' communities/all/resources/repositories*.list_tools.tsv > communities/all/resources/tools.tsv + jq -s 'map(.[])' communities/all/resources/repositories*.list_tools.json > communities/all/resources/all_tools.json + - name: Generate wordcloud and interactive table + run: | + bash sources/bin/format_tools.sh + - name: Fetch all tutorials + run: | + bash bin/extract_all_tutorials.sh + env: + PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }} + - name: Filter tutorials for communities + run: | + bash bin/get_community_tutorials.sh + - name: Update tool to keep and exclude for communities + run: | + bash bin/update_tools_to_keep_exclude.sh + - name: Filter tools for communities + run: | + bash bin/get_community_tools.sh + - name: Create Pull Request + uses: peter-evans/create-pull-request@v4 + with: + commit-message: Update resources + title: Automatic resources update + body: Automatic resource update done via GitHub Action once a week + base: main + branch: resource-update + delete-branch: true \ No newline at end of file diff --git a/.github/workflows/filter_communities.yaml b/.github/workflows/filter_communities.yaml index 48455f90..e81e0fd5 100644 --- a/.github/workflows/filter_communities.yaml +++ b/.github/workflows/filter_communities.yaml @@ -1,18 +1,11 @@ -name: Filter community tools +name: Filter community resources on: workflow_dispatch: - - # the workflow it triggered when all tools are fetched - workflow_run: - workflows: ["Fetch all tools"] - types: - - completed - # the workflow it also triggered when the community definitions are changed push: paths: - - 'data/communities**' + - 'data/communities/*/metadata/*' branches: ["main"] # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. @@ -21,104 +14,33 @@ concurrency: group: "filter" cancel-in-progress: false -permissions: - contents: write - jobs: - filter-all-tutorials: - name: Filter tutorials + filter-resources: + name: Filter resources for communities runs-on: ubuntu-20.04 steps: - name: Checkout main uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install requirement run: python -m pip install -r requirements.txt - - name: Run script + - name: Filter tutorials for communities run: | bash bin/get_community_tutorials.sh - - name: Commit results - # commit the new filtered data, only if stuff was changed - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: filter tutorials - commmit {sha}" - - update-tools-to-keep-exclude: - name: Update tool to keep and exclude - runs-on: ubuntu-20.04 - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install requirement - run: python -m pip install -r requirements.txt - - name: Run script + - name: Update tool to keep and exclude for communities run: | bash bin/update_tools_to_keep_exclude.sh - - name: Commit results - # commit the new filtered data, only if stuff was changed - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: update tool to keep and exclude - commmit {sha}" - - filter-all-tools: - name: Filter all tools - runs-on: ubuntu-20.04 - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install requirement - run: python -m pip install -r requirements.txt - - name: Run script + - name: Filter tools for communities run: | bash bin/get_community_tools.sh - - name: Commit results - # commit the new filtered data, only if stuff was changed - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true - MESSAGE: "Update results step: filter all tools - commmit {sha}" - - \ No newline at end of file + - name: Create Pull Request + uses: peter-evans/create-pull-request@v4 + with: + commit-message: Update resources + title: Resource filtering + body: Resource filering done via GitHub Action when the community definitions are changed + base: main + branch: resource-filtering + delete-branch: true \ No newline at end of file diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 83fff590..28431a56 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -8,13 +8,6 @@ jobs: steps: - name: Checkout main uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - uses: actions/setup-python@v5 with: python-version: '3.11' @@ -22,27 +15,20 @@ jobs: run: python -m pip install -r requirements.txt - name: Tool extraction run: | - bash bin/extract_all_tools.sh test + bash sources/bin/extract_all_tools.sh test env: GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} - name: Tool filter run: | - bash bin/get_community_tools.sh test + bash sources/bin/get_community_tools.sh test - name: Create interactive table and wordcloud run: | - bash bin/format_tools.sh + bash sources/bin/format_tools.sh test-tutorials: runs-on: ubuntu-20.04 steps: - name: Checkout main uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - uses: actions/setup-python@v5 with: python-version: '3.11' @@ -50,9 +36,9 @@ jobs: run: python -m pip install -r requirements.txt - name: Tutorial extraction run: | - bash bin/extract_all_tutorials.sh test + bash sources/bin/extract_all_tutorials.sh test env: PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }} - name: Tutorial filtering run: | - bash bin/get_community_tutorials.sh test \ No newline at end of file + bash sources/bin/get_community_tutorials.sh test \ No newline at end of file diff --git a/.github/workflows/utilities.yml b/.github/workflows/utilities.yml deleted file mode 100644 index 0e42d3fd..00000000 --- a/.github/workflows/utilities.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Utilities -on: - workflow_dispatch: - schedule: - #Every Sunday at 7:00 am - - cron: "0 7 * * 0" - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "tools" - cancel-in-progress: false - -permissions: - contents: write - -jobs: - fetch-available-servers: - runs-on: ubuntu-20.04 - name: Fetch list of all available servers - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) - - name: Checkout results - uses: actions/checkout@v4 - with: - ref: results - path: results - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install requirement - run: python -m pip install -r requirements.txt - - name: Run script - run: | - python bin/get_public_galaxy_servers.py -o results/available_public_servers.csv - - name: Commit servers - uses: s0/git-publish-subdir-action@develop - env: - BRANCH: results - FOLDER: results - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: self - SKIP_EMPTY_COMMITS: true \ No newline at end of file