Fetch all tools #69
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Fetch all tools | |
on: | |
workflow_dispatch: | |
schedule: | |
#Every Sunday at 8:00 am | |
- cron: "0 8 * * 0" | |
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. | |
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. | |
concurrency: | |
group: "tools" | |
cancel-in-progress: false | |
permissions: | |
contents: write | |
jobs: | |
fetch-all-tools-stepwise: | |
runs-on: ubuntu-20.04 | |
environment: fetch-tools | |
name: Fetch all tool stepwise | |
strategy: | |
max-parallel: 1 #need to run one after another, since otherwise there is a chance, that mulitple jobs want to push to the results branch at the same time (which fails due to merge) | |
matrix: | |
python-version: [3.11] | |
subset: | |
- repositories01.list | |
- repositories02.list | |
- repositories03.list | |
- repositories04.list | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v4 | |
with: | |
ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) | |
- name: Checkout results | |
uses: actions/checkout@v4 | |
with: | |
ref: results | |
path: results | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install requirement | |
run: python -m pip install -r requirements.txt | |
- name: Run script #needs PAT to access other repos | |
run: | | |
bash bin/extract_all_tools.sh "${{ matrix.subset }}" | |
env: | |
GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} | |
- name: Commit all tools | |
uses: s0/git-publish-subdir-action@develop | |
env: | |
BRANCH: results | |
FOLDER: results | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
REPO: self | |
SKIP_EMPTY_COMMITS: true | |
MESSAGE: "Update results step: fetch all tool stepwise - commmit {sha}" | |
fetch-all-tools-merge: | |
runs-on: ubuntu-20.04 | |
needs: fetch-all-tools-stepwise | |
name: Fetch all tools merge | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v4 | |
with: | |
ref: main #pull latest code produced by job 1, not the revision that started the workflow (https://github.com/actions/checkout/issues/439) | |
- name: Checkout results | |
uses: actions/checkout@v4 | |
with: | |
ref: results | |
path: results | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Install requirement | |
run: | | |
python -m pip install -r requirements.txt | |
sudo apt-get install jq | |
- name: Merge all tools | |
run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry) | |
awk 'FNR==1 && NR!=1{next;}{print}' results/repositories*.list_tools.tsv > results/all_tools.tsv | |
jq -s 'map(.[])' results/repositories*.list_tools.json > results/all_tools.json | |
- name: Wordcloud and interactive table | |
run: | | |
bash bin/format_tools.sh | |
- name: Commit all tools | |
# add or commit any changes in results if there was a change, merge with main and push as bot | |
uses: s0/git-publish-subdir-action@develop | |
env: | |
BRANCH: results | |
FOLDER: results | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
REPO: self | |
SKIP_EMPTY_COMMITS: true | |
MESSAGE: "Update results step: fetch all tools merge - commmit {sha}" |