Skip to content

Commit

Permalink
fetch tools stepwise with CI
Browse files Browse the repository at this point in the history
  • Loading branch information
paulzierep committed Nov 23, 2023
1 parent 0f523f9 commit 071f01b
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 8 deletions.
35 changes: 32 additions & 3 deletions .github/workflows/fetch_all_tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,17 @@ permissions:
contents: write

jobs:
fetch-all-tools:
fetch-all-tools-stepwise:
runs-on: ubuntu-latest
name: Fetch all tool stepwise
strategy:
matrix:
python-version: [3.8]
subset:
- repositories01.list
- repositories02.list
- repositories03.list
- repositories04.list
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand All @@ -26,8 +35,7 @@ jobs:
- name: Run script
# run: bash bin/extract_all_tools.sh
run: |
chmod +x bin/extract_all_tools.sh
bin/extract_all_tools.sh
'bash ./bin/extract_all_tools.sh "${{ matrix.subset }}"'
env:
GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }}
- name: Commit all tools
Expand All @@ -37,3 +45,24 @@ jobs:
git add results
git commit -m "fetch all tools bot"
git push
fetch-all-tools-merge:
runs-on: ubuntu-latest
name: Fetch all tools merge
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- name: Install requirement
run: python -m pip install -r requirements.txt
- name: Run script
run: |
'bash ./bin/extract_all_tools_merge.sh'
'bash ./bin/extract_all_tools_downstream.sh'
env:
GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }}
- name: Commit all tools
run: |
git config user.name github-actions
git config user.email [email protected]
git add results
git commit -m "fetch all tools bot"
git push
8 changes: 8 additions & 0 deletions bin/extract_all_tools_downstream.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

mkdir -p 'results/'

python bin/create_interactive_table.py \
--table "results/all_tools.tsv" \
--template "data/interactive_table_template.html" \
--output "results/index.html"
4 changes: 4 additions & 0 deletions bin/extract_all_tools_merge.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash

cat 'results/*_tools.tsv' > 'results/all_tools.tsv'

12 changes: 12 additions & 0 deletions bin/extract_all_tools_stepwise.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

mkdir -p 'results/'

output="results/${1}_tools.tsv"

python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools $output \
--planemorepository $1

23 changes: 18 additions & 5 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,27 @@ def get_string_content(cf: ContentFile) -> str:
return base64.b64decode(cf.content).decode("utf-8")


def get_tool_github_repositories(g: Github) -> List[str]:
def get_tool_github_repositories(g: Github, RepoSelection: Optional[str]) -> List[str]:
"""
Get list of tool GitHub repositories to parse
:param g: GitHub instance
:param RepoSelection: The selection to use from the repository (needed to split the process for CI jobs)
"""

repo = g.get_user("galaxyproject").get_repo("planemo-monitor")
repo_list: List[str] = []
for i in range(1, 5):
repo_f = repo.get_contents(f"repositories0{i}.list")
repo_l = get_string_content(repo_f).rstrip()
repo_list.extend(repo_l.split("\n"))
repo_selection = f"repositories0{i}.list"
if RepoSelection: # only get these repositories
if RepoSelection == repo_selection:
repo_f = repo.get_contents(repo_selection)
repo_l = get_string_content(repo_f).rstrip()
repo_list.extend(repo_l.split("\n"))
else:
repo_f = repo.get_contents(repo_selection)
repo_l = get_string_content(repo_f).rstrip()
repo_list.extend(repo_l.split("\n"))
return repo_list


Expand Down Expand Up @@ -390,6 +399,10 @@ def filter_tools(tools: List[Dict], ts_cat: List[str], excluded_tools: List[str]
extractools = subparser.add_parser("extractools", help="Extract tools")
extractools.add_argument("--api", "-a", required=True, help="GitHub access token")
extractools.add_argument("--all_tools", "-o", required=True, help="Filepath to TSV with all extracted tools")
extractools.add_argument(
"--planemorepository", "-pr", required=False, help="Repository list to use from the planemo-monitor repository"
)

# Filter tools
filtertools = subparser.add_parser("filtertools", help="Filter tools")
filtertools.add_argument(
Expand All @@ -412,7 +425,7 @@ def filter_tools(tools: List[Dict], ts_cat: List[str], excluded_tools: List[str]
# connect to GitHub
g = Github(args.api)
# get list of GitHub repositories to parse
repo_list = get_tool_github_repositories(g)
repo_list = get_tool_github_repositories(g, args.planemorepository)
# parse tools in GitHub repositories to extract metada, filter by TS categories and export to output file
tools: List[Dict] = []
for r in repo_list:
Expand Down

0 comments on commit 071f01b

Please sign in to comment.