Skip to content

Commit

Permalink
Merge branch 'main' of github.com:galaxyproject/galaxy_tool_metadata_…
Browse files Browse the repository at this point in the history
…extractor into merge_keep_exclude_list
  • Loading branch information
bebatut committed Jun 3, 2024
2 parents 17d99f5 + 4ef5182 commit 3c6371d
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 101 deletions.
8 changes: 8 additions & 0 deletions CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Code of Conduct
===============

As part of the Galaxy Community, this project is committed to providing a
welcoming and harassment-free experience for everyone. We therefore expect
participants to abide by our Code of Conduct, which can be found at:

https://galaxyproject.org/community/coc/
2 changes: 1 addition & 1 deletion bin/extract_all_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mkdir -p 'results/'
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools 'results/all_tools.tsv'
--all-tools 'results/all_tools.tsv'

python bin/create_interactive_table.py \
--table "results/all_tools.tsv" \
Expand Down
17 changes: 14 additions & 3 deletions bin/extract_all_tools_stepwise.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,20 @@ mkdir -p 'results/'

output="results/${1}_tools.tsv"

python bin/extract_galaxy_tools.py \
if [[ $1 =~ "01" ]]; then
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools $output \
--planemorepository $1
--all-tools $output \
--planemo-repository-list $1
else
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all-tools $output \
--planemo-repository-list $1 \
--avoid-extra-repositories
fi



4 changes: 2 additions & 2 deletions bin/extract_all_tools_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ output="results/${1}_tools.tsv"
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all_tools $output \
--planemorepository $1 \
--all-tools $output \
--planemo-repository-list $1 \
--test

37 changes: 26 additions & 11 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ def get_string_content(cf: ContentFile) -> str:


def get_tool_github_repositories(
g: Github, RepoSelection: Optional[str], run_test: bool, add_extra_repositories: bool = True
g: Github, repository_list: Optional[str], run_test: bool, add_extra_repositories: bool = True
) -> List[str]:
"""
Get list of tool GitHub repositories to parse
:param g: GitHub instance
:param RepoSelection: The selection to use from the repository (needed to split the process for CI jobs)
:run_test: for testing only parse the repository
:param repository_list: The selection to use from the repository (needed to split the process for CI jobs)
:param run_test: for testing only parse the repository
"""

if run_test:
Expand All @@ -140,8 +140,8 @@ def get_tool_github_repositories(
repo_list: List[str] = []
for i in range(1, 5):
repo_selection = f"repositories0{i}.list"
if RepoSelection: # only get these repositories
if RepoSelection == repo_selection:
if repository_list: # only get these repositories
if repository_list == repo_selection:
repo_f = repo.get_contents(repo_selection)
repo_l = get_string_content(repo_f).rstrip()
repo_list.extend(repo_l.split("\n"))
Expand Down Expand Up @@ -616,11 +616,21 @@ def filter_tools(
# Extract tools
extractools = subparser.add_parser("extractools", help="Extract tools")
extractools.add_argument("--api", "-a", required=True, help="GitHub access token")
extractools.add_argument("--all_tools", "-o", required=True, help="Filepath to TSV with all extracted tools")
extractools.add_argument("--all-tools", "-o", required=True, help="Filepath to TSV with all extracted tools")
extractools.add_argument(
"--planemorepository", "-pr", required=False, help="Repository list to use from the planemo-monitor repository"
"--planemo-repository-list",
"-pr",
required=False,
help="Repository list to use from the planemo-monitor repository",
)
extractools.add_argument(
"--avoid-extra-repositories",
"-e",
action="store_true",
default=False,
required=False,
help="Do not parse extra repositories in conf file",
)

extractools.add_argument(
"--test",
"-t",
Expand All @@ -639,13 +649,13 @@ def filter_tools(
help="Filepath to TSV with all extracted tools, generated by extractools command",
)
filtertools.add_argument(
"--ts_filtered_tools",
"--ts-filtered-tools",
"-t",
required=True,
help="Filepath to TSV with tools filtered based on ToolShed category",
)
filtertools.add_argument(
"--filtered_tools",
"--filtered-tools",
"-f",
required=True,
help="Filepath to TSV with tools filtered based on ToolShed category and manual curation",
Expand All @@ -666,7 +676,12 @@ def filter_tools(
# connect to GitHub
g = Github(args.api)
# get list of GitHub repositories to parse
repo_list = get_tool_github_repositories(g, args.planemorepository, args.test)
repo_list = get_tool_github_repositories(
g=g,
repository_list=args.planemo_repository_list,
run_test=args.test,
add_extra_repositories=not args.avoid_extra_repositories,
)
# parse tools in GitHub repositories to extract metada, filter by TS categories and export to output file
tools: List[Dict] = []
for r in repo_list:
Expand Down
4 changes: 2 additions & 2 deletions bin/get_community_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ for com_data_fp in data/communities/* ; do
python bin/extract_galaxy_tools.py \
filtertools \
--tools "results/all_tools.tsv" \
--ts_filtered_tools "results/$community/tools_filtered_by_ts_categories.tsv" \
--filtered_tools "results/$community/tools.tsv" \
--ts-filtered-tools "results/$community/tools_filtered_by_ts_categories.tsv" \
--filtered-tools "results/$community/tools.tsv" \
--categories "data/communities/$community/categories" \
--status "data/communities/$community/tool_status.tsv"

Expand Down
1 change: 0 additions & 1 deletion data/conf.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
extra-repositories:
- https://github.com/qiime2/galaxy-tools
- https://github.com/geraldinepascal/FROGS-wrappers
Loading

0 comments on commit 3c6371d

Please sign in to comment.