diff --git a/.github/workflows/fetch_all_tools.yaml b/.github/workflows/fetch_all_tools.yaml index 891eec25..4446a057 100644 --- a/.github/workflows/fetch_all_tools.yaml +++ b/.github/workflows/fetch_all_tools.yaml @@ -26,7 +26,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script run: | - python ./bin/get_public_galaxy_servers.py -o data/available_public_servers.csv + python bin/get_public_galaxy_servers.py -o data/available_public_servers.csv - name: Commit servers # add or commit any changes in results if there was a change, merge with main, and push as bot run: | @@ -59,7 +59,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script #needs PAT to access other repos run: | - bash ./bin/extract_all_tools_stepwise.sh "${{ matrix.subset }}" + bash bin/extract_all_tools.sh "${{ matrix.subset }}" env: GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} - name: Commit all tools @@ -92,7 +92,7 @@ jobs: jq -s 'map(.[])' results/repositories*.list_tools.json > results/all_tools.json - name: Wordcloud and interactive table run: | - bash ./bin/extract_all_tools_downstream.sh + bash bin/format_tools.sh - name: Commit all tools # add or commit any changes in results if there was a change, merge with main and push as bot run: | diff --git a/.github/workflows/fetch_all_tutorials.yaml b/.github/workflows/fetch_all_tutorials.yaml index c79343b1..e120f1d2 100644 --- a/.github/workflows/fetch_all_tutorials.yaml +++ b/.github/workflows/fetch_all_tutorials.yaml @@ -29,7 +29,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script #needs PAT to access other repos run: | - bash ./bin/extract_all_tutorials.sh + bash bin/extract_all_tutorials.sh env: PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }} - name: Commit all tools diff --git a/.github/workflows/filter_communities.yaml b/.github/workflows/filter_communities.yaml index b73d1976..cffaae36 100644 --- a/.github/workflows/filter_communities.yaml +++ b/.github/workflows/filter_communities.yaml @@ -36,7 +36,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script run: | - bash ./bin/get_community_tutorials.sh + bash bin/get_community_tutorials.sh - name: Commit results # commit the new filtered data, only if stuff was changed run: | @@ -59,7 +59,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script run: | - bash ./bin/update_tools_to_keep_exclude.sh + bash bin/update_tools_to_keep_exclude.sh - name: Commit results # commit the new filtered data, only if stuff was changed run: | @@ -82,7 +82,7 @@ jobs: run: python -m pip install -r requirements.txt - name: Run script run: | - bash ./bin/get_community_tools.sh + bash bin/get_community_tools.sh - name: Commit results # commit the new filtered data, only if stuff was changed run: | diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 0d520f28..6bef8c26 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -13,39 +13,16 @@ jobs: - name: Install requirement run: python -m pip install -r requirements.txt - name: Tool extraction - # run: bash bin/extract_all_tools.sh run: | - python bin/extract_galaxy_tools.py \ - extractools \ - --api $GITHUB_API_KEY \ - --all-tools "results/test_tools.tsv" \ - --all-tools-json "results/test_tools.json" \ - --planemo-repository-list "test.list" \ - --test + bash bin/extract_all_tools.sh test env: GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} - name: Tool filter run: | - python bin/extract_galaxy_tools.py \ - filtertools \ - --tools "results/all_tools.json" \ - --ts-filtered-tools "results/microgalaxy/tools_filtered_by_ts_categories.tsv" \ - --filtered-tools "results/microgalaxy/tools.tsv" \ - --categories "data/communities/microgalaxy/categories" \ - --status "data/communities/microgalaxy/tool_status.tsv" - - name: Create interactive table + bash bin/get_community_tools.sh test + - name: Create interactive table and wordcloud run: | - python bin/create_interactive_table.py \ - --table "results/microgalaxy/tools.tsv" \ - --template "data/interactive_table_template.html" \ - --output "results/microgalaxy/index.html" - - name: Create wordcloud - run: | - python bin/create_wordcloud.py \ - --table "results/microgalaxy/tools.tsv" \ - --wordcloud_mask "data/usage_stats/wordcloud_mask.png" \ - --output "results/microgalaxy/tools_wordcloud.png" \ - --stats_column "No. of tool users (2022-2023) (usegalaxy.eu)" + bash bin/format_tools.sh test-tutorials: runs-on: ubuntu-20.04 steps: @@ -57,18 +34,9 @@ jobs: run: python -m pip install -r requirements.txt - name: Tutorial extraction run: | - python bin/extract_gtn_tutorials.py \ - extracttutorials \ - --all_tutorials "results/test_tutorials.json" \ - --tools "results/all_tools.json" \ - --api $PLAUSIBLE_API_KEY \ - --test + bash bin/extract_all_tutorials.sh test env: PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }} - name: Tutorial filtering run: | - python bin/extract_gtn_tutorials.py \ - filtertutorials \ - --all_tutorials "results/test_tutorials.json" \ - --filtered_tutorials "results/microgalaxy/test_tutorials.tsv" \ - --tags "data/communities/microgalaxy/tutorial_tags" \ No newline at end of file + bash bin/get_community_tutorials.sh test \ No newline at end of file diff --git a/README.md b/README.md index a84187cf..d54dd8db 100644 --- a/README.md +++ b/README.md @@ -113,9 +113,10 @@ The script will generate a TSV file with each tool found in the list of GitHub r ``` $ python bin/extract_galaxy_tools.py \ - --tools \ - --ts-filtered-tools - --filtered-tools \ + filter \ + --all \ + --ts-filtered + --filtered \ [--categories ] \ [--status ] ``` @@ -145,18 +146,49 @@ The script will generate a TSV file with each tool found in the list of GitHub r ``` $ python bin/extract_gtn_tutorials.py \ - filtertutorials \ - --all_tutorials "results/all_tutorials.json" \ - --filtered_tutorials "results//tutorials.tsv" \ + filter\ + --all "results/all_tutorials.json" \ + --filtered "results//tutorials.tsv" \ --tags "data/communities//tutorial_tags" ``` ## Development +### Tools + To make a test run of the tool to check its functionalities follow [Usage](#Usage) to set-up the environnement and the API key, then run -```bash -bash ./bin/extract_all_tools_test.sh test.list -``` +1. Tool extraction + + ```bash + $ bash bin/extract_all_tools.sh test + ``` + + This runs the tool, but only parses the test repository [Galaxy-Tool-Metadata-Extractor-Test-Wrapper](https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper) + +2. Tool filter + + ```bash + $ bash bin/get_community_tools.sh test + ``` + +3. Create interactive table and wordcloud + + ```bash + $ bash bin/format_tools.sh + ``` + +### Tutorials + +1. Tutorial extraction + + ```bash + $ bash bin/extract_all_tutorials.sh test + ``` + +2. Tutorial filtering + + ```bash + $ bash bin/get_community_tutorials.sh test + ``` -This runs the tool, but only parses the test repository [Galaxy-Tool-Metadata-Extractor-Test-Wrapper](https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper) diff --git a/bin/extract_all_tools.sh b/bin/extract_all_tools.sh index d8efcd01..65610f66 100755 --- a/bin/extract_all_tools.sh +++ b/bin/extract_all_tools.sh @@ -2,13 +2,45 @@ mkdir -p 'results/' -python bin/extract_galaxy_tools.py \ - extractools \ - --api $GITHUB_API_KEY \ - --all-tools 'results/all_tools.tsv' \ - --all-tools-json 'results/all_tools.json' - -python bin/create_interactive_table.py \ - --table "results/all_tools.tsv" \ - --template "data/interactive_table_template.html" \ - --output "results/index.html" \ No newline at end of file +if [ ! -z $1 ] +then + if [ $1=="test" ] + then + echo "Test tool extraction" + python bin/extract_galaxy_tools.py \ + extract \ + --api $GITHUB_API_KEY \ + --all-tsv "results/test_tools.tsv" \ + --all "results/test_tools.json" \ + --planemo-repository-list "test.list" \ + --test + else + tsv_output="results/${1}_tools.tsv" + json_output="results/${1}_tools.json" + + if [[ $1 =~ "01" ]]; then + python bin/extract_galaxy_tools.py \ + extract \ + --api $GITHUB_API_KEY \ + --all-tsv $tsv_output \ + --all $json_output \ + --planemo-repository-list $1 + else + python bin/extract_galaxy_tools.py \ + extract \ + --api $GITHUB_API_KEY \ + --all-tsv $tsv_output \ + --all $json_output \ + --planemo-repository-list $1 \ + --avoid-extra-repositories + fi + fi +else + echo "Tool extraction"; + python bin/extract_galaxy_tools.py \ + extract \ + --api $GITHUB_API_KEY \ + --all-tsv 'results/all_tools.tsv' \ + --all 'results/all_tools.json' +fi + diff --git a/bin/extract_all_tools_stepwise.sh b/bin/extract_all_tools_stepwise.sh deleted file mode 100755 index 8545fd43..00000000 --- a/bin/extract_all_tools_stepwise.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -mkdir -p 'results/' - -tsv_output="results/${1}_tools.tsv" -json_output="results/${1}_tools.json" - -if [[ $1 =~ "01" ]]; then - python bin/extract_galaxy_tools.py \ - extractools \ - --api $GITHUB_API_KEY \ - --all-tools $tsv_output \ - --all-tools-json $json_output \ - --planemo-repository-list $1 -else - python bin/extract_galaxy_tools.py \ - extractools \ - --api $GITHUB_API_KEY \ - --all-tools $tsv_output \ - --all-tools-json $json_output \ - --planemo-repository-list $1 \ - --avoid-extra-repositories -fi - - - diff --git a/bin/extract_all_tutorials.sh b/bin/extract_all_tutorials.sh index 74f6c272..48592c0b 100644 --- a/bin/extract_all_tutorials.sh +++ b/bin/extract_all_tutorials.sh @@ -1,7 +1,17 @@ #!/usr/bin/env bash -python bin/extract_gtn_tutorials.py \ - extracttutorials \ - --all_tutorials "results/all_tutorials.json" \ - --tools "results/all_tools.json" \ - --api $PLAUSIBLE_API_KEY \ No newline at end of file +if [ ! -z $1 ] +then + python bin/extract_gtn_tutorials.py \ + extract \ + --all "results/test_tutorials.json" \ + --tools "results/all_tools.json" \ + --api $PLAUSIBLE_API_KEY \ + --test +else + python bin/extract_gtn_tutorials.py \ + extract \ + --all "results/all_tutorials.json" \ + --tools "results/all_tools.json" \ + --api $PLAUSIBLE_API_KEY +fi \ No newline at end of file diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index e65be847..04a2afc2 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -26,7 +26,6 @@ # Config variables BIOTOOLS_API_URL = "https://bio.tools" -# BIOTOOLS_API_URL = "https://130.226.25.21" USEGALAXY_SERVER_URLS = { "UseGalaxy.org (Main)": "https://usegalaxy.org", @@ -614,23 +613,76 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: return new_terms +def get_tools(repo_list: list, edam_ontology: dict) -> List[Dict]: + """ + Parse tools in GitHub repositories to extract metadata, + filter by TS categories, additional information + """ + tools: List[Dict] = [] + for r in repo_list: + print("Parsing tools from:", (r)) + if "github" not in r: + continue + try: + repo = get_github_repo(r, g) + tools.extend(parse_tools(repo)) + except Exception as e: + print( + f"Error while extracting tools from repo {r}: {e}", + file=sys.stderr, + ) + print(traceback.format_exc()) + + # add additional information to tools + for tool in tools: + # add EDAM terms without superclass + tool["EDAM operation (no superclasses)"] = reduce_ontology_terms(tool["EDAM operation"], ontology=edam_ontology) + tool["EDAM topic (no superclasses)"] = reduce_ontology_terms(tool["EDAM topic"], ontology=edam_ontology) + + # add availability for UseGalaxy servers + for name, url in USEGALAXY_SERVER_URLS.items(): + tool[f"Available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + # add availability for all UseGalaxy servers + for name, url in USEGALAXY_SERVER_URLS.items(): + tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + + # add all other available servers + public_servers_df = pd.read_csv(public_servers, sep="\t") + for _index, row in public_servers_df.iterrows(): + name = row["name"] + + if name.lower() not in [ + n.lower() for n in USEGALAXY_SERVER_URLS.keys() + ]: # do not query UseGalaxy servers again + + url = row["url"] + tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + + # add tool stats + for name, path in GALAXY_TOOL_STATS.items(): + tool_stats_df = pd.read_csv(path) + tool[name] = get_tool_stats_from_stats_file(tool_stats_df, tool["Galaxy tool ids"]) + + return tools + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Extract Galaxy tools from GitHub repositories together with biotools and conda metadata" ) subparser = parser.add_subparsers(dest="command") # Extract tools - extractools = subparser.add_parser("extractools", help="Extract tools") - extractools.add_argument("--api", "-a", required=True, help="GitHub access token") - extractools.add_argument("--all-tools-json", "-j", required=True, help="Filepath to JSON with all extracted tools") - extractools.add_argument("--all-tools", "-o", required=True, help="Filepath to TSV with all extracted tools") - extractools.add_argument( + extract = subparser.add_parser("extract", help="Extract tools") + extract.add_argument("--api", "-a", required=True, help="GitHub access token") + extract.add_argument("--all", "-o", required=True, help="Filepath to JSON with all extracted tools") + extract.add_argument("--all-tsv", "-j", required=True, help="Filepath to TSV with all extracted tools") + extract.add_argument( "--planemo-repository-list", "-pr", required=False, help="Repository list to use from the planemo-monitor repository", ) - extractools.add_argument( + extract.add_argument( "--avoid-extra-repositories", "-e", action="store_true", @@ -638,7 +690,7 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: required=False, help="Do not parse extra repositories in conf file", ) - extractools.add_argument( + extract.add_argument( "--test", "-t", action="store_true", @@ -648,21 +700,21 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: ) # Filter tools - filtertools = subparser.add_parser("filtertools", help="Filter tools") + filtertools = subparser.add_parser("filter", help="Filter tools") filtertools.add_argument( - "--tools", - "-i", + "--all", + "-a", required=True, help="Filepath to JSON with all extracted tools, generated by extractools command", ) filtertools.add_argument( - "--ts-filtered-tools", + "--ts-filtered", "-t", required=True, help="Filepath to TSV with tools filtered based on ToolShed category", ) filtertools.add_argument( - "--filtered-tools", + "--filtered", "-f", required=True, help="Filepath to TSV with tools filtered based on ToolShed category and manual curation", @@ -679,7 +731,7 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: ) args = parser.parse_args() - if args.command == "extractools": + if args.command == "extract": # connect to GitHub g = Github(args.api) # get list of GitHub repositories to parse @@ -690,64 +742,13 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: add_extra_repositories=not args.avoid_extra_repositories, ) # parse tools in GitHub repositories to extract metadata, filter by TS categories and export to output file - tools: List[Dict] = [] - for r in repo_list: - print("Parsing tools from:", (r)) - if "github" not in r: - continue - try: - repo = get_github_repo(r, g) - tools.extend(parse_tools(repo)) - except Exception as e: - print( - f"Error while extracting tools from repo {r}: {e}", - file=sys.stderr, - ) - print(traceback.format_exc()) - - ####################################################### - # add additional information to the List[Dict] object - ####################################################### - edam_ontology = get_ontology("https://edamontology.org/EDAM_1.25.owl").load() + tools = get_tools(repo_list, edam_ontology) + export_tools_to_json(tools, args.all) + export_tools_to_tsv(tools, args.all_tsv, format_list_col=True, add_usage_stats=True) - for tool in tools: - - # add EDAM terms without superclass - tool["EDAM operation (no superclasses)"] = reduce_ontology_terms( - tool["EDAM operation"], ontology=edam_ontology - ) - tool["EDAM topic (no superclasses)"] = reduce_ontology_terms(tool["EDAM topic"], ontology=edam_ontology) - - # add availability for UseGalaxy servers - for name, url in USEGALAXY_SERVER_URLS.items(): - tool[f"Available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) - # add availability for all UseGalaxy servers - for name, url in USEGALAXY_SERVER_URLS.items(): - tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) - - # add all other available servers - public_servers_df = pd.read_csv(public_servers, sep="\t") - for _index, row in public_servers_df.iterrows(): - name = row["name"] - - if name.lower() not in [ - n.lower() for n in USEGALAXY_SERVER_URLS.keys() - ]: # do not query UseGalaxy servers again - - url = row["url"] - tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) - - # add tool stats - for name, path in GALAXY_TOOL_STATS.items(): - tool_stats_df = pd.read_csv(path) - tool[name] = get_tool_stats_from_stats_file(tool_stats_df, tool["Galaxy tool ids"]) - - export_tools_to_json(tools, args.all_tools_json) - export_tools_to_tsv(tools, args.all_tools, format_list_col=True, add_usage_stats=True) - - elif args.command == "filtertools": - with Path(args.tools).open() as f: + elif args.command == "filter": + with Path(args.all).open() as f: tools = json.load(f) # get categories and tools to exclude categories = shared.read_file(args.categories) @@ -760,11 +761,9 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: # filter tool lists ts_filtered_tools, filtered_tools = filter_tools(tools, categories, status) - - export_tools_to_tsv(ts_filtered_tools, args.ts_filtered_tools, format_list_col=True) - + export_tools_to_tsv(ts_filtered_tools, args.ts_filtered, format_list_col=True) # if there are no filtered tools return the ts filtered tools if filtered_tools: - export_tools_to_tsv(filtered_tools, args.filtered_tools, format_list_col=True) + export_tools_to_tsv(filtered_tools, args.filtered, format_list_col=True) else: - export_tools_to_tsv(ts_filtered_tools, args.filtered_tools, format_list_col=True) + export_tools_to_tsv(ts_filtered_tools, args.filtered, format_list_col=True) diff --git a/bin/extract_gtn_tutorials.py b/bin/extract_gtn_tutorials.py index e92320e5..5ddee89f 100644 --- a/bin/extract_gtn_tutorials.py +++ b/bin/extract_gtn_tutorials.py @@ -275,18 +275,16 @@ def export_tutorials_to_tsv(tutorials: list, output_fp: str) -> None: ) subparser = parser.add_subparsers(dest="command") # Extract tutorials - extracttutorials = subparser.add_parser("extracttutorials", help="Extract all training materials") - extracttutorials.add_argument( - "--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials" - ) - extracttutorials.add_argument( + extract = subparser.add_parser("extract", help="Extract all training materials") + extract.add_argument("--all", "-o", required=True, help="Filepath to JSON with all extracted training materials") + extract.add_argument( "--tools", "-t", required=True, help="Filepath to JSON with all extracted tools, generated by extractools command", ) - extracttutorials.add_argument("--api", "-a", required=True, help="Plausible access token") - extracttutorials.add_argument( + extract.add_argument("--api", "-a", required=True, help="Plausible access token") + extract.add_argument( "--test", action="store_true", default=False, @@ -295,35 +293,35 @@ def export_tutorials_to_tsv(tutorials: list, output_fp: str) -> None: ) # Filter tutorials - filtertutorials = subparser.add_parser("filtertutorials", help="Filter training materials based on their tags") - filtertutorials.add_argument( - "--all_tutorials", - "-t", + filtertuto = subparser.add_parser("filter", help="Filter training materials based on their tags") + filtertuto.add_argument( + "--all", + "-a", required=True, help="Filepath to JSON with all extracted tutorials, generated by extracttutorials command", ) - filtertutorials.add_argument( - "--filtered_tutorials", + filtertuto.add_argument( + "--filtered", "-f", required=True, help="Filepath to TSV with filtered tutorials", ) - filtertutorials.add_argument( + filtertuto.add_argument( "--tags", - "-c", + "-t", help="Path to a file with tags to keep in the extraction (one per line)", ) args = parser.parse_args() - if args.command == "extracttutorials": + if args.command == "extract": tutorials = get_tutorials(args.tools, args.api, args.test) - shared.export_to_json(tutorials, args.all_tutorials) + shared.export_to_json(tutorials, args.all) - elif args.command == "filtertutorials": - all_tutorials = shared.load_json(args.all_tutorials) + elif args.command == "filter": + all_tutorials = shared.load_json(args.all) # get categories and training to exclude tags = shared.read_file(args.tags) # filter training lists filtered_tutorials = filter_tutorials(all_tutorials, tags) - export_tutorials_to_tsv(filtered_tutorials, args.filtered_tutorials) + export_tutorials_to_tsv(filtered_tutorials, args.filtered) diff --git a/bin/extract_all_tools_downstream.sh b/bin/format_tools.sh similarity index 100% rename from bin/extract_all_tools_downstream.sh rename to bin/format_tools.sh diff --git a/bin/get_community_tools.sh b/bin/get_community_tools.sh index efb03f9c..335a71cd 100755 --- a/bin/get_community_tools.sh +++ b/bin/get_community_tools.sh @@ -3,32 +3,46 @@ # stop on error set -e -for com_data_fp in data/communities/* ; do - if [[ -d "$com_data_fp" && ! -L "$com_data_fp" ]]; then - community=`basename "$com_data_fp"` - - echo "$community"; - - mkdir -p "results/$community" - - python bin/extract_galaxy_tools.py \ - filtertools \ - --tools "results/all_tools.json" \ - --ts-filtered-tools "results/$community/tools_filtered_by_ts_categories.tsv" \ - --filtered-tools "results/$community/tools.tsv" \ - --categories "data/communities/$community/categories" \ - --status "data/communities/$community/tool_status.tsv" - - python bin/create_interactive_table.py \ - --table "results/$community/tools.tsv" \ - --template "data/interactive_table_template.html" \ - --output "results/$community/index.html" - - python bin/create_wordcloud.py \ - --table "results/$community/tools.tsv" \ - --wordcloud_mask "data/usage_stats/wordcloud_mask.png" \ - --output "results/$community/tools_wordcloud.png" \ - --stats_column "No. of tool users (2022-2023) (usegalaxy.eu)" - - fi; -done +if [ ! -z $1 ] +then + python bin/extract_galaxy_tools.py \ + filter \ + --all "results/all_tools.json" \ + --ts-filtered "results/microgalaxy/tools_filtered_by_ts_categories.tsv" \ + --filtered "results/microgalaxy/tools.tsv" \ + --categories "data/communities/microgalaxy/categories" \ + --status "data/communities/microgalaxy/tool_status.tsv" + +else + for com_data_fp in data/communities/* ; do + if [[ -d "$com_data_fp" && ! -L "$com_data_fp" ]]; then + community=`basename "$com_data_fp"` + + echo "$community"; + + mkdir -p "results/$community" + + python bin/extract_galaxy_tools.py \ + filter \ + --all "results/all_tools.json" \ + --ts-filtered "results/$community/tools_filtered_by_ts_categories.tsv" \ + --filtered "results/$community/tools.tsv" \ + --categories "data/communities/$community/categories" \ + --status "data/communities/$community/tool_status.tsv" + + python bin/create_interactive_table.py \ + --table "results/$community/tools.tsv" \ + --template "data/interactive_table_template.html" \ + --output "results/$community/index.html" + + python bin/create_wordcloud.py \ + --table "results/$community/tools.tsv" \ + --wordcloud_mask "data/usage_stats/wordcloud_mask.png" \ + --output "results/$community/tools_wordcloud.png" \ + --stats_column "No. of tool users (2022-2023) (usegalaxy.eu)" + + fi; + done +fi + + diff --git a/bin/get_community_tutorials.sh b/bin/get_community_tutorials.sh index c4f8d3e4..fc8637d5 100644 --- a/bin/get_community_tutorials.sh +++ b/bin/get_community_tutorials.sh @@ -1,18 +1,28 @@ #!/usr/bin/env bash -for com_data_fp in data/communities/* ; do - if [[ -d "$com_data_fp" && ! -L "$com_data_fp" ]]; then - community=`basename "$com_data_fp"` +if [ ! -z $1 ] +then + python bin/extract_gtn_tutorials.py \ + filter \ + --all "results/test_tutorials.json" \ + --filtered "results/microgalaxy/test_tutorials.tsv" \ + --tags "data/communities/microgalaxy/tutorial_tags" - echo "$community"; +else + for com_data_fp in data/communities/* ; do + if [[ -d "$com_data_fp" && ! -L "$com_data_fp" ]]; then + community=`basename "$com_data_fp"` - if [[ -f "data/communities/$community/tutorial_tags" && -f "results/$community/tutorials.tsv" ]]; then + echo "$community"; - python bin/extract_gtn_tutorials.py \ - filtertutorials \ - --all_tutorials "results/all_tutorials.json" \ - --filtered_tutorials "results/$community/tutorials.tsv" \ - --tags "data/communities/$community/tutorial_tags" + if [[ -f "data/communities/$community/tutorial_tags" && -f "results/$community/tutorials.tsv" ]]; then + + python bin/extract_gtn_tutorials.py \ + filter \ + --all "results/all_tutorials.json" \ + --filtered "results/$community/tutorials.tsv" \ + --tags "data/communities/$community/tutorial_tags" + fi; fi; - fi; -done + done +fi