diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index d3a2c308..971a9fd4 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -28,16 +28,16 @@ # BIOTOOLS_API_URL = "https://130.226.25.21" USEGALAXY_STAR_SERVER_URLS = { - "UseGalaxy.org": "https://usegalaxy.org", + "UseGalaxy.org (Main)": "https://usegalaxy.org", "UseGalaxy.org.au": "https://usegalaxy.org.au", "UseGalaxy.eu": "https://usegalaxy.eu", - "UseGalaxy.org.fr": "https://usegalaxy.fr", + "UseGalaxy.fr": "https://usegalaxy.fr", } project_path = Path(__file__).resolve().parent.parent # galaxy_tool_extractor folder usage_stats_path = project_path.joinpath("data", "usage_stats") conf_path = project_path.joinpath("data", "conf.yml") -public_servers = project_path.joinpath("data","available_public_servers.csv") +public_servers = project_path.joinpath("data", "available_public_servers.csv") GALAXY_TOOL_STATS = { "No. of tool users (2022-2023) (usegalaxy.eu)": usage_stats_path.joinpath("tool_usage_per_user_2022_23_EU.csv"), @@ -721,7 +721,10 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: file=sys.stderr, ) + ####################################################### # add additional information to the List[Dict] object + ####################################################### + edam_ontology = get_ontology("https://edamontology.org/EDAM_1.25.owl").load() for tool in tools: @@ -740,9 +743,13 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: public_servers_df = pd.read_csv(public_servers) for index, row in public_servers_df.iterrows(): name = row["name"] - url = row["url"] - tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + if not name.lower() in [ + n.lower() for n in USEGALAXY_STAR_SERVER_URLS.keys() + ]: # do not query UseGalaxy servers again + + url = row["url"] + tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) export_tools_to_json(tools, args.all_tools_json) export_tools_to_tsv(tools, args.all_tools, format_list_col=True, add_usage_stats=True)