diff --git a/.github/workflows/fetch_all_tools.yaml b/.github/workflows/fetch_all_tools.yaml index b99ea875..22483873 100644 --- a/.github/workflows/fetch_all_tools.yaml +++ b/.github/workflows/fetch_all_tools.yaml @@ -16,6 +16,16 @@ permissions: contents: write jobs: + fetch-available-servers: + runs-on: ubuntu-20.04 + name: Fetch list of all available servers + steps: + - name: Install requirement + run: python -m pip install -r requirements.txt + - name: Run script + run: | + python ./bin/get_public_galaxy_servers.py -o data/available_public_servers.csv + fetch-all-tools-stepwise: runs-on: ubuntu-20.04 environment: fetch-tools diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index 24a28f38..b0e42de0 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -28,15 +28,16 @@ # BIOTOOLS_API_URL = "https://130.226.25.21" USEGALAXY_SERVER_URLS = { - "UseGalaxy.org": "https://usegalaxy.org", + "UseGalaxy.org (Main)": "https://usegalaxy.org", "UseGalaxy.org.au": "https://usegalaxy.org.au", "UseGalaxy.eu": "https://usegalaxy.eu", - "UseGalaxy.org.fr": "https://usegalaxy.fr", + "UseGalaxy.fr": "https://usegalaxy.fr", } project_path = Path(__file__).resolve().parent.parent # galaxy_tool_extractor folder usage_stats_path = project_path.joinpath("data", "usage_stats") conf_path = project_path.joinpath("data", "conf.yml") +public_servers = project_path.joinpath("data", "available_public_servers.csv") GALAXY_TOOL_STATS = { "No. of tool users (2022-2023) (usegalaxy.eu)": usage_stats_path.joinpath("tool_usage_per_user_2022_23_EU.csv"), @@ -720,7 +721,10 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: file=sys.stderr, ) + ####################################################### # add additional information to the List[Dict] object + ####################################################### + edam_ontology = get_ontology("https://edamontology.org/EDAM_1.25.owl").load() for tool in tools: @@ -734,6 +738,21 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: # add availability for UseGalaxy servers for name, url in USEGALAXY_SERVER_URLS.items(): tool[f"Available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + # add availability for all UseGalaxy servers + for name, url in USEGALAXY_SERVER_URLS.items(): + tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) + + # add all other available servers + public_servers_df = pd.read_csv(public_servers, sep="\t") + for _index, row in public_servers_df.iterrows(): + name = row["name"] + + if name.lower() not in [ + n.lower() for n in USEGALAXY_SERVER_URLS.keys() + ]: # do not query UseGalaxy servers again + + url = row["url"] + tool[f"Tools available on {name}"] = check_tools_on_servers(tool["Galaxy tool ids"], url) export_tools_to_json(tools, args.all_tools_json) export_tools_to_tsv(tools, args.all_tools, format_list_col=True, add_usage_stats=True) diff --git a/bin/get_public_galaxy_servers.py b/bin/get_public_galaxy_servers.py new file mode 100644 index 00000000..a8c66df8 --- /dev/null +++ b/bin/get_public_galaxy_servers.py @@ -0,0 +1,65 @@ +import argparse + +import pandas as pd +import requests + + +def get_public_galaxy_servers(output: str) -> None: + """ + Get public galaxy servers, that can be queried for tools using their API + + :param output: path to output the server list tsv + """ + + to_process = {} + serverlist = requests.get("https://galaxyproject.org/use/feed.json").json() + for server in serverlist: + + print(server["title"]) + # We intentionally drop all usegalaxy.eu subdomains. They're all the + # same as the top level domain and just pollute the supported instances + # list. + if ".usegalaxy.eu" in server["url"]: + continue + # Apparently the french do it too + if ".usegalaxy.fr" in server["url"]: + continue + # The aussies will soon + if ".usegalaxy.org.au" in server["url"]: + continue + # No test servers permitted + if "test." in server["url"]: + continue + + galaxy_url = server["url"] + galaxy_url = galaxy_url.rstrip("/") + base_url = f"{galaxy_url}/api" + + try: + r = requests.get(f"{base_url}/tools", params={"in_panel": False}, timeout=30) + r.raise_for_status() + r.json() + except Exception as ex: + print(f"Exception:\n{ex} \nfor server {galaxy_url}!") + continue + + to_process[server["title"]] = server["url"] + + s = pd.Series(to_process) + s.index.name = "name" + s.name = "url" + s.to_csv(output, sep="\t") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Create list of public available galaxy servers") + + parser.add_argument( + "--output", + "-o", + required=True, + help="Path to output TSV file with the servers", + ) + + args = parser.parse_args() + get_public_galaxy_servers(args.output) diff --git a/data/available_public_servers.csv b/data/available_public_servers.csv new file mode 100644 index 00000000..a4947360 --- /dev/null +++ b/data/available_public_servers.csv @@ -0,0 +1,39 @@ +name url +ANASTASIA http://motherbox.chemeng.ntua.gr/anastasia_dev/ +APOSTL http://apostl.moffitt.org/ +ARGs-OAP http://smile.hku.hk/SARGs +BF2I-MAP http://bf2i-galaxy.insa-lyon.fr:8080/ +BioBix http://galaxy.ugent.be/ +CIRM-CFBP https://iris.angers.inra.fr/galaxypub-cfbp +Center for Phage Technology (CPT) https://cpt.tamu.edu/galaxy-public/ +ChemFlow https://vm-chemflow-francegrille.eu/ +Coloc-stats https://hyperbrowser.uio.no/coloc-stats +CorGAT http://corgat.cloud.ba.infn.it/galaxy +CropGalaxy http://cropgalaxy.excellenceinbreeding.org/ +Dintor http://dintor.eurac.edu/ +FreeBioinfo http://www.freebioinfo.org/ +GASLINI http://igg.cloud.ba.infn.it/galaxy +Galaxy@AuBi https://galaxy.mesocentre.uca.fr +Galaxy@Pasteur https://galaxy.pasteur.fr/ +GalaxyTrakr https://galaxytrakr.org/ +Genomic Hyperbrowser http://hyperbrowser.uio.no/hb/ +GigaGalaxy http://gigagalaxy.net/ +HyPhy HIV NGS Tools https://galaxy.hyphy.org/ +IPK Galaxy Blast Suite https://galaxy-web.ipk-gatersleben.de +ImmPort Galaxy https://www.immportgalaxy.org/ +InteractoMIX http://galaxy.interactomix.com/ +MISSISSIPPI https://mississippi.sorbonne-universite.fr +Mandoiu Lab https://neo.engr.uconn.edu/ +MiModD NacreousMap http://mapping-by-sequencing.vm.uni-freiburg.de:8080/ +Oqtans http://galaxy.inf.ethz.ch +Palfinder https://palfinder.ls.manchester.ac.uk/ +PepSimili http://pepsimili.e-nios.com:8080/ +PhagePromotor https://galaxy.bio.di.uminho.pt/ +Protologger http://protologger.de/ +UseGalaxy.be https://usegalaxy.be/ +UseGalaxy.cz https://usegalaxy.cz/ +UseGalaxy.eu https://usegalaxy.eu +UseGalaxy.fr https://usegalaxy.fr/ +UseGalaxy.no https://usegalaxy.no/ +UseGalaxy.org (Main) https://usegalaxy.org +UseGalaxy.org.au https://usegalaxy.org.au