From f0c8d05bfccf174753d6d7010c07a4492acff442 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 1 Nov 2023 22:32:40 +0000 Subject: [PATCH] Add function to check if tools are available on a list of servers First step of https://github.com/bebatut/galaxy_tool_extractor/issues/11 . --- bin/extract_galaxy_tools.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index 54bfbcea..fdd10c66 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -24,6 +24,12 @@ BIOTOOLS_API_URL = "https://bio.tools" # BIOTOOLS_API_URL = "https://130.226.25.21" +GALAXY_SERVER_URLS = [ + "https://usegalaxy.org", + "https://usegalaxy.org.au", + "https://usegalaxy.eu", +] + def read_file(filepath: Optional[str]) -> List[str]: """ @@ -333,6 +339,30 @@ def parse_tools(repo: Repository) -> List[Dict[str, Any]]: return tools +def get_all_installed_tool_ids(galaxy_url: str) -> List[str]: + galaxy_url = galaxy_url.rstrip("/") + base_url = f"{galaxy_url}/api" + r = requests.get(f"{base_url}/tools", params={"in_panel": False}) + r.raise_for_status() + tool_dict_list = r.json() + return [tool_dict["id"] for tool_dict in tool_dict_list] + + +def check_tools_on_servers(tool_ids: List[str]) -> pd.DataFrame: + assert all("/" not in tool_id for tool_id in tool_ids), "This function only works on short tool ids" + data: List[Dict[str, bool]] = [] + for galaxy_url in GALAXY_SERVER_URLS: + installed_tool_ids = get_all_installed_tool_ids(galaxy_url) + installed_tool_short_ids = [ + tool_id.split("/")[4] if "/" in tool_id else tool_id for tool_id in installed_tool_ids + ] + d: Dict[str, bool] = {} + for tool_id in tool_ids: + d[tool_id] = tool_id in installed_tool_short_ids + data.append(d) + return pd.DataFrame(data, index=GALAXY_SERVER_URLS) + + def format_list_column(col: pd.Series) -> pd.Series: """ Format a column that could be a list before exporting