diff --git a/sources/README.md b/sources/README.md index 52fd7657..6d353ea7 100644 --- a/sources/README.md +++ b/sources/README.md @@ -245,13 +245,13 @@ To make a test run of the tool to check its functionalities follow [Usage](#Usag 1. Workflow extraction ```bash - $ bash bin/extract_all_workflows.sh test + $ bash sources/bin/extract_all_workflows.sh test ``` 2. Workflow filtering ```bash - $ bash bin/get_community_workflowss.sh test + $ bash sources/bin/get_community_workflows.sh test ``` # Galaxy Labs framework diff --git a/sources/bin/extract_galaxy_workflows.py b/sources/bin/extract_galaxy_workflows.py index 4ff03f75..cbde094f 100644 --- a/sources/bin/extract_galaxy_workflows.py +++ b/sources/bin/extract_galaxy_workflows.py @@ -156,9 +156,10 @@ def add_workflows_from_workflowhub(self) -> None: f"https://workflowhub.eu{wf['links']['self']}", header, ) - wf = Workflow() - wf.init_from_search(wf=wfhub_wf, source="WorkflowHub", tools=self.tools) - self.workflows.append(wf) + if wfhub_wf: + wf = Workflow() + wf.init_from_search(wf=wfhub_wf, source="WorkflowHub", tools=self.tools) + self.workflows.append(wf) print(len(self.workflows)) def add_workflows_from_a_server(self, server: str) -> None: @@ -170,6 +171,12 @@ def add_workflows_from_a_server(self, server: str) -> None: f"{server}/api/workflows/", header, ) + + # test max 50 wfs + if self.test: + if len(server_wfs) > 50: + server_wfs = server_wfs[:50] + count = 0 for wf in server_wfs: if wf["published"] and wf["importable"] and not wf["deleted"] and not wf["hidden"]: diff --git a/sources/bin/shared.py b/sources/bin/shared.py index 0097bcd8..611436bc 100644 --- a/sources/bin/shared.py +++ b/sources/bin/shared.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import json +import time from datetime import datetime from pathlib import Path from typing import ( @@ -13,6 +14,7 @@ import requests from github.ContentFile import ContentFile from github.Repository import Repository +from requests.exceptions import ConnectionError def get_first_commit_for_folder(tool: ContentFile, repo: Repository) -> str: @@ -90,15 +92,42 @@ def read_suite_per_tool_id(tool_fp: str) -> Dict: return tools -def get_request_json(url: str, headers: dict) -> dict: - """ - Return JSON output using request - - :param url: galaxy tool id - """ - r = requests.get(url, auth=None, headers=headers) - r.raise_for_status() - return r.json() +def get_request_json(url: str, headers: dict, retries: int = 3, delay: float = 2.0) -> dict: + """ + Perform a GET request to retrieve JSON output from a specified URL, with retry on ConnectionError. + + :param url: URL to send the GET request to. + :param headers: Headers to include in the GET request. + :param retries: Number of retry attempts in case of a ConnectionError (default is 3). + :param delay: Delay in seconds between retries (default is 2.0 seconds). + :return: JSON response as a dictionary, or None if all retries fail. + :raises ConnectionError: If all retry attempts fail due to a connection error. + :raises SystemExit: For any other request-related errors. + """ + attempt = 0 # Track the number of attempts + + while attempt < retries: + try: + r = requests.get(url, auth=None, headers=headers) + r.raise_for_status() # Raises an HTTPError for unsuccessful status codes + return r.json() # Return JSON response if successful + except ConnectionError as e: + attempt += 1 + if attempt == retries: + raise ConnectionError( + "Connection aborted after multiple retries: Remote end closed connection without response" + ) from e + print(f"Connection error on attempt {attempt}/{retries}. Retrying in {delay} seconds...") + time.sleep(delay) # Wait before retrying + except requests.exceptions.RequestException as e: + # Handles all other exceptions from the requests library + raise SystemExit(f"Request failed: {e}") + except ValueError as e: + # Handles cases where the response isn't valid JSON + raise ValueError("Response content is not valid JSON") from e + + # Return None if all retries are exhausted and no response is received + return {} def format_date(date: str) -> str: