diff --git a/galaxy_ng/app/api/v1/tasks.py b/galaxy_ng/app/api/v1/tasks.py index ff5c0c4548..03be9fde4c 100644 --- a/galaxy_ng/app/api/v1/tasks.py +++ b/galaxy_ng/app/api/v1/tasks.py @@ -200,7 +200,8 @@ def legacy_sync_from_upstream( github_user=None, role_name=None, role_version=None, - limit=None + limit=None, + start_page=None, ): """ Sync legacy roles from a remote v1 api. @@ -240,7 +241,8 @@ def legacy_sync_from_upstream( 'baseurl': baseurl, 'github_user': github_user, 'role_name': role_name, - 'limit': limit + 'limit': limit, + 'start_page': start_page, } for ns_data, rdata, rversions in upstream_role_iterator(**iterator_kwargs): diff --git a/galaxy_ng/app/management/commands/sync-galaxy-roles.py b/galaxy_ng/app/management/commands/sync-galaxy-roles.py index f07aa1863b..37b9256f4f 100644 --- a/galaxy_ng/app/management/commands/sync-galaxy-roles.py +++ b/galaxy_ng/app/management/commands/sync-galaxy-roles.py @@ -19,6 +19,7 @@ def add_arguments(self, parser): parser.add_argument("--github_user", help="find and sync only this namespace name") parser.add_argument("--role_name", help="find and sync only this role name") parser.add_argument("--limit", type=int) + parser.add_argument("--start_page", type=int) def echo(self, message, style=None): style = style or self.style.SUCCESS @@ -31,4 +32,5 @@ def handle(self, *args, **options): github_user=options['github_user'], role_name=options['role_name'], limit=options['limit'], + start_page=options['start_page'], ) diff --git a/galaxy_ng/app/utils/galaxy.py b/galaxy_ng/app/utils/galaxy.py index f9e546d465..0aaee1e648 100644 --- a/galaxy_ng/app/utils/galaxy.py +++ b/galaxy_ng/app/utils/galaxy.py @@ -1,5 +1,6 @@ import logging import requests +import time from urllib.parse import urlparse @@ -28,6 +29,24 @@ def int_to_uuid(num): return uuid +def safe_fetch(url): + rr = None + counter = 0 + while True: + counter += 1 + logger.info(f'fetch {url}') + rr = requests.get(url) + if rr.status_code < 500: + return rr + + if counter >= 10: + return rr + + time.sleep(5) + + return rr + + def paginated_results(next_url): """Iterate through a paginated query and combine the results.""" parsed = urlparse(next_url) @@ -35,7 +54,7 @@ def paginated_results(next_url): results = [] while next_url: logger.info(f'fetch {next_url}') - rr = requests.get(next_url) + rr = safe_fetch(next_url) try: ds = rr.json() @@ -75,7 +94,7 @@ def find_namespace(baseurl=None, name=None, id=None): if name: qurl = baseurl + f'/?name={name}' - rr = requests.get(qurl) + rr = safe_fetch(qurl) ds = rr.json() ns_info = ds['results'][0] @@ -83,7 +102,7 @@ def find_namespace(baseurl=None, name=None, id=None): elif id: qurl = baseurl + f'/{id}/' - rr = requests.get(qurl) + rr = safe_fetch(qurl) ds = rr.json() ns_name = ds['name'] @@ -94,7 +113,7 @@ def find_namespace(baseurl=None, name=None, id=None): owners = [] next_owners_url = _baseurl + f'/api/v1/namespaces/{ns_id}/owners/' while next_owners_url: - o_data = requests.get(next_owners_url).json() + o_data = safe_fetch(next_owners_url).json() for owner in o_data['results']: owners.append(owner) if not o_data.get('next'): @@ -111,7 +130,7 @@ def get_namespace_owners_details(baseurl, ns_id): owners = [] next_owners_url = baseurl + f'/api/v1/namespaces/{ns_id}/owners/' while next_owners_url: - o_data = requests.get(next_owners_url).json() + o_data = safe_fetch(next_owners_url).json() for owner in o_data['results']: owners.append(owner) if not o_data.get('next'): @@ -152,7 +171,7 @@ def upstream_namespace_iterator( while next_url: logger.info(f'fetch {pagenum} {next_url}') - page = requests.get(next_url) + page = safe_fetch(next_url) # Some upstream pages return ISEs for whatever reason. if page.status_code >= 500: @@ -244,7 +263,7 @@ def upstream_collection_iterator( while next_url: logger.info(f'fetch {pagenum} {next_url}') - page = requests.get(next_url) + page = safe_fetch(next_url) # Some upstream pages return ISEs for whatever reason. if page.status_code >= 500: @@ -264,7 +283,7 @@ def upstream_collection_iterator( if ns_id not in namespace_cache: logger.info(_baseurl + f'/api/v1/namespaces/{ns_id}/') ns_url = _baseurl + f'/api/v1/namespaces/{ns_id}/' - namespace_data = requests.get(ns_url).json() + namespace_data = safe_fetch(ns_url).json() # logger.info(namespace_data) namespace_cache[ns_id] = namespace_data @@ -319,6 +338,7 @@ def upstream_role_iterator( github_repo=None, role_name=None, get_versions=True, + start_page=None, ): """Abstracts the pagination of v1 roles into a generator with error handling.""" logger.info(f'baseurl1: {baseurl}') @@ -342,6 +362,12 @@ def upstream_role_iterator( else: next_url = _baseurl + '/api/v1/roles/' + if start_page: + if '?' in next_url: + next_url += f'&page={start_page}' + else: + next_url += f'/?page={start_page}' + namespace_cache = {} pagenum = 0 @@ -349,7 +375,7 @@ def upstream_role_iterator( while next_url: logger.info(f'fetch {pagenum} {next_url} role-count:{role_count}') - page = requests.get(next_url) + page = safe_fetch(next_url) # Some upstream pages return ISEs for whatever reason. if page.status_code >= 500: @@ -369,7 +395,8 @@ def upstream_role_iterator( role_upstream_url = _baseurl + f'/api/v1/roles/{remote_id}/' logger.info(f'fetch {role_upstream_url}') - role_page = requests.get(role_upstream_url) + # role_page = requests.get(role_upstream_url) + role_page = safe_fetch(role_upstream_url) if role_page.status_code == 404: continue @@ -386,7 +413,7 @@ def upstream_role_iterator( if ns_id not in namespace_cache: logger.info(_baseurl + f'/api/v1/namespaces/{ns_id}/') ns_url = _baseurl + f'/api/v1/namespaces/{ns_id}/' - namespace_data = requests.get(ns_url).json() + namespace_data = safe_fetch(ns_url).json() # logger.info(namespace_data) namespace_cache[ns_id] = namespace_data