Skip to content

Commit

Permalink
More resiliency with 500ISEs
Browse files Browse the repository at this point in the history
No-Issue

Signed-off-by: James Tanner <[email protected]>
  • Loading branch information
jctanner committed Sep 19, 2023
1 parent 6030349 commit d671843
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 13 deletions.
6 changes: 4 additions & 2 deletions galaxy_ng/app/api/v1/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def legacy_sync_from_upstream(
github_user=None,
role_name=None,
role_version=None,
limit=None
limit=None,
start_page=None,
):
"""
Sync legacy roles from a remote v1 api.
Expand Down Expand Up @@ -240,7 +241,8 @@ def legacy_sync_from_upstream(
'baseurl': baseurl,
'github_user': github_user,
'role_name': role_name,
'limit': limit
'limit': limit,
'start_page': start_page,
}
for ns_data, rdata, rversions in upstream_role_iterator(**iterator_kwargs):

Expand Down
2 changes: 2 additions & 0 deletions galaxy_ng/app/management/commands/sync-galaxy-roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def add_arguments(self, parser):
parser.add_argument("--github_user", help="find and sync only this namespace name")
parser.add_argument("--role_name", help="find and sync only this role name")
parser.add_argument("--limit", type=int)
parser.add_argument("--start_page", type=int)

def echo(self, message, style=None):
style = style or self.style.SUCCESS
Expand All @@ -31,4 +32,5 @@ def handle(self, *args, **options):
github_user=options['github_user'],
role_name=options['role_name'],
limit=options['limit'],
start_page=options['start_page'],
)
49 changes: 38 additions & 11 deletions galaxy_ng/app/utils/galaxy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import requests
import time
from urllib.parse import urlparse


Expand Down Expand Up @@ -28,14 +29,32 @@ def int_to_uuid(num):
return uuid


def safe_fetch(url):
rr = None
counter = 0
while True:
counter += 1
logger.info(f'fetch {url}')
rr = requests.get(url)
if rr.status_code < 500:
return rr

if counter >= 10:
return rr

time.sleep(5)

return rr


def paginated_results(next_url):
"""Iterate through a paginated query and combine the results."""
parsed = urlparse(next_url)
_baseurl = parsed.scheme + '://' + parsed.netloc
results = []
while next_url:
logger.info(f'fetch {next_url}')
rr = requests.get(next_url)
rr = safe_fetch(next_url)

try:
ds = rr.json()
Expand Down Expand Up @@ -75,15 +94,15 @@ def find_namespace(baseurl=None, name=None, id=None):

if name:
qurl = baseurl + f'/?name={name}'
rr = requests.get(qurl)
rr = safe_fetch(qurl)
ds = rr.json()

ns_info = ds['results'][0]
ns_name = ns_info['name']

elif id:
qurl = baseurl + f'/{id}/'
rr = requests.get(qurl)
rr = safe_fetch(qurl)
ds = rr.json()

ns_name = ds['name']
Expand All @@ -94,7 +113,7 @@ def find_namespace(baseurl=None, name=None, id=None):
owners = []
next_owners_url = _baseurl + f'/api/v1/namespaces/{ns_id}/owners/'
while next_owners_url:
o_data = requests.get(next_owners_url).json()
o_data = safe_fetch(next_owners_url).json()
for owner in o_data['results']:
owners.append(owner)
if not o_data.get('next'):
Expand All @@ -111,7 +130,7 @@ def get_namespace_owners_details(baseurl, ns_id):
owners = []
next_owners_url = baseurl + f'/api/v1/namespaces/{ns_id}/owners/'
while next_owners_url:
o_data = requests.get(next_owners_url).json()
o_data = safe_fetch(next_owners_url).json()
for owner in o_data['results']:
owners.append(owner)
if not o_data.get('next'):
Expand Down Expand Up @@ -152,7 +171,7 @@ def upstream_namespace_iterator(
while next_url:
logger.info(f'fetch {pagenum} {next_url}')

page = requests.get(next_url)
page = safe_fetch(next_url)

# Some upstream pages return ISEs for whatever reason.
if page.status_code >= 500:
Expand Down Expand Up @@ -244,7 +263,7 @@ def upstream_collection_iterator(
while next_url:
logger.info(f'fetch {pagenum} {next_url}')

page = requests.get(next_url)
page = safe_fetch(next_url)

# Some upstream pages return ISEs for whatever reason.
if page.status_code >= 500:
Expand All @@ -264,7 +283,7 @@ def upstream_collection_iterator(
if ns_id not in namespace_cache:
logger.info(_baseurl + f'/api/v1/namespaces/{ns_id}/')
ns_url = _baseurl + f'/api/v1/namespaces/{ns_id}/'
namespace_data = requests.get(ns_url).json()
namespace_data = safe_fetch(ns_url).json()
# logger.info(namespace_data)
namespace_cache[ns_id] = namespace_data

Expand Down Expand Up @@ -319,6 +338,7 @@ def upstream_role_iterator(
github_repo=None,
role_name=None,
get_versions=True,
start_page=None,
):
"""Abstracts the pagination of v1 roles into a generator with error handling."""
logger.info(f'baseurl1: {baseurl}')
Expand All @@ -342,14 +362,20 @@ def upstream_role_iterator(
else:
next_url = _baseurl + '/api/v1/roles/'

if start_page:
if '?' in next_url:
next_url += f'&page={start_page}'
else:
next_url += f'/?page={start_page}'

namespace_cache = {}

pagenum = 0
role_count = 0
while next_url:
logger.info(f'fetch {pagenum} {next_url} role-count:{role_count}')

page = requests.get(next_url)
page = safe_fetch(next_url)

# Some upstream pages return ISEs for whatever reason.
if page.status_code >= 500:
Expand All @@ -369,7 +395,8 @@ def upstream_role_iterator(
role_upstream_url = _baseurl + f'/api/v1/roles/{remote_id}/'
logger.info(f'fetch {role_upstream_url}')

role_page = requests.get(role_upstream_url)
# role_page = requests.get(role_upstream_url)
role_page = safe_fetch(role_upstream_url)
if role_page.status_code == 404:
continue

Expand All @@ -386,7 +413,7 @@ def upstream_role_iterator(
if ns_id not in namespace_cache:
logger.info(_baseurl + f'/api/v1/namespaces/{ns_id}/')
ns_url = _baseurl + f'/api/v1/namespaces/{ns_id}/'
namespace_data = requests.get(ns_url).json()
namespace_data = safe_fetch(ns_url).json()
# logger.info(namespace_data)
namespace_cache[ns_id] = namespace_data

Expand Down

0 comments on commit d671843

Please sign in to comment.