Skip to content

Commit

Permalink
continue from where it left off.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrshdhgd committed Sep 23, 2024
1 parent c535015 commit e8218c7
Showing 1 changed file with 38 additions and 34 deletions.
72 changes: 38 additions & 34 deletions src/uniprot2s3/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,43 +159,47 @@ def fetch_uniprot_data(organism_id):
max_retries = 3
backoff_factor = 1
file_path = UNIPROT_S3_DIR / f"{organism_id}.{UNIPROT_DESIRED_FORMAT}"
organism_query = TAXONOMY_ID_UNIPROT_PREFIX + organism_id

url = construct_query_url(
UNIPROT_BASE_URL, UNIPROT_DESIRED_FORMAT, organism_query, UNIPROT_FIELDS, UNIPROT_SIZE, UNIPROT_KEYWORDS
)
if not file_path.is_file():
organism_query = TAXONOMY_ID_UNIPROT_PREFIX + organism_id

try:
# Make the HTTP request to Uniprot
response = requests.get(url, timeout=30)
response.raise_for_status()
_write_file(file_path, response, organism_id, "w")

while "next" in response.links:
next_url = response.links["next"]["url"]
retries = 0
while retries < max_retries:
try:
response = requests.get(next_url, timeout=30)
response.raise_for_status()
_write_file(file_path, response, organism_id, "a")
break
except requests.exceptions.RequestException as e:
retries += 1
if retries >= max_retries:
print(f"Failed to fetch {next_url} after {max_retries} attempts.")
raise e
else:
wait_time = backoff_factor * (2 ** (retries - 1))
print(f"Retrying {next_url} in {wait_time} seconds...")
time.sleep(wait_time)
url = construct_query_url(
UNIPROT_BASE_URL, UNIPROT_DESIRED_FORMAT, organism_query, UNIPROT_FIELDS, UNIPROT_SIZE, UNIPROT_KEYWORDS
)

except requests.exceptions.HTTPError:
print(f"Bad request for organism {organism_id} - {response.status_code}")
except requests.exceptions.Timeout:
print("The request timed out")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
try:
# Make the HTTP request to Uniprot
response = requests.get(url, timeout=30)
response.raise_for_status()
_write_file(file_path, response, organism_id, "w")

while "next" in response.links:
next_url = response.links["next"]["url"]
retries = 0
while retries < max_retries:
try:
response = requests.get(next_url, timeout=30)
response.raise_for_status()
_write_file(file_path, response, organism_id, "a")
break
except requests.exceptions.RequestException as e:
retries += 1
if retries >= max_retries:
print(f"Failed to fetch {next_url} after {max_retries} attempts.")
raise e
else:
wait_time = backoff_factor * (2 ** (retries - 1))
print(f"Retrying {next_url} in {wait_time} seconds...")
time.sleep(wait_time)

except requests.exceptions.HTTPError:
print(f"Bad request for organism {organism_id} - {response.status_code}")
except requests.exceptions.Timeout:
print("The request timed out")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
else:
print(f"File {file_path} was already downloaded.")


def fetch_uniprot_reference_proteome_data() -> list:
Expand Down

0 comments on commit e8218c7

Please sign in to comment.