Skip to content

Commit

Permalink
Added retries and more descriptive logging for dnstwist scan
Browse files Browse the repository at this point in the history
  • Loading branch information
arng4108 committed Jan 7, 2025
1 parent 694dc68 commit ad65e58
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 6 deletions.
27 changes: 27 additions & 0 deletions src/pe_source/data/pe_db/db_query_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def connect():
conn = psycopg2.connect(**CONN_PARAMS_DIC)
except OperationalError as err:
show_psycopg2_exception(err)
LOGGER.error("Except condition reached for connect()")
conn = None
return conn

Expand Down Expand Up @@ -102,14 +103,19 @@ def get_orgs():
row["state_fips"] = Decimal(row.get("state_fips"))
return result
except requests.exceptions.HTTPError as errh:
LOGGER.error("Except condition (HTTPError) reached for get_orgs()")
LOGGER.error(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error("Except condition (ConnectionError) reached for get_orgs()")
LOGGER.error(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error("Except condition (Timeout) reached for get_orgs()")
LOGGER.error(errt)
except requests.exceptions.RequestException as err:
LOGGER.error("Except condition (RequestException) reached for get_orgs()")
LOGGER.error(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error("Except condition (JSONDecodeError) reached for get_orgs()")
LOGGER.error(err)


Expand Down Expand Up @@ -371,14 +377,19 @@ def get_data_source_uid(source):
tup_result = [tuple(row.values()) for row in result]
return tup_result[0][0]
except requests.exceptions.HTTPError as errh:
LOGGER.error("Except condition (HTTPError) reached for get_data_source_uid()")
LOGGER.error(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error("Except condition (ConnectionError) reached for get_data_source_uid()")
LOGGER.error(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error("Except condition (Timeout) reached for get_data_source_uid()")
LOGGER.error(errt)
except requests.exceptions.RequestException as err:
LOGGER.error("Except condition (RequestException) reached for get_data_source_uid()")
LOGGER.error(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error("Except condition (JSONDecodeError) reached for get_data_source_uid()")
LOGGER.error(err)


Expand Down Expand Up @@ -575,14 +586,19 @@ def org_root_domains(org_uid):
result_dict_list = result_df.to_dict("records")
return result_dict_list
except requests.exceptions.HTTPError as errh:
LOGGER.error("Except condition (HTTPError) reached for org_root_domains()")
LOGGER.error(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error("Except condition (ConnectionError) reached for org_root_domains()")
LOGGER.error(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error("Except condition (Timeout) reached for org_root_domains()")
LOGGER.error(errt)
except requests.exceptions.RequestException as err:
LOGGER.error("Except condition (RequestException) reached for org_root_domains()")
LOGGER.error(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error("Except condition (JSONDecodeError) reached for org_root_domains()")
LOGGER.error(err)

def get_root_domains_api(org_uid):
Expand Down Expand Up @@ -766,6 +782,7 @@ def getSubdomain(domain):
return sub[0][0]
except (Exception, psycopg2.DatabaseError):
print("Adding domain to the sub-domain table")
LOGGER.info("Except condition reached for getSubdomain(), adding new domain to subdomain table")
finally:
if conn is not None:
close(conn)
Expand Down Expand Up @@ -1181,14 +1198,19 @@ def execute_dnstwist_data(df):
result = requests.put(endpoint_url, headers=headers, data=data)
return result.json()
except requests.exceptions.HTTPError as errh:
LOGGER.error("Except condition (HTTPError) reached for execute_dnstwist_data()")
LOGGER.error(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error("Except condition (ConnectionError) reached for execute_dnstwist_data()")
LOGGER.error(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error("Except condition (Timeout) reached for execute_dnstwist_data()")
LOGGER.error(errt)
except requests.exceptions.RequestException as err:
LOGGER.error("Except condition (RequestException) reached for execute_dnstwist_data()")
LOGGER.error(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error("Except condition (JSONDecodeError) reached for execute_dnstwist_data()")
LOGGER.error(err)


Expand Down Expand Up @@ -1302,14 +1324,19 @@ def addSubdomain(domain, pe_org_uid, root):
# Process data and return
LOGGER.info(result)
except requests.exceptions.HTTPError as errh:
LOGGER.error("Except condition (HTTPError) reached for addSubdomain()")
LOGGER.error(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error("Except condition (ConnectionError) reached for addSubdomain()")
LOGGER.error(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error("Except condition (Timeout) reached for addSubdomain()")
LOGGER.error(errt)
except requests.exceptions.RequestException as err:
LOGGER.error("Except condition (RequestException) reached for addSubdomain()")
LOGGER.error(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error("Except condition (JSONDecodeError) reached for addSubdomain()")
LOGGER.error(err)


Expand Down
30 changes: 24 additions & 6 deletions src/pe_source/dnstwistscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import logging
import pathlib
import time
import traceback


Expand Down Expand Up @@ -43,9 +44,16 @@ def checkBlocklist(dom, sub_domain_uid, source_uid, pe_org_uid, perm_list):
return None, perm_list

# Check IP in Blocklist API
response = requests.get(
"http://api.blocklist.de/api.php?ip=" + str(dom["dns_a"][0])
).content
blocklist_url = "http://api.blocklist.de/api.php?ip=" + str(dom["dns_a"][0])
response = requests.get(blocklist_url)
# Retry clause
retry_count, max_retries, time_delay = 1, 10, 5
while response.status_code != 200 and retry_count <= max_retries:
LOGGER.warning(f"Retrying Blocklist.de API endpoint (code {response.status_code}), attempt {retry_count} of {max_retries} (url: {blocklist_url})")
time.sleep(time_delay)
response = requests.get(blocklist_url)
retry_count += 1
response = response.content

if str(response) != "b'attacks: 0<br />reports: 0<br />'":
try:
Expand Down Expand Up @@ -78,9 +86,17 @@ def checkBlocklist(dom, sub_domain_uid, source_uid, pe_org_uid, perm_list):
dom["dns_aaaa"] = [""]
else:
# Check IP in Blocklist API
response = requests.get(
"http://api.blocklist.de/api.php?ip=" + str(dom["dns_aaaa"][0])
).content
blocklist_url = "http://api.blocklist.de/api.php?ip=" + str(dom["dns_aaaa"][0])
response = requests.get(blocklist_url)
# Retry clause
retry_count, max_retries, time_delay = 1, 10, 5
while response.status_code != 200 and retry_count <= max_retries:
LOGGER.warning(f"Retrying Blocklist.de API endpoint (code {response.status_code}), attempt {retry_count} of {max_retries} (url: {blocklist_url})")
time.sleep(time_delay)
response = requests.get(blocklist_url)
retry_count += 1
response = response.content

if str(response) != "b'attacks: 0<br />reports: 0<br />'":
try:
malicious = True
Expand All @@ -90,6 +106,8 @@ def checkBlocklist(dom, sub_domain_uid, source_uid, pe_org_uid, perm_list):
malicious = False
dshield_attacks = 0
dshield_count = 0

# Check IP in DSheild API
try:
results = dshield.ip(str(dom["dns_aaaa"][0]), return_format=dshield.JSON)
results = json.loads(results)
Expand Down

0 comments on commit ad65e58

Please sign in to comment.