Skip to content

Commit

Permalink
regex changes
Browse files Browse the repository at this point in the history
  • Loading branch information
SandhraSokhal committed Oct 4, 2023
1 parent da798f1 commit 9c40d5e
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
13 changes: 10 additions & 3 deletions src/scripts/glue_jobs/process_access_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,16 @@ def get_normalized_method_signature(requesturl):
elif "/schema/type/" in requesturl:
result = "/schema/type/#"
else:
result = re.sub(r'\bsyn\d+(\.\d+)?\b|\b\d+(\.\d+)?\b', '#', requesturl)
result = re.sub(r';[^/]+', '', result)
result = re.sub(r'[\'!@$%^&*()_+{}\[\]:;<>,.?~\\|]+', '', result)
# find and remove substring in url starting from ';' until '/' if present.
result = re.sub(r';[^/]+', '', requesturl)
# find and remove special characters in url
result = re.sub(r'[\'!@$%^&*()_+{}\[\]:;<>,.?~\\|=]+', '', result)
# find and replace substrings with length >=2 in url containing ids with '#'. ID can start with 'syn',
# 'fh' or digits.
result = re.sub(r'\b(syn|fh)\d+(\.\d+)?\b|\b\d+(\w+)?[^/]\b', '#', result)
# The regex provided above doesn't account for substrings with a length of 1.
# Find and replace substring in url containing only digits.
result = re.sub(r'/\d+', '/#', result)
return result

def decode_url(encoded_url):
Expand Down
42 changes: 42 additions & 0 deletions tests/test_process_access_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,54 @@ def test_normalized_signature_for_drs_object(self):
real_output = process_access_record.get_normalized_method_signature("/ga4gh/drs/v1/objects/syn35423183.1")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_drs_object_for_fileHandleId(self):
expected_output = "/objects/#"
real_output = process_access_record.get_normalized_method_signature("/ga4gh/drs/v1/objects/fh123")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_schema_type(self):
expected_output = "/schema/type/#"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/schema/type/registered/a245ac37480fc40739836ce61801d19f1-my.schema-0.36652.1")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_evaluation_submission_with_string_id(self):
expected_output = "/evaluation/submission/#/status"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/evaluation/submission/9720221_curl_168/status")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_evaluation_submission_with_file_handle_id(self):
expected_output = "/evaluation/submission/#/file/#"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/evaluation/submission/9720221_curl_168/file/123")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_data_access_submission_id_with_vr(self):
expected_output = "/dataaccesssubmission/#"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/dataAccessSubmission/7416vr")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_entity_with_version_in_end(self):
expected_output = "/entity/#/version/#"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/entity/syn9692796/version/98")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_entity_with_vesion_in_middle(self):
expected_output = "/entity/#/version/#/json"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/entity/syn25830585/version/1/json")
self.assertEqual(expected_output, real_output)


def test_normalized_signature_for_team_member_with_query_parameters(self):
expected_output = "/teammembers/#"
real_output = process_access_record.get_normalized_method_signature(
"/repo/v1/teamMembers/3431460&limit=50&offset=0")
self.assertEqual(expected_output, real_output)

def test_normalized_signature_for_team_with_singleQuotes(self):
expected_output = "/team/#"
real_output = process_access_record.get_normalized_method_signature("/repo/v1/team/3409011'")
Expand Down

0 comments on commit 9c40d5e

Please sign in to comment.