diff --git a/src/scripts/glue_jobs/process_access_record.py b/src/scripts/glue_jobs/process_access_record.py index 42b9ecf..1aefcbc 100644 --- a/src/scripts/glue_jobs/process_access_record.py +++ b/src/scripts/glue_jobs/process_access_record.py @@ -120,9 +120,16 @@ def get_normalized_method_signature(requesturl): elif "/schema/type/" in requesturl: result = "/schema/type/#" else: - result = re.sub(r'\bsyn\d+(\.\d+)?\b|\b\d+(\.\d+)?\b', '#', requesturl) - result = re.sub(r';[^/]+', '', result) - result = re.sub(r'[\'!@$%^&*()_+{}\[\]:;<>,.?~\\|]+', '', result) + # find and remove substring in url starting from ';' until '/' if present. + result = re.sub(r';[^/]+', '', requesturl) + # find and remove special characters in url + result = re.sub(r'[\'!@$%^&*()_+{}\[\]:;<>,.?~\\|=]+', '', result) + # find and replace substrings with length >=2 in url containing ids with '#'. ID can start with 'syn', + # 'fh' or digits. + result = re.sub(r'\b(syn|fh)\d+(\.\d+)?\b|\b\d+(\w+)?[^/]\b', '#', result) + # The regex provided above doesn't account for substrings with a length of 1. + # Find and replace substring in url containing only digits. + result = re.sub(r'/\d+', '/#', result) return result def decode_url(encoded_url): diff --git a/tests/test_process_access_record.py b/tests/test_process_access_record.py index 7e6ffac..b5bccd2 100644 --- a/tests/test_process_access_record.py +++ b/tests/test_process_access_record.py @@ -141,12 +141,54 @@ def test_normalized_signature_for_drs_object(self): real_output = process_access_record.get_normalized_method_signature("/ga4gh/drs/v1/objects/syn35423183.1") self.assertEqual(expected_output, real_output) + def test_normalized_signature_for_drs_object_for_fileHandleId(self): + expected_output = "/objects/#" + real_output = process_access_record.get_normalized_method_signature("/ga4gh/drs/v1/objects/fh123") + self.assertEqual(expected_output, real_output) + def test_normalized_signature_for_schema_type(self): expected_output = "/schema/type/#" real_output = process_access_record.get_normalized_method_signature( "/repo/v1/schema/type/registered/a245ac37480fc40739836ce61801d19f1-my.schema-0.36652.1") self.assertEqual(expected_output, real_output) + def test_normalized_signature_for_evaluation_submission_with_string_id(self): + expected_output = "/evaluation/submission/#/status" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/evaluation/submission/9720221_curl_168/status") + self.assertEqual(expected_output, real_output) + + def test_normalized_signature_for_evaluation_submission_with_file_handle_id(self): + expected_output = "/evaluation/submission/#/file/#" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/evaluation/submission/9720221_curl_168/file/123") + self.assertEqual(expected_output, real_output) + + def test_normalized_signature_for_data_access_submission_id_with_vr(self): + expected_output = "/dataaccesssubmission/#" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/dataAccessSubmission/7416vr") + self.assertEqual(expected_output, real_output) + + def test_normalized_signature_for_entity_with_version_in_end(self): + expected_output = "/entity/#/version/#" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/entity/syn9692796/version/98") + self.assertEqual(expected_output, real_output) + + def test_normalized_signature_for_entity_with_vesion_in_middle(self): + expected_output = "/entity/#/version/#/json" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/entity/syn25830585/version/1/json") + self.assertEqual(expected_output, real_output) + + + def test_normalized_signature_for_team_member_with_query_parameters(self): + expected_output = "/teammembers/#" + real_output = process_access_record.get_normalized_method_signature( + "/repo/v1/teamMembers/3431460&limit=50&offset=0") + self.assertEqual(expected_output, real_output) + def test_normalized_signature_for_team_with_singleQuotes(self): expected_output = "/team/#" real_output = process_access_record.get_normalized_method_signature("/repo/v1/team/3409011'")