From 69e0b4c63728f7da7747e10c9b7dfab0c383ec2a Mon Sep 17 00:00:00 2001 From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:37:23 +0100 Subject: [PATCH] Suggested num respect name template (#405) * Have get next sub or ses funcs take name template argument. * Add tests for num_digits from name templates. * Handle 'get_num_value_digits_from_regexp' False case. * Move assert back to correct place. --- datashuttle/datashuttle.py | 12 +++ datashuttle/utils/getters.py | 96 ++++++++++++++++--- .../tests_integration/test_create_folders.py | 46 +++++++++ 3 files changed, 143 insertions(+), 11 deletions(-) diff --git a/datashuttle/datashuttle.py b/datashuttle/datashuttle.py index a37d88df7..3bac1f1e6 100644 --- a/datashuttle/datashuttle.py +++ b/datashuttle/datashuttle.py @@ -1080,6 +1080,11 @@ def get_next_sub( If `True, only get names from `local_path`, otherwise from `local_path` and `central_path`. """ + name_template = self.get_name_templates() + name_template_regexp = ( + name_template["sub"] if name_template["on"] else None + ) + return getters.get_next_sub_or_ses( self.cfg, top_level_folder, @@ -1087,6 +1092,7 @@ def get_next_sub( local_only=local_only, return_with_prefix=return_with_prefix, search_str="sub-*", + name_template_regexp=name_template_regexp, ) @check_configs_set @@ -1117,6 +1123,11 @@ def get_next_ses( If `True, only get names from `local_path`, otherwise from `local_path` and `central_path`. """ + name_template = self.get_name_templates() + name_template_regexp = ( + name_template["ses"] if name_template["on"] else None + ) + return getters.get_next_sub_or_ses( self.cfg, top_level_folder, @@ -1124,6 +1135,7 @@ def get_next_ses( local_only=local_only, return_with_prefix=return_with_prefix, search_str="ses-*", + name_template_regexp=name_template_regexp, ) # Name Templates diff --git a/datashuttle/utils/getters.py b/datashuttle/utils/getters.py index 262ace1eb..c5d60dbb4 100644 --- a/datashuttle/utils/getters.py +++ b/datashuttle/utils/getters.py @@ -5,8 +5,10 @@ TYPE_CHECKING, Dict, List, + Literal, Optional, Tuple, + Union, ) if TYPE_CHECKING: @@ -33,6 +35,7 @@ def get_next_sub_or_ses( local_only: bool = False, return_with_prefix: bool = True, default_num_value_digits: int = 3, + name_template_regexp: Optional[str] = None, ) -> str: """ Suggest the next available subject or session number. This function will @@ -93,7 +96,10 @@ def get_next_sub_or_ses( max_existing_num, num_value_digits, ) = get_max_sub_or_ses_num_and_value_length( - all_folders, prefix, default_num_value_digits + all_folders, + prefix, + default_num_value_digits, + name_template_regexp, ) # calculate next sub number @@ -110,6 +116,7 @@ def get_max_sub_or_ses_num_and_value_length( all_folders: List[str], prefix: Prefix, default_num_value_digits: Optional[int] = None, + name_template_regexp: Optional[str] = None, ) -> Tuple[int, int]: """ Given a list of BIDS-style folder names, find the maximum subject or @@ -141,12 +148,23 @@ def get_max_sub_or_ses_num_and_value_length( """ if len(all_folders) == 0: - max_existing_num = 0 + assert isinstance( default_num_value_digits, int ), "`default_num_value_digits` must be int`" - num_value_digits = default_num_value_digits + max_existing_num = 0 + + # Try and get the num digits from a name template, otherwise use default. + if name_template_regexp is not None: + num_value_digits = get_num_value_digits_from_regexp( + prefix, name_template_regexp + ) + if num_value_digits is False: + num_value_digits = default_num_value_digits + else: + num_value_digits = default_num_value_digits + else: all_values_str = utils.get_values_from_bids_formatted_name( all_folders, @@ -155,16 +173,22 @@ def get_max_sub_or_ses_num_and_value_length( ) # First get the length of bids-key value across the project - # (e.g. sub-003 has three values). - all_num_value_digits = [len(value) for value in all_values_str] + # or name template if it exists (e.g. sub-003 has three values). + # If a name template exists but the length can't be determined from it, + # default back to the project. + if name_template_regexp is not None: + num_value_digits = get_num_value_digits_from_regexp( + prefix, name_template_regexp + ) - if len(set(all_num_value_digits)) != 1: - utils.log_and_raise_error( - f"The number of value digits for the {prefix} level are not " - f"consistent. Cannot suggest a {prefix} number.", - NeuroBlueprintError, + if num_value_digits is False: + num_value_digits = get_num_value_digits_from_project( + all_values_str, prefix + ) + else: + num_value_digits = get_num_value_digits_from_project( + all_values_str, prefix ) - num_value_digits = all_num_value_digits[0] # Then get the latest existing sub or ses number in the project. all_value_nums = sorted( @@ -182,6 +206,56 @@ def get_max_sub_or_ses_num_and_value_length( return max_existing_num, num_value_digits +def get_num_value_digits_from_project( + all_values_str: List[str], prefix: Prefix +) -> int: + """ + Find the number of digits for the sub or ses key within the project. + `all_values_str` is a list of all the sub or ses values from within + the project. + """ + all_num_value_digits = [len(value) for value in all_values_str] + + if len(set(all_num_value_digits)) != 1: + utils.log_and_raise_error( + f"The number of value digits for the {prefix} level are not " + f"consistent. Cannot suggest a {prefix} number.", + NeuroBlueprintError, + ) + num_value_digits = all_num_value_digits[0] + + return num_value_digits + + +def get_num_value_digits_from_regexp( + prefix: Prefix, name_template_regexp: str +) -> Union[Literal[False], int]: + """ + Given a name template regexp, find the number of values for the + sub or ses key. These will be fixed with "\d" (digit) or ".?" (wildcard). + If there is length-unspecific wildcard (.*) in the sub key, then skip. + In practice, there should never really be a .* in the sub or ses + key of a name template, but handle it just in case. + """ + all_values_str = utils.get_values_from_bids_formatted_name( + [name_template_regexp], prefix, return_as_int=False + )[0] + + if "*" in all_values_str: + return False + else: + num_digits = len( + [char for char in all_values_str if char in ["d", "?"]] + ) + + if num_digits == 0: + # breaks assumption there is some usable regexp here, + # better to use project instead. + return False + + return num_digits + + def get_existing_project_paths() -> List[Path]: """ Return full path and names of datashuttle projects on diff --git a/tests/tests_integration/test_create_folders.py b/tests/tests_integration/test_create_folders.py index cb7737ef9..75a977aa5 100644 --- a/tests/tests_integration/test_create_folders.py +++ b/tests/tests_integration/test_create_folders.py @@ -423,6 +423,52 @@ def test_get_next_ses(self, project, return_with_prefix, top_level_folder): ) assert new_num == "ses-006" if return_with_prefix else "006" + def test_get_next_sub_and_ses_name_template(self, project): + """ + In the case where a name template exists, these getters should use the + number of digits on the template (even if these are different + within the project!). + """ + project.create_folders("rawdata", "sub-001", "ses-001") + + name_templates = { + "on": True, + "sub": r"sub-\d.?.?.?\d_key-value", # 5 digits + "ses": r"ses-\d_@DATE@", # 2 digits + } + project.set_name_templates(name_templates) + + new_num = project.get_next_sub( + "rawdata", return_with_prefix=False, local_only=True + ) + assert new_num == "00002" + + new_num = project.get_next_ses( + "rawdata", "sub-001", return_with_prefix=False, local_only=True + ) + assert new_num == "2" + + # Quick test on two cases that should not use name template. + # Test sub only as underlying code is the same. If name templates + # is off, use the num_digits from the project, same if the sub + # key value takes a length-unspecific wildcard (should never really happen). + name_templates["on"] = False + project.set_name_templates(name_templates) + + new_num = project.get_next_sub( + "rawdata", return_with_prefix=False, local_only=True + ) + assert new_num == "002" + + name_templates["on"] = True + name_templates["sub"] = "sub-.*" + project.set_name_templates(name_templates) + + new_num = project.get_next_sub( + "rawdata", return_with_prefix=False, local_only=True + ) + assert new_num == "002" + # ---------------------------------------------------------------------------------- # Test Helpers # ----------------------------------------------------------------------------------