Skip to content

Commit

Permalink
Suggested num respect name template (#405)
Browse files Browse the repository at this point in the history
* Have get next sub or ses funcs take name template argument.

* Add tests for num_digits from name templates.

* Handle 'get_num_value_digits_from_regexp' False case.

* Move assert back to correct place.
  • Loading branch information
JoeZiminski authored Jul 1, 2024
1 parent af87077 commit 69e0b4c
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 11 deletions.
12 changes: 12 additions & 0 deletions datashuttle/datashuttle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,13 +1080,19 @@ def get_next_sub(
If `True, only get names from `local_path`, otherwise from
`local_path` and `central_path`.
"""
name_template = self.get_name_templates()
name_template_regexp = (
name_template["sub"] if name_template["on"] else None
)

return getters.get_next_sub_or_ses(
self.cfg,
top_level_folder,
sub=None,
local_only=local_only,
return_with_prefix=return_with_prefix,
search_str="sub-*",
name_template_regexp=name_template_regexp,
)

@check_configs_set
Expand Down Expand Up @@ -1117,13 +1123,19 @@ def get_next_ses(
If `True, only get names from `local_path`, otherwise from
`local_path` and `central_path`.
"""
name_template = self.get_name_templates()
name_template_regexp = (
name_template["ses"] if name_template["on"] else None
)

return getters.get_next_sub_or_ses(
self.cfg,
top_level_folder,
sub=sub,
local_only=local_only,
return_with_prefix=return_with_prefix,
search_str="ses-*",
name_template_regexp=name_template_regexp,
)

# Name Templates
Expand Down
96 changes: 85 additions & 11 deletions datashuttle/utils/getters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
TYPE_CHECKING,
Dict,
List,
Literal,
Optional,
Tuple,
Union,
)

if TYPE_CHECKING:
Expand All @@ -33,6 +35,7 @@ def get_next_sub_or_ses(
local_only: bool = False,
return_with_prefix: bool = True,
default_num_value_digits: int = 3,
name_template_regexp: Optional[str] = None,
) -> str:
"""
Suggest the next available subject or session number. This function will
Expand Down Expand Up @@ -93,7 +96,10 @@ def get_next_sub_or_ses(
max_existing_num,
num_value_digits,
) = get_max_sub_or_ses_num_and_value_length(
all_folders, prefix, default_num_value_digits
all_folders,
prefix,
default_num_value_digits,
name_template_regexp,
)

# calculate next sub number
Expand All @@ -110,6 +116,7 @@ def get_max_sub_or_ses_num_and_value_length(
all_folders: List[str],
prefix: Prefix,
default_num_value_digits: Optional[int] = None,
name_template_regexp: Optional[str] = None,
) -> Tuple[int, int]:
"""
Given a list of BIDS-style folder names, find the maximum subject or
Expand Down Expand Up @@ -141,12 +148,23 @@ def get_max_sub_or_ses_num_and_value_length(
"""
if len(all_folders) == 0:
max_existing_num = 0

assert isinstance(
default_num_value_digits, int
), "`default_num_value_digits` must be int`"

num_value_digits = default_num_value_digits
max_existing_num = 0

# Try and get the num digits from a name template, otherwise use default.
if name_template_regexp is not None:
num_value_digits = get_num_value_digits_from_regexp(
prefix, name_template_regexp
)
if num_value_digits is False:
num_value_digits = default_num_value_digits
else:
num_value_digits = default_num_value_digits

else:
all_values_str = utils.get_values_from_bids_formatted_name(
all_folders,
Expand All @@ -155,16 +173,22 @@ def get_max_sub_or_ses_num_and_value_length(
)

# First get the length of bids-key value across the project
# (e.g. sub-003 has three values).
all_num_value_digits = [len(value) for value in all_values_str]
# or name template if it exists (e.g. sub-003 has three values).
# If a name template exists but the length can't be determined from it,
# default back to the project.
if name_template_regexp is not None:
num_value_digits = get_num_value_digits_from_regexp(
prefix, name_template_regexp
)

if len(set(all_num_value_digits)) != 1:
utils.log_and_raise_error(
f"The number of value digits for the {prefix} level are not "
f"consistent. Cannot suggest a {prefix} number.",
NeuroBlueprintError,
if num_value_digits is False:
num_value_digits = get_num_value_digits_from_project(
all_values_str, prefix
)
else:
num_value_digits = get_num_value_digits_from_project(
all_values_str, prefix
)
num_value_digits = all_num_value_digits[0]

# Then get the latest existing sub or ses number in the project.
all_value_nums = sorted(
Expand All @@ -182,6 +206,56 @@ def get_max_sub_or_ses_num_and_value_length(
return max_existing_num, num_value_digits


def get_num_value_digits_from_project(
all_values_str: List[str], prefix: Prefix
) -> int:
"""
Find the number of digits for the sub or ses key within the project.
`all_values_str` is a list of all the sub or ses values from within
the project.
"""
all_num_value_digits = [len(value) for value in all_values_str]

if len(set(all_num_value_digits)) != 1:
utils.log_and_raise_error(
f"The number of value digits for the {prefix} level are not "
f"consistent. Cannot suggest a {prefix} number.",
NeuroBlueprintError,
)
num_value_digits = all_num_value_digits[0]

return num_value_digits


def get_num_value_digits_from_regexp(
prefix: Prefix, name_template_regexp: str
) -> Union[Literal[False], int]:
"""
Given a name template regexp, find the number of values for the
sub or ses key. These will be fixed with "\d" (digit) or ".?" (wildcard).
If there is length-unspecific wildcard (.*) in the sub key, then skip.
In practice, there should never really be a .* in the sub or ses
key of a name template, but handle it just in case.
"""
all_values_str = utils.get_values_from_bids_formatted_name(
[name_template_regexp], prefix, return_as_int=False
)[0]

if "*" in all_values_str:
return False
else:
num_digits = len(
[char for char in all_values_str if char in ["d", "?"]]
)

if num_digits == 0:
# breaks assumption there is some usable regexp here,
# better to use project instead.
return False

return num_digits


def get_existing_project_paths() -> List[Path]:
"""
Return full path and names of datashuttle projects on
Expand Down
46 changes: 46 additions & 0 deletions tests/tests_integration/test_create_folders.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,52 @@ def test_get_next_ses(self, project, return_with_prefix, top_level_folder):
)
assert new_num == "ses-006" if return_with_prefix else "006"

def test_get_next_sub_and_ses_name_template(self, project):
"""
In the case where a name template exists, these getters should use the
number of digits on the template (even if these are different
within the project!).
"""
project.create_folders("rawdata", "sub-001", "ses-001")

name_templates = {
"on": True,
"sub": r"sub-\d.?.?.?\d_key-value", # 5 digits
"ses": r"ses-\d_@DATE@", # 2 digits
}
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "00002"

new_num = project.get_next_ses(
"rawdata", "sub-001", return_with_prefix=False, local_only=True
)
assert new_num == "2"

# Quick test on two cases that should not use name template.
# Test sub only as underlying code is the same. If name templates
# is off, use the num_digits from the project, same if the sub
# key value takes a length-unspecific wildcard (should never really happen).
name_templates["on"] = False
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "002"

name_templates["on"] = True
name_templates["sub"] = "sub-.*"
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "002"

# ----------------------------------------------------------------------------------
# Test Helpers
# ----------------------------------------------------------------------------------
Expand Down

0 comments on commit 69e0b4c

Please sign in to comment.