Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suggested num respect name template #405

Merged
merged 4 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions datashuttle/datashuttle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,13 +1080,19 @@ def get_next_sub(
If `True, only get names from `local_path`, otherwise from
`local_path` and `central_path`.
"""
name_template = self.get_name_templates()
name_template_regexp = (
name_template["sub"] if name_template["on"] else None
)

return getters.get_next_sub_or_ses(
self.cfg,
top_level_folder,
sub=None,
local_only=local_only,
return_with_prefix=return_with_prefix,
search_str="sub-*",
name_template_regexp=name_template_regexp,
)

@check_configs_set
Expand Down Expand Up @@ -1117,13 +1123,19 @@ def get_next_ses(
If `True, only get names from `local_path`, otherwise from
`local_path` and `central_path`.
"""
name_template = self.get_name_templates()
name_template_regexp = (
name_template["ses"] if name_template["on"] else None
)

return getters.get_next_sub_or_ses(
self.cfg,
top_level_folder,
sub=sub,
local_only=local_only,
return_with_prefix=return_with_prefix,
search_str="ses-*",
name_template_regexp=name_template_regexp,
)

# Name Templates
Expand Down
96 changes: 85 additions & 11 deletions datashuttle/utils/getters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
TYPE_CHECKING,
Dict,
List,
Literal,
Optional,
Tuple,
Union,
)

if TYPE_CHECKING:
Expand All @@ -33,6 +35,7 @@ def get_next_sub_or_ses(
local_only: bool = False,
return_with_prefix: bool = True,
default_num_value_digits: int = 3,
name_template_regexp: Optional[str] = None,
) -> str:
"""
Suggest the next available subject or session number. This function will
Expand Down Expand Up @@ -93,7 +96,10 @@ def get_next_sub_or_ses(
max_existing_num,
num_value_digits,
) = get_max_sub_or_ses_num_and_value_length(
all_folders, prefix, default_num_value_digits
all_folders,
prefix,
default_num_value_digits,
name_template_regexp,
)

# calculate next sub number
Expand All @@ -110,6 +116,7 @@ def get_max_sub_or_ses_num_and_value_length(
all_folders: List[str],
prefix: Prefix,
default_num_value_digits: Optional[int] = None,
name_template_regexp: Optional[str] = None,
) -> Tuple[int, int]:
"""
Given a list of BIDS-style folder names, find the maximum subject or
Expand Down Expand Up @@ -141,12 +148,23 @@ def get_max_sub_or_ses_num_and_value_length(

"""
if len(all_folders) == 0:
max_existing_num = 0

assert isinstance(
default_num_value_digits, int
), "`default_num_value_digits` must be int`"

num_value_digits = default_num_value_digits
max_existing_num = 0

# Try and get the num digits from a name template, otherwise use default.
if name_template_regexp is not None:
num_value_digits = get_num_value_digits_from_regexp(
prefix, name_template_regexp
)
if num_value_digits is False:
num_value_digits = default_num_value_digits
else:
num_value_digits = default_num_value_digits

else:
all_values_str = utils.get_values_from_bids_formatted_name(
all_folders,
Expand All @@ -155,16 +173,22 @@ def get_max_sub_or_ses_num_and_value_length(
)

# First get the length of bids-key value across the project
# (e.g. sub-003 has three values).
all_num_value_digits = [len(value) for value in all_values_str]
# or name template if it exists (e.g. sub-003 has three values).
# If a name template exists but the length can't be determined from it,
# default back to the project.
if name_template_regexp is not None:
num_value_digits = get_num_value_digits_from_regexp(
prefix, name_template_regexp
)

if len(set(all_num_value_digits)) != 1:
utils.log_and_raise_error(
f"The number of value digits for the {prefix} level are not "
f"consistent. Cannot suggest a {prefix} number.",
NeuroBlueprintError,
if num_value_digits is False:
num_value_digits = get_num_value_digits_from_project(
all_values_str, prefix
)
else:
num_value_digits = get_num_value_digits_from_project(
all_values_str, prefix
)
num_value_digits = all_num_value_digits[0]

# Then get the latest existing sub or ses number in the project.
all_value_nums = sorted(
Expand All @@ -182,6 +206,56 @@ def get_max_sub_or_ses_num_and_value_length(
return max_existing_num, num_value_digits


def get_num_value_digits_from_project(
all_values_str: List[str], prefix: Prefix
) -> int:
"""
Find the number of digits for the sub or ses key within the project.
`all_values_str` is a list of all the sub or ses values from within
the project.
"""
all_num_value_digits = [len(value) for value in all_values_str]

if len(set(all_num_value_digits)) != 1:
utils.log_and_raise_error(
f"The number of value digits for the {prefix} level are not "
f"consistent. Cannot suggest a {prefix} number.",
NeuroBlueprintError,
)
num_value_digits = all_num_value_digits[0]

return num_value_digits


def get_num_value_digits_from_regexp(
prefix: Prefix, name_template_regexp: str
) -> Union[Literal[False], int]:
"""
Given a name template regexp, find the number of values for the
sub or ses key. These will be fixed with "\d" (digit) or ".?" (wildcard).
If there is length-unspecific wildcard (.*) in the sub key, then skip.
In practice, there should never really be a .* in the sub or ses
key of a name template, but handle it just in case.
"""
all_values_str = utils.get_values_from_bids_formatted_name(
[name_template_regexp], prefix, return_as_int=False
)[0]

if "*" in all_values_str:
return False
else:
num_digits = len(
[char for char in all_values_str if char in ["d", "?"]]
)

if num_digits == 0:
# breaks assumption there is some usable regexp here,
# better to use project instead.
return False

return num_digits


def get_existing_project_paths() -> List[Path]:
"""
Return full path and names of datashuttle projects on
Expand Down
46 changes: 46 additions & 0 deletions tests/tests_integration/test_create_folders.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,52 @@ def test_get_next_ses(self, project, return_with_prefix, top_level_folder):
)
assert new_num == "ses-006" if return_with_prefix else "006"

def test_get_next_sub_and_ses_name_template(self, project):
"""
In the case where a name template exists, these getters should use the
number of digits on the template (even if these are different
within the project!).
"""
project.create_folders("rawdata", "sub-001", "ses-001")

name_templates = {
"on": True,
"sub": r"sub-\d.?.?.?\d_key-value", # 5 digits
"ses": r"ses-\d_@DATE@", # 2 digits
}
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "00002"

new_num = project.get_next_ses(
"rawdata", "sub-001", return_with_prefix=False, local_only=True
)
assert new_num == "2"

# Quick test on two cases that should not use name template.
# Test sub only as underlying code is the same. If name templates
# is off, use the num_digits from the project, same if the sub
# key value takes a length-unspecific wildcard (should never really happen).
name_templates["on"] = False
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "002"

name_templates["on"] = True
name_templates["sub"] = "sub-.*"
project.set_name_templates(name_templates)

new_num = project.get_next_sub(
"rawdata", return_with_prefix=False, local_only=True
)
assert new_num == "002"

# ----------------------------------------------------------------------------------
# Test Helpers
# ----------------------------------------------------------------------------------
Expand Down