From fe054598f591c7dfd33d61884195d774b05f3ef0 Mon Sep 17 00:00:00 2001 From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com> Date: Thu, 5 Oct 2023 17:25:08 +0100 Subject: [PATCH] Change `data-type` to `datatype` (#196) * Change data-type to datatype on CLI argument. * Change data-type to datatype in docs. * Make other small changes in code documentation. * Fix tests. * Replace data_type with datatype everywhere. * Fix the space case data type to datatype. * Fix missed case in README.md --- README.md | 2 +- datashuttle/command_line_interface.py | 13 ++-- datashuttle/configs/canonical_configs.py | 16 ++-- datashuttle/configs/canonical_folders.py | 14 ++-- datashuttle/configs/config_class.py | 48 ++++++------ datashuttle/datashuttle.py | 28 +++---- datashuttle/utils/data_transfer.py | 50 ++++++------- datashuttle/utils/folders.py | 72 +++++++++--------- datashuttle/utils/formatting.py | 20 +++-- docs/source/pages/documentation.md | 56 +++++++------- tests/test_utils.py | 74 +++++++++---------- .../test_command_line_interface.py | 24 +++--- .../test_file_conflicts_pathtable.py | 10 +-- .../test_filesystem_transfer.py | 34 ++++----- tests/tests_integration/test_logging.py | 12 +-- tests/tests_integration/test_make_folders.py | 16 ++-- .../test_ssh_file_transfer.py | 46 ++++++------ 17 files changed, 263 insertions(+), 272 deletions(-) diff --git a/README.md b/README.md index a6aefcfa..3b6442e9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Datashuttle includes tools for automated generation and transfer of neuroscience * Manage files across multiple data-collection computers by synchronising all data to with a centrally stored project. -* Simplify data transfers by selecting only a sub-set of data to move (e.g. specific subjects, sessions or data types) +* Simplify data transfers by selecting only a sub-set of data to move (e.g. specific subjects, sessions or datatypes) See the [DataShuttle Documentation](https://datashuttle.neuroinformatics.dev) to get started or join the [Zulip chat](https://neuroinformatics.zulipchat.com/#narrow/stream/405999-DataShuttle) to discuss any questions, comments or feedback. diff --git a/datashuttle/command_line_interface.py b/datashuttle/command_line_interface.py index c1b68e24..520689ce 100644 --- a/datashuttle/command_line_interface.py +++ b/datashuttle/command_line_interface.py @@ -613,11 +613,10 @@ def construct_parser(): nargs="+", type=str, required=False, - help="Optional: (str, single or multiple) (selection of data types, or 'all')", + help="Optional: (str, single or multiple) (selection of datatypes, or 'all')", ) make_sub_folders_parser.add_argument( - "--data-type", - "--data_type", + "--datatype", "-dt", type=str, nargs="+", @@ -659,13 +658,12 @@ def construct_parser(): help=help("required_str_single_or_multiple_or_all"), ) upload_parser.add_argument( - "--data-type", - "--data_type", + "--datatype", "-dt", type=str, nargs="+", required=False, - help="Optional: (str, single or multiple) (selection of data types, or 'all') (default 'all')", + help="Optional: (str, single or multiple) (selection of datatypes, or 'all') (default 'all')", ) upload_parser.add_argument( "--dry-run", @@ -732,8 +730,7 @@ def construct_parser(): help=help("required_str_single_or_multiple_or_all"), ) download_parser.add_argument( - "--data-type", - "--data_type", + "--datatype", "-dt", type=str, nargs="+", diff --git a/datashuttle/configs/canonical_configs.py b/datashuttle/configs/canonical_configs.py index dd5e387e..57107c48 100644 --- a/datashuttle/configs/canonical_configs.py +++ b/datashuttle/configs/canonical_configs.py @@ -38,15 +38,15 @@ def get_canonical_config_dict() -> dict: "show_transfer_progress": None, } - data_type_configs = get_data_types(as_dict=True) - config_dict.update(data_type_configs) + datatype_configs = get_datatypes(as_dict=True) + config_dict.update(datatype_configs) return config_dict -def get_data_types(as_dict: bool = False): +def get_datatypes(as_dict: bool = False): """ - Canonical list of data_type flags. This is used + Canonical list of datatype flags. This is used to define get_canonical_config_dict() as well as in testing. """ @@ -63,7 +63,7 @@ def get_flags() -> List[str]: Return all configs that are bool flags. This is used in testing and type checking config inputs. """ - return get_data_types() + [ + return get_datatypes() + [ "overwrite_old_files", "show_transfer_progress", ] @@ -151,10 +151,10 @@ def check_dict_values_raise_on_fail(config_dict: Configs) -> None: "with no ~ syntax." ) - if not any([config_dict[key] for key in get_data_types()]): + if not any([config_dict[key] for key in get_datatypes()]): utils.log_and_raise_error( - f"At least one data type must be True in " - f"configs, from: {' '.join(get_data_types())}." + f"At least one datatype must be True in " + f"configs, from: {' '.join(get_datatypes())}." ) # Check SSH settings diff --git a/datashuttle/configs/canonical_folders.py b/datashuttle/configs/canonical_folders.py index 57ebb953..ea67a432 100644 --- a/datashuttle/configs/canonical_folders.py +++ b/datashuttle/configs/canonical_folders.py @@ -8,7 +8,7 @@ from datashuttle.utils.folder_class import Folder -def get_data_type_folders(cfg: Configs) -> dict: +def get_datatype_folders(cfg: Configs) -> dict: """ This function holds the canonical folders managed by datashuttle. @@ -23,12 +23,12 @@ def get_data_type_folders(cfg: Configs) -> dict: When adding a new folder, the key should be the canonical key used to refer - to the data_type in datashuttle and SWC-BIDs. + to the datatype in datashuttle and SWC-BIDs. The value is a Folder() class instance with the required fields - name : The display name for the data_type, that will + name : The display name for the datatype, that will be used for making and transferring files in practice. This should always match the canonical name, but left as an option for rare cases in which advanced users want to change it. @@ -80,8 +80,8 @@ def get_non_sub_names(): return [ "all_ses", "all_non_ses", - "all_data_type", - "all_ses_level_non_data_type", + "all_datatype", + "all_ses_level_non_datatype", ] @@ -93,8 +93,8 @@ def get_non_ses_names(): return [ "all_sub", "all_non_sub", - "all_data_type", - "all_ses_level_non_data_type", + "all_datatype", + "all_ses_level_non_datatype", ] diff --git a/datashuttle/configs/config_class.py b/datashuttle/configs/config_class.py index 8291ecc1..5c0bfc65 100644 --- a/datashuttle/configs/config_class.py +++ b/datashuttle/configs/config_class.py @@ -49,7 +49,7 @@ def __init__( self.top_level_folder: str - self.data_type_folders: dict + self.datatype_folders: dict self.logging_path: Path self.hostkeys_path: Path self.ssh_key_path: Path @@ -300,62 +300,62 @@ def make_and_get_logging_path(self) -> Path: folders.make_folders(logging_path) return logging_path - def init_data_type_folders(self): + def init_datatype_folders(self): """""" - self.data_type_folders = canonical_folders.get_data_type_folders(self) + self.datatype_folders = canonical_folders.get_datatype_folders(self) - def get_data_type_items( - self, data_type: Union[str, list] + def get_datatype_items( + self, datatype: Union[str, list] ) -> Union[ItemsView, zip]: """ - Get the .items() structure of the data type, either all of - them (stored in self.data_type_folders) or as a single item. + Get the .items() structure of the datatype, either all of + them (stored in self.datatype_folders) or as a single item. """ - if isinstance(data_type, str): - data_type = [data_type] + if isinstance(datatype, str): + datatype = [datatype] items: Union[ItemsView, zip] - if "all" in data_type: - items = self.data_type_folders.items() + if "all" in datatype: + items = self.datatype_folders.items() else: items = zip( - data_type, - [self.data_type_folders[key] for key in data_type], + datatype, + [self.datatype_folders[key] for key in datatype], ) return items - def items_from_data_type_input( + def items_from_datatype_input( self, local_or_central: str, - data_type: Union[list, str], + datatype: Union[list, str], sub: str, ses: Optional[str] = None, ) -> Union[ItemsView, zip]: """ - Get the list of data_types to transfer, either + Get the list of datatypes to transfer, either directly from user input, or by searching what is available if "all" is passed. Parameters ---------- - see _transfer_data_type() for parameters. + see _transfer_datatype() for parameters. """ base_folder = self.get_base_folder(local_or_central) - if data_type not in [ + if datatype not in [ "all", ["all"], - "all_data_type", - ["all_data_type"], + "all_datatype", + ["all_datatype"], ]: - data_type_items = self.get_data_type_items( - data_type, + datatype_items = self.get_datatype_items( + datatype, ) else: - data_type_items = folders.search_data_folders_sub_or_ses_level( + datatype_items = folders.search_data_folders_sub_or_ses_level( self, base_folder, local_or_central, @@ -363,4 +363,4 @@ def items_from_data_type_input( ses, ) - return data_type_items + return datatype_items diff --git a/datashuttle/datashuttle.py b/datashuttle/datashuttle.py index 75bea915..1d63bc38 100644 --- a/datashuttle/datashuttle.py +++ b/datashuttle/datashuttle.py @@ -119,7 +119,7 @@ def _set_attributes_after_config_load(self) -> None: self._make_project_metadata_if_does_not_exist() - self.cfg.init_data_type_folders() + self.cfg.init_datatype_folders() # ------------------------------------------------------------------------- # Public Folder Makers @@ -159,7 +159,7 @@ def make_sub_folders( self, sub_names: Union[str, list], ses_names: Optional[Union[str, list]] = None, - data_type: str = "all", + datatype: str = "all", ) -> None: """ Create a subject / session folder tree in the project @@ -178,12 +178,12 @@ def make_sub_folders( (if not already, these will be prefixed with "ses-"). If no session is provided, no session-level folders are made. - data_type : - The data_type to make in the sub / ses folders. - (e.g. "ephys", "behav", "histology"). Only data_types + datatype : + The datatype to make in the sub / ses folders. + (e.g. "ephys", "behav", "histology"). Only datatypes that are enabled in the configs (e.g. use_behav) will be created. If "all" is selected, folders will be created - for all data_type enabled in config. Use empty string "" for + for all datatype enabled in config. Use empty string "" for none. Notes @@ -204,7 +204,7 @@ def make_sub_folders( Examples -------- - project.make_sub_folders("sub-001", data_type="all") + project.make_sub_folders("sub-001", datatype="all") project.make_sub_folders("sub-002@TO@005", ["ses-001", "ses-002"], @@ -251,7 +251,7 @@ def make_sub_folders( self.cfg, sub_names, ses_names, - data_type, + datatype, log=True, ) @@ -294,7 +294,7 @@ def upload( self, sub_names: Union[str, list], ses_names: Union[str, list], - data_type: str = "all", + datatype: str = "all", dry_run: bool = False, init_log: bool = True, ) -> None: @@ -313,7 +313,7 @@ def upload( be prefixed with "sub-", or the prefix will be automatically added. "@*@" can be used as a wildcard. "all" will search for all sub-folders in the - data type folder to upload. + datatype folder to upload. ses_names : a session name / list of session names, similar to sub_names but requiring a "ses-" prefix. @@ -321,7 +321,7 @@ def upload( perform a dry-run of upload. This will output as if file transfer was taking place, but no files will be moved. Useful to check which files will be moved on data transfer. - data_type : + datatype : see make_sub_folders() init_log : @@ -357,7 +357,7 @@ def upload( "upload", sub_names, ses_names, - data_type, + datatype, dry_run, log=True, ) @@ -368,7 +368,7 @@ def download( self, sub_names: Union[str, list], ses_names: Union[str, list], - data_type: str = "all", + datatype: str = "all", dry_run: bool = False, init_log: bool = True, ) -> None: @@ -393,7 +393,7 @@ def download( "download", sub_names, ses_names, - data_type, + datatype, dry_run, log=True, ) diff --git a/datashuttle/utils/data_transfer.py b/datashuttle/utils/data_transfer.py index 2b2c66e1..840f36c0 100644 --- a/datashuttle/utils/data_transfer.py +++ b/datashuttle/utils/data_transfer.py @@ -13,7 +13,7 @@ def __init__( upload_or_download: str, sub_names: Union[str, List[str]], ses_names: Union[str, List[str]], - data_type: Union[str, List[str]], + datatype: Union[str, List[str]], dry_run: bool, log: bool, ): @@ -26,7 +26,7 @@ def __init__( self.sub_names = self.to_list(sub_names) self.ses_names = self.to_list(ses_names) - self.data_type = self.to_list(data_type) + self.datatype = self.to_list(datatype) self.check_input_arguments() @@ -70,7 +70,7 @@ def build_a_list_of_all_files_and_folders_to_transfer(self) -> List[str]: self.update_list_with_dtype_paths( sub_ses_dtype_include, - self.data_type, + self.datatype, sub, ) @@ -88,14 +88,14 @@ def build_a_list_of_all_files_and_folders_to_transfer(self) -> List[str]: # Datatype (sub and ses level) -------------------------------- - if self.transfer_non_data_type(self.data_type): + if self.transfer_non_datatype(self.datatype): self.update_list_with_non_dtype_ses_level_folders( extra_folder_names, extra_filenames, sub, ses ) self.update_list_with_dtype_paths( sub_ses_dtype_include, - self.data_type, + self.datatype, sub, ses, ) @@ -161,7 +161,7 @@ def update_list_with_non_ses_sub_level_folders( ) sub_level_dtype = [ dtype.name - for dtype in self.cfg.data_type_folders.values() + for dtype in self.cfg.datatype_folders.values() if dtype.level == "sub" ] @@ -192,7 +192,7 @@ def update_list_with_non_dtype_ses_level_folders( ses_level_dtype = [ dtype.name - for dtype in self.cfg.data_type_folders.values() + for dtype in self.cfg.datatype_folders.values() if dtype.level == "ses" ] filt_ses_level_folders = filter( @@ -206,33 +206,33 @@ def update_list_with_non_dtype_ses_level_folders( ] # ------------------------------------------------------------------------- - # Update list with path to sub and ses level data_type folders + # Update list with path to sub and ses level datatype folders # ------------------------------------------------------------------------- def update_list_with_dtype_paths( self, sub_ses_dtype_include, - data_type: List[str], + datatype: List[str], sub: str, ses: Optional[str] = None, ) -> None: """ """ - data_type = list( - filter(lambda x: x != "all_ses_level_non_data_type", data_type) + datatype = list( + filter(lambda x: x != "all_ses_level_non_datatype", datatype) ) - data_type_items = self.cfg.items_from_data_type_input( - self.local_or_central, data_type, sub, ses + datatype_items = self.cfg.items_from_datatype_input( + self.local_or_central, datatype, sub, ses ) level = "ses" if ses else "sub" - for data_type_key, data_type_folder in data_type_items: # type: ignore - if data_type_folder.level == level: + for datatype_key, datatype_folder in datatype_items: # type: ignore + if datatype_folder.level == level: if ses: - filepath = Path(sub) / ses / data_type_folder.name + filepath = Path(sub) / ses / datatype_folder.name else: - filepath = Path(sub) / data_type_folder.name + filepath = Path(sub) / datatype_folder.name sub_ses_dtype_include.append(filepath.as_posix()) @@ -251,7 +251,7 @@ def check_input_arguments( """ Check the sub / session names passed. The checking here is stricter than for make_sub_folderss / formatting.check_and_format_names - because we want to ensure that a) non-data-type arguments are not + because we want to ensure that a) non-datatype arguments are not passed at the wrong input (e.g. all_non_ses as a subject name). We also want to limit the possible combinations of inputs, such @@ -278,11 +278,11 @@ def check_input_arguments( "'ses_names' must only include 'all' or 'all_ses' if these options are used." ) - if len(self.data_type) > 1 and any( - [name in ["all", "all_data_type"] for name in self.data_type] + if len(self.datatype) > 1 and any( + [name in ["all", "all_datatype"] for name in self.datatype] ): utils.log_and_raise_error( - "'data_type' must only include 'all' or 'all_data_type' if these options are used." + "'datatype' must only include 'all' or 'all_datatype' if these options are used." ) # ----------------------------------------------------------------------------- @@ -348,14 +348,14 @@ def get_processed_names( return processed_names - def transfer_non_data_type(self, data_type_checked: List[str]) -> bool: + def transfer_non_datatype(self, datatype_checked: List[str]) -> bool: """ - Convenience function, bool if all non-data-type folders + Convenience function, bool if all non-datatype folders are to be transferred """ return any( [ - name in ["all_ses_level_non_data_type", "all"] - for name in data_type_checked + name in ["all_ses_level_non_datatype", "all"] + for name in datatype_checked ] ) diff --git a/datashuttle/utils/folders.py b/datashuttle/utils/folders.py index 38fc8b69..b965026e 100644 --- a/datashuttle/utils/folders.py +++ b/datashuttle/utils/folders.py @@ -25,13 +25,13 @@ def make_folder_trees( cfg: Configs, sub_names: Union[str, list], ses_names: Union[str, list], - data_type: str, + datatype: str, log: bool = True, ) -> None: """ Entry method to make a full folder tree. It will iterate through all passed subjects, then sessions, then - subfolders within a data_type folder. This + subfolders within a datatype folder. This permits flexible creation of folders (e.g. to make subject only, do not pass session name. @@ -41,15 +41,15 @@ def make_folder_trees( Parameters ---------- - sub_names, ses_names, data_type : see make_sub_folders() + sub_names, ses_names, datatype : see make_sub_folders() log : whether to log or not. If True, logging must already be initialised. """ - data_type_passed = data_type not in [[""], ""] + datatype_passed = datatype not in [[""], ""] - if data_type_passed: - formatting.check_data_type_is_valid(cfg, data_type, error_on_fail=True) + if datatype_passed: + formatting.check_datatype_is_valid(cfg, datatype, error_on_fail=True) for sub in sub_names: sub_path = cfg.make_path( @@ -59,8 +59,8 @@ def make_folder_trees( make_folders(sub_path, log) - if data_type_passed: - make_data_type_folders(cfg, data_type, sub_path, "sub") + if datatype_passed: + make_datatype_folders(cfg, datatype, sub_path, "sub") for ses in ses_names: ses_path = cfg.make_path( @@ -70,27 +70,25 @@ def make_folder_trees( make_folders(ses_path, log) - if data_type_passed: - make_data_type_folders( - cfg, data_type, ses_path, "ses", log=log - ) + if datatype_passed: + make_datatype_folders(cfg, datatype, ses_path, "ses", log=log) -def make_data_type_folders( +def make_datatype_folders( cfg: Configs, - data_type: Union[list, str], + datatype: Union[list, str], sub_or_ses_level_path: Path, level: str, log: bool = True, ) -> None: """ - Make data_type folder (e.g. behav) at the sub or ses + Make datatype folder (e.g. behav) at the sub or ses level. Checks folder_class.Folders attributes, - whether the data_type is used and at the current level. + whether the datatype is used and at the current level. Parameters ---------- - data_type : data type (e.g. "behav", "all") to use. Use + datatype : datatype (e.g. "behav", "all") to use. Use empty string ("") for none. sub_or_ses_level_path : Full path to the subject @@ -103,15 +101,15 @@ def make_data_type_folders( log : whether to log on or not (if True, logging must already be initialised). """ - data_type_items = cfg.get_data_type_items(data_type) + datatype_items = cfg.get_datatype_items(datatype) - for data_type_key, data_type_folder in data_type_items: # type: ignore - if data_type_folder.used and data_type_folder.level == level: - data_type_path = sub_or_ses_level_path / data_type_folder.name + for datatype_key, datatype_folder in datatype_items: # type: ignore + if datatype_folder.used and datatype_folder.level == level: + datatype_path = sub_or_ses_level_path / datatype_folder.name - make_folders(data_type_path, log) + make_folders(datatype_path, log) - make_datashuttle_metadata_folder(data_type_path, log) + make_datashuttle_metadata_folder(datatype_path, log) # Make Folders Helpers -------------------------------------------------------- @@ -296,25 +294,25 @@ def search_data_folders_sub_or_ses_level( ) -> zip: """ Search a subject or session folder specifically - for data_types. First searches for all folders / files + for datatypes. First searches for all folders / files in the folder, and then returns any folders that - match data_type name. + match datatype name. see folders.search_sub_or_ses_level() for full parameters list. Returns ------- - Find the data_type files and return in + Find the datatype files and return in a format that mirrors dict.items() """ search_results = search_sub_or_ses_level( cfg, base_folder, local_or_central, sub, ses )[0] - data_folders = process_glob_to_find_data_type_folders( + data_folders = process_glob_to_find_datatype_folders( search_results, - cfg.data_type_folders, + cfg.datatype_folders, ) return data_folders @@ -384,13 +382,13 @@ def search_for_wildcards( return new_all_names -# Search Data Types +# Search Datatypes # ----------------------------------------------------------------------------- -def process_glob_to_find_data_type_folders( +def process_glob_to_find_datatype_folders( folder_names: list, - data_type_folders: dict, + datatype_folders: dict, ) -> zip: """ Process the results of glob on a sub or session level, @@ -400,21 +398,21 @@ def process_glob_to_find_data_type_folders( Returns ------- - Find the data_type files and return in + Find the datatype files and return in a format that mirrors dict.items() """ ses_folder_keys = [] ses_folder_values = [] for name in folder_names: - data_type_key = [ + datatype_key = [ key - for key, value in data_type_folders.items() + for key, value in datatype_folders.items() if value.name == name ] - if data_type_key: - ses_folder_keys.append(data_type_key[0]) - ses_folder_values.append(data_type_folders[data_type_key[0]]) + if datatype_key: + ses_folder_keys.append(datatype_key[0]) + ses_folder_values.append(datatype_folders[datatype_key[0]]) return zip(ses_folder_keys, ses_folder_values) diff --git a/datashuttle/utils/formatting.py b/datashuttle/utils/formatting.py index cc653f0d..a753ae2d 100644 --- a/datashuttle/utils/formatting.py +++ b/datashuttle/utils/formatting.py @@ -290,26 +290,24 @@ def ensure_prefixes_on_list_of_names( return new_names -def check_data_type_is_valid( - cfg: Configs, data_type: str, error_on_fail: bool +def check_datatype_is_valid( + cfg: Configs, datatype: str, error_on_fail: bool ) -> bool: """ - Check the passed data_type is valid (must + Check the passed datatype is valid (must be a key on self.ses_folders e.g. "behav", or "all") """ - if isinstance(data_type, list): - valid_keys = list(cfg.data_type_folders.keys()) + ["all"] - is_valid = all([type in valid_keys for type in data_type]) + if isinstance(datatype, list): + valid_keys = list(cfg.datatype_folders.keys()) + ["all"] + is_valid = all([type in valid_keys for type in datatype]) else: - is_valid = ( - data_type in cfg.data_type_folders.keys() or data_type == "all" - ) + is_valid = datatype in cfg.datatype_folders.keys() or datatype == "all" if error_on_fail and not is_valid: utils.log_and_raise_error( - f"data_type: '{data_type}' " + f"datatype: '{datatype}' " f"is not valid. Must be one of" - f" {list(cfg.data_type_folders.keys())}. or 'all'" + f" {list(cfg.datatype_folders.keys())}. or 'all'" f" No folders were made." ) diff --git a/docs/source/pages/documentation.md b/docs/source/pages/documentation.md index 4187831f..29cf7d30 100644 --- a/docs/source/pages/documentation.md +++ b/docs/source/pages/documentation.md @@ -16,7 +16,7 @@ DataShuttle aims to integrate seamlessly into existing neuroscience data collect datashuttle central and local machines -DataShuttle requires a [one-time setup](#initial-setup-with-configurations) of project name and configurations. Next, subjects, session and data-type folder trees can be [created](#creating-subject-and-session-folders) during experimental acquisition. +DataShuttle requires a [one-time setup](#initial-setup-with-configurations) of project name and configurations. Next, subjects, session and datatype folder trees can be [created](#creating-subject-and-session-folders) during experimental acquisition. Once acquisition is complete, data can be easily [transferred](#data-transfer) from acquisition computers to a central storage machine. @@ -58,7 +58,7 @@ The _configurations_ tell DataShuttle: - The paths to the *local* and *central* folders that contain the project. - How to connect to the _central_ machine. - The settings that specify how data is transferred. -- The *data-types* that will be used in the project, e.g. *behaviour* (`behav`) or *electrophysiology* (`ephys`). +- The *datatypes* that will be used in the project, e.g. *behaviour* (`behav`) or *electrophysiology* (`ephys`). The command `make-config-file` is used for the initial setup of the project. The **required arguments** are: @@ -68,7 +68,7 @@ The command `make-config-file` is used for the initial setup of the project. The `connection_method`: `local_filesystem` or `ssh`. Local filesystem can be used if the *central* storage is mounted to the local machine. Otherwise `ssh` can be used. -Finally, the *data-type* flags `--use_ephys`, `--use_funcimg`, `--use_histology`, `--use_behav` set the types of data required for the project on the local machine. While individual flags are optional, at least one must be chosen when initialising the project. +Finally, the *datatype* flags `--use_ephys`, `--use_funcimg`, `--use_histology`, `--use_behav` set the types of data required for the project on the local machine. While individual flags are optional, at least one must be chosen when initialising the project. ### Optional Arguments @@ -137,7 +137,7 @@ my_first_project \ make-sub-folders -sub 001@TO@003 -ses 010_@TIME@ -dt all ``` -When the `all` argument is used for `--data_type` (`-dt`), the folders created depend on the *data-types* specified during *configuration* setup. For example, if +When the `all` argument is used for `--datatype` (`-dt`), the folders created depend on the *datatypes* specified during *configuration* setup. For example, if `--use_behav`, `--use_funcimg`, `--use_histology` were set during *configuration* setup, the folder tree from the above command (assuming the time is `4.02.48 PM`), would look like: ``` @@ -159,9 +159,9 @@ When the `all` argument is used for `--data_type` (`-dt`), the folders created d ``` -### Data Types Folders +### Datatype Folders -In [SWC-Blueprint](https://swc-blueprint.neuroinformatics.dev/specification.html), *data-types* specify where acquired experimental data of currently supported types (`behav`, `ephys`, `funcimg` and `histology`) is stored. See the [*data-types* section of the SWC-Blueprint for more details](https://swc-blueprint.neuroinformatics.dev/specification.html#datatype). +In [SWC-Blueprint](https://swc-blueprint.neuroinformatics.dev/specification.html), *datatypes* specify where acquired experimental data of currently supported types (`behav`, `ephys`, `funcimg` and `histology`) is stored. See the [*datatypes* section of the SWC-Blueprint for more details](https://swc-blueprint.neuroinformatics.dev/specification.html#datatype). At present, `histology` is saved to the `sub-` level, as it is assumed `histology` is conducted *ex vivo* and so session will be possible. Please don't hesitate to get into contact if you have an alternative use case. @@ -182,7 +182,7 @@ upload \ Will *upload* (from *local* to *central* ) _behavioural_ _sessions_ 5 and 6, collected at any date, for _subjects_ 1 to 3. -The keyword `all` can be input in place of a `-sub`, `-ses` or _data-type_ argument `dt` to transfer all available subject, sessions or data types available. For example: +The keyword `all` can be input in place of a `-sub`, `-ses` or _datatype_ argument `-dt` to transfer all available subject, sessions or datatypes available. For example: ``` datashuttle \ @@ -249,9 +249,9 @@ run on the folder tree: will transfer all data in both the `rawdata` and `derivatives` folders from the *local* machine to the *central* machine. -### Transferring files that are not within data-type folders +### Transferring files that are not within datatype folders -In some cases, files related to metadata may be stored outside of *data-type* folders. When the `all` flag is used, files outside of folders at the *top-level folder* (for `-sub`), *subject* level (for `-ses`) and *session* level (`for -dt`) will also be transferred. However, if specific subject, session or data-type are selected, files outside of these will not be transferred. +In some cases, files related to metadata may be stored outside of *datatype* folders. When the `all` flag is used, files outside of folders at the *top-level folder* (for `-sub`), *subject* level (for `-ses`) and *session* level (`for -dt`) will also be transferred. However, if specific subject, session or datatype are selected, files outside of these will not be transferred. The example below exemplifies how the `all` argument works during data transfer. For example, given the project folder: @@ -281,13 +281,13 @@ upload \ will move: -- The file `a_project_file.json` (and any other files at this level) and search all *subjects* for the specified *sessions* */ data-types*. +- The file `a_project_file.json` (and any other files at this level) and search all *subjects* for the specified *sessions* */ datatypes*. - Only *sessions* called `001`, but not any other files or folders at this level (i.e. `sub-001_ses-001_extrafile-ses.json` will not be transferred). -- All *data-types* and non-*data-types* at the session level. For example, `behav` and `sub-001_ses-001_extrafile-dtype.json` (that reside in *session* folders called `ses-001`) will be transferred. +- All *datatypes* and non-*datatypes* at the session level. For example, `behav` and `sub-001_ses-001_extrafile-dtype.json` (that reside in *session* folders called `ses-001`) will be transferred. -For convenience, it is suggested to keep all files within *data-type* level folders. However, the `all` argument, as well as the additional available arguments: `all_sub` and `all_non_sub` (for `-sub`), `all_ses` and `all_non_ses` (for `-ses`) and `-all_ses_level_non_data_type` are available, as [detailed below](#flexible-transfers-with-keyword-arguments) +For convenience, it is suggested to keep all files within *datatype* level folders. However, the `all` argument, as well as the additional available arguments: `all_sub` and `all_non_sub` (for `-sub`), `all_ses` and `all_non_ses` (for `-ses`) and `-all_ses_level_non_datatype` are available, as [detailed below](#flexible-transfers-with-keyword-arguments) ### Transferring a specific file or folder @@ -336,13 +336,13 @@ project.make_config_file( ) ``` -and methods for making subject folders and transferring data accessed similarly. Note that the shortcut arguments `-sub`, `-ses`, `-dt` are not available through the Python API, and the full argument names (`sub_names`, `ses_names`, `data_type`) must be used. +and methods for making subject folders and transferring data accessed similarly. Note that the shortcut arguments `-sub`, `-ses`, `-dt` are not available through the Python API, and the full argument names (`sub_names`, `ses_names`, `datatype`) must be used. ``` project.make_sub_folders( sub_names="sub-001@TO@002", ses_names="ses-001_@DATE@", - data_type="all" + datatype="all" ) ``` @@ -350,7 +350,7 @@ project.make_sub_folders( project.upload( sub_names="001@TO@003", ses_names=["005_date-@*@", "006_date-@*@"], - data_type="behav" + datatype="behav" ) ``` @@ -492,7 +492,7 @@ To change this behaviour, the configuration `overwrite_old_files` can be set to ### Flexible transfers with keyword arguments -DataShuttle provides a number of keyword arguments to allow separate handling of files that are not found in *data-type* folders. +DataShuttle provides a number of keyword arguments to allow separate handling of files that are not found in *datatype* folders. #### For use with the `-sub` / `--sub-names` flag @@ -510,14 +510,14 @@ DataShuttle provides a number of keyword arguments to allow separate handling of `all_non_ses` : All files and folders that are not prefixed with `-sub` will be transferred. Any folders prefixed with `-ses` will not be transferred. -#### For use with the `-dt` / `--data-type` flag +#### For use with the `-dt` / `--datatype` flag -`all` : All *data-type* folders at the *subject* or *session* folder level will be transferred, as well as all files and folders within selected *session* folders. +`all` : All *datatype* folders at the *subject* or *session* folder level will be transferred, as well as all files and folders within selected *session* folders. -`all_data_type` : All *data-type* folders (i.e. folders with the pre-determined name: `behav`, `ephys`, `funcimg`, `histology`) residing at either the *subject* or *session* level will be -transferred. Non-*data-type* folders at the *session* level will not be transferred +`all_datatype` : All *datatype* folders (i.e. folders with the pre-determined name: `behav`, `ephys`, `funcimg`, `histology`) residing at either the *subject* or *session* level will be +transferred. Non-*datatype* folders at the *session* level will not be transferred -`all_ses_level_non_data_type` : Non *-data-type* folders at the *session* level will not be transferred +`all_ses_level_non_datatype` : Non *datatype* folders at the *session* level will not be transferred Below, a number of examples are given to exemplify how these arguments effect data transfer. Given the *local* project folder: @@ -544,7 +544,7 @@ Below, a number of examples are given to exemplify how these arguments effect da └── ... ``` -1) The first example indicates the effect of selectively transferring non-*data-type* sessions. The command: +1) The first example indicates the effect of selectively transferring non-*datatype* sessions. The command: ``` datashuttle \ @@ -552,17 +552,17 @@ my_first_project \ upload -sub all -ses all --dt all_ses_level_non_data_type +-dt all_ses_level_non_datatype ``` Would upload: - All non-*subject* files in the *top-level* folder (`rawdata`) - The `sub-001_extra_file.json` and `sub-002_extra_file.json` -- For `sub-001`, the file `ses-001_extra_file.json`. For `sub-002`, no other files are transferred because there is no non-*data-type* files at the *session* level. +- For `sub-001`, the file `ses-001_extra_file.json`. For `sub-002`, no other files are transferred because there is no non-*datatype* files at the *session* level. -2) The next two examples show the effect of selecting `-dt all` vs. `-dt all_data_type`. The command: +2) The next two examples show the effect of selecting `-dt all` vs. `-dt all_datatype`. The command: ``` datashuttle \ @@ -577,7 +577,7 @@ Would upload: - Contents residing in the `sub-001` folder only. - The file `sub-001_extra-file.json` -- All *data-type* folder contents (`histology`, `behav`, `ephys`) +- All *datatype* folder contents (`histology`, `behav`, `ephys`) The command: @@ -587,13 +587,13 @@ my_first_project \ upload -sub 001 -ses all_sub --dt all_data_type +-dt all_datatype ``` Would upload: - Contents residing in the `sub-001` folder only. -- All *data-type* folder contents (`histology`, `behav`, `ephys`) +- All *datatype* folder contents (`histology`, `behav`, `ephys`) 3) The final example shows the effect of transferring `all_non_sub` files only. The command: diff --git a/tests/test_utils.py b/tests/test_utils.py index c0412605..28be2a8a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -25,13 +25,13 @@ def setup_project_default_configs( tmp_path, local_path=False, central_path=False, - all_data_type_on=True, + all_datatype_on=True, ): """ Set up a fresh project to test on local_path / central_path: provide the config paths to set - all_data_type_on: by default, all data_type flags are False. + all_datatype_on: by default, all datatype flags are False. for testing, it is preferable to have all True so set this if this argument is True. """ @@ -45,8 +45,8 @@ def setup_project_default_configs( tmp_path, set_as_defaults=True ) - if all_data_type_on: - default_configs.update(get_all_data_types_on("kwargs")) + if all_datatype_on: + default_configs.update(get_all_datatypes_on("kwargs")) project.make_config_file(**default_configs) @@ -269,13 +269,13 @@ def check_folder_tree_is_correct( Automated test that folders are made based on the structure specified on project itself. - Cycle through all data_types (defined in - project.cfg.data_type_folders()), sub, sessions and check that + Cycle through all datatypes (defined in + project.cfg.datatype_folders()), sub, sessions and check that the expected file exists. For subfolders, recursively check all exist. Folders in which folder_used[key] (where key - is the canonical dict key in project.cfg.data_type_folders()) + is the canonical dict key in project.cfg.datatype_folders()) is not used are expected not to be made, and this is checked. @@ -291,7 +291,7 @@ def check_folder_tree_is_correct( path_to_ses_folder = join(base_folder, sub, ses) check_and_cd_folder(path_to_ses_folder) - for key, folder in project.cfg.data_type_folders.items(): + for key, folder in project.cfg.datatype_folders.items(): assert key in folder_used.keys(), ( "Key not found in folder_used. " "Update folder used and hard-coded tests: " @@ -302,13 +302,13 @@ def check_folder_tree_is_correct( base_folder, folder, folder_used, key, sub, ses ): if folder.level == "sub": - data_type_path = join(path_to_sub_folder, folder.name) + datatype_path = join(path_to_sub_folder, folder.name) elif folder.level == "ses": - data_type_path = join(path_to_ses_folder, folder.name) + datatype_path = join(path_to_ses_folder, folder.name) - check_and_cd_folder(data_type_path) + check_and_cd_folder(datatype_path) check_and_cd_folder( - join(data_type_path, ".datashuttle_meta") + join(datatype_path, ".datashuttle_meta") ) @@ -344,16 +344,16 @@ def check_and_cd_folder(path_): os.chdir(path_) -def check_data_type_sub_ses_uploaded_correctly( +def check_datatype_sub_ses_uploaded_correctly( base_path_to_check, - data_type_to_transfer, + datatype_to_transfer, subs_to_upload=None, ses_to_upload=None, ): """ - Iterate through the project (data_type > ses > sub) and + Iterate through the project (datatype > ses > sub) and check that the folders at each level match those that are - expected (passed in data_type / sub / ses to upload). Folders + expected (passed in datatype / sub / ses to upload). Folders are searched with wildcard glob. Note: might be easier to flatten entire path with glob(**) @@ -373,56 +373,56 @@ def check_data_type_sub_ses_uploaded_correctly( "*", ) ) - if data_type_to_transfer == ["histology"]: + if datatype_to_transfer == ["histology"]: assert ses_names == ["histology"] return # handle the case in which histology # only is transferred, # and there are no sessions to transfer. - copy_data_type_to_transfer = ( + copy_datatype_to_transfer = ( check_and_strip_within_sub_data_folders( - ses_names, data_type_to_transfer + ses_names, datatype_to_transfer ) ) assert ses_names == sorted(ses_to_upload) - # check data_type folders in session folder - if copy_data_type_to_transfer: + # check datatype folders in session folder + if copy_datatype_to_transfer: for ses in ses_names: data_names = glob_basenames( join(base_path_to_check, sub, ses, "*") ) - assert data_names == sorted(copy_data_type_to_transfer) + assert data_names == sorted(copy_datatype_to_transfer) -def check_and_strip_within_sub_data_folders(ses_names, data_type_to_transfer): +def check_and_strip_within_sub_data_folders(ses_names, datatype_to_transfer): """ - Check if data_type folders at the sub level are picked + Check if datatype folders at the sub level are picked up when sessions are searched for with wildcard. Remove so that sessions can be explicitly tested next. """ - if "histology" in data_type_to_transfer: + if "histology" in datatype_to_transfer: assert "histology" in ses_names ses_names.remove("histology") - copy_ = copy.deepcopy(data_type_to_transfer) + copy_ = copy.deepcopy(datatype_to_transfer) copy_.remove("histology") return copy_ - return data_type_to_transfer + return datatype_to_transfer def make_and_check_local_project_folders( - project, subs, sessions, data_type, folder_name="rawdata" + project, subs, sessions, datatype, folder_name="rawdata" ): """ - Make a local project folder tree with the specified data_type, + Make a local project folder tree with the specified datatype, subs, sessions and check it is made successfully. Since empty folders are not transferred, it is necessary to write a placeholder file in all bottom-level directories so ensure they are transferred. """ - make_local_folders_with_files_in(project, subs, sessions, data_type) + make_local_folders_with_files_in(project, subs, sessions, datatype) check_folder_tree_is_correct( project, @@ -434,9 +434,9 @@ def make_and_check_local_project_folders( def make_local_folders_with_files_in( - project, subs, sessions=None, data_type="all" + project, subs, sessions=None, datatype="all" ): - project.make_sub_folders(subs, sessions, data_type) + project.make_sub_folders(subs, sessions, datatype) for root, dirs, files in os.walk(project.cfg["local_path"]): if not dirs: path_ = Path(root) / "placeholder_file.txt" @@ -613,17 +613,17 @@ def run_cli(command, project_name=None): return stdout.decode("utf8"), stderr.decode("utf8") -def get_all_data_types_on(kwargs_or_flags): +def get_all_datatypes_on(kwargs_or_flags): """ - Get all data_types (e.g. --use_behav) in on form, + Get all datatypes (e.g. --use_behav) in on form, either as kwargs for API or str of flags for CLI. """ - data_types = canonical_configs.get_data_types() + datatypes = canonical_configs.get_datatypes() if kwargs_or_flags == "flags": - return f"{' '.join(['--' + flag for flag in data_types])}" + return f"{' '.join(['--' + flag for flag in datatypes])}" else: - return dict(zip(data_types, [True] * len(data_types))) + return dict(zip(datatypes, [True] * len(datatypes))) def move_some_keys_to_end_of_dict(config): diff --git a/tests/tests_integration/test_command_line_interface.py b/tests/tests_integration/test_command_line_interface.py index 4180a764..8ca6ca16 100644 --- a/tests/tests_integration/test_command_line_interface.py +++ b/tests/tests_integration/test_command_line_interface.py @@ -174,7 +174,7 @@ def test_make_config_file_non_default_variables(self, tmp_path): def test_make_sub_folders_variable(self, sep): stdout, _ = test_utils.run_cli( f" make{sep}sub{sep}folders " - f"--data_type all " + f"--datatype all " f"--sub_names 001 " f"--ses_names 002 " ) @@ -182,7 +182,7 @@ def test_make_sub_folders_variable(self, sep): args_, kwargs_ = self.decode(stdout) assert args_ == [] - assert kwargs_["data_type"] == ["all"] + assert kwargs_["datatype"] == ["all"] assert kwargs_["sub_names"] == ["001"] assert kwargs_["ses_names"] == ["002"] @@ -195,7 +195,7 @@ def test_upload_download_variables(self, upload_or_download, sep): """ stdout, _ = test_utils.run_cli( f" {upload_or_download} " - f"--data{sep}type all " + f"--datatype all " f"--sub{sep}names one " f"--ses{sep}names two" ) @@ -205,7 +205,7 @@ def test_upload_download_variables(self, upload_or_download, sep): stdout, _ = test_utils.run_cli( f" {upload_or_download} " - f"--data{sep}type all " + f"--datatype all " f"--sub{sep}names one " f"--ses{sep}names two " f"--dry{sep}run" @@ -282,14 +282,14 @@ def test_multiple_inputs(self, command): """ stdout, stderr = test_utils.run_cli( f"{command} " - f"--data_type all " + f"--datatype all " f"--sub_names one two 3 sub-004 sub-w23@ " f"--ses_names 5 06 007" ) _, kwargs_ = self.decode(stdout) - assert kwargs_["data_type"] == ["all"] + assert kwargs_["datatype"] == ["all"] assert kwargs_["sub_names"] == [ "one", "two", @@ -400,7 +400,7 @@ def test_make_sub_folders(self, setup_project): ses = ["ses-123", "ses-999"] test_utils.run_cli( - f"make_sub_folders --data_type all --sub_names {self.to_cli_input(subs)} --ses_names {self.to_cli_input(ses)} ", # noqa + f"make_sub_folders --datatype all --sub_names {self.to_cli_input(subs)} --ses_names {self.to_cli_input(ses)} ", setup_project.project_name, ) @@ -448,7 +448,7 @@ def test_upload_and_download( elif transfer_method == "standard": test_utils.run_cli( f"{upload_or_download} " - f"--data_type all " + f"--datatype all " f"--sub_names all " f"--ses_names all", setup_project.project_name, @@ -459,13 +459,13 @@ def test_upload_and_download( setup_project.project_name, ) - test_utils.check_data_type_sub_ses_uploaded_correctly( + test_utils.check_datatype_sub_ses_uploaded_correctly( base_path_to_check=os.path.join( base_path_to_check, setup_project.cfg.top_level_folder ), - data_type_to_transfer=[ + datatype_to_transfer=[ flag.split("use_")[1] - for flag in canonical_configs.get_data_types() + for flag in canonical_configs.get_datatypes() ], subs_to_upload=subs, ses_to_upload=sessions, @@ -647,7 +647,7 @@ def check_kwargs(self, required_options, kwargs_): assert kwargs_ == {} def check_upload_download_args(self, args_, kwargs_, dry_run_is): - assert kwargs_["data_type"] == ["all"] + assert kwargs_["datatype"] == ["all"] assert kwargs_["sub_names"] == ["one"] assert kwargs_["ses_names"] == ["two"] assert kwargs_["dry_run"] is dry_run_is diff --git a/tests/tests_integration/test_file_conflicts_pathtable.py b/tests/tests_integration/test_file_conflicts_pathtable.py index 17ae0652..3227e8b4 100644 --- a/tests/tests_integration/test_file_conflicts_pathtable.py +++ b/tests/tests_integration/test_file_conflicts_pathtable.py @@ -8,21 +8,21 @@ def get_pathtable(base_folder): - columns = ["base_folder", "path", "is_non_sub", "is_non_ses", "is_ses_level_non_data_type", "parent_sub", "parent_ses", "parent_data_type"] + columns = ["base_folder", "path", "is_non_sub", "is_non_ses", "is_ses_level_non_datatype", "parent_sub", "parent_ses", "parent_datatype"] -# base_folder path is_non_sub is_non_ses is_ses_level_non_data_type parent_sub parent_ses parent_data_type +# base_folder path is_non_sub is_non_ses is_ses_level_non_datatype parent_sub parent_ses parent_datatype data = [[base_folder, Path("rawdata") / "sub-001" / "ses-001" / "sub-001_ses-001_data-file", False, False, True, "sub-001", "ses-001", None], [base_folder, Path("rawdata") / "sub-001" / "ses-002_random-key" / "random-key-file.mp4", False, False, True, "sub-001", "ses-002_random-key", None], [base_folder, Path("rawdata") / "sub-001" / "ses-003_date-20231901" / "behav" / "behav.csv", False, False, False, "sub-001", "ses-003_date-20231901", "behav"], [base_folder, Path("rawdata") / "sub-001" / "ses-003_date-20231901" / "ephys" / "ephys.bin", False, False, False, "sub-001", "ses-003_date-20231901", "ephys"], [base_folder, Path("rawdata") / "sub-001" / "ses-003_date-20231901" / "non_data" / "non_data.mp4", False, False, True, "sub-001", "ses-003_date-20231901", None], - [base_folder, Path("rawdata") / "sub-001" / "ses-003_date-20231901" / "nondata_type_level_file.csv", False, False, True, "sub-001", "ses-003_date-20231901", None], + [base_folder, Path("rawdata") / "sub-001" / "ses-003_date-20231901" / "nondatatype_level_file.csv", False, False, True, "sub-001", "ses-003_date-20231901", None], [base_folder, Path("rawdata") / "sub-001" / "random-ses_level_file.mp4", False, True, False, "sub-001", None, None], [base_folder, Path("rawdata") / "sub-001" / "histology" / "sub-001_histology.file", False, False, False, "sub-001", None, "histology"], [base_folder, Path("rawdata") / "sub-002_random-value" / "sub-002_random-value.file", False, True, False, "sub-002_random-value", None, None], - [base_folder, Path("rawdata") / "sub-002_random-value" / "ses-001" / "non_data_type_level_folder" / "file.csv", False, False, True, "sub-002_random-value", "ses-001", None], + [base_folder, Path("rawdata") / "sub-002_random-value" / "ses-001" / "non_datatype_level_folder" / "file.csv", False, False, True, "sub-002_random-value", "ses-001", None], [base_folder, Path("rawdata") / "sub-003_date-20231901" / "ses-001" / "funcimg" / ".myfile.xlsx", False, False, False, "sub-003_date-20231901", "ses-001", "funcimg"], - [base_folder, Path("rawdata") / "sub-003_date-20231901" / "ses-003_date-20231901" / "nondata_type_level_file.csv", False, False, True, "sub-003_date-20231901", "ses-003_date-20231901", None], + [base_folder, Path("rawdata") / "sub-003_date-20231901" / "ses-003_date-20231901" / "nondatatype_level_file.csv", False, False, True, "sub-003_date-20231901", "ses-003_date-20231901", None], [base_folder, Path("rawdata") / "sub-003_date-20231901" / "ses-003_date-20231901" / "funcimg" / "funcimg.nii", False, False, False, "sub-003_date-20231901", "ses-003_date-20231901", "funcimg"], [base_folder, Path("rawdata") / "sub-003_date-20231901" / "seslevel_non-prefix_folder" / "nonlevel.mat", False, True, False, "sub-003_date-20231901", "seslevel_non-prefix_folder", None], [base_folder, Path("rawdata") / "sub-003_date-20231901" / "sub-ses-level_file.txt", False, True, False, "sub-003_date-20231901", None, None], diff --git a/tests/tests_integration/test_filesystem_transfer.py b/tests/tests_integration/test_filesystem_transfer.py index 9685886c..deb05f2f 100644 --- a/tests/tests_integration/test_filesystem_transfer.py +++ b/tests/tests_integration/test_filesystem_transfer.py @@ -155,7 +155,7 @@ def test_transfer_all_top_level_folders(self, project, upload_or_download): ) @pytest.mark.parametrize( - "data_type_to_transfer", + "datatype_to_transfer", [ ["behav"], ["ephys"], @@ -170,11 +170,11 @@ def test_transfer_all_top_level_folders(self, project, upload_or_download): ) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) def test_transfer_empty_folder_specific_dataal_data( - self, project, upload_or_download, data_type_to_transfer + self, project, upload_or_download, datatype_to_transfer ): """ - For the combination of data_type folders, make a folder - tree with all data_type folders then upload select ones, + For the combination of datatype folders, make a folder + tree with all datatype folders then upload select ones, checking only the selected ones are uploaded. """ subs, sessions = test_utils.get_default_sub_sessions_to_test() @@ -187,11 +187,11 @@ def test_transfer_empty_folder_specific_dataal_data( base_path_to_check, ) = test_utils.handle_upload_or_download(project, upload_or_download) - transfer_function(subs, sessions, data_type_to_transfer) + transfer_function(subs, sessions, datatype_to_transfer) - test_utils.check_data_type_sub_ses_uploaded_correctly( + test_utils.check_datatype_sub_ses_uploaded_correctly( os.path.join(base_path_to_check, project.cfg.top_level_folder), - data_type_to_transfer, + datatype_to_transfer, subs, sessions, ) @@ -200,7 +200,7 @@ def test_transfer_empty_folder_specific_dataal_data( "sub_idx_to_upload", [[0], [1], [2], [0, 1], [1, 2], [0, 2], [0, 1, 2]] ) @pytest.mark.parametrize( - "data_type_to_transfer", + "datatype_to_transfer", [ ["histology"], ["behav", "ephys"], @@ -213,7 +213,7 @@ def test_transfer_empty_folder_specific_subs( self, project, upload_or_download, - data_type_to_transfer, + datatype_to_transfer, sub_idx_to_upload, ): """ @@ -232,11 +232,11 @@ def test_transfer_empty_folder_specific_subs( ) = test_utils.handle_upload_or_download(project, upload_or_download) subs_to_upload = [subs[i] for i in sub_idx_to_upload] - transfer_function(subs_to_upload, sessions, data_type_to_transfer) + transfer_function(subs_to_upload, sessions, datatype_to_transfer) - test_utils.check_data_type_sub_ses_uploaded_correctly( + test_utils.check_datatype_sub_ses_uploaded_correctly( os.path.join(base_path_to_check, project.cfg.top_level_folder), - data_type_to_transfer, + datatype_to_transfer, subs_to_upload, ) @@ -245,7 +245,7 @@ def test_transfer_empty_folder_specific_subs( ) @pytest.mark.parametrize("sub_idx_to_upload", [[0], [1, 2], [0, 1, 2]]) @pytest.mark.parametrize( - "data_type_to_transfer", + "datatype_to_transfer", [["ephys"], ["funcimg", "histology", "behav"]], ) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) @@ -253,7 +253,7 @@ def test_transfer_empty_folder_specific_ses( self, project, upload_or_download, - data_type_to_transfer, + datatype_to_transfer, sub_idx_to_upload, ses_idx_to_upload, ): @@ -274,11 +274,11 @@ def test_transfer_empty_folder_specific_ses( subs_to_upload = [subs[i] for i in sub_idx_to_upload] ses_to_upload = [sessions[i] for i in ses_idx_to_upload] - transfer_function(subs_to_upload, ses_to_upload, data_type_to_transfer) + transfer_function(subs_to_upload, ses_to_upload, datatype_to_transfer) - test_utils.check_data_type_sub_ses_uploaded_correctly( + test_utils.check_datatype_sub_ses_uploaded_correctly( os.path.join(base_path_to_check, project.cfg.top_level_folder), - data_type_to_transfer, + datatype_to_transfer, subs_to_upload, ses_to_upload, ) diff --git a/tests/tests_integration/test_logging.py b/tests/tests_integration/test_logging.py index 61532823..bd77de60 100644 --- a/tests/tests_integration/test_logging.py +++ b/tests/tests_integration/test_logging.py @@ -138,7 +138,7 @@ def test_make_sub_folders(self, setup_project): subs = ["sub-11", f"sub-002{tags('to')}004"] ses = ["ses-123", "ses-101"] - setup_project.make_sub_folders(subs, ses, data_type="all") + setup_project.make_sub_folders(subs, ses, datatype="all") log = self.read_log_file(setup_project.cfg.logging_path) @@ -147,7 +147,7 @@ def test_make_sub_folders(self, setup_project): assert ( "\n\nVariablesState:\nlocals: {'sub_names': ['sub-11', " "'sub-002@TO@004'], 'ses_names': ['ses-123', 'ses-101'], " - "'data_type': 'all'}\ncfg: {'local_path':" in log + "'datatype': 'all'}\ncfg: {'local_path':" in log ) assert f"sub_names: ['sub-11', 'sub-002{tags('to')}004']" in log @@ -254,7 +254,7 @@ def test_logs_upload_and_download( ) else: assert ( - "VariablesState:\nlocals: {'sub_names': 'all', 'ses_names': 'all', 'data_type': 'all', 'dry_run': False, 'init_log': True}\ncfg: {'local_path': " + "VariablesState:\nlocals: {'sub_names': 'all', 'ses_names': 'all', 'datatype': 'all', 'dry_run': False, 'init_log': True}\ncfg: {'local_path': " in log ) @@ -279,7 +279,7 @@ def test_logs_upload_and_download_folder_or_file( setup_project, subs=["sub-001"], sessions=["ses-001"], - data_type="all", + datatype="all", ) setup_project.update_config("show_transfer_progress", False) @@ -334,11 +334,11 @@ def test_logs_check_update_config_error(self, setup_project): def test_logs_bad_make_sub_folders_error(self, setup_project): """""" - setup_project.make_sub_folders("sub-001", data_type="all") + setup_project.make_sub_folders("sub-001", datatype="all") self.delete_log_files(setup_project.cfg.logging_path) with pytest.raises(BaseException): - setup_project.make_sub_folders("sub-001", data_type="all") + setup_project.make_sub_folders("sub-001", datatype="all") log = self.read_log_file(setup_project.cfg.logging_path) diff --git a/tests/tests_integration/test_make_folders.py b/tests/tests_integration/test_make_folders.py index 9247adce..8c3a7b09 100644 --- a/tests/tests_integration/test_make_folders.py +++ b/tests/tests_integration/test_make_folders.py @@ -173,7 +173,7 @@ def test_explicitly_session_list(self, project): def test_turn_off_specific_folder_used(self, project, folder_key): """ Whether or not a folder is made is held in the .used key of the - folder class (stored in project.cfg.data_type_folders). + folder class (stored in project.cfg.datatype_folders). """ # Overwrite configs to make specified folder not used. @@ -201,10 +201,10 @@ def test_custom_folder_names(self, project): ensure they are made correctly. """ # Change folder names to custom names - project.cfg.data_type_folders["ephys"].name = "change_ephys" - project.cfg.data_type_folders["behav"].name = "change_behav" - project.cfg.data_type_folders["histology"].name = "change_histology" - project.cfg.data_type_folders["funcimg"].name = "change_funcimg" + project.cfg.datatype_folders["ephys"].name = "change_ephys" + project.cfg.datatype_folders["behav"].name = "change_behav" + project.cfg.datatype_folders["histology"].name = "change_histology" + project.cfg.datatype_folders["funcimg"].name = "change_funcimg" # Make the folders sub = "sub-001" @@ -243,10 +243,10 @@ def test_custom_folder_names(self, project): ["funcimg"], ], ) - def test_data_types_subsection(self, project, files_to_test): + def test_datatypes_subsection(self, project, files_to_test): """ - Check that combinations of data_types passed to make file folder - make the correct combination of data types. + Check that combinations of datatypes passed to make file folder + make the correct combination of datatypes. Note this will fail when new top level folders are added, and should be updated. diff --git a/tests/tests_integration/test_ssh_file_transfer.py b/tests/tests_integration/test_ssh_file_transfer.py index 5ea08f5a..a03360a7 100644 --- a/tests/tests_integration/test_ssh_file_transfer.py +++ b/tests/tests_integration/test_ssh_file_transfer.py @@ -144,16 +144,16 @@ def central_from_local(self, path_): ], ) @pytest.mark.parametrize( - "data_type", + "datatype", [ ["all"], - ["all_ses_level_non_data_type"], - ["all_data_type"], + ["all_ses_level_non_datatype"], + ["all_datatype"], ["behav"], ["ephys"], ["histology"], ["funcimg"], - ["histology", "behav", "all_ses_level_non_data_type"], + ["histology", "behav", "all_ses_level_non_datatype"], ], ) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) @@ -162,14 +162,14 @@ def test_all_data_transfer_options( pathtable_and_project, sub_names, ses_names, - data_type, + datatype, upload_or_download, ): """ Parse the arguments to filter the pathtable, getting the files expected to be transferred passed on the arguments Note files in sub/ses/datatype folders must be handled - separately to those in non-sub, non-ses, non-data-type folders + separately to those in non-sub, non-ses, non-datatype folders see test_utils.swap_local_and_central_paths() for the logic on setting up and swapping local / central paths for @@ -183,7 +183,7 @@ def test_all_data_transfer_options( swap_last_folder_only=project.testing_ssh, )[0] - transfer_function(sub_names, ses_names, data_type, init_log=False) + transfer_function(sub_names, ses_names, datatype, init_log=False) if upload_or_download == "download": test_utils.swap_local_and_central_paths( @@ -192,21 +192,19 @@ def test_all_data_transfer_options( sub_names = self.parse_arguments(pathtable, sub_names, "sub") ses_names = self.parse_arguments(pathtable, ses_names, "ses") - data_type = self.parse_arguments(pathtable, data_type, "data_type") + datatype = self.parse_arguments(pathtable, datatype, "datatype") # Filter pathtable to get files that were expected # to be transferred ( sub_ses_dtype_arguments, extra_arguments, - ) = self.make_pathtable_search_filter(sub_names, ses_names, data_type) + ) = self.make_pathtable_search_filter(sub_names, ses_names, datatype) - data_type_folders = self.query_table( - pathtable, sub_ses_dtype_arguments - ) + datatype_folders = self.query_table(pathtable, sub_ses_dtype_arguments) extra_folders = self.query_table(pathtable, extra_arguments) - expected_paths = pd.concat([data_type_folders, extra_folders]) + expected_paths = pd.concat([datatype_folders, extra_folders]) expected_paths = expected_paths.drop_duplicates(subset="path") central_base_paths = expected_paths.base_folder.map( @@ -268,8 +266,8 @@ def parse_arguments(self, pathtable, list_of_names, field): if list_of_names == ["all"]: entries += ( [f"all_non_{field}"] - if field != "data_type" - else ["all_ses_level_non_data_type"] + if field != "datatype" + else ["all_ses_level_non_datatype"] ) list_of_names = entries return list_of_names @@ -281,13 +279,13 @@ def create_all_pathtable_files(self, pathtable): filepath.parents[0].mkdir(parents=True, exist_ok=True) test_utils.write_file(filepath, contents="test_entry") - def make_pathtable_search_filter(self, sub_names, ses_names, data_type): + def make_pathtable_search_filter(self, sub_names, ses_names, datatype): """ Create a string of arguments to pass to pd.query() that will - create the table of only transferred sub, ses and data_type. + create the table of only transferred sub, ses and datatype. Two arguments must be created, one of all sub / ses / datatypes - and the other of all non sub/ non ses / non data type + and the other of all non sub/ non ses / non datatype folders. These must be handled separately as they are mutually exclusive. """ @@ -298,9 +296,9 @@ def make_pathtable_search_filter(self, sub_names, ses_names, data_type): if sub == "all_non_sub": extra_arguments += ["is_non_sub == True"] else: - if "histology" in data_type: + if "histology" in datatype: sub_ses_dtype_arguments += [ - f"(parent_sub == '{sub}' & (parent_data_type == 'histology' | parent_data_type == 'histology'))" + f"(parent_sub == '{sub}' & (parent_datatype == 'histology' | parent_datatype == 'histology'))" ] for ses in ses_names: @@ -309,14 +307,14 @@ def make_pathtable_search_filter(self, sub_names, ses_names, data_type): f"(parent_sub == '{sub}' & is_non_ses == True)" ] else: - for dtype in data_type: - if dtype == "all_ses_level_non_data_type": + for dtype in datatype: + if dtype == "all_ses_level_non_datatype": extra_arguments += [ - f"(parent_sub == '{sub}' & parent_ses == '{ses}' & is_ses_level_non_data_type == True)" + f"(parent_sub == '{sub}' & parent_ses == '{ses}' & is_ses_level_non_datatype == True)" ] else: sub_ses_dtype_arguments += [ - f"(parent_sub == '{sub}' & parent_ses == '{ses}' & (parent_data_type == '{dtype}' | parent_data_type == '{dtype}'))" + f"(parent_sub == '{sub}' & parent_ses == '{ses}' & (parent_datatype == '{dtype}' | parent_datatype == '{dtype}'))" ] return sub_ses_dtype_arguments, extra_arguments