From 971d15d04d2f2eb89ed5d7989dd2634a6bc6e141 Mon Sep 17 00:00:00 2001 From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com> Date: Wed, 10 Apr 2024 18:58:48 +0100 Subject: [PATCH] Rename transfer methods. (#360) * Rename 'upload_all' and 'download_all' to new convenience functions. * Fix top-level-folder type hint issues. * Update documentation for new API. * Remove 'use_all_alias' function. * Rename 'upload()' / 'download()' to 'upload_custom()' / 'download_custom()'. * Update docs/source/pages/how_tos/transfer-data.md Co-authored-by: Niko Sirmpilatze * Remove redundant ValueError raise after assertion. --------- Co-authored-by: Niko Sirmpilatze --- datashuttle/configs/canonical_folders.py | 7 ++- datashuttle/datashuttle.py | 58 ++++++++++++------- datashuttle/tui/interface.py | 25 ++++++-- datashuttle/tui/tabs/transfer_status_tree.py | 3 +- datashuttle/utils/rclone.py | 15 +---- docs/source/pages/documentation.md | 10 ++-- docs/source/pages/how_tos/top-level-folder.md | 2 +- docs/source/pages/how_tos/transfer-data.md | 16 ++--- .../source/pages/tutorials/getting_started.md | 8 +-- tests/test_utils.py | 18 +++--- .../tests_integration/test_create_folders.py | 12 +++- .../test_filesystem_transfer.py | 48 ++++++++++----- tests/tests_integration/test_logging.py | 24 ++++---- .../tests_integration/test_transfer_checks.py | 6 +- 14 files changed, 151 insertions(+), 101 deletions(-) diff --git a/datashuttle/configs/canonical_folders.py b/datashuttle/configs/canonical_folders.py index f567019d..df5637eb 100644 --- a/datashuttle/configs/canonical_folders.py +++ b/datashuttle/configs/canonical_folders.py @@ -1,7 +1,10 @@ from __future__ import annotations from pathlib import Path -from typing import List, Tuple +from typing import TYPE_CHECKING, List, Tuple + +if TYPE_CHECKING: + from datashuttle.utils.custom_types import TopLevelFolder from datashuttle.utils.folder_class import Folder @@ -87,7 +90,7 @@ def canonical_reserved_keywords() -> List[str]: return get_non_sub_names() + get_non_ses_names() -def get_top_level_folders() -> List[str]: +def get_top_level_folders() -> List[TopLevelFolder]: return ["rawdata", "derivatives"] diff --git a/datashuttle/datashuttle.py b/datashuttle/datashuttle.py index 9c21e89d..adbfe9ec 100644 --- a/datashuttle/datashuttle.py +++ b/datashuttle/datashuttle.py @@ -292,7 +292,7 @@ def _format_and_validate_names( # ------------------------------------------------------------------------- @check_configs_set - def upload( + def upload_custom( self, top_level_folder: TopLevelFolder, sub_names: Union[str, list], @@ -375,7 +375,7 @@ def upload( ds_logger.close_log_filehandler() @check_configs_set - def download( + def download_custom( self, top_level_folder: TopLevelFolder, sub_names: Union[str, list], @@ -391,8 +391,8 @@ def download( not be overwritten even if the central file is an older version. - This function is identical to upload() but with the direction - of data transfer reversed. Please see upload() for arguments. + This function is identical to upload_custom() but with the direction + of data transfer reversed. Please see upload_custom() for arguments. "all" arguments will search the central project for sub / ses to download. """ @@ -413,8 +413,28 @@ def download( if init_log: ds_logger.close_log_filehandler() + # Specific top-level folder + # ---------------------------------------------------------------------------------- + # A set of convenience functions are provided to abstract + # away the 'top_level_folder' concept. + + @check_configs_set + def upload_rawdata(self, dry_run: bool = False): + self._upload_top_level_folder("rawdata", dry_run=dry_run) + @check_configs_set - def upload_all( + def upload_derivatives(self, dry_run: bool = False): + self._upload_top_level_folder("derivatives", dry_run=dry_run) + + @check_configs_set + def download_rawdata(self, dry_run: bool = False): + self._download_top_level_folder("rawdata", dry_run=dry_run) + + @check_configs_set + def download_derivatives(self, dry_run: bool = False): + self._download_top_level_folder("derivatives", dry_run=dry_run) + + def _upload_top_level_folder( self, top_level_folder: TopLevelFolder, dry_run: bool = False, @@ -424,12 +444,12 @@ def upload_all( Convenience function to upload all data. Alias for: - project.upload("all", "all", "all") + project.upload_custom("all", "all", "all") """ if init_log: - self._start_log("upload-all", local_vars=locals()) + self._start_log(f"upload-{top_level_folder}", local_vars=locals()) - self.upload( + self.upload_custom( top_level_folder, "all", "all", @@ -441,8 +461,7 @@ def upload_all( if init_log: ds_logger.close_log_filehandler() - @check_configs_set - def download_all( + def _download_top_level_folder( self, top_level_folder: TopLevelFolder, dry_run: bool = False, @@ -451,12 +470,14 @@ def download_all( """ Convenience function to download all data. - Alias for : project.download("all", "all", "all") + Alias for : project.download_custom("all", "all", "all") """ if init_log: - self._start_log("download-all", local_vars=locals()) + self._start_log( + f"download-{top_level_folder}", local_vars=locals() + ) - self.download( + self.download_custom( top_level_folder, "all", "all", @@ -1009,7 +1030,7 @@ def validate_project( def check_name_formatting(names: Union[str, list], prefix: Prefix) -> None: """ Pass list of names to check how these will be auto-formatted, - for example as when passed to create_folders() or upload() + for example as when passed to create_folders() or upload_custom() or download() Useful for checking tags e.g. @TO@, @DATE@, @DATETIME@, @DATE@. @@ -1047,11 +1068,6 @@ def _transfer_entire_project( Transfer (i.e. upload or download) the entire project (i.e. every 'top level folder' (e.g. 'rawdata', 'derivatives'). - This function leverages the upload_all or download_all - methods while switching the top level folder as defined in - self.cfg that these methods use to determine the top-level - folder to transfer. - Parameters ---------- @@ -1059,7 +1075,9 @@ def _transfer_entire_project( local to central) or "download" (from central to local). """ transfer_all_func = ( - self.upload_all if direction == "upload" else self.download_all + self._upload_top_level_folder + if direction == "upload" + else self._download_top_level_folder ) for top_level_folder in canonical_folders.get_top_level_folders(): diff --git a/datashuttle/tui/interface.py b/datashuttle/tui/interface.py index 336d255a..4c21d102 100644 --- a/datashuttle/tui/interface.py +++ b/datashuttle/tui/interface.py @@ -224,11 +224,24 @@ def transfer_top_level_only( from central to remote. """ + assert selected_top_level_folder in ["rawdata", "derivatives"] + try: - if upload: - self.project.upload_all(selected_top_level_folder) - else: - self.project.download_all(selected_top_level_folder) + if selected_top_level_folder == "rawdata": + transfer_func = ( + self.project.upload_rawdata + if upload + else self.project.download_rawdata + ) + elif selected_top_level_folder == "derivatives": + transfer_func = ( + self.project.upload_derivatives + if upload + else self.project.download_derivatives + ) + + transfer_func() + return True, None except BaseException as e: @@ -266,14 +279,14 @@ def transfer_custom_selection( """ try: if upload: - self.project.upload( + self.project.upload_custom( selected_top_level_folder, sub_names=sub_names, ses_names=ses_names, datatype=datatype, ) else: - self.project.download( + self.project.download_custom( selected_top_level_folder, sub_names=sub_names, ses_names=ses_names, diff --git a/datashuttle/tui/tabs/transfer_status_tree.py b/datashuttle/tui/tabs/transfer_status_tree.py index 8849a487..dbcaa205 100644 --- a/datashuttle/tui/tabs/transfer_status_tree.py +++ b/datashuttle/tui/tabs/transfer_status_tree.py @@ -92,7 +92,8 @@ def update_transfer_diffs(self) -> None: Updates the transfer diffs used to style the DirectoryTree. """ self.transfer_diffs = get_local_and_central_file_differences( - self.interface.get_configs(), top_level_folders_to_check=["all"] + self.interface.get_configs(), + top_level_folders_to_check=["rawdata", "derivatives"], ) # Overridden Methods diff --git a/datashuttle/utils/rclone.py b/datashuttle/utils/rclone.py index 37db17fd..d0c4500e 100644 --- a/datashuttle/utils/rclone.py +++ b/datashuttle/utils/rclone.py @@ -3,7 +3,6 @@ from subprocess import CompletedProcess from typing import Dict, List, Literal -from datashuttle.configs import canonical_folders from datashuttle.configs.config_class import Configs from datashuttle.utils import utils from datashuttle.utils.custom_types import TopLevelFolder @@ -207,7 +206,7 @@ def transfer_data( def get_local_and_central_file_differences( cfg: Configs, - top_level_folders_to_check: List[str], + top_level_folders_to_check: List[TopLevelFolder], ) -> Dict: """ Convert the output of rclone's check (with `--combine`) flag @@ -221,7 +220,7 @@ def get_local_and_central_file_differences( ---------- top_level_folders_to_check : - Either 'all' for all top-level folder or list of top-level folders. + List of top-level folders to check. Returns ------- @@ -243,15 +242,7 @@ def get_local_and_central_file_differences( parsed_output: Dict[str, List] parsed_output = {val: [] for val in convert_symbols.values()} - if "all" in top_level_folders_to_check: - assert ( - len(top_level_folders_to_check) == 1 - ), "If using `all` can be only list entry." - permitted_top_level_folders = canonical_folders.get_top_level_folders() - else: - permitted_top_level_folders = top_level_folders_to_check - - for top_level_folder in permitted_top_level_folders: + for top_level_folder in top_level_folders_to_check: rclone_output = perform_rclone_check(cfg, top_level_folder) # type: ignore split_rclone_output = rclone_output.split("\n") diff --git a/docs/source/pages/documentation.md b/docs/source/pages/documentation.md index aeb3fafe..231a95cc 100644 --- a/docs/source/pages/documentation.md +++ b/docs/source/pages/documentation.md @@ -498,7 +498,7 @@ For example, the call: :::{tab-item} Python API ```{code-block} python -project.upload( +project.upload_custom( sub_names="001@TO@003", ses_names=["005_date-@*@", "006_date-@*@"], datatype="behav" @@ -634,7 +634,7 @@ the below command transfers only the first 25 subjects: :::{tab-item} Python API ```{code-block} python -project.upload( +project.upload_custom( sub_names="001@TO@025", ses_names="all", datatype="all", @@ -698,7 +698,7 @@ everything that comes after the `date` key: :::{tab-item} Python API ```{code-block} python -project.upload( +project.upload_custom( sub_names=["001", "002"], ses_names="005_condition-test_date-@*@", datatype="behav", @@ -851,7 +851,7 @@ Given our example *local* project folder above: :::{tab-item} Python API ```{code-block} console -project.upload("all", "all", "all_non_datatype") +project.upload_custom("all", "all", "all_non_datatype") ``` ::: @@ -895,7 +895,7 @@ For `sub-002`, no other files are transferred because there is no non-*datatype* :::{tab-item} Python API ```{code-block} console -project.upload("sub-001", "all", "all") +project.upload_custom("sub-001", "all", "all") ``` ::: diff --git a/docs/source/pages/how_tos/top-level-folder.md b/docs/source/pages/how_tos/top-level-folder.md index c66e76de..df53ca7f 100644 --- a/docs/source/pages/how_tos/top-level-folder.md +++ b/docs/source/pages/how_tos/top-level-folder.md @@ -53,7 +53,7 @@ project on the local machine you are using. When making folders, `create_folders` will only create folders in the working top-level folder. -Transferring folders (e.g. with `upload()` or `download()`) will +Transferring folders (e.g. with `upload_custom()` or `download_custom()`) will only transfer folders in the working top-level folder (unless `upload_entire_project()` or `download_entire_project()` is used). diff --git a/docs/source/pages/how_tos/transfer-data.md b/docs/source/pages/how_tos/transfer-data.md index 1dcd19c7..4fddc62f 100644 --- a/docs/source/pages/how_tos/transfer-data.md +++ b/docs/source/pages/how_tos/transfer-data.md @@ -33,7 +33,7 @@ There are three main methods to transfer data in **datashuttle**. These allow transfer between: 1) The entire project (all files in both `rawdata` and `derivatives`) -2) A specific top-level-folder (e.g. all files in `rawdata`) +2) Only the `rawdata` or `derivatives` folder. 3) A custom subset of subjects / sessions / datatypes. Below we will explore each method in turn, as well as consider @@ -110,7 +110,7 @@ project.download_entire_project() :::: (transfer-top-level-folder)= -## Transfer the top-level folder +## Transfer only `rawdata` or `derivatives` This mode acts almost identically to [transferring the entire project](transfer-entire-project) @@ -151,16 +151,16 @@ and press `Transfer` to begin. :::{tab-item} Python API :sync: python -The `upload_all()` or `download_all()` methods can be used with the argument `top_level_folder` to specify -the top-level folder to transfer within. +The `upload_rawdata()`, `upload_derivatives()` or for downloading, `download_rawdata()`, `download_derivatives()` +to specify the top-level folder to transfer. In the next example, we will upload `rawdata` downloading `derivatives`. ```python -project.upload_all("rawdata") +project.upload_rawdata() -project.download_all("derivatives") +project.download_derivatives() ``` ::: @@ -240,11 +240,11 @@ and press `Transfer` to begin. :::{tab-item} Python API :sync: python -The `upload()` and `download()` methods can be used for custom +The `upload_custom()` and `download_custom()` methods can be used for custom data transfers. For example, to perform a custom upload: ```python -project.upload( +project.upload_custom( top_level_folder="rawdata", sub_names="all_sub", ses_names="ses-001_@*@", diff --git a/docs/source/pages/tutorials/getting_started.md b/docs/source/pages/tutorials/getting_started.md index 4c9d6f4a..c2877021 100644 --- a/docs/source/pages/tutorials/getting_started.md +++ b/docs/source/pages/tutorials/getting_started.md @@ -538,7 +538,7 @@ and all files will be uploaded from the local version of the project to central Navigating to the `central_path` in your systems file browser, the newly transferred data will have appeared, simulating transfer to a separate data storage machine. -Other methods (`upload_all()` and `upload()`) provide refined +Other methods (e.g. `upload_rawdata()` and `upload_custom()`) provide refined data transfers (and every `upload` method has an equivalent `download` method). For more information see the [How to Transfer Data](how-to-transfer-data) page @@ -686,14 +686,14 @@ of files will now be available in the _local path_ folder. :::{tab-item} Python API :sync: python -We can use the `download()` method (the download equivalent method of -the `upload()`). +We can use the `download_custom()` method (the download equivalent method of +the `upload_custom()`). We will download only the behavioural data from the first session, using a few shortcuts available for custom transfers ```python -project.download( +project.download_custom( top_level_folder="rawdata", sub_names="all", ses_names="ses-001_@*@", diff --git a/tests/test_utils.py b/tests/test_utils.py index ac29d3be..48fd9ce1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -434,7 +434,7 @@ def get_top_level_folder_path( def handle_upload_or_download( project, upload_or_download, - use_all_alias=False, + specific_top_level_folder=False, transfer_entire_project=False, swap_last_folder_only=False, ): @@ -451,19 +451,23 @@ def handle_upload_or_download( if transfer_entire_project: transfer_function = project.download_entire_project - elif use_all_alias: - transfer_function = project.download_all + elif specific_top_level_folder == "rawdata": + transfer_function = project.download_rawdata + elif specific_top_level_folder == "derivatives": + transfer_function = project.download_derivatives else: - transfer_function = project.download + transfer_function = project.download_custom else: central_path = project.cfg["central_path"] if transfer_entire_project: transfer_function = project.upload_entire_project - elif use_all_alias: - transfer_function = project.upload_all + elif specific_top_level_folder == "rawdata": + transfer_function = project.upload_rawdata + elif specific_top_level_folder == "derivatives": + transfer_function = project.upload_derivatives else: - transfer_function = project.upload + transfer_function = project.upload_custom return transfer_function, central_path diff --git a/tests/tests_integration/test_create_folders.py b/tests/tests_integration/test_create_folders.py index f57b7005..5c2e94a6 100644 --- a/tests/tests_integration/test_create_folders.py +++ b/tests/tests_integration/test_create_folders.py @@ -300,7 +300,11 @@ def test_get_next_sub(self, project, return_with_prefix, top_level_folder): assert new_num == "sub-004" if return_with_prefix else "004" # Upload to central, now local and central folders match - project.upload_all(top_level_folder) + ( + project.upload_rawdata() + if top_level_folder == "rawdata" + else project.upload_derivatives() + ) shutil.rmtree(project.cfg["local_path"] / top_level_folder) @@ -353,7 +357,11 @@ def test_get_next_ses(self, project, return_with_prefix, top_level_folder): # Now upload the data, delete locally, and check the # suggested values are correct based on the `central` path. - project.upload_all(top_level_folder) + ( + project.upload_rawdata() + if top_level_folder == "rawdata" + else project.upload_derivatives() + ) shutil.rmtree(project.cfg["local_path"] / top_level_folder) diff --git a/tests/tests_integration/test_filesystem_transfer.py b/tests/tests_integration/test_filesystem_transfer.py index 1f3d8b08..8c19cdfe 100644 --- a/tests/tests_integration/test_filesystem_transfer.py +++ b/tests/tests_integration/test_filesystem_transfer.py @@ -14,13 +14,13 @@ class TestFileTransfer(BaseTest): @pytest.mark.parametrize("top_level_folder", ["rawdata", "derivatives"]) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) - @pytest.mark.parametrize("use_all_alias", [True, False]) + @pytest.mark.parametrize("use_top_level_folder_func", [True, False]) def test_transfer_empty_folder_structure( self, project, top_level_folder, upload_or_download, - use_all_alias, + use_top_level_folder_func, ): """ First make a project (folders only) locally. @@ -37,11 +37,15 @@ def test_transfer_empty_folder_structure( transfer_function, base_path_to_check, ) = test_utils.handle_upload_or_download( - project, upload_or_download, use_all_alias=use_all_alias + project, + upload_or_download, + specific_top_level_folder=( + top_level_folder if use_top_level_folder_func else False + ), ) - if use_all_alias: - transfer_function(top_level_folder) + if use_top_level_folder_func: + transfer_function() else: transfer_function(top_level_folder, "all", "all", "all") @@ -54,7 +58,7 @@ def test_transfer_empty_folder_structure( def test_empty_folder_is_not_transferred(self, project): project.create_folders("rawdata", "sub-001") - project.upload_all("rawdata") + project.upload_rawdata() assert not ( project.cfg["central_path"] / "rawdata" / "sub-001" ).is_dir() @@ -64,19 +68,19 @@ def test_empty_folder_is_not_transferred(self, project): canonical_folders.get_top_level_folders(), ) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) - @pytest.mark.parametrize("use_all_alias", [True, False]) + @pytest.mark.parametrize("use_top_level_folder_func", [True, False]) def test_transfer_across_top_level_folders( self, project, top_level_folder_to_transfer, upload_or_download, - use_all_alias, + use_top_level_folder_func, ): """ For each possible top level folder (e.g. rawdata, derivatives) (parametrized) create a folder tree in every top-level folder, then transfer using upload / download and - upload_all / download_all that only the working top-level folder + upload_rawdata() / download_rawdata() that only the working top-level folder is transferred. """ subs, sessions = test_utils.get_default_sub_sessions_to_test() @@ -94,11 +98,17 @@ def test_transfer_across_top_level_folders( transfer_function, base_path_to_check, ) = test_utils.handle_upload_or_download( - project, upload_or_download, use_all_alias=use_all_alias + project, + upload_or_download, + specific_top_level_folder=( + top_level_folder_to_transfer + if use_top_level_folder_func + else False + ), ) - if use_all_alias: - transfer_function(top_level_folder_to_transfer) + if use_top_level_folder_func: + transfer_function() else: transfer_function( top_level_folder_to_transfer, "all", "all", "all" @@ -420,7 +430,7 @@ def test_rclone_options( ) test_utils.clear_capsys(capsys) - project.upload_all("rawdata", dry_run=dry_run) + project.upload_rawdata(dry_run=dry_run) log = capsys.readouterr().out @@ -450,7 +460,7 @@ def test_rclone_transfer_verbosity( project.update_config_file(transfer_verbosity=transfer_verbosity) test_utils.clear_capsys(capsys) - project.upload_all("rawdata") + project.upload_rawdata() log = capsys.readouterr().out @@ -501,7 +511,13 @@ def test_rclone_overwrite_modified_file( if overwrite_existing_files: project.update_config_file(overwrite_existing_files=True) - project.upload_all(top_level_folder) + upload_func = ( + project.upload_rawdata + if top_level_folder == "rawdata" + else project.upload_derivatives + ) + + upload_func() # Update the file and transfer and transfer again test_utils.write_file( @@ -510,7 +526,7 @@ def test_rclone_overwrite_modified_file( assert time_written < os.path.getatime(local_test_file_path) - project.upload_all(top_level_folder) + upload_func() central_contents = test_utils.read_file(central_test_file_path) diff --git a/tests/tests_integration/test_logging.py b/tests/tests_integration/test_logging.py index e054d4d6..2d2d2294 100644 --- a/tests/tests_integration/test_logging.py +++ b/tests/tests_integration/test_logging.py @@ -187,9 +187,9 @@ def test_create_folders(self, project): ) @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) - @pytest.mark.parametrize("use_all_alias", [True, False]) + @pytest.mark.parametrize("use_top_level_folder_func", [True, False]) def test_logs_upload_and_download( - self, project, upload_or_download, use_all_alias + self, project, upload_or_download, use_top_level_folder_func ): """ Set transfer verbosity and progress settings so @@ -215,25 +215,25 @@ def test_logs_upload_and_download( ) = test_utils.handle_upload_or_download( project, upload_or_download, - use_all_alias, + specific_top_level_folder=( + "rawdata" if use_top_level_folder_func else False + ), ) self.delete_log_files(project.cfg.logging_path) ( - transfer_function("rawdata") - if use_all_alias + transfer_function() + if use_top_level_folder_func else transfer_function("rawdata", "all", "all", "all") ) log = self.read_log_file(project.cfg.logging_path) - suffix = "-all" if use_all_alias else "" - - assert ( - f"Starting logging for command {upload_or_download}{suffix}" in log - ) - - if use_all_alias: + if use_top_level_folder_func: + assert ( + f"Starting logging for command {upload_or_download}-rawdata" + in log + ) assert ( "VariablesState:\nlocals: {'top_level_folder': 'rawdata', 'dry_run': False" in log diff --git a/tests/tests_integration/test_transfer_checks.py b/tests/tests_integration/test_transfer_checks.py index 970a9a3e..342ed640 100644 --- a/tests/tests_integration/test_transfer_checks.py +++ b/tests/tests_integration/test_transfer_checks.py @@ -74,11 +74,7 @@ def test_rclone_check(self, project, top_level_folders): results = get_local_and_central_file_differences( project.cfg, - top_level_folders_to_check=( - ["all"] - if len(top_level_folders) == 2 - else [top_level_folders[0]] - ), + top_level_folders_to_check=(top_level_folders), ) # Check the results are sorted into cases correctly.