diff --git a/.buildinfo b/.buildinfo index 2b2a51aa..00891052 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: ada322124a69aad90c9ce7ef269cedbe +config: 559e47c374702c5cd145111ce6cf3376 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index 096a84f0..3ba792b4 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree index a144443d..b08aa7e2 100644 Binary files a/.doctrees/index.doctree and b/.doctrees/index.doctree differ diff --git a/.doctrees/pages/api_index.doctree b/.doctrees/pages/api_index.doctree index 32b776f1..d7acca49 100644 Binary files a/.doctrees/pages/api_index.doctree and b/.doctrees/pages/api_index.doctree differ diff --git a/.doctrees/pages/cli_index.doctree b/.doctrees/pages/cli_index.doctree deleted file mode 100644 index 55c400bf..00000000 Binary files a/.doctrees/pages/cli_index.doctree and /dev/null differ diff --git a/.doctrees/pages/documentation.doctree b/.doctrees/pages/documentation.doctree deleted file mode 100644 index 35c5d64b..00000000 Binary files a/.doctrees/pages/documentation.doctree and /dev/null differ diff --git a/.doctrees/pages/how_tos.doctree b/.doctrees/pages/how_tos.doctree index 176285f4..7facab83 100644 Binary files a/.doctrees/pages/how_tos.doctree and b/.doctrees/pages/how_tos.doctree differ diff --git a/.doctrees/pages/how_tos/choose-a-terminal.doctree b/.doctrees/pages/how_tos/choose-a-terminal.doctree index f310f115..29b835ee 100644 Binary files a/.doctrees/pages/how_tos/choose-a-terminal.doctree and b/.doctrees/pages/how_tos/choose-a-terminal.doctree differ diff --git a/.doctrees/pages/how_tos/create-folders.doctree b/.doctrees/pages/how_tos/create-folders.doctree index 5ee3900c..80b6bb38 100644 Binary files a/.doctrees/pages/how_tos/create-folders.doctree and b/.doctrees/pages/how_tos/create-folders.doctree differ diff --git a/.doctrees/pages/how_tos/install.doctree b/.doctrees/pages/how_tos/install.doctree index e8d9f37c..54ed854e 100644 Binary files a/.doctrees/pages/how_tos/install.doctree and b/.doctrees/pages/how_tos/install.doctree differ diff --git a/.doctrees/pages/how_tos/make-a-new-project.doctree b/.doctrees/pages/how_tos/make-a-new-project.doctree index 7ac84a1c..fe147d50 100644 Binary files a/.doctrees/pages/how_tos/make-a-new-project.doctree and b/.doctrees/pages/how_tos/make-a-new-project.doctree differ diff --git a/.doctrees/pages/how_tos/read-logs.doctree b/.doctrees/pages/how_tos/read-logs.doctree index 416f922e..b34e7dc2 100644 Binary files a/.doctrees/pages/how_tos/read-logs.doctree and b/.doctrees/pages/how_tos/read-logs.doctree differ diff --git a/.doctrees/pages/how_tos/top-level-folder.doctree b/.doctrees/pages/how_tos/top-level-folder.doctree deleted file mode 100644 index b03eacf8..00000000 Binary files a/.doctrees/pages/how_tos/top-level-folder.doctree and /dev/null differ diff --git a/.doctrees/pages/how_tos/transfer-data.doctree b/.doctrees/pages/how_tos/transfer-data.doctree index 85cf013f..3782f905 100644 Binary files a/.doctrees/pages/how_tos/transfer-data.doctree and b/.doctrees/pages/how_tos/transfer-data.doctree differ diff --git a/.doctrees/pages/how_tos/update-configs.doctree b/.doctrees/pages/how_tos/update-configs.doctree index a1bf0311..b45b12d8 100644 Binary files a/.doctrees/pages/how_tos/update-configs.doctree and b/.doctrees/pages/how_tos/update-configs.doctree differ diff --git a/.doctrees/pages/tutorials.doctree b/.doctrees/pages/tutorials.doctree index 44cbe599..7b8fffc8 100644 Binary files a/.doctrees/pages/tutorials.doctree and b/.doctrees/pages/tutorials.doctree differ diff --git a/.doctrees/pages/tutorials/getting_started.doctree b/.doctrees/pages/tutorials/getting_started.doctree index b1bd97a8..eb47bf0e 100644 Binary files a/.doctrees/pages/tutorials/getting_started.doctree and b/.doctrees/pages/tutorials/getting_started.doctree differ diff --git a/_images/how-to-transfer-all-dark.png b/_images/how-to-transfer-all-dark.png index 52f1bb54..9f66a1e7 100644 Binary files a/_images/how-to-transfer-all-dark.png and b/_images/how-to-transfer-all-dark.png differ diff --git a/_images/how-to-transfer-all-light.png b/_images/how-to-transfer-all-light.png index dec9b504..888eaea4 100644 Binary files a/_images/how-to-transfer-all-light.png and b/_images/how-to-transfer-all-light.png differ diff --git a/_images/how-to-transfer-custom-dark.png b/_images/how-to-transfer-custom-dark.png index eba64596..17db9014 100644 Binary files a/_images/how-to-transfer-custom-dark.png and b/_images/how-to-transfer-custom-dark.png differ diff --git a/_images/how-to-transfer-custom-light.png b/_images/how-to-transfer-custom-light.png index 6fc2d7db..385a4757 100644 Binary files a/_images/how-to-transfer-custom-light.png and b/_images/how-to-transfer-custom-light.png differ diff --git a/_images/how-to-transfer-toplevel-dark.png b/_images/how-to-transfer-toplevel-dark.png index 9e1a41a3..9d13e5f3 100644 Binary files a/_images/how-to-transfer-toplevel-dark.png and b/_images/how-to-transfer-toplevel-dark.png differ diff --git a/_images/how-to-transfer-toplevel-light.png b/_images/how-to-transfer-toplevel-light.png index 0b81b8c5..fcd2c5a1 100644 Binary files a/_images/how-to-transfer-toplevel-light.png and b/_images/how-to-transfer-toplevel-light.png differ diff --git a/_images/tutorial-1-transfer-screen-custom-switch-dark.png b/_images/tutorial-1-transfer-screen-custom-switch-dark.png index 03db573e..02cdec0d 100644 Binary files a/_images/tutorial-1-transfer-screen-custom-switch-dark.png and b/_images/tutorial-1-transfer-screen-custom-switch-dark.png differ diff --git a/_images/tutorial-1-transfer-screen-custom-switch-light.png b/_images/tutorial-1-transfer-screen-custom-switch-light.png index a22420d7..e93bd135 100644 Binary files a/_images/tutorial-1-transfer-screen-custom-switch-light.png and b/_images/tutorial-1-transfer-screen-custom-switch-light.png differ diff --git a/_images/tutorial-1-transfer-screen-upload-dark.png b/_images/tutorial-1-transfer-screen-upload-dark.png index 58a13aed..34b02db0 100644 Binary files a/_images/tutorial-1-transfer-screen-upload-dark.png and b/_images/tutorial-1-transfer-screen-upload-dark.png differ diff --git a/_images/tutorial-1-transfer-screen-upload-light.png b/_images/tutorial-1-transfer-screen-upload-light.png index 2ab03bb9..124853d9 100644 Binary files a/_images/tutorial-1-transfer-screen-upload-light.png and b/_images/tutorial-1-transfer-screen-upload-light.png differ diff --git a/_modules/datashuttle/datashuttle.html b/_modules/datashuttle/datashuttle.html index ea0853ea..0b1f058e 100644 --- a/_modules/datashuttle/datashuttle.html +++ b/_modules/datashuttle/datashuttle.html @@ -36,7 +36,7 @@ - + @@ -126,7 +126,7 @@ -

Datashuttle v0.3.0

+

datashuttle v0.4.0

@@ -414,7 +414,11 @@

Source code for datashuttle.datashuttle

 )
 
 if TYPE_CHECKING:
-    from datashuttle.utils.custom_types import Prefix, TopLevelFolder
+    from datashuttle.utils.custom_types import (
+        OverwriteExistingFiles,
+        Prefix,
+        TopLevelFolder,
+    )
 
 import paramiko
 import yaml
@@ -472,7 +476,7 @@ 

Source code for datashuttle.datashuttle

     cfgs.make_and_get_logging_path().
 
     For transferring data between a central data storage
-    with SSH, use setup setup_ssh_connection_to_central_server().
+    with SSH, use setup setup_ssh_connection().
     This will allow you to check the server key, add host key to
     profile if accepted, and setup ssh key pair.
 
@@ -687,13 +691,14 @@ 

Source code for datashuttle.datashuttle

     # Public File Transfer
     # -------------------------------------------------------------------------
 
-
[docs] @check_configs_set - def upload( +
[docs] @check_configs_set + def upload_custom( self, top_level_folder: TopLevelFolder, sub_names: Union[str, list], ses_names: Union[str, list], datatype: Union[List[str], str] = "all", + overwrite_existing_files: OverwriteExistingFiles = "never", dry_run: bool = False, init_log: bool = True, ) -> None: @@ -717,44 +722,32 @@

Source code for datashuttle.datashuttle

             automatically added. "@*@" can be used as a wildcard.
             "all" will search for all sub-folders in the
             datatype folder to upload.
+
         ses_names :
             a session name / list of session names, similar to
             sub_names but requiring a "ses-" prefix.
+
+        datatype :
+            see create_folders()
+
+        overwrite_existing_files :
+            If `False`, files on central will never be overwritten
+            by files transferred from local. If `True`, central files
+            will be overwritten if there is any difference (date, size)
+            between central and local files.
+
         dry_run :
-            perform a dry-run of upload. This will output as if file
+            perform a dry-run of transfer. This will output as if file
             transfer was taking place, but no files will be moved. Useful
             to check which files will be moved on data transfer.
-        datatype :
-            see create_folders()
 
         init_log :
-            (Optional). Whether handle logging. This should
+            (Optional). Whether to handle logging. This should
             always be True, unless logger is handled elsewhere
             (e.g. in a calling function).
-
-        Notes
-        -----
-
-        The configs "overwrite_existing_files", "transfer_verbosity"
-        and "show_transfer_progress" pertain to data-transfer settings.
-        See make_config_file() for more information.
-
-        sub_names or ses_names may contain certain formatting tags:
-
-        @*@: wildcard search for subject names. e.g. ses-001_date-@*@
-             will transfer all session 001 collected on all dates.
-        @TO@: used to transfer a range of sub/ses.
-              Number must be either side of the tag
-              e.g. sub-001@TO@003 will generate
-              ["sub-001", "sub-002", "sub-003"]
-        @DATE@, @TIME@ @DATETIME@: will add date-<value>, time-<value> or
-              date-<value>_time-<value> keys respectively. Only one per-name
-              is permitted.
-              e.g. sub-001_@DATE@ will generate sub-001_date-20220101
-              (on the 1st january, 2022).
         """
         if init_log:
-            self._start_log("upload", local_vars=locals())
+            self._start_log("upload-custom", local_vars=locals())
 
         TransferData(
             self.cfg,
@@ -763,6 +756,7 @@ 

Source code for datashuttle.datashuttle

             sub_names,
             ses_names,
             datatype,
+            overwrite_existing_files,
             dry_run,
             log=True,
         )
@@ -770,30 +764,61 @@ 

Source code for datashuttle.datashuttle

         if init_log:
             ds_logger.close_log_filehandler()
-
[docs] @check_configs_set - def download( +
[docs] @check_configs_set + def download_custom( self, top_level_folder: TopLevelFolder, sub_names: Union[str, list], ses_names: Union[str, list], datatype: Union[List[str], str] = "all", + overwrite_existing_files: OverwriteExistingFiles = "never", dry_run: bool = False, init_log: bool = True, ) -> None: """ Download data from the central project folder to the - local project folder. In the case that a file / folder - exists on the central and local, the local will - not be overwritten even if the central file is an - older version. - - This function is identical to upload() but with the direction - of data transfer reversed. Please see upload() for arguments. - "all" arguments will search the central - project for sub / ses to download. + local project folder. + + Parameters + ---------- + + top_level_folder : + The top-level folder (e.g. `rawdata`) to transfer files + and folders within. + + sub_names : + a subject name / list of subject names. These must + be prefixed with "sub-", or the prefix will be + automatically added. "@*@" can be used as a wildcard. + "all" will search for all sub-folders in the + datatype folder to upload. + + ses_names : + a session name / list of session names, similar to + sub_names but requiring a "ses-" prefix. + + datatype : + see create_folders() + + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. + + init_log : + (Optional). Whether to handle logging. This should + always be True, unless logger is handled elsewhere + (e.g. in a calling function). """ if init_log: - self._start_log("download", local_vars=locals()) + self._start_log("download-custom", local_vars=locals()) TransferData( self.cfg, @@ -802,6 +827,7 @@

Source code for datashuttle.datashuttle

             sub_names,
             ses_names,
             datatype,
+            overwrite_existing_files,
             dry_run,
             log=True,
         )
@@ -809,86 +835,205 @@ 

Source code for datashuttle.datashuttle

         if init_log:
             ds_logger.close_log_filehandler()
-
[docs] @check_configs_set - def upload_all( + # Specific top-level folder + # ---------------------------------------------------------------------------------- + # A set of convenience functions are provided to abstract + # away the 'top_level_folder' concept. + +
[docs] @check_configs_set + def upload_rawdata( self, - top_level_folder: TopLevelFolder, + overwrite_existing_files: OverwriteExistingFiles = "never", dry_run: bool = False, - init_log: bool = True, - ) -> None: + ): """ - Convenience function to upload all data. + Upload files in the `rawdata` top level folder. - Alias for: - project.upload("all", "all", "all") - """ - if init_log: - self._start_log("upload-all", local_vars=locals()) + Parameters + ---------- - self.upload( - top_level_folder, - "all", - "all", - "all", - dry_run=dry_run, - init_log=False, - ) + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. - if init_log: - ds_logger.close_log_filehandler()
+ dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. + """ + self._transfer_top_level_folder( + "upload", + "rawdata", + overwrite_existing_files=overwrite_existing_files, + dry_run=dry_run, + )
-
[docs] @check_configs_set - def download_all( +
[docs] @check_configs_set + def upload_derivatives( self, - top_level_folder: TopLevelFolder, + overwrite_existing_files: OverwriteExistingFiles = "never", dry_run: bool = False, - init_log: bool = True, - ) -> None: + ): """ - Convenience function to download all data. + Upload files in the `derivatives` top level folder. + + Parameters + ---------- - Alias for : project.download("all", "all", "all") + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. """ - if init_log: - self._start_log("download-all", local_vars=locals()) + self._transfer_top_level_folder( + "upload", + "derivatives", + overwrite_existing_files=overwrite_existing_files, + dry_run=dry_run, + )
- self.download( - top_level_folder, - "all", - "all", - "all", +
[docs] @check_configs_set + def download_rawdata( + self, + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, + ): + """ + Download files in the `rawdata` top level folder. + + Parameters + ---------- + + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. + """ + self._transfer_top_level_folder( + "download", + "rawdata", + overwrite_existing_files=overwrite_existing_files, dry_run=dry_run, - init_log=False, - ) + )
- if init_log: - ds_logger.close_log_filehandler()
+
[docs] @check_configs_set + def download_derivatives( + self, + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, + ): + """ + Download files in the `derivatives` top level folder. + + Parameters + ---------- + + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. + """ + self._transfer_top_level_folder( + "download", + "derivatives", + overwrite_existing_files=overwrite_existing_files, + dry_run=dry_run, + )
[docs] @check_configs_set - def upload_entire_project(self) -> None: + def upload_entire_project( + self, + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, + ) -> None: """ Upload the entire project (from 'local' to 'central'), i.e. including every top level folder (e.g. 'rawdata', 'derivatives', 'code', 'analysis'). + + Parameters + ---------- + + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. """ - self._start_log("transfer-entire-project", local_vars=locals()) - self._transfer_entire_project("upload") + self._start_log("upload-entire-project", local_vars=locals()) + self._transfer_entire_project( + "upload", overwrite_existing_files, dry_run + ) ds_logger.close_log_filehandler()
[docs] @check_configs_set - def download_entire_project(self) -> None: + def download_entire_project( + self, + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, + ) -> None: """ Download the entire project (from 'central' to 'local'), i.e. including every top level folder (e.g. 'rawdata', 'derivatives', 'code', 'analysis'). + + Parameters + ---------- + + overwrite_existing_files : + If "never" files on target will never be overwritten by source. + If "always" files on target will be overwritten by source if + there is any difference in date or size. + If "if_source_newer" files on target will only be overwritten + by files on source with newer creation / modification datetime. + + dry_run : + perform a dry-run of transfer. This will output as if file + transfer was taking place, but no files will be moved. Useful + to check which files will be moved on data transfer. """ - self._start_log("transfer-entire-project", local_vars=locals()) - self._transfer_entire_project("download") + self._start_log("download-entire-project", local_vars=locals()) + self._transfer_entire_project( + "download", overwrite_existing_files, dry_run + ) ds_logger.close_log_filehandler()
[docs] @check_configs_set def upload_specific_folder_or_file( - self, filepath: Union[str, Path], dry_run: bool = False + self, + filepath: Union[str, Path], + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, ) -> None: """ Upload a specific file or folder. If transferring @@ -903,20 +1048,33 @@

Source code for datashuttle.datashuttle

 
         filepath :
             a string containing the full filepath.
+
+        overwrite_existing_files :
+            If "never" files on target will never be overwritten by source.
+            If "always" files on target will be overwritten by source if
+            there is any difference in date or size.
+            If "if_source_newer" files on target will only be overwritten
+            by files on source with newer creation / modification datetime.
+
         dry_run :
-            perform a dry-run of upload. This will output as if file
+            perform a dry-run of transfer. This will output as if file
             transfer was taking place, but no files will be moved. Useful
             to check which files will be moved on data transfer.
         """
         self._start_log("upload-specific-folder-or-file", local_vars=locals())
 
-        self._transfer_specific_file_or_folder("upload", filepath, dry_run)
+        self._transfer_specific_file_or_folder(
+            "upload", filepath, overwrite_existing_files, dry_run
+        )
 
         ds_logger.close_log_filehandler()
[docs] @check_configs_set def download_specific_folder_or_file( - self, filepath: Union[str, Path], dry_run: bool = False + self, + filepath: Union[str, Path], + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, ) -> None: """ Download a specific file or folder. If transferring @@ -931,8 +1089,16 @@

Source code for datashuttle.datashuttle

 
         filepath :
             a string containing the full filepath.
+
+        overwrite_existing_files :
+            If "never" files on target will never be overwritten by source.
+            If "always" files on target will be overwritten by source if
+            there is any difference in date or size.
+            If "if_source_newer" files on target will only be overwritten
+            by files on source with newer creation / modification datetime.
+
         dry_run :
-            perform a dry-run of upload. This will output as if file
+            perform a dry-run of transfer. This will output as if file
             transfer was taking place, but no files will be moved. Useful
             to check which files will be moved on data transfer.
         """
@@ -940,14 +1106,54 @@ 

Source code for datashuttle.datashuttle

             "download-specific-folder-or-file", local_vars=locals()
         )
 
-        self._transfer_specific_file_or_folder("download", filepath, dry_run)
+        self._transfer_specific_file_or_folder(
+            "download", filepath, overwrite_existing_files, dry_run
+        )
 
         ds_logger.close_log_filehandler()
+ def _transfer_top_level_folder( + self, + upload_or_download: Literal["upload", "download"], + top_level_folder: TopLevelFolder, + overwrite_existing_files: OverwriteExistingFiles = "never", + dry_run: bool = False, + init_log: bool = True, + ): + """ + Core function to upload / download files within a + particular top-level-folder. e.g. `upload_rawdata().` + """ + if init_log: + self._start_log( + f"{upload_or_download}-{top_level_folder}", local_vars=locals() + ) + + transfer_func = ( + self.upload_custom + if upload_or_download == "upload" + else self.download_custom + ) + + transfer_func( + top_level_folder, + "all", + "all", + "all", + overwrite_existing_files=overwrite_existing_files, + dry_run=dry_run, + init_log=False, + ) + + if init_log: + ds_logger.close_log_filehandler() + def _transfer_specific_file_or_folder( - self, upload_or_download, filepath, dry_run + self, upload_or_download, filepath, overwrite_existing_files, dry_run ): - """""" + """ + Core function for upload/download_specific_folder_or_file(). + """ if isinstance(filepath, str): filepath = Path(filepath) @@ -974,7 +1180,9 @@

Source code for datashuttle.datashuttle

             upload_or_download,
             top_level_folder,
             include_list,
-            self.cfg.make_rclone_transfer_options(dry_run),
+            self.cfg.make_rclone_transfer_options(
+                overwrite_existing_files, dry_run
+            ),
         )
 
         utils.log(output.stderr.decode("utf-8"))
@@ -983,8 +1191,8 @@ 

Source code for datashuttle.datashuttle

     # SSH
     # -------------------------------------------------------------------------
 
-
[docs] @requires_ssh_configs - def setup_ssh_connection_to_central_server(self) -> None: +
[docs] @requires_ssh_configs + def setup_ssh_connection(self) -> None: """ Setup a connection to the central server using SSH. Assumes the central_host_id and central_host_username @@ -1048,9 +1256,6 @@

Source code for datashuttle.datashuttle

         connection_method: str,
         central_host_id: Optional[str] = None,
         central_host_username: Optional[str] = None,
-        overwrite_existing_files: bool = False,
-        transfer_verbosity: str = "v",
-        show_transfer_progress: bool = False,
     ) -> None:
         """
         Initialise the configurations for datashuttle to use on the
@@ -1092,24 +1297,6 @@ 

Source code for datashuttle.datashuttle

         central_host_username :
             username for which to log in to central host.
             e.g. "jziminski"
-
-        overwrite_existing_files :
-            If True, when copying data (upload or download) files
-            will be overwritten if the timestamp of the copied
-            version is later than the target folder version
-            of the file i.e. edits made to a file in the source
-            machine will be copied to the target machine. If False,
-            a file will be copied if it does not exist on the target
-            folder, otherwise it will never be copied, even if
-            the source version of the file has a later timestamp.
-
-        transfer_verbosity :
-            "v" will tell you about each file that is transferred and
-            significant events, "vv" will be very verbose and inform
-            on all events.
-
-        show_transfer_progress :
-            If true, the real-time progress of file transfers will be printed.
         """
         self._start_log(
             "make-config-file",
@@ -1133,9 +1320,6 @@ 

Source code for datashuttle.datashuttle

                 "connection_method": connection_method,
                 "central_host_id": central_host_id,
                 "central_host_username": central_host_username,
-                "overwrite_existing_files": overwrite_existing_files,
-                "transfer_verbosity": transfer_verbosity,
-                "show_transfer_progress": show_transfer_progress,
             },
         )
 
@@ -1239,15 +1423,15 @@ 

Source code for datashuttle.datashuttle

         """
         return getters.get_existing_project_paths()
-
[docs] @check_configs_set - def get_next_sub_number( +
[docs] @check_configs_set + def get_next_sub( self, top_level_folder: TopLevelFolder, return_with_prefix: bool = True, local_only: bool = False, ) -> str: """ - Convenience function for get_next_sub_or_ses_number + Convenience function for get_next_sub_or_ses to find the next subject number. Parameters @@ -1260,7 +1444,7 @@

Source code for datashuttle.datashuttle

             If `True, only get names from `local_path`, otherwise from
             `local_path` and `central_path`.
         """
-        return getters.get_next_sub_or_ses_number(
+        return getters.get_next_sub_or_ses(
             self.cfg,
             top_level_folder,
             sub=None,
@@ -1269,8 +1453,8 @@ 

Source code for datashuttle.datashuttle

             search_str="sub-*",
         )
-
[docs] @check_configs_set - def get_next_ses_number( +
[docs] @check_configs_set + def get_next_ses( self, top_level_folder: TopLevelFolder, sub: str, @@ -1278,7 +1462,7 @@

Source code for datashuttle.datashuttle

         local_only: bool = False,
     ) -> str:
         """
-        Convenience function for get_next_sub_or_ses_number
+        Convenience function for get_next_sub_or_ses
         to find the next session number.
 
         Parameters
@@ -1297,7 +1481,7 @@ 

Source code for datashuttle.datashuttle

             If `True, only get names from `local_path`, otherwise from
             `local_path` and `central_path`.
         """
-        return getters.get_next_sub_or_ses_number(
+        return getters.get_next_sub_or_ses(
             self.cfg,
             top_level_folder,
             sub=sub,
@@ -1405,7 +1589,7 @@ 

Source code for datashuttle.datashuttle

     def check_name_formatting(names: Union[str, list], prefix: Prefix) -> None:
         """
         Pass list of names to check how these will be auto-formatted,
-        for example as when passed to create_folders() or upload()
+        for example as when passed to create_folders() or upload_custom()
         or download()
 
         Useful for checking tags e.g. @TO@, @DATE@, @DATETIME@, @DATE@.
@@ -1437,30 +1621,32 @@ 

Source code for datashuttle.datashuttle

     # -------------------------------------------------------------------------
 
     def _transfer_entire_project(
-        self, direction: Literal["upload", "download"]
+        self,
+        upload_or_download: Literal["upload", "download"],
+        overwrite_existing_files: OverwriteExistingFiles,
+        dry_run: bool,
     ) -> None:
         """
         Transfer (i.e. upload or download) the entire project (i.e.
         every 'top level folder' (e.g. 'rawdata', 'derivatives').
 
-        This function leverages the upload_all or download_all
-        methods while switching the top level folder as defined in
-        self.cfg that these methods use to determine the top-level
-        folder to transfer.
-
         Parameters
         ----------
 
-        direction : direction to transfer the data, either "upload" (from
+        upload_or_download : direction to transfer the data, either "upload" (from
                     local to central) or "download" (from central to local).
         """
-        transfer_all_func = (
-            self.upload_all if direction == "upload" else self.download_all
-        )
-
         for top_level_folder in canonical_folders.get_top_level_folders():
+
             utils.log_and_message(f"Transferring `{top_level_folder}`")
-            transfer_all_func(top_level_folder, init_log=False)
+
+            self._transfer_top_level_folder(
+                upload_or_download,
+                top_level_folder,
+                overwrite_existing_files=overwrite_existing_files,
+                dry_run=dry_run,
+                init_log=False,
+            )
 
     def _start_log(
         self,
@@ -1641,17 +1827,26 @@ 

Source code for datashuttle.datashuttle

 
     def _update_settings_with_new_canonical_keys(self, settings: Dict):
         """
-        Perform a check on the top-level keys within persistent settings.
+        Perform a check on the keys within persistent settings.
         If they do not exist, persistent settings is from older version
         and the new keys need adding.
         If changing keys within the top level (e.g. a dict entry in
         "tui") this method will need to be extended.
+
+        Added keys:
+            v0.4.0: tui "overwrite_existing_files" and "dry_run"
         """
         if "name_templates" not in settings:
             settings.update(canonical_configs.get_name_templates_defaults())
 
+        canonical_tui_configs = canonical_configs.get_tui_config_defaults()
+
         if "tui" not in settings:
-            settings.update(canonical_configs.get_tui_config_defaults())
+ settings.update(canonical_tui_configs) + + for key in ["overwrite_existing_files", "dry_run"]: + if key not in settings["tui"]: + settings["tui"][key] = canonical_tui_configs["tui"][key]
diff --git a/_modules/index.html b/_modules/index.html index 38886009..f611b795 100644 --- a/_modules/index.html +++ b/_modules/index.html @@ -36,7 +36,7 @@ - + @@ -126,7 +126,7 @@ -

Datashuttle v0.3.0

+

datashuttle v0.4.0

diff --git a/_sources/index.md.txt b/_sources/index.md.txt index ab728977..e83c6d8b 100644 --- a/_sources/index.md.txt +++ b/_sources/index.md.txt @@ -3,7 +3,7 @@ {.center} # **datashuttle** -

The tool to automate neuroscience project folder creation and transfer.

+

The tool to automate neuroscience project folder creation, validation and transfer.

```{image} _static/datashuttle-overview-light.png :alt: My Logo @@ -40,17 +40,18 @@ Short guides on specific actions. :link: pages/api_index :link-type: doc -Full Python API reference. +Full Python reference. ::: :::: -Datashuttle creates and validates projects standardised to the +**datashuttle** creates and validates projects standardised to the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev) specification. Dive right into **datashuttle** with our -[Getting Started Tutorial]((tutorial-getting-started)=) -or targeted [How-To Guides](how-tos). +[Getting Started Tutorial](tutorial-getting-started) +or targeted [How-To Guides](how-tos). \ +It can be used through a graphical interface or Python API. Don't hesitate to get in contact through our [GitHub Issues](https://github.com/neuroinformatics-unit/datashuttle/issues) diff --git a/_sources/pages/cli_index.rst.txt b/_sources/pages/cli_index.rst.txt deleted file mode 100644 index c9b9f928..00000000 --- a/_sources/pages/cli_index.rst.txt +++ /dev/null @@ -1,9 +0,0 @@ -.. _CLI_Reference: - -CLI Reference ------------------ - -.. argparse:: - :module: datashuttle.command_line_interface - :func: construct_parser - :prog: datashuttle diff --git a/_sources/pages/documentation.md.txt b/_sources/pages/documentation.md.txt deleted file mode 100644 index d818ce38..00000000 --- a/_sources/pages/documentation.md.txt +++ /dev/null @@ -1,1044 +0,0 @@ -:tocdepth: 2 - -# User Guide - -Datashuttle is a tool to help standardise neuroscience project folders. - -Datashuttle's goal is to alleviate the burden of maintaining -sharable experimental project folders by: - -- Automating the creation of standardised project folders. -- Allowing convenient transfer of data between machines. -- Eliminating the requirement to manually combine data collected across -different acquisition machines. - -Datashuttle aims to integrate seamlessly into existing neuroscience data -collection and analysis workflows. - -## Datashuttle's place in neuroscience pipelines - -A typical setup in systems neuroscience is that multiple acquisition -machines collect experimental data (e.g. separate machines for acquiring -behaviour and electrophysiology). - -The data from these separate machines are then combined -in a central storage machine - this may be a particular computer -in the lab or a high-performance computing (HPC) system. - -Following data collection, the entire project or subsets of the data are downloaded -to other machines (e.g. a researcher's laptop) for analysis. - -datashuttle central and local machines - -Datashuttle facilitates the creation of standardised project folders and data transfer between -acquisition, central storage and analysis machines. - -Datashuttle manages datasets that are formatted according to the -[NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) specification. - -::: {dropdown} Data specifications for neuroscience -:color: info -:icon: info - -A data specification details the folder structure and naming scheme for a project. -The most widely used specification for data-sharing in neuroscience is the [BIDS](https://bids.neuroimaging.io/) specification. -First developed for human imaging, it has since been extended to other methodologies used in -human experimental neuroscience (e.g. EEG, MEG) . - -Extensions to systems neuroscience datatypes are currently in progress -(e.g. [microscopy](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/10-microscopy.html), -[electrophysiology BEP](https://bep032tools.readthedocs.io/en/latest/)). - -While BIDS is an excellent, extensive formal specification, the detailed requirements necessary for data-sharing -are difficult to maintain during data-acquisition. It is also yet to be fully extended to -systems neuroscience. - -Therefore, we have introduced [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/), -a lightweight specification heavily inspired by BIDS for use during data acquisition. Organising -data according to [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) during acquisition -will facilitate conversion to full BIDS for data sharing if required. - -::: -
- -# Installation - - -We recommend you install Datashuttle inside a [conda](https://docs.conda.io/en/latest/) -or [mamba](https://mamba.readthedocs.io/en/latest/index.html) environment. - -In the following we assume you have `conda` installed, -but the same commands will also work with `mamba`/`micromamba`. - -First, create and activate an environment. -You can call your environment whatever you like, we've used "datashuttle-env". - -```sh -conda create -n datashuttle-env -c conda-forge python=3.10 rclone -conda activate datashuttle-env -``` - -Next install the `datashuttle` package: - -::::{tab-set} - -:::{tab-item} Users -To get the latest release from PyPI: - -```sh -pip install datashuttle -``` -If you have an older version of `datashuttle` installed in the same environment, -you can update to the latest version with: - -```sh -pip install --upgrade datashuttle -``` -::: - -:::{tab-item} Developers -To get the latest development version, clone the -[GitHub repository](https://github.com/neuroinformatics-unit/datashuttle/) -and then run from inside the repository: - -```sh -pip install -e .[dev] # works on most shells -pip install -e '.[dev]' # works on zsh (the default shell on macOS) -``` - -This will install the package in editable mode, including all `dev` dependencies. -::: - -:::: - - -# Setup - -The first thing to do when starting with Datashuttle is to setup a new project on a *local* machine. - -First, we need to tell Datashuttle the path to the project folder on our *local* machine -and how we want to connect to the *central* storage machine. - - -## *local* machines and the *central* machine - -Datashuttle makes the distinction between *local* machines and a single *central* machine. -There may be multiple *local* machines, but only one *central* machine. - -*local* machines are typically acquisition and analysis machines, whereas the -*central* machine is used for data storage. - -Datashuttle needs to be setup once for each *local* machine, but requires no -setup on the *central* machine. - -::: {dropdown} Example: Datashuttle use in a neuroscience project -:color: info -:icon: info - -Imagine an experiment in which two different types of data, behavioural and -electrophysiological, are collected on separate acquisition PCs. -These data are sent to a central server where they are combined -and stored. - -Later, a subset of the data is transferred to a third machine for analysis. It is -usually necessary to only download a subset of the data for particular analyses -(e.g. "I want to transfer subjects 1-5, sessions 5, behavioural data -to my laptop".) - -In this case, the behavioural and electrophysiological acquisition machine and -analysis machines are *local*; the central storage machine is the *central* machine. - -::: - -## Setting up Datashuttle - -A one-time setup on each *local* machine used is required, specifying the -`project_name` and configs (short for 'configurations'). - -To interact with Datashuttle, a cross-platform command line interface (CLI) -and a Python API are available (see [API](API_Reference) and [CLI](CLI_Reference) -for reference documentation). - -To set up, we can use the `make-config-file` command to tell Datashuttle our project details. - -::: {dropdown} Updating an existing configuration file. -:color: info -:icon: info - -`make-config-file` should be used when first setting up a project's configs. To update -an existing config file, use `update-config-file` with the arguments to be updated. - -Using `make-config-file` will completely overwrite any existing configurations, including -setting any optional arguments that are not passed to factory default values. - -::: - -We need to tell Datashuttle: - -- The paths to the *local* and *central* folders that contain the project. -- How to connect to the *central* machine. -- The settings that specify how data is transferred. -- The *[datatypes](#datatype-folders)* that will be used in the project, e.g. behaviour (`behav`) or electrophysiology (`ephys`). - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -from datashuttle import DataShuttle - -project = DataShuttle("my_first_project") - -project.make_config_file( - local_path="/path/to/my_projects/my_first_project", - central_path="/central/live/username/my_projects/my_first_project", - connection_method="local_filesystem", -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) - -macOS and Linux, ``\`` allows the command to continue on a new line. - -```{code-block} console -datashuttle \ -my_first_project \ -make-config-file \ -/path/to/my_projects/my_first_project \ -/central/live/username/my_projects/my_first_project \ -local_filesystem -``` -::: - -:::{tab-item} CLI (Windows) - -On Windows, the `^` character allows the command to continue on a new line. - -```{code-block} console -datashuttle ^ -my_first_project ^ -make-config-file ^ -C:\path\to\my_projects\my_first_project ^ -/central/live/username/my_projects/my_first_project ^ -local_filesystem -``` -::: - -:::: - -### Required Arguments - -**local_path**: The full file path to the project folder on the *local* machine, -including the project folder. The project folder must have the same name as the -Datashuttle project. For example, if your project name is `my_first_project`, -and the project folder resides in `C:\User\my_projects`, the `local_path` -should be `C:\User\my_projects\my_first_project`. - -**central_path**: The path on the *central* machine to the project folder. Similar to the -`local_path`, the path must point to the project folder that has the same name as -the project in Datashuttle. For example, if your project is called `my_first_project`, -connecting to a remote Linux server, the `central_path` may be -`/hpc/home/user/my_projects/my_first_project`. - -**connection_method**: `local_filesystem` or `ssh`. Local filesystem can be used -if the *central* storage is mounted to the local machine. Otherwise `ssh` can be used. -See [setting up the connection to central](#setting-up-the-connection-to-central) for -more information. - - -### Optional Arguments - -If connection method is `ssh`, the **central_host_id** and **central_host_username** -must be set. See the [SSH section](#ssh) for details. - -The optional arguments **overwrite_existing_files**, **transfer_verbosity** and -**show_transfer_progress** determine how data transfer is performed -(see the [Data Transfer](#data-transfer) section for details). - -Custom config files can be supplied using the `supply-config` -command (this simplifies setting up projects across multiple *local* machines). - - -## Setting up the connection to *central* - -### Local Filesystem - -Local filesystem transfers allow transfer of files and folders across the -file system available to the machine. This is used when the *central* machine is -setup as a mounted drive. This is a common form of communication between -client machines and a server, such as a high-performance computer -(HPC, also often called a *cluster*). - -When a *central* machine is mounted to the *local* machine, the folders of the -*central* machine are available as if they were part of the *local* filesystem. -In this case, the `central_path` configuration (see `make-config-file`) -can simply be set to the path directed to the mounted drive. - -With the `connection_method` set to `local_filesystem`, data transfer will -proceed between the *local* machine filesystem and mounted drive. - -::: {dropdown} Local Filesystem Example -:color: info -:icon: info - -Imagine your *central* data store is a HPC cluster. Your projects are stored in your -home drive, with the project folder at `/system/home/username/my_projects/my_first_project`. - -You have mounted your home drive, `/system/home/username` to your local filesystem, -at the path `X:\username`. - -In this case, you can set the `central_path` to `X:\username\my_projects\my_first_project` -and set `connection_method` to `local_filesystem` to transfer data from -*local* to *central*. - -::: - -### SSH - -An alternative method of connecting to the *central* machine is the Secure Shell (SSH). -To use the SSH connection protocol, additional configs must be provided that -tell Datashuttle how to connect. - -**central_host_id:** This is the address of the server you want to connect to. - -**central_host_username:** This is your profile name on the server you want to -connect to. - - -In Datashuttle, the -`connection_method` configuration must be set to `"ssh"` -to use the SSH protocol for data transfers. - -Prior to using the SSH protocol, the host ID must be accepted and your -user account password entered. This is only required once, following this -SSH key-pairs will be used to connect via SSH. The -command `setup-ssh-connection-to-central-server` can be used to -setup an SSH connection to the *central* machine. - -:::::{dropdown} SSH Example -:color: info -:icon: info - -When setting up a project for SSH connection, the `central_host_id` -and `central_host_username` must be provided: - - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.make_config_file( - local_path="/path/to/my_projects/my_first_project", - central_path="/central/live/username/my_projects/my_first_project", - connection_method="ssh", - central_host_id="ssh.swc.ucl.ac.uk", - central_host_username="username", -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) - -```{code-block} console -datashuttle \ -my_first_project \ -make-config-file \ -/path/to/my_projects/my_first_project \ -/central/live/username/my_projects/my_first_project \ -ssh \ ---central_host_id ssh.swc.ucl.ac.uk \ ---central_host_username username -``` -::: - -:::{tab-item} CLI (Windows) - -```{code-block} console -datashuttle ^ -my_first_project ^ -make-config-file ^ -C:\path\to\my_projects\my_first_project ^ -/central/live/username/my_projects/my_first_project ^ -ssh ^ ---central_host_id ssh.swc.ucl.ac.uk ^ ---central_host_username username -``` -::: - -:::: - -Next, a one-time command to setup the SSH connection must be run: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.setup_ssh_connection_to_central_server() -``` -::: - -:::{tab-item} CLI (macOS / Linux / Windows) -```{code-block} -datashuttle my_new_project setup-ssh-connection-to-central-server -``` -::: - -:::: - -Running `setup-ssh-connection-to-central-server` will require verification -that the SSH server connected to is correct (pressing `y` to proceed). - -Next, your password to the *central* machine will be requested. -This command sets up SSH key pairs between *local* and *central* machines. - -Password-less SSH communication is setup and no further configuration should be -necessary for SSH transfer. - -::::: - -Next, we can start setting up the project by automatically creating standardised -project folder trees. - -# Data Transfer - -Datashuttle offers a convenient way of transferring entire project folders or -subsets of the data. - -The main data transfer commands are: `upload`, `download`, `upload-all`, -`download-all`, `upload-entire-project`, `download-entire-project`. The term *upload* refers to transfer from -*local* to the *remote* machine, while *download* transfers in the opposite direction. - -These commands act differently in regard to the *top-level-folder*. In Datashuttle, the current working -*top-level-folder* is by default *rawdata*. The working *top-level-folder* determines where folders -are created (e.g. `create_folders`) and how commands transfer data. - - -:::{dropdown} *top-level-folders* -:color: info -:icon: info - -The top-level-folders are the folders immediately under the -project root (i.e. the folders within the folder that has the name of the project). - -[NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) defines two main *top-level-folders*, *rawdata* and *derivatives*. -The purpose of *rawdata* is to store data directly as acquired. -The *derivatives* folder is used to store the results of processing the *rawdata*. -This distinction ensures that *rawdata* is not overwritten during processing, and -makes sharing of *rawdata* simpler. - -``` -└── my_first_project/ - ├── rawdata/ - │ └── ... - └── derivatives/ - └── ... -``` - -To change the *top-level-folder*, the command `set-top-level-folder` can be used. e.g. - -```{code-block} console -datashuttle my_first_project set-top-level-folder derivatives -``` - -The *top-level-folder* setting will remain across Datashuttle sessions. - -To see the current *top-level-folder*, the command `get-top-level-folder` can be used. - -::: - - -To quickly transfer the entire project (i.e. everything in all _top-level-folders_), `upload-entire-project` and `download-entire-project` can be used. - -For example, the command: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.upload_entire_project() -``` -::: - -:::{tab-item} CLI (macOS / Linux / Windows) -`datashuttle my_first_project upload-entire-project` -::: - -:::: - -when run on the folder tree: - -``` -. -└── my_first_project/ - ├── rawdata/ - │ └── sub-001/ - │ └── ses-001/ - │ └── my_tracking_video.mp4 - └── derivatives/ - └── sub-001/ - └── tracking_video_results.csv -``` - -will transfer all files and folders in both the *rawdata* and *derivatives* folders from the -*local* machine to the *central* machine. - -In contrast, `upload-all` and `download-all` will transfer the entire *top-level-folder*. -For example, if *rawdata* is the current *top-level-folder*, `upload-all` will transfer all contents of -*rawdata* from *local* to *central*. - - -## Selective transfer of data with `upload` and `download` - -Subsets of *subjects*, *sessions* and *datatypes* can be transferred with -the `upload` and `download` commands. - -For example, the call: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.upload( - sub_names="001@TO@003", - ses_names=["005_date-@*@", "006_date-@*@"], - datatype="behav" -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub 001@TO@003 \ --ses 005_date-@*@ 006_date-@*@* \ --dt behav -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub 001@TO@003 ^ --ses 005_date-@*@ 006_date-@*@* ^ --dt behav -``` -::: - -:::: -will *upload* -behavioural sessions 5 and 6, collected on any date, for subjects 1 to 3. - -The keyword `all` can be input in place of a `-sub`, `-ses` or _datatype_ -argument `-dt` to transfer all available subject, sessions or datatypes available. - -Often additional files or folders may be stored outside of *datatype* -folders. The `all` argument will transfer all files and folders at the -specified level. Datashuttle offers a flexible argument syntax for -selecting precise data subsets, see [Data Transfer Options](#data-transfer-options) -for details. - -## Transferring a specific file or folder - -The functions `upload-specific-folder-or-file` or `download-specific-folder-or-file` -can be used to transfer an individual file or folder. - -The path to the file or folder (either full or relative to the working *top-level-folder*) -should be input. - - -# Advanced Usage - -## Convenience tags - -Datashuttle provides convenience tags that can be included in -*subject* or *session* names during folder creation or transfer. These -tags help automate repetitive routines and add flexibility to -data transfers. - -### Automatically include *date*, *time* or *datetime* -*Used when making subject or session folders* - -When creating subject or session folders, it is often desirable to include the -*date*, *time*, or *datetime* as a key-value pair in the folder name. For example: - -`ses-001_date-20230516` - -Datashuttle provides convenience tags to automatically format a key-value pair -with the current date or time (as determined from the machine *datetime*). - -For example, the command: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.create_folders( - top_level_folder="rawdata", - sub_names="sub-001", - ses_names=["001_@DATETIME@", "002_@DATETIME@"], - datatype="behav", -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -create_folders \ --sub sub-001 \ --ses 001_@DATETIME@ 002_@DATETIME@ \ --dt behav -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -create_folders ^ --sub sub-001 ^ --ses 001_@DATETIME@ 002_@DATETIME@ ^ --dt behav -``` -::: - -:::: - -creates the folder tree (assuming the *top-level-folder* is _rawdata_): - -``` -└── rawdata/ - └── sub-001/ - ├── ses-001_datetime-20230606T202701/ - │ └── behav - └── ses-002_datetime-20230606T202701/ - └── behav -``` - - -### Specify ranges with the `@TO@` flag -*When making subject or session folders and transferring data* - -Often it is desirable to specify a range of subject or session names for -folder creation or data transfer. - -For example, in a project with 50 subjects (`sub-001`, `sub-002`, `...`, `sub-050`), -the below command transfers only the first 25 subjects: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.upload( - sub_names="001@TO@025", - ses_names="all", - datatype="all", -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub 001@TO@025 \ --ses all \ --dt all -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub 001@TO@025 ^ --ses all ^ --dt all -``` -::: - -:::: - -When making folders with the `@TO@` tag, the maximum number of leading zeros -found either side of the tag will be used for folder creation. For example, -setting `-sub` to `0001@TO@02` will create the subject folders `sub-0001` and `sub-0002`. - -### The wildcard flag `@*@` -*Used when transferring data* - -When selected subjects and sessions for data transfer, it is often -necessary to match only part of the folder name. In this case, wildcards -can be included in the search term. - -For example, we may want to transfer the 5th session for all subjects -in the project folder below: - -``` -└── rawdata/ - ├── sub-001 / - │ ├── ... - │ └── ses-005_condition-test_date-20230428/ - │ └── behav - └── sub-002/ - └── ses-005_condition-test_date-20230431/ - └── behav -``` - -We can use the wildcard tag in the *session* name to match -everything that comes after the `date` key: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.upload( - sub_names=["001", "002"], - ses_names="005_condition-test_date-@*@", - datatype="behav", -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub 001 002 \ --ses 005_condition-test_date-@*@ \ --dt behav -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub 001 002 ^ --ses 005_condition-test_date-@*@ ^ --dt behav -``` -::: - -:::: - -This command would transfer session 5 from subject 001 and 002. - -::: {warning} - -If using the z-shell (zsh) - which is the default shell on macOS - -text including the `@*@` tag must be wrapped in quotation marks. -e.g. `--ses "005_condition-test_date-@*@"`) -::: - -## Data Transfer Options - -A number of [Rclone](https://rclone.org/) options are exposed in Datashuttle to facilitate flexible data transfer. - -### Overwriting existing files - -`overwrite_existing_files` determines whether folders and files are overwritten -during transfer. By default, Datashuttle does not overwrite any existing -folder during data transfer. - -For example, if the file `sub-001_ses-001_measure-trajectories.csv` exists on -the *central* repository, it will never be over-written during upload -from *local* to *central*, even if the version on *local* is newer. - -To change this behaviour, the configuration `overwrite_existing_files` can be set to `True`. -In this case, files in which the timestamp of the target directory (e.g. *central* -in our example) will be overwritten if their timestamp is -older than the corresponding file in the source directory. - -The configuration can be changed with the `update-config-file` command. - -### Additional Transfer Configurations - -`transfer_verbosity` : set to `"vv"` for additional detail on the transfer operation. -Set to `"v"` to only see each file that is transferred as well as significant events that occur during transfer. - -`show_transfer_progress` : When `True`, real-time transfer statistics will be reported and logged. - -### Flexible transfers with keyword arguments - -Often additional files or folders may be stored outside *datatype* -folders. The `all` argument will transfer all files and folders at the -specified level. - -For example, consider the project below. This project has files -stored within *datatype* folders, but additional files outside *datatype* -folders at the *subject* and *session* levels. -``` -. -└── rawdata/ - ├── a_project_related_file.json - ├── sub-001/ - │ ├── sub-001_extra-file.json - │ └── ses-001/ - │ ├── ses-001_extra-file.json - │ ├── behav/ - │ │ └── ... - │ └── ephys/ - │ └── ... - └── sub-002/ - ├── sub-002_extra-file.json - └── ses-001/ - ├── behav/ - │ └── ... - ├── ephys/ - │ └── ... - └── anat/ - └── ... -``` - -Datashuttle provides a number of keyword arguments to allow separate -handling of files that are not found in *datatype* folders. - -These are: -`all_sub` and `all_non_sub` (for `-sub`), `all_ses` and `all_non_ses` (for `-ses`) and `all_non_datatype` (for `-dt`). - - -#### For use with the `-sub` / `--sub-names` flag - -`all` - All *subject* and non-*subject* files and folders within the *top-level-folder* -(e.g. _rawdata_) will be transferred. - -`all_sub` - *Subject* folders only (i.e. prefixed with `sub`) and everything -within them will be transferred. - -`all_non_sub` - All files and folders that are not prefixed with `sub`, -within the *top-level-folder*, will be transferred. -Any folders prefixed with `sub` at this level will not be transferred. - -#### For use with the `-ses` / `--ses-names` flag - -`all` : All *session* and non-*session* files and folders within a *subject* level folder -(e.g. `sub-001`) will be transferred. - -`all_ses` : *Session* folders only (i.e. prefixed with `ses`) and everything within -them will be transferred. - -`all_non_ses` : All files and folders that are not prefixed with `ses`, within a *subject* folder, -will be transferred. Any folders prefixed with `ses` will not be transferred. - -#### For use with the `-dt` / `--datatype` flag - -`all` : All *datatype* folders at the *subject* or *session* folder level will be transferred, -as well as all files and folders within selected *session* folders. - -`all_datatype` : All *datatype* folders (i.e. folders with the pre-determined name: -`behav`, `ephys`, `funcimg`, `anat`) within a *session* folder will be -transferred. Non-*datatype* folders at the *session* level will not be transferred - -`all_non_datatype` : Non-*datatype* folders within *session* folders only will be transferred - -Below, a number of examples are given to exemplify how these arguments effect data transfer. -Given our example *local* project folder above: - -1) The first example indicates the effect of selectively transferring non-*datatype* sessions. - -2) The command: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} console -project.upload("all", "all", "all_non_datatype") -``` -::: - - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub all \ --ses all \ --dt all_non_datatype -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub all ^ --ses all ^ --dt all_non_datatype -``` -::: - -:::: - - -Would upload: - -- All non-*subject* files in the *top-level* folder (i.e. `a_project_related_file.json`.) -- The `sub-001_extra_file.json` and `sub-002_extra_file.json` -- For `sub-001`, the file `ses-001_extra_file.json`. -For `sub-002`, no other files are transferred because there is no non-*datatype* files at the *session* level. - - -2) The next two examples show the effect of selecting `-dt all` vs. `-dt all_datatype`. The command: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} console -project.upload("sub-001", "all", "all") -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub 001 \ --ses all \ --dt all -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub 001 ^ --ses all ^ --dt all -``` -::: - -:::: - -Would upload: - -- Contents residing in the `sub-001` folder only. -- The file `sub-001_extra-file.json` and *session* folders. -- All *datatype* folder contents (`behav`, `ephys`) and non-*datatype* files (`ses-001_extra-file.json`). - -The command: - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.create_folders( - top_level_folder="rawdata", - sub_names="001", - ses_names="all", - datatype="all_datatype" -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub 001 \ --ses all \ --dt all_datatype -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub 001 ^ --ses all ^ --dt all_datatype -``` -::: - -:::: - - -Would upload: - -- Contents residing in the `sub-001` folder only. -- The *session* folder and all *datatype* folder contents (`behav`, `ephys`) -but not the non-*datatype* file `ses-001_extra-file.json`. - -3) The final example shows the effect of transferring `all_non_sub` files only. The command: - - -::::{tab-set} - -:::{tab-item} Python API -```{code-block} python -project.create_folders( - top_level_folder="rawdata", - sub_names="all_non_sub", - ses_names="all", - datatype="all" -) -``` -::: - -:::{tab-item} CLI (macOS / Linux) -```{code-block} console -datashuttle \ -my_first_project \ -upload \ --sub all_non_sub \ --ses all \ --dt all -``` -::: - -:::{tab-item} CLI (Windows) -```{code-block} console -datashuttle ^ -my_first_project ^ -upload ^ --sub all_non_sub ^ --ses all ^ --dt all -``` -::: - -:::: - -Would upload: - -- the file `a_project_related_file.json` only. - -## Query Datashuttle for current settings - -A number of commands exist to query Datashuttle's current configs. -For example the `show-local-path` command will print the currently set *local* path to the terminal. -The command `show-configs` will print all currently set configs. - -For a full list of available commands, see the [API reference](API_Reference) or [CLI reference](CLI_Reference). - - -## Logging - -Detailed logs of all configuration changes, folder creation and data transfers are stored -to the `.datashuttle` folder that is created in the *local* project folder. - -The log itself contains relevant information pertaining to that command. -For example, if the commands `create_folders`, `upload`, `download` were run sequentially, -the logs output folder would look like: - -``` -20230608T095514_create-folders.log -20230608T095545_upload-data.log -20230608T095621_download-data.log -``` diff --git a/_sources/pages/how_tos.md.txt b/_sources/pages/how_tos.md.txt index e6565b78..e7750277 100644 --- a/_sources/pages/how_tos.md.txt +++ b/_sources/pages/how_tos.md.txt @@ -1,4 +1,5 @@ :html_theme.sidebar_secondary.remove: + (how-tos)= # How To @@ -70,3 +71,19 @@ A handy feature for template subject and session names. ::: :::: + +```{toctree} +:maxdepth: 2 +:caption: how to +:hidden: + +how_tos/choose-a-terminal +how_tos/create-folders +how_tos/install +how_tos/make-a-new-project +how_tos/transfer-data +how_tos/read-logs +how_tos/update-configs +how_tos/use-name-templates + +``` diff --git a/_sources/pages/how_tos/choose-a-terminal.md.txt b/_sources/pages/how_tos/choose-a-terminal.md.txt index ddca77c8..72051183 100644 --- a/_sources/pages/how_tos/choose-a-terminal.md.txt +++ b/_sources/pages/how_tos/choose-a-terminal.md.txt @@ -4,43 +4,44 @@ **datashuttle**'s graphical interface can run in almost any terminal—but native Windows and macOS terminals may display with rendering errors. +See below for recommended terminals for these operating systems. If you are using Linux, the native terminal will work without issue. ::::{tab-set} -:::{tab-item} Bad Rendering +:::{tab-item} Good Rendering -```{image} /_static/screenshots/how-to-choose-a-terminal-bad-dark.png +```{image} /_static/screenshots/how-to-choose-a-terminal-good-dark.png :align: center :class: only-dark :width: 900px ``` -```{image} /_static/screenshots/how-to-choose-a-terminal-bad-light.png +```{image} /_static/screenshots/how-to-choose-a-terminal-good-light.png :align: center :class: only-light :width: 900px ```
+ ::: -:::{tab-item} Good Rendering +:::{tab-item} Bad Rendering -```{image} /_static/screenshots/how-to-choose-a-terminal-good-dark.png +```{image} /_static/screenshots/how-to-choose-a-terminal-bad-dark.png :align: center :class: only-dark :width: 900px ``` -```{image} /_static/screenshots/how-to-choose-a-terminal-good-light.png +```{image} /_static/screenshots/how-to-choose-a-terminal-bad-light.png :align: center :class: only-light :width: 900px ```
- ::: :::: @@ -52,8 +53,8 @@ If you are using Linux, the native terminal will work without issue. :::{tab-item} Windows For Windows, we recommend using -[Terminal](https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB), -Window's own high-powered terminal available for free from the +[Terminal](https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB)—Window's +own high-powered terminal available for free on the Microsoft Store. **datashuttle** will run in Command Prompt (`cmd.exe`), Powershell (`ps.exe`) @@ -64,7 +65,7 @@ but may display with rendering errors. :::{tab-item} macOS -On macOS, we recommend using next-generation terminal emulators such +On macOS we recommend using next-generation terminal emulators such as [Warp](https://warp.dev/) or [Wezterm](https://wezfurlong.org/wezterm/index.html). Both come with easy-to-use installers. diff --git a/_sources/pages/how_tos/create-folders.md.txt b/_sources/pages/how_tos/create-folders.md.txt index 3b374766..204755ce 100644 --- a/_sources/pages/how_tos/create-folders.md.txt +++ b/_sources/pages/how_tos/create-folders.md.txt @@ -1,18 +1,13 @@ (how-to-create-folders)= # How to Create Folders -**datashuttle** creates project folders -according to the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) -specification. +**datashuttle** automates project folder creation and validation +according to the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/). -Before jumping into the folder-creation process, we'll quickly -review the key features of the -[specification](https://neuroblueprint.neuroinformatics.dev/specification.html)) -that are created folders must conform to. +Before starting with folder creation, we'll briefly introduce the +[NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html). -In [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) for each -subject and session there are datatype folders in which acquired -data is saved: +An example [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) project: ```{image} /_static/NeuroBlueprint_project_tree_dark.png :align: center @@ -26,33 +21,38 @@ data is saved: ```
+Some key features: -The subject and session folder names must begin with `sub-` and `ses-` -respectively—other key-value pairs are optional. All acquired data must go -in a datatype folder with a -[standard name](https://neuroblueprint.neuroinformatics.dev/specification.html). +* The `rawdata` top-level-folder contains acquired data. Following acquisition +this data is never edited. -No strong requirements are made on filenames of acquired data, but it is recommended -to include the subject and session number if possible. +* The `derivatives` top-level folder contains all processing and analysis outputs. There are +no fixed requirements on its organisation. -Now the specification has been introduced, let's dive in to folder creation! +* Subject and session folders are formatted as key-value pairs. +* Only the `sub-` and `ses-` key-value pairs are required (additional pairs are optional). -## Creating project folders +* Each session contains datatype folders, in which acquired data is stored. + +Now, let's get started with folder creation! -In the below example, folders will be created in the `rawdata` folder, -within the `my_first_project` project folder. +## Creating project folders -The project folder is located at the **local path** +The project-name folder is located at the **local path** specified when [setting up the project](make-a-new-project). +We will now create subject, session and +datatype folders within a `rawdata` top-level folder. + + We will create datatype folders `behav` and `funcimg` -within a `ses-001_` for both `sub-001` and `sub-002`. +within a `ses-001_` for both a `sub-001` and `sub-002`. The below example uses the `@DATE@` convenience tag to automate -creation of today's date. See the section below for more -information on +creation of today's date. See the [convenience tags](create-folders-convenience-tags). +section for more information on these tags. ::::{tab-set} @@ -60,6 +60,7 @@ information on :::{tab-item} Graphical Interface :sync: gui +Folders are created in the `Create` tab on the `Project Manager` page. ```{image} /_static/screenshots/how-to-create-folders-example-dark.png :align: center @@ -74,29 +75,28 @@ information on
-Folders are created in the `Create` tab on the `Project Manager` page. - -We can fill in the subject and session names and select datatype -folders to create. +We can enter the subject and session names into the input boxes, +and select datatype folders to create. Clicking `Create Folders` +will create the folders within the project. -Note that the `sub-` or `ses-` prefix is not actually required and will -be automatically added. +A number of useful shortcuts to streamline this process are described below. -### `Create` tab shortcuts +### `Create` shortcuts The `Create` tab has a lot of useful shortcuts. -First, **double-clicking the subject or session input boxes** will suggest +First, **double-clicking subject or session input boxes** will suggest the next subject or session to create, based on the local project. If a [Name Template](how-to-use-name-templates) is set, the suggested name will also include the template. -Holding `CTRL` while clicking will add the `sub-` +Holding `CTRL` while clicking will enter the `sub-` or `ses-` prefix only. Next, the **Directory Tree** has a number of useful shortcuts. These are -activated by hovering the mouse and pressing one of the below combination -of keys (you may need to click the `Directory Tree`) first: +activated by hovering the mouse of a file or folder and pressing +one of the below key combinations +(you may need to click the `Directory Tree` first): Fill an input : `CTRL+F` will fill the subject or session input with the name @@ -104,20 +104,21 @@ of the folder (prefixed with `sub-` or `ses-`) that is hovered over. Append to an input : `CTRL+A` is similar to 'fill' above, but will instead append the name -to those already in the input. This allows creation of lists. +to those already in the input. This allows creation of multiple +subjects or sessions at once. Open folder in system filebrowser -: `CTRL+O` will open (any) folder in the system filebrowser. +: `CTRL+O` will open a folder in the system filebrowser. Copy the full filepath. : `CTRL+Q` will copy the entire filepath of the file or -folder that is hovered over. +folder. -### `Create` tab Settings +### `Create` Settings -Clicking the `Settings` button on the `Create` tab will give access -allow setting the top-level folder, and bypass validation. +Click the `Settings` button on the `Create` tab to set +the top-level folder, and bypass validation. ```{image} /_static/screenshots/how-to-create-folders-settings-dark.png :align: center @@ -131,16 +132,16 @@ allow setting the top-level folder, and bypass validation. ```
-Top-level folder -: This dropdown box will set whether folderes are created in the -`rawdata` or `derivatives` top-level folder +Top level folder +: This dropdown box will set whether folders are created in the +`rawdata` or `derivatives` top-level folder. Bypass validation -: If on, this setting will allow folder creation even if the names -are not valid (e.g. break with +: This setting will allow folder creation even if the names +are not valid (i.e. they break with [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/)). -This screen is also used to set validation against +This screen is also used to validate and autofill with [Name Templates](how-to-use-name-templates). ::: @@ -148,7 +149,8 @@ This screen is also used to set validation against :::{tab-item} Python API :sync: python -Creating folders can be done with the `create_folders()` method in the Python API. +The `create_folders()` method is used for folder creation. + We simply need to provide the subject, session and datatypes to create: ```python @@ -164,36 +166,26 @@ created_folders = project.create_folders( ) ``` -We provides **datashuttle** with a list of subject, session and -datatype folders to create. +The method outputs `created_folders`, which contains a list of all +`Path`s to all created datatype folders. See the below section for +details on the `@DATE@` and other convenience tags. -Note that the `sub-` or `ses-` prefix is not actually required and will -be automatically added. +By default, an error will be raised if the folder names break +with [Neuroblueprint](https://neuroblueprint.neuroinformatics.dev/) +and folders will not be created. +The `bypass_validation` argument can be used to bypass this feature. -The method outputs `created_folders`, which contains a list of all -`Path`s to all created datatype folders. ::: :::: -:::{admonition} Folder Validation -:class: note - -The names of the folders to be created are validated on the fly against -[NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/). -If the folder names will break with the specification, an error will -be raised and the folders will not be created. - -Validation can be extended by defining custom templates for subject -or session names—if folders don't match the template an error will be raised. -See [How to use Name Templates](how-to-use-name-templates) for more information. - -::: (create-folders-convenience-tags)= ## Convenience Tags There are four convenience tags that can be used in subject or session -names when creating folders. They automate the inclusion of: +names when creating folders. + +They automate the inclusion of: Today's Date : The `@DATE@` tag will include today's date in the format `YYYYMMDD`. \ @@ -202,7 +194,7 @@ create the folder `ses-001_date-20241605`. Current Time : The `@TIME@` tag will include the current time in the format `HHMMSS`. \ - *e.g.* If the current time is `15:10:05` (i.e. 10 minutes and 5 seconds past 3 pm.), + *e.g.* If the current time is `15:10:05` (i.e. 10 minutes and 5 seconds past 3 p.m.), the name `"ses-001_@TIME@"` will create the folder `ses-001_time-151005`. Current Datetime diff --git a/_sources/pages/how_tos/install.md.txt b/_sources/pages/how_tos/install.md.txt index 5e84d45a..5a83d906 100644 --- a/_sources/pages/how_tos/install.md.txt +++ b/_sources/pages/how_tos/install.md.txt @@ -1,10 +1,18 @@ (how-to-install)= # How to Install -**datashuttle** requires Python and a number of other dependencies to run. +**datashuttle** requires +[Python](https://www.python.org/) +to run. -The easiest way to install **datashuttle** is through [conda](https://docs.conda.io/en/latest/), -but installation via `pip` and for developers is also supported. +The easiest way to install **datashuttle** is through the Python package manager +[conda](https://docs.conda.io/en/latest/). However, +installation via `pip` is also supported. + +:::{warning} +**datashuttle** is currently in the [beta](https://en.wikipedia.org/wiki/Software_release_life_cycle#Beta) release phase. Please +get in contact if you experience any bugs or unexpected behaviour. +::: ## Installation instructions @@ -15,17 +23,17 @@ but installation via `pip` and for developers is also supported. If you do not already have `conda` on your system, first [download and install conda](https://docs.anaconda.com/free/miniconda/miniconda-install/). -If you are on Windows, the easiest way to use `conda` is through the [Anaconda Prompt](https://docs.anaconda.com/free/anaconda/getting-started/index.html) +If you are on Windows, the easiest way to use `conda` is through the [Anaconda Prompt](https://docs.anaconda.com/free/anaconda/getting-started/index.html). Next, create and activate an environment. You can call your environment whatever you like, -we've used `datashuttle-env`. +we've used `datashuttle-env`: ```sh conda create -n datashuttle-env python=3.10 conda activate datashuttle-env ``` -then install **datashuttle** and all dependencies with +Next, install **datashuttle** and all dependencies with: ```sh conda install -c conda-forge datashuttle @@ -39,7 +47,7 @@ conda install -c conda-forge datashuttle [Rclone must be installed separately](https://rclone.org/downloads/). Once Rclone is installed, **datashuttle** and all other dependencies can be -installed in a `pipenv` or `virtualenv` environment with +installed in a `pipenv` or `virtualenv` environment with: ```shell pip install datashuttle @@ -65,7 +73,7 @@ This will install the package in editable mode, including all `dev` dependencies ## Check the installation To check **datashuttle** has successfully installed, launch the -graphical interface with +graphical interface with: ```shell datashuttle launch diff --git a/_sources/pages/how_tos/make-a-new-project.md.txt b/_sources/pages/how_tos/make-a-new-project.md.txt index 4596b4da..b4b14185 100644 --- a/_sources/pages/how_tos/make-a-new-project.md.txt +++ b/_sources/pages/how_tos/make-a-new-project.md.txt @@ -2,7 +2,7 @@ # How to Make a New Project -This guide will cover all we need to know for setting up a new project +This guide will cover all you need to know for setting up a new project in **datashuttle**. First, make sure you have @@ -12,8 +12,8 @@ Next, we set up **datashuttle** on a new machine we must tell it three things: 1) **project name**: The name of the project (must be the same for all local machines tied to a project). -2) **local path**: location of the project our local machine, where we will save acquired data. -3) **central path**: location of the central data storage, where we will upload the acquired data. +2) **local path**: location of the project our local machine. +3) **central path**: location of the project on the central data storage machine. ```{image} /_static/datashuttle-overview-dark.png :align: center @@ -39,9 +39,11 @@ or IT department. (new-project-mounted-drive)= ## When central storage is a mounted drive -When the central storage machine is mounted as a mounted drive, we -simply need to set the **central path** as the path to -the central project as it appears on your local machine's filesystem. +In this case, the central storage machine is mounted as a drive +on the local machine. + +We simply need to set the **central path** as the path to +the central project as it appears on the local machine's filesystem. :::{dropdown} Local Filesystem Example :color: info @@ -52,25 +54,23 @@ your machine at `X:\username`. You want your project folder to be located at `X:\username\my_projects`. In this case, you can set the **central_path** to `X:\username\my_projects` -and with **connection_method** to `local_filesystem`. +and with **connection_method** to **local filesystem**. -You may pass the local or central path without the **project name**, -it will be automatically included. The project folder will be located +The project folder will be located at `X:\username\my_projects\my_project_name`. +You may pass the local or central path without the **project name**, +(it will be automatically included). ::: -In addition, we need to tell **datashuttle** the project name and -local path where we want to put our project and hold data on -our local machine. - ::::{tab-set} :::{tab-item} Graphical Interface :sync: gui -From the launch page, click `Make New Project` and you will -be taken to the page where project details must be entered +First, click the `Make New Project` button from the launch page. + +The `Make New Project` screen will be displayed: ```{image} /_static/screenshots/tutorial-1-make-screen-dark.png :align: center @@ -86,14 +86,11 @@ be taken to the page where project details must be entered (general-tui-datashuttle-setup)= Setting up **datashuttle** is as simple as entering the `Project name`, -`Local Path` and `Central Path` into the relevant input boxes. The paths -do not need to end in the project name - this will be automatically added. - -You can paste a path into the input boxes with `CTRL+V`, copy the input path -with `CTRL+Q` or open the path in your systems filebrowser with `CTRL+O`. +`Local Path` and `Central Path` into the relevant input boxes. -Use the `Select` feature to navigate to the local and central paths -on your local filesystem. +The paths do not need to end in the project name—it will be automatically added. +You can paste a path into the input boxes with `CTRL+V or use `Select` +to navigate to paths on your local filesystem. By default, the `Connection Method` is set to `Local Filesystem`, so this does not need to be changed. @@ -101,13 +98,19 @@ this does not need to be changed. Once all information is input, click `Save` to set up the project. You can then navigate to the `Project Manager` screen by clicking the ``Go To Project Screen`` that appears. + +```{note} +The contents of the input boxes can be copied with +with `CTRL+Q`, or opened in the system filebrowser with `CTRL+O`. +``` + ::: :::{tab-item} Python API :sync: python -We will first import and initialise the `DataShuttle` class with our -`project_name`. +We will first import the `DataShuttle` class and initialise +it with the `project_name`: ```{code-block} python @@ -117,7 +120,7 @@ project = DataShuttle("my_first_project") ``` -Next, we can use the `make_config_file()` method to set up a new +Next, the `make_config_file()` method can be used to set up a new project with the desired **local path**, **central path** and **connection method**. @@ -129,9 +132,6 @@ project.make_config_file( ) ``` -Now the project is set up! See the later section for -[optional arguments that control data transfers](make-project-extra-arguments). - ::: :::: @@ -142,16 +142,14 @@ Another common method of connecting to a central storage machine is via [SSH](https://www.ssh.com/academy/ssh/protocol). To set up SSH connection -we need to give **datashuttle** the address of the machine to connect to, -and now the **central path** will be relative to the machine -we are connecting to. +we need to provide: -**central_host_id:** This is the address of the server you want to connect to. +1) **central_host_id:** This is the address of the server you want to connect to. -**central_host_username:** This is your profile name on the server you want to +2) **central_host_username:** This is your profile username on the server you want to connect to. -**central path**: This is the path to the project *on the server*. +3) **central path**: This is the path to the project *on the server*. :::{dropdown} SSH Example :color: info @@ -161,10 +159,10 @@ Let's say the central project was stored on a remote server with address `ssh.swc.ucl.ac.uk`, and your account username on the server is `myusername`. -Finally, we want to store the project at the location (on the server) +We want to store the project at the location (on the server) `/ceph/my_lab/my_name/my_projects/project_name/`. -Then the input to **datashuttle** would be +Then the settings would be: **central host id**: `ssh.swc.ucl.ac.uk` @@ -175,6 +173,8 @@ Then the input to **datashuttle** would be You may pass the **local path** and **central path** without the **project name**, it will be automatically included. +Note that Linux-based shortcuts (e.g. `~` for home directory) are not permitted. + :::: ::::{tab-set} @@ -202,18 +202,16 @@ Next, input the `Central Host ID`, `Central Host Username` and `Central Path` as described above. Clicking `Save` will save these project configs. A button -`Setup SSH Connection` will appear. Click this to -confirm the server and enter your password to the server -(you will only need to do this once) -`` +`Setup SSH Connection` will appear. Click to +confirm the server ID and enter your password +(you will only need to do this once). ::: :::{tab-item} Python API :sync: python -In Datashuttle, the -`connection_method` configuration must be set to `"ssh"` +The `connection_method` configuration must be set to `"ssh"` to use the SSH protocol for data transfers. Enter the `central_path`, `central_host_id` and @@ -232,46 +230,11 @@ project.make_config_file( Next, a one-time command to set up the SSH connection must be run: ```{code-block} python -project.setup_ssh_connection_to_central_server() +project.setup_ssh_connection() ``` -Running `setup-ssh-connection-to-central-server` will require verification +Running `setup_ssh_connection()` will require verification that the SSH server connected to is correct (pressing `y` to proceed). Finally, your password to the central server will be requested (you will only need to do this once). - -::: -:::: - -(make-project-extra-arguments)= -## Extra arguments (Python API) - -A number of settings that control the behaviour of transfers -can be set with the `make_config_file()` method. - -These configs are not relevant for the graphical interface, with the exception of -`overwrite_existing_folders` which set directly on the -graphical interface's `Transfer` screen. - -(overwrite-existing-files-config)= -overwrite_existing_files -: Determines whether folders and files are overwritten -during transfer. By default, Datashuttle does not overwrite any existing -folder during data transfer.

- *e.g.* if the file `sub-001_ses-001_measure-trajectories.csv` exists on -the central project, it will never be over-written during upload -from the local to central project, even if the local version is newer.

-To change this behaviour, the configuration `overwrite_existing_files` can be set to `True`. -If **overwrite_existing_files** is `True`, files in which the timestamp of the -target directory will be overwritten if their -timestamp is older than the corresponding file in the source directory. - -transfer_verbosity -: Set to `"vv"` for additional detail on the -transfer operation. Set to `"v"` to only see each file that is transferred -as well as significant events that occur during transfer. - - -show_transfer_progress -: When `True`, real-time transfer statistics will be reported and logged. diff --git a/_sources/pages/how_tos/read-logs.md.txt b/_sources/pages/how_tos/read-logs.md.txt index c92d01a8..7426491d 100644 --- a/_sources/pages/how_tos/read-logs.md.txt +++ b/_sources/pages/how_tos/read-logs.md.txt @@ -2,13 +2,12 @@ # How to Read the Logs -When commands that create folders, change project configs -or transfer data are run, **datashuttle** stored detailed -logs of these actions. +**datashuttle** stores detailed logs when commands that +create folders, change project configs or perform data transfers are run. These logs can be accessed and read directly in the -Graphical Interface, or located on your filesysetm -and opened in your favourite text file reader. +graphical interface, or located on your filesysetm +and opened in your favourite text editor. Logs are stored as [ISO8601](https://en.wikipedia.org/wiki/ISO_8601)-prefixed filenames that includes the relevant **datashuttle** method performed. @@ -44,27 +43,28 @@ Clicking `Open Most Recent` will open the most recently saved logs. :::{tab-item} Python API :sync: python -The path where logs are stored can be accessed by running the -`get_logging_path()` method: +The path where logs are stored can be accessed by running +`get_logging_path()`: ```python logs_path = project.get_logging_path() print(logs_path) +# Path('C:/Users/Joe/data/local/my_first_project/.datashuttle/logs') ``` You can then navigate to this path in your system filebrowser -and open the logs in your favourite text editor. +and open the logs. ```{image} /_static/screenshots/how-to-logs-filesbrowser-dark.png :align: center :class: only-dark - :width: 400px + :width: 500px ``` ```{image} /_static/screenshots/how-to-logs-filesbrowser-light.png :align: center :class: only-light - :width: 400px + :width: 500px ```
diff --git a/_sources/pages/how_tos/top-level-folder.md.txt b/_sources/pages/how_tos/top-level-folder.md.txt deleted file mode 100644 index c66e76de..00000000 --- a/_sources/pages/how_tos/top-level-folder.md.txt +++ /dev/null @@ -1,84 +0,0 @@ -(how-to-set-top-level-folder)= - -# How to Set the Top-level Folder - - [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) specifies -the top-level folder inside the project folder must be `rawdata` or `derivatives`. - -```{image} /_static/NeuroBlueprint_project_tree_dark.png - :align: center - :class: only-dark - :width: 650px -``` -```{image} /_static/NeuroBlueprint_project_tree_light.png - :align: center - :class: only-light - :width: 650px -``` -
- -In **datashuttle**, the top level folder is relevant when: -1) creating folders (i.e. in `rawdata` or `derivatives`) -2) transferring data with the top-level method or custom. - -Using the Graphical interface, the top-level folder is -set by a drop-down menu on the relevant tab (`Create` or `Transfer`). -^^ TODO: link to sections!!! TODO TODO TODO - - -However, in the Python API methods act in `rawdata` or `derivatives` -according to a stored top-level folder setting. - -## Setting the level folder in the Python API - -In the Python API the *working* top level folder -is stored as a persistent property, accessible with -the `get_top_level_folder()` and `set_top_level_folder()` methods. - -This is to avoid continuously inputting the top-level folder -for every method call. - -:::{info} -:class: info - -Top-level folder is persistent across sessions on a machine. If you -change the working top-level folder with `set_top_level_folder()` then -close-and-reopen python, the change is remembered. - -Changing the working top-level folder only affects the -project on the local machine you are using. - -::: - -When making folders, `create_folders` will only create folders in the -working top-level folder. - -Transferring folders (e.g. with `upload()` or `download()`) will -only transfer folders in the working top-level folder -(unless `upload_entire_project()` or `download_entire_project()` is used). - -In the below example we will create and transfer folders in `rawdata`. -Then, the top-level folder is switched to `derivatives` and the actions repeated.# - -```python -project.set_top_level_folder("rawdata") - -# make folders in `rawdata` -project.create_folders(sub="sub-002") - -# transfer files in `rawdata` -project.upload_data(sub_names="all", ses_names="all", datatype="all") - -# set working top-level folder to `derivatives` -project.set_top_level_folder("derivatives") - -print(project.get_top_level_folder()) -# "derivatives" - -# create folders in derivatives -project.create_folders("sub-002") - -# transfer folders in derivatives -project.download_data() - -``` diff --git a/_sources/pages/how_tos/transfer-data.md.txt b/_sources/pages/how_tos/transfer-data.md.txt index 1dcd19c7..06f4e870 100644 --- a/_sources/pages/how_tos/transfer-data.md.txt +++ b/_sources/pages/how_tos/transfer-data.md.txt @@ -1,12 +1,12 @@ (how-to-transfer-data)= # How to Transfer Data -Transferring data between the local project and the project located -on central storage is a key feature of **datashuttle**. It allows: +**datashuttle** facilitates convenient transfer of data between +local and central storage machines. -- Transfer of data from an acquisition machine to the central project. -- Convenient integration of data collected from multiple acquisition. -- Pulling subsets of data from central storage to analysis machines. +This includes: +- 'Uploading' data from an acquisition machine to central data storage. +- 'Downloading' subsets of data from central storage to analysis machines. ```{image} /_static/datashuttle-overview-light.png :align: center @@ -23,47 +23,41 @@ on central storage is a key feature of **datashuttle**. It allows: :class: note -In **datashuttle**, the term *upload* refers to transfer -from the local machine to central storage. -*Download* refers to transfer from central storage to -a local machine. +In **datashuttle**, the *upload* refers to transfer +from a local to the central machine. +*Download* refers to transfer from the central machine to a local machine. ::: -There are three main methods to transfer data in **datashuttle**. These -allow transfer between: +There are three main methods to transfer data. These +allow transfer across: -1) The entire project (all files in both `rawdata` and `derivatives`) -2) A specific top-level-folder (e.g. all files in `rawdata`) -3) A custom subset of subjects / sessions / datatypes. +1) the **entire project** (all files in both `rawdata` and `derivatives`) +2) only the `rawdata` or `derivatives` **top level folder**. +3) a **custom** subset of subjects / sessions / datatypes. -Below we will explore each method in turn, as well as consider -[configuring transfer](configuring-transfer) including the important -**overwrite existing files** option. ```{warning} -The -[`Overwrite Existing Files`](overwrite-existing-files-config) -setting is very important. - -By default it is turned off and a transfer will never overwrite a -file that already exists, even if the source version is newer. +The **overwrite existing files** setting is very important. +It takes on the options **never**, **always** or **if source newer**. +See the [transfer options](transfer-options) section for full details on +this and other transfer settings. ``` (transfer-entire-project)= ## Transfer the entire project -The first option is to transfer the entire project, -that is all files in the `rawdata` and `derivatives` +The first option is to transfer the entire project—all +files in the `rawdata` and `derivatives` [top-level-folders](https://neuroblueprint.neuroinformatics.dev/specification.html#basic-principles). This includes all files inside or outside a subject, session or datatype folder. This mode is useful for data acquisition when **overwrite existing files** -is off. Any new files (i.e. newly acquired data) will be transferred, -to central storage, while any existing files will be ignored. +is set to **never**. Any new files (i.e. newly acquired data) will be transferred +to central storage while existing files will be ignored. ::::{tab-set} @@ -82,24 +76,23 @@ to central storage, while any existing files will be ignored. ```
-To transfer the entire project navitgate to the `Transfer tab. The -`All` button indicates to transfer the entire project. +To transfer the entire project navitgate to the `Transfer tab`. The +`All` button is selected to transfer the entire project. -Use the `Upload / Download` switch to control transfer direction, -and press `Transfer` to begin. +Click `Transfer` to begin. ::: :::{tab-item} Python API :sync: python -The command to upload the entire project is +The method to upload the entire project is: ```python project.upload_entire_project() ``` -while the command to download the entire project is +while the method to download the entire project is: ```python project.download_entire_project() @@ -110,17 +103,12 @@ project.download_entire_project() :::: (transfer-top-level-folder)= -## Transfer the top-level folder +## Transfer only `rawdata` or `derivatives` -This mode acts almost identically to +This acts almost identically to [transferring the entire project](transfer-entire-project) -however it will only transfer files within a -particular top-level folder (`rawdata` or `derivatives`). - -This mode is also useful for quickly uploading new files -during data acquisition (`rawdata`) or analysis (`derivatves`), when -**overwrite existing files** is off—any newly acquired or generated files -will be transfer, ignoring any previously existing files. +but will only transfer files within a +single top-level folder (`rawdata` or `derivatives`). ::::{tab-set} @@ -140,27 +128,26 @@ will be transfer, ignoring any previously existing files. ```
-Selecting the `Top-Level` button on the `Transfer` tab will +Selecting the `Top Level` button on the `Transfer` tab will allow selection of `rawdata` or `derivatives` to transfer. -Use the `Upload / Download` switch to control transfer direction, -and press `Transfer` to begin. +Click `Transfer` to begin. ::: :::{tab-item} Python API :sync: python -The `upload_all()` or `download_all()` methods can be used with the argument `top_level_folder` to specify -the top-level folder to transfer within. +The `upload_rawdata()`, `upload_derivatives()` and `download_rawdata()`, `download_derivatives()` +methods target transfer to a particular top-level folder. -In the next example, we will upload `rawdata` downloading `derivatives`. +The below example will upload `rawdata` then download `derivatives`. ```python -project.upload_all("rawdata") +project.upload_rawdata() -project.download_all("derivatives") +project.download_derivatives() ``` ::: @@ -170,30 +157,19 @@ project.download_all("derivatives") ## Custom transfers -Custom transfers permit full customisation of the files inside -or outside of subject, session and datatype folders. - -Custom transfers are particularly useful during data analysis, in -which a subset of data can be downloaded from central storage. -For example, you want to only transfer behavioural data from -test sessions—custom transfers allow you to do this with ease. - -See below for how to run custom transfers, as well as -certain keywords and convenience tags to fully customise data transfer. +Custom transfers permit full customisation of data transfer. -For example, `all_sub` in the below examples tells datashuttle -to consider only files and folders within subject folders for transfer. -Files or folders within `rawdata` that are not `sub-` -folders will not be transferred. - -See below for full details on custom transfer keywords and -convenience tags. +Custom transfers can transfer select subsets of data. +For example, you may only want download behavioural data from +test sessions for a particular data analysis. ::::{tab-set} :::{tab-item} Graphical Interface :sync: gui +Select `Custom` on the `Transfer` tab to open the custom transfer settings. + ```{image} /_static/screenshots/how-to-transfer-custom-dark.png :align: center :class: only-dark @@ -206,13 +182,11 @@ convenience tags. ```
-Select `Custom` on the `Transfer` tab to select custom transfers. - The top-level folder can be set by the first dropdown menu. Next, subject and session keywords can be added to customise -files to transfer. In this example, data from all *subject* -folders, all first session behavioral data will be transferred. +files to transfer. In this example, the first behavioural session for +all subjects will be transferred. Subject and sessions can be added to the input boxes automatically by hovering over `sub-` or `ses-` folders on the `DirectoryTree`. @@ -220,9 +194,6 @@ Pressing `CTRL+F` will 'fill' the input with the foldername, while `CTRL+A` will 'append' the foldername, creating a list of subjects or sessions to transfer. -Use the `Upload / Download` switch to control transfer direction, -and press `Transfer` to begin. - ```{image} /_static/screenshots/how-to-transfer-datatypes-dark.png :align: center :class: only-dark @@ -235,16 +206,18 @@ and press `Transfer` to begin. ```
+Finally, click `Transfer` to begin. + ::: :::{tab-item} Python API :sync: python -The `upload()` and `download()` methods can be used for custom +The `upload_custom()` and `download_custom()` methods can be used for custom data transfers. For example, to perform a custom upload: ```python -project.upload( +project.upload_custom( top_level_folder="rawdata", sub_names="all_sub", ses_names="ses-001_@*@", @@ -252,8 +225,8 @@ project.upload( ) ``` -In this example, data from all *subject* -folders, all first session behavioral data will be uploaded. +In this example, the first behavioural session for +all subjects will be transferred. ::: :::: @@ -263,39 +236,48 @@ folders, all first session behavioral data will be uploaded. Custom transfer keywords determine how files and folders outside of subject, session and datatype folders are handled. -Ideally, all data will be stored in datatype folders. However, this +Ideally, all data will be stored in datatype folders—however this is not always feasible. -In this case, custom transfer keywords allows flexible handling of +In such cases custom transfer keywords allows flexible handling of the transfer of non `sub-`, `ses-` prefixed or datatype folders at the subject, session and datatype level. +Note that the [dry run argument](dry-run-argument) can be used +to perform a dry-run transfer to check transfers proceed as expected. Subject level -: * `all` - All subject (i.e. prefixed with `sub-`) folders and non-subject files within the -top-level folder will be transferred. - * `all_sub` - Subject folders only and them will be transferred. - * `all_non_sub` - All files and folders that are not prefixed with `sub-`, -within the top-level folder, will be transferred. -Any folders prefixed with `sub-` at this level will not be transferred. + +: For files and folders within top-level folders: + +: * `all` - All files and non-subject folders will be transferred. +All subject (i.e. prefixed with `sub-`) folders will be considered for transfer. + * `all_sub` - All subject folders will be considered for transfer. + * `all_non_sub` - All files and non-subject folders will be transferred. +Subject folders will not be transferred. Session Level -: * `all` : All session and non-session files and folders within a subject level folder -(e.g. `sub-001`) will be transferred. - * `all_ses` : Session* folders only (i.e. prefixed with `ses-`) and everything within -them will be transferred. - * `all_non_ses` : All files and folders that are not prefixed with `ses-`, within a subject folder, -will be transferred. Any folders prefixed with `ses-` will not be transferred. + +: For sessions within subjects considered for transfer: + +: * `all` : All files and non-session folders will be transferred. +All session (i.e. prefixed with `ses-`) folders will be considered for transfer. + * `all_ses` : All session folders will be considered for transfer. + * `all_non_ses` : All files and non-session folders will be transferred. +Session folders will not be transferred. Datatype Level: -: * `all` : All datatype folders at the subject or session folder level will be transferred, -as well as all files and folders within selected session folders. - * `all_datatype` : All datatype folders (e.g. `behav`, `ephys`, `funcimg`, `anat`) within a session folder will be -transferred. Non-*datatype* folders at the session level will not be transferred - * `all_non_datatype` : Non-datatype folders within session folders only will be transferred +: For datatype folders (e.g. `behav`, `ephys`, `funcimg`, `anat`) +within sessions considered for transfer: + +: * `all` : All files, datatype folders and non-datatype folders will be transferred. + * `all_datatype` : All datatype folders will be transferred. +Files and non-datatype folders will not be transferred. + * `all_non_datatype` : Files and non-datatype folders will be transferred. +Datatype folders will not be transferred. -### Convenience Tags +### Custom transfer convenience tags These tags can be included in subject or session names to allow further customisation of data transfer. @@ -303,15 +285,42 @@ allow further customisation of data transfer. (transfer-the-wildcard-tag)= Wildcard : The `@*@` tag can be used to match any portion of a subject or session name. -*e.g.* `ses-001_date-@*@` will transfer all first sessions, matching all possibles date. +*e.g.* `ses-001_date-@*@` will transfer all first sessions matching all possibles date. Transfer a range : The `@TO@` tag can be used to target a range of subjects for transfer. *e.g.* `sub-001@TO@025` will transfer the 1st to up to and including the 25th subject. -(configuring-transfer)= -## Configuring data transfer - -!! overview - -!! link to configs +## Transfer Options + +(transfer-options)= + +overwrite existing files +: By default this option is set to **never**—a transfer will never overwrite a +file that already exists, even if the source and destination modification datetimes +or sizes are different. + +: If *always**, when there are differences in datetime or size +between the source and destination file the destination file will be overwritten. +This includes when the source file is older or smaller than the destination. + +: Finally, **if source newer** ensures data is only overwritten +when the +[source file has a more recent modification time](https://rclone.org/docs/#u-update) +than the destination. +If modification datetimes are equal, the destination will be overwritten if the +sizes or checksums are different. + +: Under the hood, transfers are made with calls to +[Rclone](https://rclone.org/). Using **never** +calls +[Rclone's copy](https://rclone.org/commands/rclone_copy/) +function with the flag `--ignore_existing`. Using +**always** copies without this flag and (using Rclone's default overwrite behaviour.) +Using **if source newer** calls copy with the `--update` flag. + +(dry-run-argument)= +dry run +: Performs a dry-run transfer in which no data is transferred but logs +are saved as if a transfer had taken place. +This is a useful way to test if a transfer will run as expected. diff --git a/_sources/pages/how_tos/update-configs.md.txt b/_sources/pages/how_tos/update-configs.md.txt index 6d7fb247..6a7dd921 100644 --- a/_sources/pages/how_tos/update-configs.md.txt +++ b/_sources/pages/how_tos/update-configs.md.txt @@ -1,7 +1,7 @@ # How to Update Configs -Once a project has been created, the configs can be updated at any time. +The project configs can be updated at any time following the initial setup. ::::{tab-set} @@ -20,15 +20,14 @@ Once a project has been created, the configs can be updated at any time. ```
-On the `Project Manager` page, clicking the `Configs` tab will display +Clicking the `Configs` tab on the `Project Manager` page will display the current configs. Changing any config and clicking `Save` will update the project configs on the local machine. -If `SSH` configs are [reconfigured](new-project-ssh), -the connection to the server will need -to be reset with `Setup SSH Connection`. +If SSH configs are changed the connection may need to be +[setup again](new-project-ssh). ::: @@ -47,11 +46,11 @@ project.update_config_file( ) ``` -If changing `SSH` configs, the connection may need to be -[reconfigured](new-project-ssh) with: +If SSH configs are changed the connection may need to be +[setup again](new-project-ssh) with: ```python -project.setup_ssh_connection_to_central_server() +project.setup_ssh_connection() ``` ::: diff --git a/_sources/pages/tutorials.md.txt b/_sources/pages/tutorials.md.txt index bdabe800..ca421758 100644 --- a/_sources/pages/tutorials.md.txt +++ b/_sources/pages/tutorials.md.txt @@ -14,3 +14,11 @@ Walk-through starting a new experiment with **datashuttle**. ::: :::: + +```{toctree} +:maxdepth: 2 +:caption: tutorials +:hidden: + +tutorials/getting_started +``` diff --git a/_sources/pages/tutorials/getting_started.md.txt b/_sources/pages/tutorials/getting_started.md.txt index 4fd3489c..a7fc0f97 100644 --- a/_sources/pages/tutorials/getting_started.md.txt +++ b/_sources/pages/tutorials/getting_started.md.txt @@ -1,13 +1,12 @@ (tutorial-getting-started)= -# Getting Started -## Introduction +# Getting Started This tutorial will give a full introduction to starting a neuroscience project with **datashuttle**. -We will get an overview of **datashuttle**'s key features by creating -and transferring a 'mock' experiment, standardised to the +We will highlight **datashuttle**'s key features by creating +a 'mock' experiment, standardised to the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) style. @@ -23,14 +22,15 @@ and transferring a 'mock' experiment, standardised to the ```
-We will create standardised folders then upload mock 'acquired' data (empty text files) -to a central data storage, as you would do in a real data acquisition session. -Then we will download a subset of data (e.g. test sessions only) from the central -storage, as you would do during analysis. +We will upload data to a central data storage machine, +as you would do at the end of a real acquisition session. + +Finally we will download data from the central +storage to a local machine, as you would do during analysis. ## Installing **datashuttle** -The first step is to install **datashuttle**, by following the instructions +The first step is to install **datashuttle** by following the instructions on the [How to Install](how-to-install) page. @@ -39,8 +39,8 @@ on the [How to Install](how-to-install) page. :::{tab-item} Graphical Interface :sync: gui -Once **datashuttle** is installed, typing `datashuttle launch` will -launch the application in your terminal +Entering `datashuttle launch` after installation +will launch the application in your terminal: ```{image} /_static/screenshots/tutorial-1-landing-screen-dark.png :align: center @@ -58,7 +58,7 @@ launch the application in your terminal :sync: python We can check **datashuttle** has installed correctly by -by importing it into Python without error +by importing it into Python without error: ```python from datashuttle import DataShuttle @@ -72,25 +72,26 @@ from datashuttle import DataShuttle The first thing to do when using **datashuttle** on a new machine is to set up your project. -We need to tell **datashuttle** the: +We need to set the: 1) project name -2) location of the project our local machine, where we will save acquired data -3) location of the central data storage, where we will upload the acquired data +2) location of the project our local machine (where the acquired data will be saved). +3) location of the project on the central data storage (where we will upload the acquired data). -**datashuttle** supports central data storage either mounted as a drive -on the local machine or through an SHH connection. -See [How to Make a New Project](make-a-new-project) for detailed instructions for -connecting a mounted drive or SSH connection. +**datashuttle** supports connecting to the central storage machine +either as a mounted drive or through SHH. \ +See [How to Make a New Project](make-a-new-project) +for detailed instructions for +connecting a mounted drive or by using SSH. In this walkthrough, we will set our central storage as a -folder on our machine for simplicity. +folder on our local machine for simplicity. ::::{tab-set} :::{tab-item} Graphical Interface :sync: gui -Now we will set up a new project. Click `Make New Project` and you +Click `Make New Project` and you will be taken to the project setup page. ```{image} /_static/screenshots/tutorial-1-make-screen-dark.png @@ -105,8 +106,8 @@ will be taken to the project setup page. ```
-We'll call our project `my_first_project`, and can type this into -the first input box on the page. +We'll call our project `my_first_project` and can type this into +the first input box on the page: ```{image} /_static/screenshots/tutorial-1-make-project-name-dark.png :align: center @@ -120,11 +121,12 @@ the first input box on the page. ```
-Next we need to specify the _local path_, the location on our machine where -we will save our acquired data. Choose any directory that is -convenient, and then add `local` to the end of the filepath. -The filepath can be typed into the input, copied in with `CTRL+V` -or selected from a directory tree using the `Select` button. +Next we need to specify the **local path**, the location on our machine where +acquired data will be saved. Choose any directory that is +convenient. + +In this example we will add the folder `"local"` +to the end of the filepath for clarity: ```{image} /_static/screenshots/tutorial-1-make-local-path-dark.png :align: center @@ -138,13 +140,17 @@ or selected from a directory tree using the `Select` button. ```
-Finally, we need to select the _central path_. Usually this would be +The filepath can be typed into the input, copied in with `CTRL+V` +or selected from a directory tree using the `Select` button. + +Finally, we need to select the **central path**. Usually this would be a path to a mounted central storage drive or relative to the server path -if connecting via SSH. In this tutorial, we will -set this next to the _local path_ for convenience: +if connecting via SSH. + +In this tutorial, we will set this next to the local path for convenience. 1) Copy the contents of the _local path_ input by clicking it, hovering over it and pressing `CTRL+Q` to copy. -2) Paste it into the _central path_ input with `CTRL+V` and change 'local' to 'central'. +2) Paste it into the _central path_ input with `CTRL+V` and change "local" to "central". ```{image} /_static/screenshots/tutorial-1-make-central-path-dark.png :align: center @@ -158,42 +164,39 @@ set this next to the _local path_ for convenience: ```
-You can now click `Save` to set up the project. Once the project -is created, the `Go to Project Screen` button will appear. -Click to move on to the `Create Project` page. +You can now click `Save` to set up the project. + +Once the project is created, the `Go to Project Screen` button +will appear. Click to move on to the `Create Project` page. ::: :::{tab-item} Python API :sync: python -First, we can initialise **datashuttle** -with our chosen `project_name`. In this tutorial we will call -our project `my_first_project` and instantiate the `DataShuttle` -class with this name +First, we must initialise the `DataShuttle` object +with our chosen `project_name`. + +We will call our project `"my_first_project"`: ```python +from datashuttle import DataShuttle + project = DataShuttle("my_first_project") ``` -The created `project` object has many methods for interacting with -**datashuttle**. Next, we will use the `make_config_file()` method to set up the configurations -('configs') for our project. +Next, we will use the `make_config_file()` method set the +configurations ('configs') for our project. + -While full details of all available configs can be found on the -[How to Make a New Project](make-a-new-project) -page, here we will focus on the main three: the `local_path`, `central_path` -and the `connection_method`. +First, we need to specify the `local_path` as the location on our machine +where the projact (and acquired data) will be located. -We will specify the `local_path` as the location on our machine where we -will place our project and save the acquired data. +Next, we set the `central_path` to the project location on the central storage machine. -Usually `central_path` would be the path to a mounted -central storage drive or a relative server path -if connecting via SSH. In this tutorial, we will -set this next to the _local path_ for convenience. +In this tutorial, we will set this next to the `local_path` for convenience. Finally, we will set the `connection_method` to `"local_filesystem"` -as we are not using SSH. +as we are not using SSH in this example. ```python project.make_config_file( @@ -203,7 +206,7 @@ project.make_config_file( ) ``` -If you want to change any config in the future, use the `update_config_file` method +If you want to change any config in the future, use the `update_config_file()` method ```python project.update_config_file( @@ -211,15 +214,16 @@ project.update_config_file( ) ``` -Now the project is set up, we are ready to create our standardised project folders! +We are now ready to create our standardised project folders. ::: :::: ## Creating folders Let's imagine today is our first day of data collection, and we are acquiring behaviour (`behav`) and electrophysiology (`ephys`) data. + We will create standardised subject, session and datatype folders -to put the acquired data into. +in which to store the acquired data. ::::{tab-set} :::{tab-item} Graphical Interface @@ -240,18 +244,17 @@ We will create standardised project folders using the `Create` tab.
Following the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) -style we will call the first subject `sub-001`. Additional key-value pairs in -the subject name could be included if desired (see the +style we will call the first subject `sub-001`. Additional key-value pairs +could be included if desired (see the [NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html) -for more details). +for details). -In the session name we can include today's date, -so our first session will be `ses-001_date-`. +In the session name we will include today's date as an extra key-value pair. +Our first session will be `ses-001_date-`. We could start by typing `sub-001` into the subject input box, but it is more convenient to simply double-left-click it. This will suggest -the next subject number based on the current subjects in the project. -As currently this project is empty, the suggested next subject is `sub-001`. +the next subject number based on the current subjects in the project: ```{image} /_static/screenshots/tutorial-1-create-subject-dark.png :align: center @@ -275,9 +278,7 @@ to see the nature of the error. Next, we can input the session name. Double-left-click on the session input to automatically fill with `ses-001`. We can then add -today's date with the `@DATE@` convenience tag. - -When the session folder is created, today's date will be automatically added. +today's date with the `@DATE@` convenience tag: ```{image} /_static/screenshots/tutorial-1-create-session-dark.png :align: center @@ -291,8 +292,11 @@ When the session folder is created, today's date will be automatically added. ```
-Next, uncheck the `funcimg` and `anat` datatype boxes, to ensure -we only create `behav` and `ephys` folders in our session folder. +Today's date will be automatically added when the session folder is created. + +The datatype folders to create can be set with the `Datatype(s)` checkboxes. +Uncheck the `funcimg` and `anat` datatype boxes to ensure +we only create `behav` and `ephys` folders. ```{image} /_static/screenshots/tutorial-1-create-datatype-dark.png :align: center @@ -306,24 +310,23 @@ we only create `behav` and `ephys` folders in our session folder. ```
-Finally, click `Create Folders` to create the folder structure in the project! - +Finally, click `Create Folders` to create the project folders. ::: :::{tab-item} Python API :sync: python -We will create standardised project folders with the `create_folders()` method. +We will create project folders with the `create_folders()` method. Following the [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/) -style we will call the first subject `sub-001`. Additional key-value pairs in -the subject name could be included if desired (see the +style we will call the first subject `sub-001`. Additional key-value pairs +could be included if desired (see the [NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html) -for more details). +for details). -In the session name we can include today’s date, so our first session will -be `ses-001_date-`. We can use the `@DATE@` convenience tag. +In the session name we will include today's date as an extra key-value pair. +Our first session will be `ses-001_date-`. Finally, we will tell **datashuttle** to create `behav` and `ephys` datatype -folders only. +folders only: ```python project.create_folders( @@ -334,27 +337,25 @@ project.create_folders( ) ``` -Navigating to the `central_path` in your system filebrowser, you will -see the transferred files. +Navigate to the `local_path` in your system filebrowser to see the created folders. ```{note} The names of the folders to be created are validated on the fly against [NeuroBlueprint](https://neuroblueprint.neuroinformatics.dev/specification.html). -If the folder names will break with the specification, an error will be -raised and the folders will not be created. +An error will be raised if names break with the specification and +the folders will not be created. ``` -Two useful methods to automate folder creation are `get_next_sub_number()` and -`get_next_ses_number()`. These can be used to automatically get the next subject +Two useful methods to automate folder creation are `get_next_sub()` and +`get_next_ses()`. These can be used to automatically get the next subject and session names in a project. -For example, to get the next subject -in this project (`sub-002`) and the next session for that subject (in this case, -as it is the first session for `sub-002`, it will be `ses-001`) we can run +To get the next subject in this project (`sub-002`) and the next +session for that subject (`ses-001`) we can run: ```python -next_sub = project.get_next_sub_number("rawdata", local_only=True) # returns "sub-001" -next_ses = project.get_next_ses_number("rawdata", sub=next_sub, local_only=True) # returns "ses-001" +next_sub = project.get_next_sub("rawdata", local_only=True) # returns "sub-001" +next_ses = project.get_next_ses("rawdata", sub=next_sub, local_only=True) # returns "ses-001" project.create_folders( "rawdata", @@ -364,33 +365,30 @@ project.create_folders( ) ``` -This will create the folders, with today's date included in the session folder name. The `local_only` argument restricts the search for the next subject and session -to the local project folder only. To also consider subjects and sessions in -the central storage, set this to `False`. +to the local project folder only. Set this to `False` to consider subjects +and sessions in the central storage. ::: :::: -This was a quick overview of the creating folders—see [How to use Name Templates](how-to-use-name-templates) -and [How to use Create Folder Tags](how-to-create-folders2) for more detail on validation and convenience tags. +This was a quick overview of creating folders—see +and [How to use Create Folder Tags](how-to-create-folders) for full details +including additional customisation with [Name Templates](how-to-use-name-templates). ## Exploring folders -In our imagined experiment, we will next want to save data from +In our imagined experiment, we will now want to save data from acquisition software into our newly created, standardised folders. +**datashuttle** provides some quick methods to pass the created +folder paths to acquisition software. ::::{tab-set} :::{tab-item} Graphical Interface :sync: gui -When folders are created, the `Directory Tree` on the left-hand side -will update to display the new folders. -By hovering over a folder on the `Directory Tree` we can quickly -copy the full path to the folder (`CTRL+Q)`) (you may need to click -the `Directory Tree` first). - -Alternatively, pressing `CTRL+O` will open the folder in your file browser. +When folders are created the `Directory Tree` on the left-hand side +will update to display the new folders: ```{image} /_static/screenshots/tutorial-1-explore-folders-dark.png :align: center @@ -404,19 +402,23 @@ Alternatively, pressing `CTRL+O` will open the folder in your file browser. ```
-These shortcuts aim to make it simple to direct your acquisition software -to the datatype folders. Hover over the `DirectoryTree` -for a tooltip indicating all possible shortcuts. +By hovering over a folder with the mouse we can quickly +copy the full path to the folder by pressing `CTRL+Q)` +(you may need to click the `Directory Tree` first). + +Alternatively, pressing `CTRL+O` will open the folder in your file browser. + +Hover the mouse over the `DirectoryTree` for a tooltip indicating all possible shortcuts. ```{admonition} Creating mock data for the tutorial -To continue with our experiment, we will need to create 'mock' +To continue with our experiment we will need to create 'mock' acquired data to transfer to central storage. These will take the form of simple text files with their extensions changed. You can download these files from [this link](https://gin.g-node.org/joe-ziminski/datashuttle/src/master/docs/tutorial-mock-data-files), -by right-clicking each file and selecting 'Download (or) Save Link As..'. +by right-clicking each file and selecting "Download (or) Save Link As...". Alternatively you can create them in your favourite text editor. Next, hover over the `behav` folder the `Directory Tree` with your @@ -436,7 +438,8 @@ Finally, hover the mouse over the `Directory Tree` and press `CTRL+R` to refresh :sync: python `create_folders()` returns the full filepaths of created datatype folders. -These can be used to save data to these folders in acquisition scripts + +These can be used in acquisition scripts to save data to these folders: ```python folder_path_list = project.create_folders( @@ -454,18 +457,18 @@ print([path_ for path_ in folder_path_list if path_.name == "behav"]) ```{admonition} Creating mock data for the tutorial -To continue with our experiment, we will need to create 'mock' +To continue with our experiment we will need to create 'mock' acquired data to transfer to central storage. These will take the form of simple text files with their extensions changed. You can download these files from [this link](https://gin.g-node.org/joe-ziminski/datashuttle/src/master/docs/tutorial-mock-data-files), -by right-clicking each file and selecting 'Download (or) Save Link As..'. +by right-clicking each file and selecting "Download (or) Save Link As...". Alternatively you can create them in your favourite text editor. Move the mock behavioural data file (`sub-001_ses-001_camera-top.mp4`) into the `behav` datatype folder and the remaining -electrophysiology file to the `ephys` folder. +electrophysiology files to the `ephys` folder. ``` ::: @@ -476,16 +479,23 @@ electrophysiology file to the `ephys` folder. We have now 'acquired' `behav` and `ephys` data onto our local machine. The next step is to upload it to central data storage. -Typically, this would be an external machine or server, connected through a mounted -drive or via SSH. In this walkthrough, we set the central storage on our -local machine for convenience. +In this walkthrough we set the central storage on our +local machine for convenience. Typically, this would be an external +central storage machine connected as a mounted drive or through SSH. + +```{warning} +The **overwrite existing files** setting is very important. +It takes on the options **never**, **always** or **if source newer**. + +See the [transfer options](transfer-options) section for full details. +``` ::::{tab-set} :::{tab-item} Graphical Interface :sync: gui -First, switch to the `Transfer` tab, where on the left we will again -see a `Directory Tree` displaying the local version of the project. +Switch to the `Transfer` tab. On the left we again +see a `Directory Tree` displaying the local version of the project: ```{image} /_static/screenshots/tutorial-1-transfer-screen-upload-dark.png :align: center @@ -500,102 +510,78 @@ see a `Directory Tree` displaying the local version of the project.
The first page on the `Transfer` tab allows us to upload the entire project, -both the `rawdata` and `derivatives` top-level folders -(the `derivatives` folder is used for outputs of processing `rawdata`, see the -[NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html)). +both the `rawdata` and `derivatives`—see the +[NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html) +for details. + +We only have acquired data in the `rawdata` folder. +We can simply click `Transfer` to upload everything to central storage. -As we only have a `rawdata` folder, we can simply click `Transfer` to -upload everything to central storage. +The data from local will now appear in the "central" folder +(an easy way to navigate to the folder to check +is to go to the `Config` tab and press `CTRL+O` on the **central path** input box). -Navigating to the _central path_ in the file browser, -the newly transferred data will have appeared, simulating transfer -to a separate data storage machine. (An easy way to navigate to the *central path* -is to go to the `Config` tab and press `CTRL+O` on the _central path_ input box). +See the [How to Transfer Data](how-to-transfer-data) page for full details +on transfer options, as well as `Top Level Folder` and `Custom` transfers. -We can also click the `Top Level` -or `Custom` buttons for refined transfers (for example, if we also had a -`derivatives` folder we did not want to upload). For more information -see the [How to Transfer Data](how-to-transfer-data) page as well as -and the next tutorial section for `Custom` transfers. +Next, we will use `Custom` transfers to move only a subset of the dataset. ::: :::{tab-item} Python API :sync: python `upload_entire_project()` is a high level method that uploads all files -in the project. This includes both `rawdata` and `derivatives` top-level folders -(the `derivatives` folder is used for outputs of processing `rawdata`, see the -[NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html)). +in the project. +This includes both the `rawdata` and `derivatives` top-level folders—see the +[NeuroBlueprint specification](https://neuroblueprint.neuroinformatics.dev/specification.html) +for details. -As we only have a `rawdata` folder, we can simply run +As we only have a `rawdata` folder we can simply run: ```python project.upload_entire_project() ``` -and all files will be uploaded from the local version of the project to central storage. +All files will be uploaded from the local version of the project to central storage. Navigating to the `central_path` in your systems file browser, the newly transferred data -will have appeared, simulating transfer to a separate data storage machine. +will have appeared. -Other methods (`upload_all()` and `upload()`) provide refined -data transfers (and every `upload` method has an equivalent `download` method). -For more information see the -[How to Transfer Data](how-to-transfer-data) page -as well as the next tutorial section for customisable transfers. +Other methods (e.g. `upload_rawdata()` and `upload_custom()`) provide customisable +transfers (and every `upload` method has an equivalent `download` method). -Note that the `overwrite_existing_files` config controls whether -transferred data will overwrite data on the target machine. This config -can be set initially with `make_config_file` or updated with with -`update_config_file` +See the [How to Transfer Data](how-to-transfer-data) page for full details +on transfer methods and [arguments](transfer-options). -```python -project.update_config_file( - overwrite_existing_files=True, -) -``` +Next, we will use `Custom` transfers to move only a subset of the dataset. ::: :::: -```{warning} -The `Overwrite Existing Files` setting is very important. -By default it is turned off and a transfer will never overwrite a -file that already exists, even if the source version is newer. - -For example, if we upload the first session's behavioural data—and there -is already a file on central storage with the same name -in the same folder—the file will not be uploaded. - -If `Overwrite Existing Files` is on, then any existing files -will be overwritten by newer versions of the file during transfer. -``` - -With the data safely on our central storage, -our experimental acquisition session is complete! - - ## Downloading from central storage -Next let's imagine we are on a different, analysis machine and want to -download a subset of data for further processing. +Next let's imagine we are now using an analysis machine on which +there is no data. We want to download a subset of data central storage +data for further processing. In this example we will download the behavioural data only from the first session. + In practice **datashuttle**'s custom data transfers work well when there -are many subjects and sessions (for example, downloading only the behavioural -'test' sessions from a specific range of subjects). +are many subjects and sessions. For example, we may want to download +only the behavioural 'test' sessions from a specific range of subjects. ```{admonition} Replicating a fresh machine for the tutorial -To replicate starting on a new local machine, delete the `rawdata` folder from -your _local_ path. +To replicate starting on a new local machine, delete the `rawdata` folder +from your **local path**. + +We will next download data from the **central path** to our now-empty local project. -We will next download data from the central project to our now-empty local project. In practice when setting up **datashuttle** on a new machine, you would again [Make a new project](make-a-new-project). ``` We will look at a small subset of possible -options here, but see [How to make Custom Transfers](making-custom-transfers) for more possibilities. +options here—see [How to make Custom Transfers](making-custom-transfers) for all possibilities. ::::{tab-set} :::{tab-item} Graphical Interface @@ -632,7 +618,7 @@ In the subject input, we can simply type `all` (in this case, we only have one s Next, let's specify what session to download. We can use the [wildcard tag](transfer-the-wildcard-tag) -to avoid typing the exact date—`ses-001_@*@`. +to avoid typing the exact date—`ses-001_@*@`: ```{image} /_static/screenshots/tutorial-1-transfer-screen-custom-sessions-dark.png :align: center @@ -646,6 +632,8 @@ to avoid typing the exact date—`ses-001_@*@`. ```
+This is useful if you want to download many sessions, all with different dates. + Then, select only the `behav` datatype from the datatype checkboxes. ```{image} /_static/screenshots/tutorial-1-transfer-screen-custom-datatypes-dark.png @@ -663,37 +651,32 @@ Then, select only the `behav` datatype from the datatype checkboxes. Finally, we can select `Download` from the upload / download switch, and click `Transfer`. -Note that the `Overwrite Existing Files` setting affects both upload -and downloads—any local versions of a file will be overwritten -by newer versions downloaded from central storage when it is turned on. - ```{image} /_static/screenshots/tutorial-1-transfer-screen-custom-switch-dark.png :align: center :class: only-dark - :width: 400px + :width: 580px ``` ```{image} /_static/screenshots/tutorial-1-transfer-screen-custom-switch-light.png :align: center :class: only-light - :width: 400px + :width: 580px ```
The transfer will complete, and the custom selection -of files will now be available in the _local path_ folder. +of files will now be available in the **local path**. ::: :::{tab-item} Python API :sync: python -We can use the `download()` method (the download equivalent method of -the `upload()`). +We will use the `download_custom()` method (the download equivalent method of +the `upload_custom()`). -We will download only the behavioural data from the first -session, using a few shortcuts available for custom transfers +Convenience tags can be used to make downloading subsets of data easier: ```python -project.download( +project.download_custom( top_level_folder="rawdata", sub_names="all", ses_names="ses-001_@*@", @@ -706,7 +689,9 @@ we only have one subject anyway). The `@*@` [wildcard tag](transfer-the-wildcard-tag) can be used to match any part of a subject or session name—in this case we use it to avoid -typing out the date. +typing out the date. This is also useful if you want to download many +sessions, all with different dates. + Finally, we chose to download only the `behav` data for the session. ::: @@ -718,7 +703,7 @@ Visit [How to Read the Logs](how-to-read-the-logs) for more information. ``` The transfer will complete, and the custom selection -of files will now be available in the _local path_ folder! +of files will now be available in the **local path**. ## Summary diff --git a/_static/documentation_options.js b/_static/documentation_options.js index 44e14882..c00afe73 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '0.3.0', + VERSION: '0.4.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/_static/screenshots/how-to-transfer-all-dark.png b/_static/screenshots/how-to-transfer-all-dark.png index 52f1bb54..9f66a1e7 100644 Binary files a/_static/screenshots/how-to-transfer-all-dark.png and b/_static/screenshots/how-to-transfer-all-dark.png differ diff --git a/_static/screenshots/how-to-transfer-all-light.png b/_static/screenshots/how-to-transfer-all-light.png index dec9b504..888eaea4 100644 Binary files a/_static/screenshots/how-to-transfer-all-light.png and b/_static/screenshots/how-to-transfer-all-light.png differ diff --git a/_static/screenshots/how-to-transfer-custom-dark.png b/_static/screenshots/how-to-transfer-custom-dark.png index eba64596..17db9014 100644 Binary files a/_static/screenshots/how-to-transfer-custom-dark.png and b/_static/screenshots/how-to-transfer-custom-dark.png differ diff --git a/_static/screenshots/how-to-transfer-custom-light.png b/_static/screenshots/how-to-transfer-custom-light.png index 6fc2d7db..385a4757 100644 Binary files a/_static/screenshots/how-to-transfer-custom-light.png and b/_static/screenshots/how-to-transfer-custom-light.png differ diff --git a/_static/screenshots/how-to-transfer-toplevel-dark.png b/_static/screenshots/how-to-transfer-toplevel-dark.png index 9e1a41a3..9d13e5f3 100644 Binary files a/_static/screenshots/how-to-transfer-toplevel-dark.png and b/_static/screenshots/how-to-transfer-toplevel-dark.png differ diff --git a/_static/screenshots/how-to-transfer-toplevel-light.png b/_static/screenshots/how-to-transfer-toplevel-light.png index 0b81b8c5..fcd2c5a1 100644 Binary files a/_static/screenshots/how-to-transfer-toplevel-light.png and b/_static/screenshots/how-to-transfer-toplevel-light.png differ diff --git a/_static/screenshots/tutorial-1-transfer-screen-custom-switch-dark.png b/_static/screenshots/tutorial-1-transfer-screen-custom-switch-dark.png index 03db573e..02cdec0d 100644 Binary files a/_static/screenshots/tutorial-1-transfer-screen-custom-switch-dark.png and b/_static/screenshots/tutorial-1-transfer-screen-custom-switch-dark.png differ diff --git a/_static/screenshots/tutorial-1-transfer-screen-custom-switch-light.png b/_static/screenshots/tutorial-1-transfer-screen-custom-switch-light.png index a22420d7..e93bd135 100644 Binary files a/_static/screenshots/tutorial-1-transfer-screen-custom-switch-light.png and b/_static/screenshots/tutorial-1-transfer-screen-custom-switch-light.png differ diff --git a/_static/screenshots/tutorial-1-transfer-screen-upload-dark.png b/_static/screenshots/tutorial-1-transfer-screen-upload-dark.png index 58a13aed..34b02db0 100644 Binary files a/_static/screenshots/tutorial-1-transfer-screen-upload-dark.png and b/_static/screenshots/tutorial-1-transfer-screen-upload-dark.png differ diff --git a/_static/screenshots/tutorial-1-transfer-screen-upload-light.png b/_static/screenshots/tutorial-1-transfer-screen-upload-light.png index 2ab03bb9..124853d9 100644 Binary files a/_static/screenshots/tutorial-1-transfer-screen-upload-light.png and b/_static/screenshots/tutorial-1-transfer-screen-upload-light.png differ diff --git a/genindex.html b/genindex.html index 5cd2311f..f8ae2521 100644 --- a/genindex.html +++ b/genindex.html @@ -36,7 +36,7 @@ - + @@ -126,7 +126,7 @@ -

Datashuttle v0.3.0

+

datashuttle v0.4.0

@@ -395,13 +395,15 @@

D

@@ -451,7 +453,7 @@

S

@@ -368,7 +368,7 @@

datashuttle#

-

The tool to automate neuroscience project folder creation and transfer.

+

The tool to automate neuroscience project folder creation, validation and transfer.


@@ -397,18 +397,19 @@

datashuttle
Python API
-

Full Python API reference.

+

Full Python reference.

-

Datashuttle creates and validates projects standardised to the +

datashuttle creates and validates projects standardised to the NeuroBlueprint specification.

Dive right into datashuttle with our -Getting Started Tutorial -or targeted How-To Guides.

+Getting Started Tutorial +or targeted How-To Guides.
+It can be used through a graphical interface or Python API.

Don’t hesitate to get in contact through our GitHub Issues or diff --git a/objects.inv b/objects.inv index c99e969a..dd26d1a2 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/output.json b/output.json index e42ef49f..68ba8ce1 100644 --- a/output.json +++ b/output.json @@ -1,39 +1,37 @@ {"filename": "index.md", "lineno": 25, "status": "unchecked", "code": 0, "uri": "", "info": ""} -{"filename": "pages/documentation.md", "lineno": 158, "status": "unchecked", "code": 0, "uri": "#api-reference", "info": ""} -{"filename": "pages/documentation.md", "lineno": 158, "status": "unchecked", "code": 0, "uri": "#cli-reference", "info": ""} -{"filename": "pages/how_tos/install.md", "lineno": 74, "status": "unchecked", "code": 0, "uri": "#how-to-choose-a-terminal", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 82, "status": "unchecked", "code": 0, "uri": "#how-to-choose-a-terminal", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 375, "status": "unchecked", "code": 0, "uri": "#how-to-create-folders", "info": ""} {"filename": "pages/how_tos/make-a-new-project.md", "lineno": 8, "status": "unchecked", "code": 0, "uri": "#how-to-install", "info": ""} -{"filename": "pages/tutorials/getting_started.md", "lineno": 716, "status": "unchecked", "code": 0, "uri": "#how-to-read-the-logs", "info": ""} -{"filename": "pages/tutorials/getting_started.md", "lineno": 517, "status": "unchecked", "code": 0, "uri": "#how-to-transfer-data", "info": ""} -{"filename": "pages/how_tos/create-folders.md", "lineno": 90, "status": "unchecked", "code": 0, "uri": "#how-to-use-name-templates", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 701, "status": "unchecked", "code": 0, "uri": "#how-to-read-the-logs", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 526, "status": "unchecked", "code": 0, "uri": "#how-to-transfer-data", "info": ""} +{"filename": "pages/how_tos/create-folders.md", "lineno": 89, "status": "unchecked", "code": 0, "uri": "#how-to-use-name-templates", "info": ""} {"filename": "index.md", "lineno": 51, "status": "unchecked", "code": 0, "uri": "#how-tos", "info": ""} -{"filename": "pages/how_tos/create-folders.md", "lineno": 46, "status": "unchecked", "code": 0, "uri": "#make-a-new-project", "info": ""} -{"filename": "pages/tutorials/getting_started.md", "lineno": 597, "status": "unchecked", "code": 0, "uri": "#making-custom-transfers", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 490, "status": "unchecked", "code": 0, "uri": "#id2", "info": ""} +{"filename": "pages/how_tos/create-folders.md", "lineno": 42, "status": "unchecked", "code": 0, "uri": "#make-a-new-project", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 583, "status": "unchecked", "code": 0, "uri": "#making-custom-transfers", "info": ""} {"filename": "pages/how_tos/update-configs.md", "lineno": 30, "status": "unchecked", "code": 0, "uri": "#new-project-ssh", "info": ""} -{"filename": "pages/how_tos/transfer-data.md", "lineno": 44, "status": "unchecked", "code": 0, "uri": "#overwrite-existing-files-config", "info": ""} -{"filename": "pages/tutorials/getting_started.md", "lineno": 635, "status": "unchecked", "code": 0, "uri": "#transfer-the-wildcard-tag", "info": ""} -{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 81, "status": "unchecked", "code": 0, "uri": "#tutorial-getting-started", "info": ""} -{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 60, "status": "working", "code": 0, "uri": "https://docs.anaconda.com/free/anaconda/getting-started/index.html", "info": ""} -{"filename": "pages/documentation.md", "lineno": 49, "status": "working", "code": 0, "uri": "https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/10-microscopy.html", "info": ""} -{"filename": "pages/documentation.md", "lineno": 49, "status": "working", "code": 0, "uri": "https://bep032tools.readthedocs.io/en/latest/", "info": ""} -{"filename": "pages/how_tos/install.md", "lineno": 16, "status": "working", "code": 0, "uri": "https://docs.anaconda.com/free/miniconda/miniconda-install/", "info": ""} -{"filename": "pages/documentation.md", "lineno": 44, "status": "working", "code": 0, "uri": "https://bids.neuroimaging.io/", "info": ""} -{"filename": "pages/how_tos/create-folders.md", "lineno": 209, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/ISO_8601", "info": ""} -{"filename": "pages/documentation.md", "lineno": 67, "status": "working", "code": 0, "uri": "https://docs.conda.io/en/latest/", "info": ""} -{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 55, "status": "redirected", "code": 302, "uri": "https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB", "info": "https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&%3Bhl=en-gb&%3Bgl=GB&hl=en-us&gl=US"} -{"filename": "pages/tutorials/getting_started.md", "lineno": 733, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle", "info": ""} -{"filename": "pages/documentation.md", "lineno": 101, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle/", "info": ""} -{"filename": "pages/documentation.md", "lineno": 31, "status": "redirected", "code": 302, "uri": "https://github.com/neuroinformatics-unit/datashuttle/assets/29216006/51b65a6d-492a-4047-ae7b-16273b58e258", "info": "https://github-production-user-asset-6210df.s3.amazonaws.com/29216006/244708175-51b65a6d-492a-4047-ae7b-16273b58e258.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240408%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240408T203542Z&X-Amz-Expires=300&X-Amz-Signature=fa6efe054603d02e748bfa547936c20c746132305dc9945257612a320d352824&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=531897428"} -{"filename": "pages/tutorials/getting_started.md", "lineno": 419, "status": "working", "code": 0, "uri": "https://gin.g-node.org/joe-ziminski/datashuttle/src/master/docs/tutorial-mock-data-files", "info": ""} -{"filename": "index.md", "lineno": 55, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle/issues", "info": ""} -{"filename": "pages/documentation.md", "lineno": 36, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 621, "status": "unchecked", "code": 0, "uri": "#transfer-the-wildcard-tag", "info": ""} +{"filename": "index.md", "lineno": 51, "status": "unchecked", "code": 0, "uri": "#tutorial-getting-started", "info": ""} +{"filename": "pages/how_tos/create-folders.md", "lineno": 201, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/ISO_8601", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 13, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/Software_release_life_cycle#Beta", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 24, "status": "working", "code": 0, "uri": "https://docs.anaconda.com/free/miniconda/miniconda-install/", "info": ""} +{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 56, "status": "redirected", "code": 302, "uri": "https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB", "info": "https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&%3Bhl=en-gb&%3Bgl=GB&hl=en-us&gl=US"} +{"filename": "pages/how_tos/install.md", "lineno": 8, "status": "working", "code": 0, "uri": "https://docs.conda.io/en/latest/", "info": ""} +{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 61, "status": "working", "code": 0, "uri": "https://docs.anaconda.com/free/anaconda/getting-started/index.html", "info": ""} +{"filename": "index.md", "lineno": 56, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle/issues", "info": ""} {"filename": "index.md", "lineno": 47, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev", "info": ""} -{"filename": "pages/how_tos/create-folders.md", "lineno": 8, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/specification.html", "info": ""} -{"filename": "pages/how_tos/transfer-data.md", "lineno": 57, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/specification.html#basic-principles", "info": ""} -{"filename": "pages/documentation.md", "lineno": 67, "status": "working", "code": 0, "uri": "https://mamba.readthedocs.io/en/latest/index.html", "info": ""} -{"filename": "index.md", "lineno": 55, "status": "working", "code": 0, "uri": "https://neuroinformatics.zulipchat.com/#narrow/stream/405999-DataShuttle", "info": ""} -{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 68, "status": "working", "code": 0, "uri": "https://wezfurlong.org/wezterm/index.html", "info": ""} -{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 68, "status": "redirected", "code": 301, "uri": "https://warp.dev/", "info": "https://www.warp.dev/"} +{"filename": "pages/how_tos/create-folders.md", "lineno": 4, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 718, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle", "info": ""} +{"filename": "pages/how_tos/create-folders.md", "lineno": 7, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/specification.html", "info": ""} +{"filename": "pages/how_tos/transfer-data.md", "lineno": 51, "status": "working", "code": 0, "uri": "https://neuroblueprint.neuroinformatics.dev/specification.html#basic-principles", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 60, "status": "working", "code": 0, "uri": "https://github.com/neuroinformatics-unit/datashuttle/", "info": ""} +{"filename": "index.md", "lineno": 56, "status": "working", "code": 0, "uri": "https://neuroinformatics.zulipchat.com/#narrow/stream/405999-DataShuttle", "info": ""} +{"filename": "pages/tutorials/getting_started.md", "lineno": 421, "status": "working", "code": 0, "uri": "https://gin.g-node.org/joe-ziminski/datashuttle/src/master/docs/tutorial-mock-data-files", "info": ""} +{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 69, "status": "redirected", "code": 301, "uri": "https://warp.dev/", "info": "https://www.warp.dev/"} +{"filename": "pages/how_tos/choose-a-terminal.md", "lineno": 69, "status": "working", "code": 0, "uri": "https://wezfurlong.org/wezterm/index.html", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 4, "status": "working", "code": 0, "uri": "https://www.python.org/", "info": ""} +{"filename": "pages/how_tos/transfer-data.md", "lineno": 314, "status": "working", "code": 0, "uri": "https://rclone.org/commands/rclone_copy/", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 47, "status": "working", "code": 0, "uri": "https://rclone.org/", "info": ""} +{"filename": "pages/how_tos/install.md", "lineno": 47, "status": "working", "code": 0, "uri": "https://rclone.org/downloads/", "info": ""} {"filename": "pages/how_tos/make-a-new-project.md", "lineno": 141, "status": "working", "code": 0, "uri": "https://www.ssh.com/academy/ssh/protocol", "info": ""} -{"filename": "pages/how_tos/install.md", "lineno": 39, "status": "working", "code": 0, "uri": "https://rclone.org/downloads/", "info": ""} -{"filename": "pages/documentation.md", "lineno": 744, "status": "working", "code": 0, "uri": "https://rclone.org/", "info": ""} +{"filename": "pages/how_tos/transfer-data.md", "lineno": 307, "status": "working", "code": 0, "uri": "https://rclone.org/docs/#u-update", "info": ""} diff --git a/output.txt b/output.txt index 74af3997..bfdb60ae 100644 --- a/output.txt +++ b/output.txt @@ -1,3 +1,2 @@ -pages/how_tos/choose-a-terminal.md:55: [redirected with Found] https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB to https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&%3Bhl=en-gb&%3Bgl=GB&hl=en-us&gl=US -pages/documentation.md:31: [redirected with Found] https://github.com/neuroinformatics-unit/datashuttle/assets/29216006/51b65a6d-492a-4047-ae7b-16273b58e258 to https://github-production-user-asset-6210df.s3.amazonaws.com/29216006/244708175-51b65a6d-492a-4047-ae7b-16273b58e258.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240408%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240408T203542Z&X-Amz-Expires=300&X-Amz-Signature=fa6efe054603d02e748bfa547936c20c746132305dc9945257612a320d352824&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=531897428 -pages/how_tos/choose-a-terminal.md:68: [redirected permanently] https://warp.dev/ to https://www.warp.dev/ +pages/how_tos/choose-a-terminal.md:56: [redirected with Found] https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&hl=en-gb&gl=GB to https://apps.microsoft.com/detail/9n0dx20hk701?rtc=1&%3Bhl=en-gb&%3Bgl=GB&hl=en-us&gl=US +pages/how_tos/choose-a-terminal.md:69: [redirected permanently] https://warp.dev/ to https://www.warp.dev/ diff --git a/pages/api_index.html b/pages/api_index.html index 9c826fda..2b1b3a11 100644 --- a/pages/api_index.html +++ b/pages/api_index.html @@ -37,7 +37,7 @@ - + @@ -47,7 +47,7 @@ - + @@ -128,7 +128,7 @@ -

Datashuttle v0.3.0

+

datashuttle v0.4.0

@@ -259,7 +259,7 @@ -
+
@@ -345,15 +345,6 @@
- - @@ -424,7 +415,7 @@ To get the path to datashuttle logs, use cfgs.make_and_get_logging_path().

For transferring data between a central data storage -with SSH, use setup setup_ssh_connection_to_central_server(). +with SSH, use setup setup_ssh_connection(). This will allow you to check the server key, add host key to profile if accepted, and setup ssh key pair.

@@ -445,71 +436,77 @@ - + - + - - + + - + - + + + + - + - + - + - + - + - + - + - - + + - - + + - + - + - + - + - + - - + + - + + + + @@ -596,8 +593,8 @@
-
-upload(top_level_folder, sub_names, ses_names, datatype='all', dry_run=False, init_log=True)[source]#
+
+upload_custom(top_level_folder, sub_names, ses_names, datatype='all', overwrite_existing_files='never', dry_run=False, init_log=True)[source]#

Upload data from a local project to the central project folder. In the case that a file / folder exists on the central and local, the central will not be overwritten @@ -615,11 +612,15 @@ datatype folder to upload.

  • ses_names (Union[str, list]) – a session name / list of session names, similar to sub_names but requiring a “ses-” prefix.

  • -
  • dry_run (bool) – perform a dry-run of upload. This will output as if file +

  • datatype (Union[List[str], str]) – see create_folders()

  • +
  • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If False, files on central will never be overwritten +by files transferred from local. If True, central files +will be overwritten if there is any difference (date, size) +between central and local files.

  • +
  • dry_run (bool) – perform a dry-run of transfer. This will output as if file transfer was taking place, but no files will be moved. Useful to check which files will be moved on data transfer.

  • -
  • datatype (Union[List[str], str]) – see create_folders()

  • -
  • init_log (bool) – (Optional). Whether handle logging. This should +

  • init_log (bool) – (Optional). Whether to handle logging. This should always be True, unless logger is handled elsewhere (e.g. in a calling function).

  • @@ -628,101 +629,178 @@

    None

    -

    Notes

    -

    The configs “overwrite_existing_files”, “transfer_verbosity” -and “show_transfer_progress” pertain to data-transfer settings. -See make_config_file() for more information.

    -

    sub_names or ses_names may contain certain formatting tags:

    -
    -
    @*@: wildcard search for subject names. e.g. ses-001_date-@*@

    will transfer all session 001 collected on all dates.

    -
    -
    @TO@: used to transfer a range of sub/ses.

    Number must be either side of the tag -e.g. sub-001@TO@003 will generate -[“sub-001”, “sub-002”, “sub-003”]

    +
    + +
    +
    +download_custom(top_level_folder, sub_names, ses_names, datatype='all', overwrite_existing_files='never', dry_run=False, init_log=True)[source]#
    +

    Download data from the central project folder to the +local project folder.

    +
    +
    Parameters:
    +
      +
    • top_level_folder (Literal['rawdata', 'derivatives']) – The top-level folder (e.g. rawdata) to transfer files +and folders within.

    • +
    • sub_names (Union[str, list]) – a subject name / list of subject names. These must +be prefixed with “sub-”, or the prefix will be +automatically added. “@*@” can be used as a wildcard. +“all” will search for all sub-folders in the +datatype folder to upload.

    • +
    • ses_names (Union[str, list]) – a session name / list of session names, similar to +sub_names but requiring a “ses-” prefix.

    • +
    • datatype (Union[List[str], str]) – see create_folders()

    • +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    • init_log (bool) – (Optional). Whether to handle logging. This should +always be True, unless logger is handled elsewhere +(e.g. in a calling function).

    • +
    -
    @DATE@, @TIME@ @DATETIME@: will add date-<value>, time-<value> or

    date-<value>_time-<value> keys respectively. Only one per-name -is permitted. -e.g. sub-001_@DATE@ will generate sub-001_date-20220101 -(on the 1st january, 2022).

    +
    Return type:
    +

    None

    -
    -download(top_level_folder, sub_names, ses_names, datatype='all', dry_run=False, init_log=True)[source]#
    -

    Download data from the central project folder to the -local project folder. In the case that a file / folder -exists on the central and local, the local will -not be overwritten even if the central file is an -older version.

    -

    This function is identical to upload() but with the direction -of data transfer reversed. Please see upload() for arguments. -“all” arguments will search the central -project for sub / ses to download.

    +
    +upload_rawdata(overwrite_existing_files='never', dry_run=False)[source]#
    +

    Upload files in the rawdata top level folder.

    -
    Return type:
    -

    None

    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    -
    -upload_all(top_level_folder, dry_run=False, init_log=True)[source]#
    -

    Convenience function to upload all data.

    +
    +upload_derivatives(overwrite_existing_files='never', dry_run=False)[source]#
    +

    Upload files in the derivatives top level folder.

    -
    Return type:
    -

    None

    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    -
    -
    Alias for:

    project.upload(“all”, “all”, “all”)

    +
    + +
    +
    +download_rawdata(overwrite_existing_files='never', dry_run=False)[source]#
    +

    Download files in the rawdata top level folder.

    +
    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    -
    -download_all(top_level_folder, dry_run=False, init_log=True)[source]#
    -

    Convenience function to download all data.

    -

    Alias for : project.download(“all”, “all”, “all”)

    +
    +download_derivatives(overwrite_existing_files='never', dry_run=False)[source]#
    +

    Download files in the derivatives top level folder.

    -
    Return type:
    -

    None

    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    -upload_entire_project()[source]#
    +upload_entire_project(overwrite_existing_files='never', dry_run=False)[source]#

    Upload the entire project (from ‘local’ to ‘central’), i.e. including every top level folder (e.g. ‘rawdata’, ‘derivatives’, ‘code’, ‘analysis’).

    -
    Return type:
    -

    None

    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    +
    +
    Return type:
    +

    None

    -download_entire_project()[source]#
    +download_entire_project(overwrite_existing_files='never', dry_run=False)[source]#

    Download the entire project (from ‘central’ to ‘local’), i.e. including every top level folder (e.g. ‘rawdata’, ‘derivatives’, ‘code’, ‘analysis’).

    -
    Return type:
    -

    None

    +
    Parameters:
    +
      +
    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file +transfer was taking place, but no files will be moved. Useful +to check which files will be moved on data transfer.

    • +
    +
    +
    Return type:
    +

    None

    -upload_specific_folder_or_file(filepath, dry_run=False)[source]#
    +upload_specific_folder_or_file(filepath, overwrite_existing_files='never', dry_run=False)[source]#

    Upload a specific file or folder. If transferring a single file, the path including the filename is required (see ‘filepath’ input). If a folder, @@ -733,7 +811,12 @@

    Parameters:
    • filepath (Union[str, Path]) – a string containing the full filepath.

    • -
    • dry_run (bool) – perform a dry-run of upload. This will output as if file +

    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file transfer was taking place, but no files will be moved. Useful to check which files will be moved on data transfer.

    @@ -746,7 +829,7 @@
    -download_specific_folder_or_file(filepath, dry_run=False)[source]#
    +download_specific_folder_or_file(filepath, overwrite_existing_files='never', dry_run=False)[source]#

    Download a specific file or folder. If transferring a single file, the path including the filename is required (see ‘filepath’ input). If a folder, @@ -757,7 +840,12 @@

    Parameters:
    • filepath (Union[str, Path]) – a string containing the full filepath.

    • -
    • dry_run (bool) – perform a dry-run of upload. This will output as if file +

    • overwrite_existing_files (Literal['never', 'always', 'if_source_newer']) – If “never” files on target will never be overwritten by source. +If “always” files on target will be overwritten by source if +there is any difference in date or size. +If “if_source_newer” files on target will only be overwritten +by files on source with newer creation / modification datetime.

    • +
    • dry_run (bool) – perform a dry-run of transfer. This will output as if file transfer was taking place, but no files will be moved. Useful to check which files will be moved on data transfer.

    @@ -769,8 +857,8 @@
    -
    -setup_ssh_connection_to_central_server()[source]#
    +
    +setup_ssh_connection()[source]#

    Setup a connection to the central server using SSH. Assumes the central_host_id and central_host_username are set in configs (see make_config_file() and update_config_file())

    @@ -806,7 +894,7 @@
    -make_config_file(local_path, central_path, connection_method, central_host_id=None, central_host_username=None, overwrite_existing_files=False, transfer_verbosity='v', show_transfer_progress=False)[source]#
    +make_config_file(local_path, central_path, connection_method, central_host_id=None, central_host_username=None)[source]#

    Initialise the configurations for datashuttle to use on the local machine. Once initialised, these settings will be used each time the datashuttle is opened. This method @@ -834,18 +922,6 @@ e.g. “ssh.swc.ucl.ac.uk”

  • central_host_username (Optional[str]) – username for which to log in to central host. e.g. “jziminski”

  • -
  • overwrite_existing_files (bool) – If True, when copying data (upload or download) files -will be overwritten if the timestamp of the copied -version is later than the target folder version -of the file i.e. edits made to a file in the source -machine will be copied to the target machine. If False, -a file will be copied if it does not exist on the target -folder, otherwise it will never be copied, even if -the source version of the file has a later timestamp.

  • -
  • transfer_verbosity (str) – “v” will tell you about each file that is transferred and -significant events, “vv” will be very verbose and inform -on all events.

  • -
  • show_transfer_progress (bool) – If true, the real-time progress of file transfers will be printed.

  • Return type:
    @@ -945,9 +1021,9 @@
    -
    -get_next_sub_number(top_level_folder, return_with_prefix=True, local_only=False)[source]#
    -

    Convenience function for get_next_sub_or_ses_number +

    +get_next_sub(top_level_folder, return_with_prefix=True, local_only=False)[source]#
    +

    Convenience function for get_next_sub_or_ses to find the next subject number.

    Parameters:
    @@ -964,9 +1040,9 @@
    -
    -get_next_ses_number(top_level_folder, sub, return_with_prefix=True, local_only=False)[source]#
    -

    Convenience function for get_next_sub_or_ses_number +

    +get_next_ses(top_level_folder, sub, return_with_prefix=True, local_only=False)[source]#
    +

    Convenience function for get_next_sub_or_ses to find the next session number.

    Parameters:
    @@ -1068,7 +1144,7 @@
    static check_name_formatting(names, prefix)[source]#

    Pass list of names to check how these will be auto-formatted, -for example as when passed to create_folders() or upload() +for example as when passed to create_folders() or upload_custom() or download()

    Useful for checking tags e.g. @TO@, @DATE@, @DATETIME@, @DATE@. This method will print the formatted list of names,

    @@ -1114,15 +1190,17 @@ @@ -397,97 +400,98 @@

    How to Create Folders#

    -

    datashuttle creates project folders -according to the NeuroBlueprint -specification.

    -

    Before jumping into the folder-creation process, we’ll quickly -review the key features of the -specification) -that are created folders must conform to.

    -

    In NeuroBlueprint for each -subject and session there are datatype folders in which acquired -data is saved:

    +

    datashuttle automates project folder creation and validation +according to the NeuroBlueprint.

    +

    Before starting with folder creation, we’ll briefly introduce the +NeuroBlueprint specification.

    +

    An example NeuroBlueprint project:

    ../../_images/NeuroBlueprint_project_tree_dark.png ../../_images/NeuroBlueprint_project_tree_light.png
    -

    The subject and session folder names must begin with sub- and ses- -respectively—other key-value pairs are optional. All acquired data must go -in a datatype folder with a -standard name.

    -

    No strong requirements are made on filenames of acquired data, but it is recommended -to include the subject and session number if possible.

    -

    Now the specification has been introduced, let’s dive in to folder creation!

    +

    Some key features:

    +
      +
    • The rawdata top-level-folder contains acquired data. Following acquisition +this data is never edited.

    • +
    • The derivatives top-level folder contains all processing and analysis outputs. There are +no fixed requirements on its organisation.

    • +
    • Subject and session folders are formatted as key-value pairs.

    • +
    • Only the sub- and ses- key-value pairs are required (additional pairs are optional).

    • +
    • Each session contains datatype folders, in which acquired data is stored.

    • +
    +

    Now, let’s get started with folder creation!

    Creating project folders#

    -

    In the below example, folders will be created in the rawdata folder, -within the my_first_project project folder.

    -

    The project folder is located at the local path +

    The project-name folder is located at the local path specified when setting up the project.

    +

    We will now create subject, session and +datatype folders within a rawdata top-level folder.

    We will create datatype folders behav and funcimg -within a ses-001_<todays_date> for both sub-001 and sub-002.

    +within a ses-001_<todays_date> for both a sub-001 and sub-002.

    The below example uses the @DATE@ convenience tag to automate -creation of today’s date. See the section below for more -information on -convenience tags.

    +creation of today’s date. See the +convenience tags. +section for more information on these tags.

    +

    Folders are created in the Create tab on the Project Manager page.

    ../../_images/how-to-create-folders-example-dark.png ../../_images/how-to-create-folders-example-light.png
    -

    Folders are created in the Create tab on the Project Manager page.

    -

    We can fill in the subject and session names and select datatype -folders to create.

    -

    Note that the sub- or ses- prefix is not actually required and will -be automatically added.

    -

    Create tab shortcuts

    +

    We can enter the subject and session names into the input boxes, +and select datatype folders to create. Clicking Create Folders +will create the folders within the project.

    +

    A number of useful shortcuts to streamline this process are described below.

    +

    Create shortcuts

    The Create tab has a lot of useful shortcuts.

    -

    First, double-clicking the subject or session input boxes will suggest +

    First, double-clicking subject or session input boxes will suggest the next subject or session to create, based on the local project. If a Name Template is set, the suggested name will also include the template.

    -

    Holding CTRL while clicking will add the sub- +

    Holding CTRL while clicking will enter the sub- or ses- prefix only.

    Next, the Directory Tree has a number of useful shortcuts. These are -activated by hovering the mouse and pressing one of the below combination -of keys (you may need to click the Directory Tree) first:

    +activated by hovering the mouse of a file or folder and pressing +one of the below key combinations +(you may need to click the Directory Tree first):

    Fill an input

    CTRL+F will fill the subject or session input with the name of the folder (prefixed with sub- or ses-) that is hovered over.

    Append to an input

    CTRL+A is similar to ‘fill’ above, but will instead append the name -to those already in the input. This allows creation of lists.

    +to those already in the input. This allows creation of multiple +subjects or sessions at once.

    -
    Open folder in system filebrowser

    CTRL+O will open (any) folder in the system filebrowser.

    +
    Open folder in system filebrowser

    CTRL+O will open a folder in the system filebrowser.

    Copy the full filepath.

    CTRL+Q will copy the entire filepath of the file or -folder that is hovered over.

    +folder.

    -

    Create tab Settings

    -

    Clicking the Settings button on the Create tab will give access -allow setting the top-level folder, and bypass validation.

    +

    Create Settings

    +

    Click the Settings button on the Create tab to set +the top-level folder, and bypass validation.

    ../../_images/how-to-create-folders-settings-dark.png ../../_images/how-to-create-folders-settings-light.png
    -
    Top-level folder

    This dropdown box will set whether folderes are created in the -rawdata or derivatives top-level folder

    +
    Top level folder

    This dropdown box will set whether folders are created in the +rawdata or derivatives top-level folder.

    -
    Bypass validation

    If on, this setting will allow folder creation even if the names -are not valid (e.g. break with +

    Bypass validation

    This setting will allow folder creation even if the names +are not valid (i.e. they break with NeuroBlueprint).

    -

    This screen is also used to set validation against +

    This screen is also used to validate and autofill with Name Templates.

    -

    Creating folders can be done with the create_folders() method in the Python API. -We simply need to provide the subject, session and datatypes to create:

    +

    The create_folders() method is used for folder creation.

    +

    We simply need to provide the subject, session and datatypes to create:

    from datashuttle import DataShuttle
     
     project = DataShuttle("my_first_project")
    @@ -500,36 +504,28 @@ 

    )

    -

    We provides datashuttle with a list of subject, session and -datatype folders to create.

    -

    Note that the sub- or ses- prefix is not actually required and will -be automatically added.

    The method outputs created_folders, which contains a list of all -Paths to all created datatype folders.

    -
    +Paths to all created datatype folders. See the below section for +details on the @DATE@ and other convenience tags.

    +

    By default, an error will be raised if the folder names break +with Neuroblueprint +and folders will not be created. +The bypass_validation argument can be used to bypass this feature.

    -
    -

    Folder Validation

    -

    The names of the folders to be created are validated on the fly against -NeuroBlueprint. -If the folder names will break with the specification, an error will -be raised and the folders will not be created.

    -

    Validation can be extended by defining custom templates for subject -or session names—if folders don’t match the template an error will be raised. -See How to use Name Templates for more information.

    Convenience Tags#

    There are four convenience tags that can be used in subject or session -names when creating folders. They automate the inclusion of:

    +names when creating folders.

    +

    They automate the inclusion of:

    Today’s Date

    The @DATE@ tag will include today’s date in the format YYYYMMDD.
    e.g. If today’s date is 16th May 2024, the name "ses-001_@DATE@" will create the folder ses-001_date-20241605.

    Current Time

    The @TIME@ tag will include the current time in the format HHMMSS.
    -e.g. If the current time is 15:10:05 (i.e. 10 minutes and 5 seconds past 3 pm.), +e.g. If the current time is 15:10:05 (i.e. 10 minutes and 5 seconds past 3 p.m.), the name "ses-001_@TIME@" will create the folder ses-001_time-151005.

    Current Datetime

    The @DATETIME@ tag will add the diff --git a/pages/how_tos/install.html b/pages/how_tos/install.html index 5f2232d5..fe96a80a 100644 --- a/pages/how_tos/install.html +++ b/pages/how_tos/install.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -148,7 +150,7 @@ - - + + + @@ -397,9 +400,17 @@

    How to Install#

    -

    datashuttle requires Python and a number of other dependencies to run.

    -

    The easiest way to install datashuttle is through conda, -but installation via pip and for developers is also supported.

    +

    datashuttle requires +Python +to run.

    +

    The easiest way to install datashuttle is through the Python package manager +conda. However, +installation via pip is also supported.

    +
    +

    Warning

    +

    datashuttle is currently in the beta release phase. Please +get in contact if you experience any bugs or unexpected behaviour.

    +

    Installation instructions#

    @@ -408,14 +419,14 @@

    Installation instructions

    If you do not already have conda on your system, first download and install conda.

    -

    If you are on Windows, the easiest way to use conda is through the Anaconda Prompt

    +

    If you are on Windows, the easiest way to use conda is through the Anaconda Prompt.

    Next, create and activate an environment. You can call your environment whatever you like, -we’ve used datashuttle-env.

    +we’ve used datashuttle-env:

    conda create -n datashuttle-env python=3.10
     conda activate datashuttle-env
     
    -

    then install datashuttle and all dependencies with

    +

    Next, install datashuttle and all dependencies with:

    conda install -c conda-forge datashuttle
     
    @@ -426,7 +437,7 @@

    Installation instructionsRClone, which is not available through pip. Rclone must be installed separately.

    Once Rclone is installed, datashuttle and all other dependencies can be -installed in a pipenv or virtualenv environment with

    +installed in a pipenv or virtualenv environment with:

    pip install datashuttle
     
    @@ -448,7 +459,7 @@

    Installation instructions

    Check the installation#

    To check datashuttle has successfully installed, launch the -graphical interface with

    +graphical interface with:

    datashuttle launch
     
    diff --git a/pages/how_tos/make-a-new-project.html b/pages/how_tos/make-a-new-project.html index 367d5ff5..a60f4d44 100644 --- a/pages/how_tos/make-a-new-project.html +++ b/pages/how_tos/make-a-new-project.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -148,7 +150,7 @@ - - + + + @@ -397,7 +400,7 @@

    How to Make a New Project#

    -

    This guide will cover all we need to know for setting up a new project +

    This guide will cover all you need to know for setting up a new project in datashuttle.

    First, make sure you have installed and launched datashuttle.

    @@ -405,8 +408,8 @@
    1. project name: The name of the project (must be the same for all local machines tied to a project).

    2. -
    3. local path: location of the project our local machine, where we will save acquired data.

    4. -
    5. central path: location of the central data storage, where we will upload the acquired data.

    6. +
    7. local path: location of the project our local machine.

    8. +
    9. central path: location of the project on the central data storage machine.

    ../../_images/datashuttle-overview-dark.png ../../_images/datashuttle-overview-light.png @@ -420,9 +423,10 @@ or IT department.

    When central storage is a mounted drive#

    -

    When the central storage machine is mounted as a mounted drive, we -simply need to set the central path as the path to -the central project as it appears on your local machine’s filesystem.

    +

    In this case, the central storage machine is mounted as a drive +on the local machine.

    +

    We simply need to set the central path as the path to +the central project as it appears on the local machine’s filesystem.

    Local Filesystem Example
    @@ -434,47 +438,48 @@ your machine at X:\username. You want your project folder to be located at X:\username\my_projects.

    In this case, you can set the central_path to X:\username\my_projects -and with connection_method to local_filesystem.

    -

    You may pass the local or central path without the project name, -it will be automatically included. The project folder will be located -at X:\username\my_projects\my_project_name.

    +and with connection_method to local filesystem.

    +

    The project folder will be located +at X:\username\my_projects\my_project_name. +You may pass the local or central path without the project name, +(it will be automatically included).

    -

    In addition, we need to tell datashuttle the project name and -local path where we want to put our project and hold data on -our local machine.

    -
    +
    -

    From the launch page, click Make New Project and you will -be taken to the page where project details must be entered

    +

    First, click the Make New Project button from the launch page.

    +

    The Make New Project screen will be displayed:

    ../../_images/tutorial-1-make-screen-dark.png ../../_images/tutorial-1-make-screen-light.png

    Setting up datashuttle is as simple as entering the Project name, -Local Path and Central Path into the relevant input boxes. The paths -do not need to end in the project name - this will be automatically added.

    -

    You can paste a path into the input boxes with CTRL+V, copy the input path -with CTRL+Q or open the path in your systems filebrowser with CTRL+O.

    -

    Use the Select feature to navigate to the local and central paths -on your local filesystem.

    +Local Path and Central Path into the relevant input boxes.

    +

    The paths do not need to end in the project name—it will be automatically added. +You can paste a path into the input boxes with CTRL+V or use Select` +to navigate to paths on your local filesystem.

    By default, the Connection Method is set to Local Filesystem, so this does not need to be changed.

    Once all information is input, click Save to set up the project. You can then navigate to the Project Manager screen by clicking the Go To Project Screen that appears.

    +
    +

    Note

    +

    The contents of the input boxes can be copied with +with CTRL+Q, or opened in the system filebrowser with CTRL+O.

    +
    -

    We will first import and initialise the DataShuttle class with our -project_name.

    +

    We will first import the DataShuttle class and initialise +it with the project_name:

    from datashuttle import DataShuttle
     
     project = DataShuttle("my_first_project")
     
    -

    Next, we can use the make_config_file() method to set up a new +

    Next, the make_config_file() method can be used to set up a new project with the desired local path, central path and connection method.

    project.make_config_file(
    @@ -484,8 +489,6 @@
     )
     
    -

    Now the project is set up! See the later section for -optional arguments that control data transfers.

    @@ -494,13 +497,13 @@

    Another common method of connecting to a central storage machine is via SSH.

    To set up SSH connection -we need to give datashuttle the address of the machine to connect to, -and now the central path will be relative to the machine -we are connecting to.

    -

    central_host_id: This is the address of the server you want to connect to.

    -

    central_host_username: This is your profile name on the server you want to -connect to.

    -

    central path: This is the path to the project on the server.

    +we need to provide:

    +
      +
    1. central_host_id: This is the address of the server you want to connect to.

    2. +
    3. central_host_username: This is your profile username on the server you want to +connect to.

    4. +
    5. central path: This is the path to the project on the server.

    6. +
    SSH Example
    @@ -511,14 +514,15 @@

    Let’s say the central project was stored on a remote server with address ssh.swc.ucl.ac.uk, and your account username on the server is myusername.

    -

    Finally, we want to store the project at the location (on the server) +

    We want to store the project at the location (on the server) /ceph/my_lab/my_name/my_projects/project_name/.

    -

    Then the input to datashuttle would be

    +

    Then the settings would be:

    central host id: ssh.swc.ucl.ac.uk

    central host username: myusername

    central path: /ceph/my_lab/my_name/my_projects/project_name/

    You may pass the local path and central path without the project name, it will be automatically included.

    +

    Note that Linux-based shortcuts (e.g. ~ for home directory) are not permitted.

    @@ -533,16 +537,14 @@

    Next, input the Central Host ID, Central Host Username and Central Path as described above.

    Clicking Save will save these project configs. A button -Setup SSH Connection will appear. Click this to -confirm the server and enter your password to the server -(you will only need to do this once) -``

    +Setup SSH Connection will appear. Click to +confirm the server ID and enter your password +(you will only need to do this once).

    -

    In Datashuttle, the -connection_method configuration must be set to "ssh" +

    The connection_method configuration must be set to "ssh" to use the SSH protocol for data transfers.

    Enter the central_path, central_host_id and central_host_username as described above.

    @@ -556,43 +558,16 @@

    Next, a one-time command to set up the SSH connection must be run:

    -
    project.setup_ssh_connection_to_central_server()
    +
    project.setup_ssh_connection()
     
    -

    Running setup-ssh-connection-to-central-server will require verification +

    Running setup_ssh_connection() will require verification that the SSH server connected to is correct (pressing y to proceed).

    Finally, your password to the central server will be requested (you will only need to do this once).

    -
    -

    Extra arguments (Python API)#

    -

    A number of settings that control the behaviour of transfers -can be set with the make_config_file() method.

    -

    These configs are not relevant for the graphical interface, with the exception of -overwrite_existing_folders which set directly on the -graphical interface’s Transfer screen.

    -
    -
    overwrite_existing_files

    Determines whether folders and files are overwritten -during transfer. By default, Datashuttle does not overwrite any existing -folder during data transfer.

    -e.g. if the file sub-001_ses-001_measure-trajectories.csv exists on -the central project, it will never be over-written during upload -from the local to central project, even if the local version is newer.

    -To change this behaviour, the configuration overwrite_existing_files can be set to True. -If overwrite_existing_files is True, files in which the timestamp of the -target directory will be overwritten if their -timestamp is older than the corresponding file in the source directory.

    -
    -
    transfer_verbosity

    Set to "vv" for additional detail on the -transfer operation. Set to "v" to only see each file that is transferred -as well as significant events that occur during transfer.

    -
    -
    show_transfer_progress

    When True, real-time transfer statistics will be reported and logged.

    -
    -
    -
    @@ -619,7 +594,6 @@ diff --git a/pages/how_tos/read-logs.html b/pages/how_tos/read-logs.html index 5c9c0b45..f88311f4 100644 --- a/pages/how_tos/read-logs.html +++ b/pages/how_tos/read-logs.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -148,7 +150,7 @@ - - + + + @@ -397,12 +400,11 @@

    How to Read the Logs#

    -

    When commands that create folders, change project configs -or transfer data are run, datashuttle stored detailed -logs of these actions.

    +

    datashuttle stores detailed logs when commands that +create folders, change project configs or perform data transfers are run.

    These logs can be accessed and read directly in the -Graphical Interface, or located on your filesysetm -and opened in your favourite text file reader.

    +graphical interface, or located on your filesysetm +and opened in your favourite text editor.

    Logs are stored as ISO8601-prefixed filenames that includes the relevant datashuttle method performed.

    @@ -423,17 +425,18 @@

    Find and read the logs diff --git a/pages/how_tos/top-level-folder.html b/pages/how_tos/top-level-folder.html deleted file mode 100644 index 923fa033..00000000 --- a/pages/how_tos/top-level-folder.html +++ /dev/null @@ -1,538 +0,0 @@ - - - - - - - - - - - How to Set the Top-level Folder — datashuttle - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - - - - -
    -
    -
    - - - - Ctrl+K -
    -
    - - - - -
    -
    - - - - - -
    - - - - - - - - - - - -
    - -
    - - -
    -
    - -
    -
    - -
    - -
    - - - - -
    - -
    - - -
    -
    - - - - - -
    - -
    -

    How to Set the Top-level Folder#

    -

    NeuroBlueprint specifies -the top-level folder inside the project folder must be rawdata or derivatives.

    -../../_images/NeuroBlueprint_project_tree_dark.png -../../_images/NeuroBlueprint_project_tree_light.png -
    -

    In datashuttle, the top level folder is relevant when:

    -
      -
    1. creating folders (i.e. in rawdata or derivatives)

    2. -
    3. transferring data with the top-level method or custom.

    4. -
    -

    Using the Graphical interface, the top-level folder is -set by a drop-down menu on the relevant tab (Create or Transfer). -^^ TODO: link to sections!!! TODO TODO TODO

    -

    However, in the Python API methods act in rawdata or derivatives -according to a stored top-level folder setting.

    -
    -

    Setting the level folder in the Python API#

    -

    In the Python API the working top level folder -is stored as a persistent property, accessible with -the get_top_level_folder() and set_top_level_folder() methods.

    -

    This is to avoid continuously inputting the top-level folder -for every method call.

    -

    When making folders, create_folders will only create folders in the -working top-level folder.

    -

    Transferring folders (e.g. with upload() or download()) will -only transfer folders in the working top-level folder -(unless upload_entire_project() or download_entire_project() is used).

    -

    In the below example we will create and transfer folders in rawdata. -Then, the top-level folder is switched to derivatives and the actions repeated.#

    -
    project.set_top_level_folder("rawdata")
    -
    -# make folders in `rawdata`
    -project.create_folders(sub="sub-002")
    -
    -# transfer files in `rawdata`
    -project.upload_data(sub_names="all", ses_names="all", datatype="all")
    -
    -# set working top-level folder to `derivatives`
    -project.set_top_level_folder("derivatives")
    -
    -print(project.get_top_level_folder())
    -# "derivatives"
    -
    -# create folders in derivatives
    -project.create_folders("sub-002")
    -
    -# transfer folders in derivatives
    -project.download_data()
    -
    -
    -
    -
    -
    - - -
    - - - - - -
    - - - -
    - - -
    -
    - -
    - -
    -
    -
    - - - - - - - - \ No newline at end of file diff --git a/pages/how_tos/transfer-data.html b/pages/how_tos/transfer-data.html index 36c482c6..66526b70 100644 --- a/pages/how_tos/transfer-data.html +++ b/pages/how_tos/transfer-data.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -148,7 +150,7 @@ - - + + + @@ -397,50 +400,45 @@

    How to Transfer Data#

    -

    Transferring data between the local project and the project located -on central storage is a key feature of datashuttle. It allows:

    +

    datashuttle facilitates convenient transfer of data between +local and central storage machines.

    +

    This includes:

      -
    • Transfer of data from an acquisition machine to the central project.

    • -
    • Convenient integration of data collected from multiple acquisition.

    • -
    • Pulling subsets of data from central storage to analysis machines.

    • +
    • ‘Uploading’ data from an acquisition machine to central data storage.

    • +
    • ‘Downloading’ subsets of data from central storage to analysis machines.

    ../../_images/datashuttle-overview-light.png ../../_images/datashuttle-overview-dark.png

    Transfer Direction

    -

    In datashuttle, the term upload refers to transfer -from the local machine to central storage. -Download refers to transfer from central storage to -a local machine.

    +

    In datashuttle, the upload refers to transfer +from a local to the central machine. +Download refers to transfer from the central machine to a local machine.

    -

    There are three main methods to transfer data in datashuttle. These -allow transfer between:

    +

    There are three main methods to transfer data. These +allow transfer across:

      -
    1. The entire project (all files in both rawdata and derivatives)

    2. -
    3. A specific top-level-folder (e.g. all files in rawdata)

    4. -
    5. A custom subset of subjects / sessions / datatypes.

    6. +
    7. the entire project (all files in both rawdata and derivatives)

    8. +
    9. only the rawdata or derivatives top level folder.

    10. +
    11. a custom subset of subjects / sessions / datatypes.

    -

    Below we will explore each method in turn, as well as consider -configuring transfer including the important -overwrite existing files option.

    Warning

    -

    The -Overwrite Existing Files -setting is very important.

    -

    By default it is turned off and a transfer will never overwrite a -file that already exists, even if the source version is newer.

    +

    The overwrite existing files setting is very important. +It takes on the options never, always or if source newer.

    +

    See the transfer options section for full details on +this and other transfer settings.

    Transfer the entire project#

    -

    The first option is to transfer the entire project, -that is all files in the rawdata and derivatives +

    The first option is to transfer the entire project—all +files in the rawdata and derivatives top-level-folders.

    This includes all files inside or outside a subject, session or datatype folder.

    This mode is useful for data acquisition when overwrite existing files -is off. Any new files (i.e. newly acquired data) will be transferred, -to central storage, while any existing files will be ignored.

    +is set to never. Any new files (i.e. newly acquired data) will be transferred +to central storage while existing files will be ignored.

    -

    The command to upload the entire project is

    +

    The method to upload the entire project is:

    project.upload_entire_project()
     
    -

    while the command to download the entire project is

    +

    while the method to download the entire project is:

    project.download_entire_project()
     
    -
    -

    Transfer the top-level folder#

    -

    This mode acts almost identically to +

    +

    Transfer only rawdata or derivatives#

    +

    This acts almost identically to transferring the entire project -however it will only transfer files within a -particular top-level folder (rawdata or derivatives).

    -

    This mode is also useful for quickly uploading new files -during data acquisition (rawdata) or analysis (derivatves), when -overwrite existing files is off—any newly acquired or generated files -will be transfer, ignoring any previously existing files.

    +but will only transfer files within a +single top-level folder (rawdata or derivatives).

    -

    The upload_all() or download_all() methods can be used with the argument top_level_folder to specify -the top-level folder to transfer within.

    -

    In the next example, we will upload rawdata downloading derivatives.

    -
    project.upload_all("rawdata")
    +

    The upload_rawdata(), upload_derivatives() and download_rawdata(), download_derivatives() +methods target transfer to a particular top-level folder.

    +

    The below example will upload rawdata then download derivatives.

    +
    project.upload_rawdata()
     
    -project.download_all("derivatives")
    +project.download_derivatives()
     
    @@ -504,49 +497,38 @@

    Custom transfers#

    -

    Custom transfers permit full customisation of the files inside -or outside of subject, session and datatype folders.

    -

    Custom transfers are particularly useful during data analysis, in -which a subset of data can be downloaded from central storage. -For example, you want to only transfer behavioural data from -test sessions—custom transfers allow you to do this with ease.

    -

    See below for how to run custom transfers, as well as -certain keywords and convenience tags to fully customise data transfer.

    -

    For example, all_sub in the below examples tells datashuttle -to consider only files and folders within subject folders for transfer. -Files or folders within rawdata that are not sub- -folders will not be transferred.

    -

    See below for full details on custom transfer keywords and -convenience tags.

    +

    Custom transfers permit full customisation of data transfer.

    +

    Custom transfers can transfer select subsets of data. +For example, you may only want download behavioural data from +test sessions for a particular data analysis.

    +

    Select Custom on the Transfer tab to open the custom transfer settings.

    ../../_images/how-to-transfer-custom-dark.png ../../_images/how-to-transfer-custom-light.png
    -

    Select Custom on the Transfer tab to select custom transfers.

    The top-level folder can be set by the first dropdown menu.

    Next, subject and session keywords can be added to customise -files to transfer. In this example, data from all subject -folders, all first session behavioral data will be transferred.

    +files to transfer. In this example, the first behavioural session for +all subjects will be transferred.

    Subject and sessions can be added to the input boxes automatically by hovering over sub- or ses- folders on the DirectoryTree. Pressing CTRL+F will ‘fill’ the input with the foldername, while CTRL+A will ‘append’ the foldername, creating a list of subjects or sessions to transfer.

    -

    Use the Upload / Download switch to control transfer direction, -and press Transfer to begin.

    ../../_images/how-to-transfer-datatypes-dark.png ../../_images/how-to-transfer-datatypes-light.png
    +

    Finally, click Transfer to begin.

    -

    The upload() and download() methods can be used for custom +

    The upload_custom() and download_custom() methods can be used for custom data transfers. For example, to perform a custom upload:

    -
    project.upload(
    +
    project.upload_custom(
         top_level_folder="rawdata",
         sub_names="all_sub",
         ses_names="ses-001_@*@",
    @@ -554,55 +536,62 @@
     )
     
    -

    In this example, data from all subject -folders, all first session behavioral data will be uploaded.

    +

    In this example, the first behavioural session for +all subjects will be transferred.

    Custom transfer keywords#

    Custom transfer keywords determine how files and folders outside of subject, session and datatype folders are handled.

    -

    Ideally, all data will be stored in datatype folders. However, this +

    Ideally, all data will be stored in datatype folders—however this is not always feasible.

    -

    In this case, custom transfer keywords allows flexible handling of +

    In such cases custom transfer keywords allows flexible handling of the transfer of non sub-, ses- prefixed or datatype folders at the subject, session and datatype level.

    +

    Note that the dry run argument can be used +to perform a dry-run transfer to check transfers proceed as expected.

    -
    Subject level
      -
    • all - All subject (i.e. prefixed with sub-) folders and non-subject files within the -top-level folder will be transferred.

    • -
    • all_sub - Subject folders only and them will be transferred.

    • -
    • all_non_sub - All files and folders that are not prefixed with sub-, -within the top-level folder, will be transferred. -Any folders prefixed with sub- at this level will not be transferred.

    • +
      Subject level

      For files and folders within top-level folders:

      +
      +
        +
      • all - All files and non-subject folders will be transferred. +All subject (i.e. prefixed with sub-) folders will be considered for transfer.

      • +
      • all_sub - All subject folders will be considered for transfer.

      • +
      • all_non_sub - All files and non-subject folders will be transferred. +Subject folders will not be transferred.

      -
      Session Level
        -
      • all : All session and non-session files and folders within a subject level folder -(e.g. sub-001) will be transferred.

      • -
      • all_ses : Session* folders only (i.e. prefixed with ses-) and everything within -them will be transferred.

      • -
      • all_non_ses : All files and folders that are not prefixed with ses-, within a subject folder, -will be transferred. Any folders prefixed with ses- will not be transferred.

      • +
        Session Level

        For sessions within subjects considered for transfer:

        +
        +
          +
        • all : All files and non-session folders will be transferred. +All session (i.e. prefixed with ses-) folders will be considered for transfer.

        • +
        • all_ses : All session folders will be considered for transfer.

        • +
        • all_non_ses : All files and non-session folders will be transferred. +Session folders will not be transferred.

        -
        Datatype Level:
          -
        • all : All datatype folders at the subject or session folder level will be transferred, -as well as all files and folders within selected session folders.

        • -
        • all_datatype : All datatype folders (e.g. behav, ephys, funcimg, anat) within a session folder will be -transferred. Non-datatype folders at the session level will not be transferred

        • -
        • all_non_datatype : Non-datatype folders within session folders only will be transferred

        • +
          Datatype Level:

          For datatype folders (e.g. behav, ephys, funcimg, anat) +within sessions considered for transfer:

          +
          +
            +
          • all : All files, datatype folders and non-datatype folders will be transferred.

          • +
          • all_datatype : All datatype folders will be transferred. +Files and non-datatype folders will not be transferred.

          • +
          • all_non_datatype : Files and non-datatype folders will be transferred. +Datatype folders will not be transferred.

    -
    -

    Convenience Tags#

    +
    +

    Custom transfer convenience tags#

    These tags can be included in subject or session names to allow further customisation of data transfer.

    Wildcard

    The @*@ tag can be used to match any portion of a subject or session name. -e.g. ses-001_date-@*@ will transfer all first sessions, matching all possibles date.

    +e.g. ses-001_date-@*@ will transfer all first sessions matching all possibles date.

    Transfer a range

    The @TO@ tag can be used to target a range of subjects for transfer. e.g. sub-001@TO@025 will transfer the 1st to up to and including the 25th subject.

    @@ -610,10 +599,39 @@

    Convenience Tags -

    Configuring data transfer#

    -

    !! overview

    -

    !! link to configs

    +
    +

    Transfer Options#

    +
    +
    overwrite existing files

    By default this option is set to never—a transfer will never overwrite a +file that already exists, even if the source and destination modification datetimes +or sizes are different.

    +
    +

    If always*, when there are differences in datetime or size +between the source and destination file the destination file will be overwritten. +This includes when the source file is older or smaller than the destination.

    +
    +

    Finally, if source newer ensures data is only overwritten +when the +source file has a more recent modification time +than the destination. +If modification datetimes are equal, the destination will be overwritten if the +sizes or checksums are different.

    +
    +

    Under the hood, transfers are made with calls to +Rclone. Using never +calls +Rclone’s copy +function with the flag --ignore_existing. Using +always copies without this flag and (using Rclone’s default overwrite behaviour.) +Using if source newer calls copy with the --update flag.

    +
    +
    +
    +
    dry run

    Performs a dry-run transfer in which no data is transferred but logs +are saved as if a transfer had taken place. +This is a useful way to test if a transfer will run as expected.

    +
    +

    @@ -640,13 +658,13 @@

    Convenience Tags

    diff --git a/pages/how_tos/update-configs.html b/pages/how_tos/update-configs.html index 68aa68e2..7918ffc1 100644 --- a/pages/how_tos/update-configs.html +++ b/pages/how_tos/update-configs.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -148,7 +150,7 @@ - - + + + @@ -393,7 +396,7 @@

    How to Update Configs#

    -

    Once a project has been created, the configs can be updated at any time.

    +

    The project configs can be updated at any time following the initial setup.

    diff --git a/pages/tutorials/getting_started.html b/pages/tutorials/getting_started.html index e250ee7d..f35629f1 100644 --- a/pages/tutorials/getting_started.html +++ b/pages/tutorials/getting_started.html @@ -37,7 +37,7 @@ - + @@ -47,6 +47,8 @@ + + @@ -127,7 +129,7 @@ -

    Datashuttle v0.3.0

    +

    datashuttle v0.4.0

    @@ -141,7 +143,7 @@

    check_name_formatting(names, prefix)

    Pass list of names to check how these will be auto-formatted, for example as when passed to create_folders() or upload() or download()

    Pass list of names to check how these will be auto-formatted, for example as when passed to create_folders() or upload_custom() or download()

    create_folders(top_level_folder, sub_names)

    Create a subject / session folder tree in the project folder.

    download(top_level_folder, sub_names, ses_names)

    download_custom(top_level_folder, sub_names, ...)

    Download data from the central project folder to the local project folder.

    download_all(top_level_folder[, dry_run, ...])

    Convenience function to download all data.

    download_derivatives([...])

    Download files in the derivatives top level folder.

    download_entire_project()

    download_entire_project([...])

    Download the entire project (from 'central' to 'local'), i.e. including every top level folder (e.g.

    download_specific_folder_or_file(filepath[, ...])

    download_rawdata([overwrite_existing_files, ...])

    Download files in the rawdata top level folder.

    download_specific_folder_or_file(filepath[, ...])

    Download a specific file or folder.

    get_central_path()

    get_central_path()

    Get the project central path.

    get_config_path()

    get_config_path()

    Get the full path to the DataShuttle config file.

    get_datashuttle_path()

    get_datashuttle_path()

    Get the path to the local datashuttle folder where configs and other datashuttle files are stored.

    get_existing_projects()

    get_existing_projects()

    Get a list of existing project names found on the local machine.

    get_local_path()

    get_local_path()

    Get the projects local path.

    get_logging_path()

    get_logging_path()

    Get the path where datashuttle logs are written.

    get_name_templates()

    get_name_templates()

    Get the regexp templates used for validation.

    get_next_ses_number(top_level_folder, sub[, ...])

    Convenience function for get_next_sub_or_ses_number to find the next session number.

    get_next_ses(top_level_folder, sub[, ...])

    Convenience function for get_next_sub_or_ses to find the next session number.

    get_next_sub_number(top_level_folder[, ...])

    Convenience function for get_next_sub_or_ses_number to find the next subject number.

    get_next_sub(top_level_folder[, ...])

    Convenience function for get_next_sub_or_ses to find the next subject number.

    make_config_file(local_path, central_path, ...)

    make_config_file(local_path, central_path, ...)

    Initialise the configurations for datashuttle to use on the local machine.

    set_name_templates(new_name_templates)

    set_name_templates(new_name_templates)

    Update the persistent settings with new name templates.

    setup_ssh_connection_to_central_server()

    setup_ssh_connection()

    Setup a connection to the central server using SSH.

    show_configs()

    show_configs()

    Print the current configs to the terminal.

    upload(top_level_folder, sub_names, ses_names)

    upload_custom(top_level_folder, sub_names, ...)

    Upload data from a local project to the central project folder.

    upload_all(top_level_folder[, dry_run, init_log])

    Convenience function to upload all data.

    upload_derivatives([...])

    Upload files in the derivatives top level folder.

    upload_entire_project()

    upload_entire_project([...])

    Upload the entire project (from 'local' to 'central'), i.e. including every top level folder (e.g.

    upload_rawdata([overwrite_existing_files, ...])

    Upload files in the rawdata top level folder.

    upload_specific_folder_or_file(filepath[, ...])

    Upload a specific file or folder.