Accept external atlases by searching BIDS-Atlas datasets (PennLINC#1265)

tsalo · Oct 4, 2024 · 9dea9cb · 9dea9cb
1 parent 68caadf
commit 9dea9cb
Show file tree

Hide file tree

Showing 68 changed files with 973 additions and 741 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -72,6 +72,23 @@ jobs:
           paths:
             - /src/xcp_d/.circleci/data/ds001419-aroma
 
+  download_data_schaefer100:
+    <<: *dockersetup
+    steps:
+      - checkout
+      - restore_cache:
+          key: schaefer100-02
+      - run: *runinstall
+      - run:
+          name: Download BIDS-Atlas dataset
+          command: |
+            cd /src/xcp_d/.circleci
+            python get_data.py $PWD/data schaefer100
+      - save_cache:
+          key: schaefer100-02
+          paths:
+            - /src/xcp_d/.circleci/data/schaefer100
+
   download_data_pnc:
     <<: *dockersetup
     steps:
@@ -161,6 +178,8 @@ jobs:
             fi
       - restore_cache:
           key: fmriprepwithoutfreesurfer-03
+      - restore_cache:
+          key: schaefer100-02
       - run: *runinstall
       - run:
           name: Run full xcp_d on nifti without freesurfer
@@ -505,6 +524,8 @@ jobs:
           key: fmriprepwithoutfreesurfer-03
       - restore_cache:
           key: nibabies-04
+      - restore_cache:
+          key: schaefer100-02
       - run: *runinstall
       - run:
           name: Run pytest on the tests directory
@@ -648,6 +669,13 @@ workflows:
             tags:
               only: /.*/
 
+      - download_data_schaefer100:
+          requires:
+            - build
+          filters:
+            tags:
+              only: /.*/
+
       - pnc_cifti:
           requires:
             - download_data_pnc
@@ -718,6 +746,7 @@ workflows:
       - nifti_without_freesurfer:
           requires:
             - download_data_fmriprepwithoutfreesurfer
+            - download_data_schaefer100
           filters:
             branches:
               ignore:
@@ -743,6 +772,7 @@ workflows:
             - download_data_fmriprepwithoutfreesurfer
             - download_data_ds001419
             - download_data_nibabies
+            - download_data_schaefer100
           filters:
             branches:
               ignore:

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -6,7 +6,5 @@
     "[python]": {
         "editor.rulers": [99]
     },
-    "python.linting.flake8Enabled": true,
-    "python.linting.enabled": true,
     "python.analysis.typeCheckingMode": "off",
 }
diff --git a/README.rst b/README.rst
@@ -88,7 +88,7 @@ and other postprocessing/analysis tools are better suited for many types of data
 XCP-D derivatives are not particularly useful for task-dependent functional connectivity analyses,
 such as psychophysiological interactions (PPIs) or beta series analyses.
 It is also not suitable for general task-based analyses, such as standard task GLMs,
-as we recommend included nuisance regressors in the GLM step,
+as we recommend including nuisance regressors in the GLM step,
 rather than denoising data prior to the GLM.
 
 

diff --git a/docs/usage.rst b/docs/usage.rst
@@ -469,42 +469,42 @@ plot_design_matrix.html#create-design-matrices>`_.
 Then, create a confounds config file to include derivatives from ``custom_confounds``.
 Something like this should work:
 
-```yaml
-name: my_custom_confounds
-description: |
-   Nuisance regressors were task regressors convolved with an HRF and motion parameters.
-confounds:
-   motion:
-      dataset: preprocessed
-      query:
-         space: null
-         cohort: null
-         res: null
-         den: null
-         desc: confounds
-         extension: .tsv
-         suffix: timeseries
-      columns:
-      - trans_x
-      - trans_y
-      - trans_z
-      - rot_x
-      - rot_y
-      - rot_z
-   task:
-      dataset: custom
-      query:
-         space: null
-         cohort: null
-         res: null
-         den: null
-         desc: confounds
-         extension: .tsv
-         suffix: timeseries
-      columns:
-      - condition1
-      - condition2
-```
+.. code-block:: yaml
+
+   name: my_custom_confounds
+   description: |
+      Nuisance regressors were task regressors convolved with an HRF and motion parameters.
+   confounds:
+      motion:
+         dataset: preprocessed
+         query:
+            space: null
+            cohort: null
+            res: null
+            den: null
+            desc: confounds
+            extension: .tsv
+            suffix: timeseries
+         columns:
+         - trans_x
+         - trans_y
+         - trans_z
+         - rot_x
+         - rot_y
+         - rot_z
+      task:
+         dataset: custom
+         query:
+            space: null
+            cohort: null
+            res: null
+            den: null
+            desc: confounds
+            extension: .tsv
+            suffix: timeseries
+         columns:
+         - condition1
+         - condition2
 
 
 Command Line XCP-D with Custom Confounds
@@ -523,38 +523,89 @@ Last, run XCP-D with your custom configuration file and the path to the custom d
       --nuisance-regressors /mnt/custom_config.yaml
 
 
-********************
-Custom Parcellations
-********************
+****************
+External Atlases
+****************
 
-While XCP-D comes with many built in parcellations, we understand that many users will want to use
-custom parcellations.
-If you use the ``-cifti`` option, you can use the Human Connectome Project's ``wb_command`` to
-generate the time series:
+While XCP-D comes with many built-in parcellations,
+we understand that many users will want to use different ones.
 
-.. code-block:: bash
+As long as the parcellation is organized in a BIDS-Atlas dataset and is in
+fsLR-32k space (for CIFTI processing) or
+MNIInfant, MNI152NLin6Asym, or MNI152NLin2009cAsym space (for NIfTI processing),
+you can use it with XCP-D.
+
+.. warning::
+   BIDS Extension Proposal 38 (Atlas Specification) has not been integrated in BIDS yet,
+   so the organization and naming for atlas datasets may change in the future.
+
+   We have attempted to follow the proposed structure in XCP-D,
+   but we cannot guarantee that this will not change.
+
+.. tip::
+   The main elements from the BIDS-Atlas dataset that XCP-D uses are:
+
+   1. There must be a dataset_description.json file with DatasetType set to "atlas".
+   2. The atlas metadata files must have the same entities as the atlas image files,
+      as PyBIDS does not support the inheritance principle when querying BIDS-Atlas datasets (yet).
+   3. There must be a TSV file for the atlas, with "index" and "label" columns.
 
-   wb_command \
-      -cifti-parcellate \
-      {SUB}_ses-{SESSION}_task-{TASK}_run-{RUN}_space-fsLR_den-91k_desc-residual_bold.dtseries.nii \
-      your_parcels.dlabel \
-      {SUB}_ses-{SESSION}_task-{TASK}_run-{RUN}_space-fsLR_den-91k_desc-residual_timeseries.ptseries.nii
+To do this, use the ``--datasets`` and ``--atlases`` parameters.
+The ``--datasets`` parameter should point to the directory containing the BIDS-Atlas dataset,
+and the ``--atlases`` parameter should include the names of the atlases in the dataset to use.
 
-After this, if one wishes to have a connectivity matrix:
+For example, consider a scenario where you have two BIDS-Atlas datasets, one containing all of the
+Schaefer 2018 resolutions and one containing the AAL atlas.
+These datasets are in ``/data/atlases/schaefer`` and ``/data/atlases/aal``, respectively.
+The file structure for these two datasets might look like this:
+
+.. code-block::
+
+   /data/atlases/
+      schaefer/
+         dataset_description.json
+         atlas-Schaefer100/
+            atlas-Schaefer100_dseg.tsv
+            atlas-Schaefer100_space-fsLR_den-32k_dseg.dlabel.nii
+            atlas-Schaefer100_space-fsLR_den-32k_dseg.json
+         atlas-Schaefer200/
+            atlas-Schaefer200_dseg.tsv
+            atlas-Schaefer200_space-fsLR_den-32k_dseg.dlabel.nii
+            atlas-Schaefer200_space-fsLR_den-32k_dseg.json
+         ...
+         atlas-Schaefer1000/
+            atlas-Schaefer1000_dseg.tsv
+            atlas-Schaefer1000_space-fsLR_den-32k_dseg.dlabel.nii
+            atlas-Schaefer1000_space-fsLR_den-32k_dseg.json
+      aal/
+         dataset_description.json
+         atlas-AAL/
+            atlas-AAL_dseg.tsv
+            atlas-AAL_space-fsLR_den-32k_dseg.dlabel.nii
+            atlas-AAL_space-fsLR_den-32k_dseg.json
+
+You may want to only apply the Schaefer100 atlas from the ``schaefer`` dataset and the AAL atlas
+from the ``aal`` dataset, along with one of XCP-D's built-in atlases (``4S156Parcels``).
+Here's what the XCP-D call might look like:
 
 .. code-block:: bash
 
-   wb_command \
-      -cifti-correlation \
-      {SUB}_ses-{SESSION}_task-{TASK}_run-{RUN}_space-fsLR_den-91k_desc-residual_timeseries.ptseries.nii \
-      {SUB}_ses-{SESSION}_task-{TASK}_run-{RUN}_space-fsLR_den-91k_desc-residual_boldmap.pconn.nii
+   apptainer run --cleanenv -B /data:/data xcpd_latest.sif \
+      /data/dataset/derivatives/fmriprep \
+      /data/dataset/derivatives/xcp_d \
+      participant \
+      --mode linc \
+      --datasets schaefer=/data/atlases/schaefer aal==/data/atlases/aal \
+      --atlases Schaefer100 AAL 4S156Parcels
+
+XCP-D will search for ``atlas-Schaefer100``, ``atlas-AAL``, and ``atlas-4S156Parcels`` across the
+``schaefer``, ``aal``, and XCP-D's built-in atlas datasets.
+If the atlases are found, then they will be used for parcellation.
 
-More information can be found at the HCP
-`documentation <https://www.humanconnectome.org/software/workbench-command>`_.
+.. important::
 
-If you use the default NIFTI processing pipeline, you can use Nilearn's
-`NiftiLabelsMasker <https://nilearn.github.io/stable/auto_examples/06_manipulating_images/\
-plot_nifti_labels_simple.html#extracting-signals-from-brain-regions-using-the-niftilabelsmasker>`_
+   Atlas names must be unique across BIDS-Atlas datasets.
+   If two atlases have the same name, XCP-D will raise an error.
 
 
 *********************

diff --git a/xcp_d/cli/combineqc.py b/xcp_d/cli/combineqc.py
@@ -47,4 +47,4 @@ def main(args=None):
 
 
 if __name__ == "__main__":
-    raise RuntimeError("this should be run after xcp_d;\nrun XCP-D first")
+    raise RuntimeError("this should be run after XCP-D;\nrun XCP-D first")
diff --git a/xcp_d/cli/parser.py b/xcp_d/cli/parser.py
@@ -134,7 +134,7 @@ def _build_parser():
         type=str,
         nargs="+",
         help=(
-            "Search PATH(s) for pre-computed derivatives. "
+            "Search PATH(s) for derivatives or atlas datasets. "
             "These may be provided as named folders "
             "(e.g., `--datasets smriprep=/path/to/smriprep`)."
         ),
@@ -503,10 +503,12 @@ def _build_parser():
         action="store",
         nargs="+",
         metavar="ATLAS",
-        choices=all_atlases,
         default=all_atlases,
         dest="atlases",
-        help="Selection of atlases to apply to the data. All are used by default.",
+        help=(
+            "Selection of atlases to apply to the data. "
+            "All of XCP-D's built-in atlases are used by default."
+        ),
     )
     g_atlases.add_argument(
         "--skip-parcellation",
@@ -934,6 +936,16 @@ def _validate_parameters(opts, build_log, parser):
     assert opts.output_type in ("censored", "interpolated", "auto")
     assert opts.process_surfaces in (True, False, "auto")
 
+    # Add internal atlas datasets to the list of datasets
+    opts.datasets = opts.datasets or {}
+    if opts.atlases:
+        if "xcpdatlases" not in opts.datasets:
+            opts.datasets["xcpdatlases"] = load_data("atlases")
+
+        if any(atlas.startswith("4S") for atlas in opts.atlases):
+            if "xcpd4s" not in opts.datasets:
+                opts.datasets["xcpd4s"] = Path("/AtlasPack")
+
     # Check parameters based on the mode
     if opts.mode == "abcd":
         opts.abcc_qc = True if (opts.abcc_qc == "auto") else opts.abcc_qc

diff --git a/xcp_d/cli/parser_utils.py b/xcp_d/cli/parser_utils.py
@@ -140,7 +140,9 @@ class YesNoAction(Action):
     def __call__(self, parser, namespace, values, option_string=None):  # noqa: U100
         """Call the argument."""
         lookup = {"y": True, "n": False, None: True, "auto": "auto"}
-        assert values in lookup.keys(), f"Invalid value '{values}' for {self.dest}"
+        if values not in lookup:
+            raise parser.error(f"Invalid value '{values}' for {self.dest}")
+
         setattr(namespace, self.dest, lookup[values])
 
 
@@ -159,9 +161,9 @@ def __call__(self, parser, namespace, values, option_string=None):  # noqa: U100
                 name = loc.name
 
             if name in d:
-                raise ValueError(f"Received duplicate derivative name: {name}")
+                raise parser.error(f"Received duplicate derivative name: {name}")
             elif name == "preprocessed":
-                raise ValueError("The 'preprocessed' derivative is reserved for internal use.")
+                raise parser.error("The 'preprocessed' derivative is reserved for internal use.")
 
             d[name] = loc
         setattr(namespace, self.dest, d)

diff --git a/xcp_d/cli/workflow.py b/xcp_d/cli/workflow.py
@@ -93,7 +93,7 @@ def build_workflow(config_file, retval):
     ]
 
     if config.execution.datasets:
-        init_msg += [f"Searching for derivatives: {config.execution.datasets}."]
+        init_msg += [f"Searching for derivatives and atlases: {config.execution.datasets}."]
 
     build_log.log(25, f"\n{' ' * 11}* ".join(init_msg))
 

diff --git a/xcp_d/config.py b/xcp_d/config.py
@@ -380,7 +380,7 @@ class execution(_Config):
     fmri_dir = None
     """An existing path to the preprocessing derivatives dataset, which must be BIDS-compliant."""
     datasets = {}
-    """Path(s) to search for pre-computed derivatives"""
+    """Path(s) to search for other datasets (either derivatives or atlases)."""
     aggr_ses_reports = None
     """Maximum number of sessions aggregated in one subject's visual report."""
     bids_database_dir = None
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,4 +47,4 @@ def main(args=None):


		if __name__ == "__main__":
		raise RuntimeError("this should be run after xcp_d;\nrun XCP-D first")
		raise RuntimeError("this should be run after XCP-D;\nrun XCP-D first")