diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7c9b357..4b17a5e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,10 +33,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Update build tools - run: python -m pip install --upgrade pip flit_scm + run: python -m pip install --upgrade pip - name: Install Arcana - run: python -m pip install .[test] + run: python -m pip install .[test] fileformats-testing - name: Pytest run: pytest -vvs --cov arcana.bids --cov-config .coveragerc --cov-report xml diff --git a/README.rst b/README.rst index 69d53fa..b7a527e 100644 --- a/README.rst +++ b/README.rst @@ -9,10 +9,10 @@ Arcana Extension - bids :alt: Python versions .. image:: https://img.shields.io/pypi/v/arcana-bids.svg :target: https://pypi.python.org/pypi/arcana-bids/ - :alt: Latest Version -.. image:: https://github.com/ArcanaFramework/arcana/actions/workflows/docs.yml/badge.svg - :target: http://arcana.readthedocs.io/en/latest/?badge=latest - :alt: Docs + :alt: Latest Version +.. image:: https://readthedocs.org/projects/arcana/badge/?version=latest + :target: https://arcanaframework.github.io/arcana + :alt: Documentation Status An extension of the Arcana framework to work with Brain Imaging Data Structure (BIDS) diff --git a/arcana/bids/cli.py b/arcana/bids/cli.py index 132ea28..59dfa4f 100644 --- a/arcana/bids/cli.py +++ b/arcana/bids/cli.py @@ -15,8 +15,7 @@ def bids_group(): @bids_group.command( name="app-entrypoint", help="""Loads a dataset, or creates one it is not already present, then applies and -launches a pipeline in a single command. To be used within the command configuration -of an XNAT Container Service ready Docker image. +launches a pipeline in a single command. To be used inside BidsApp images. DATASET_LOCATOR string containing the nickname of the data store, the ID of the dataset (e.g. XNAT project ID or file-system directory) and the dataset's name diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 39563b3..faea8c2 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -4,7 +4,6 @@ import re import logging from operator import itemgetter -from copy import copy import attrs import jq from pathlib import Path @@ -15,7 +14,7 @@ from arcana.core.exceptions import ArcanaUsageError from arcana.core.data.tree import DataTree from arcana.core.data.set import Dataset -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.core.data.entry import DataEntry from arcana.core.data.row import DataRow @@ -69,11 +68,19 @@ class Bids(LocalStore): name: str = "bids" BIDS_VERSION = "1.0.1" + DEFAULT_SPACE = Clinical PROV_SUFFIX = ".provenance" FIELDS_FNAME = "__fields__" FIELDS_PROV_FNAME = "__fields_provenance__" + VALID_HIERARCHIES = ( + ["subject", "timepoint"], + ["session"], + ["group", "subject", "timepoint"], + ["group", "session"], + ) + ################################# # Abstract-method implementations ################################# @@ -89,11 +96,10 @@ def populate_tree(self, tree: DataTree): The dataset to construct the tree dimensions for """ root_dir = Path(tree.dataset.id) - participants_fspath = root_dir / "participants.tsv" - participants = {} - if participants_fspath.exists(): - with open(participants_fspath) as f: + if "group" in tree.dataset.hierarchy: + with open(root_dir / "participants.tsv") as f: lines = f.read().splitlines() + participants = {} if lines: participant_keys = lines[0].split("\t") for line in lines[1:]: @@ -103,18 +109,17 @@ def populate_tree(self, tree: DataTree): if not subject_dir.name.startswith("sub-"): continue subject_id = subject_dir.name[len("sub-") :] - try: - additional_ids = {"group": participants[subject_id]["group"]} - except KeyError: - additional_ids = {} + if "group" in tree.dataset.hierarchy: + tree_path = [participants[subject_id]["group"]] + else: + tree_path = [] + tree_path.append(subject_id) if any(d.name.startswith("ses-") for d in subject_dir.iterdir()): for sess_dir in subject_dir.iterdir(): timepoint_id = sess_dir.name[len("ses-") :] - sess_add_ids = copy(additional_ids) - sess_add_ids["session"] = f"sub-{subject_id}_ses-{timepoint_id}" - tree.add_leaf([subject_id, timepoint_id], additional_ids=sess_add_ids) + tree.add_leaf(tree_path + [timepoint_id]) else: - tree.add_leaf([subject_id], additional_ids=additional_ids) + tree.add_leaf([subject_id]) def populate_row(self, row: DataRow): root_dir = row.dataset.root_dir @@ -167,7 +172,7 @@ def fileset_uri(self, path: str, datatype: type, row: DataRow) -> str: if dataset_name is None: base_uri = "" elif not dataset_name: - base_uri = f"derivatives/{Dataset.EMPTY_NAME}" + base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}" else: base_uri = f"derivatives/{dataset_name}" return base_uri + str( @@ -188,7 +193,7 @@ def field_uri(self, path: str, datatype: type, row: DataRow) -> str: if dataset_name is None: base_uri = "" elif not dataset_name: - base_uri = f"derivatives/{Dataset.EMPTY_NAME}" + base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}" else: base_uri = f"derivatives/{dataset_name}" try: @@ -268,51 +273,49 @@ def create_data_tree( id: str, leaves: list[tuple[str, ...]], hierarchy: list[str], - id_composition: dict[str, str] = None, **kwargs ): + if hierarchy not in self.VALID_HIERARCHIES: + raise ArcanaUsageError( + f"Invalid hiearchy {hierarchy} provided to create a new data tree " + f"needs to be one of the following:\n" + + "\n".join(str(h) for h in self.VALID_HIERARCHIES) + ) root_dir = Path(id) root_dir.mkdir(parents=True) # Create sub-directories corresponding to rows of the dataset group_ids = set() - subject_group_ids = {} + subjects_group_id = {} for ids_tuple in leaves: ids = dict(zip(hierarchy, ids_tuple)) # Add in composed IDs - ids.update(Dataset.decompose_ids(ids, id_composition)) - if "session" in hierarchy: - subject_id = ids["session"] - timepoint_id = None - assert "subject" not in ids - assert "timepoint" not in ids - else: + try: subject_id = ids["subject"] - timepoint_id = ids["timepoint"] - assert "session" not in ids + except KeyError: + subject_id = ids["session"] + timepoint_id = ids.get("timepoint") group_id = ids.get("group") if group_id: group_ids.add(group_id) - subject_group_ids[subject_id] = group_id + subjects_group_id[subject_id] = group_id sess_dir_fspath = root_dir / self._entry2fs_path( entry_path=None, subject_id=subject_id, timepoint_id=timepoint_id ) - sess_dir_fspath.mkdir(parents=True) + sess_dir_fspath.mkdir(parents=True, exist_ok=True) # Add participants.tsv to define the groups if present if group_ids: with open(root_dir / "participants.tsv", "w") as f: f.write("participant_id\tgroup\n") - for subject_id, group_id in subject_group_ids.items(): + for subject_id, group_id in subjects_group_id.items(): f.write(f"sub-{subject_id}\t{group_id}\n") #################### # Overrides of API # #################### - def save_dataset( - self, dataset: Dataset, name: str = None, overwrite_bids_metadata: bool = False - ): + def save_dataset(self, dataset: Dataset, name: str = None): super().save_dataset(dataset, name=name) - self._save_metadata(dataset, overwrite_bids_metadata=overwrite_bids_metadata) + self._save_metadata(dataset) def create_dataset( self, @@ -347,81 +350,57 @@ def create_dataset( dataset = super().create_dataset( id=id, leaves=leaves, hierarchy=hierarchy, space=space, name=name, **kwargs ) - self._save_metadata(dataset, overwrite_bids_metadata=True) + self._save_metadata(dataset) return dataset ################ # Helper methods ################ - def _save_metadata(self, dataset: Dataset, overwrite_bids_metadata: bool = False): + def _save_metadata(self, dataset: Dataset): root_dir = Path(dataset.id) dataset_description_fspath = root_dir / "dataset_description.json" - if dataset_description_fspath.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS dataset description at " - "'%s, use 'overwrite_bids_metadata' to " - "force.", - str(dataset_description_fspath), - ) - else: - dataset_description = map_to_bids_names( - attrs.asdict(dataset.metadata, recurse=True) - ) - dataset_description["BIDSVersion"] = self.BIDS_VERSION - with open(dataset_description_fspath, "w") as f: - json.dump(dataset_description, f, indent=" ") + dataset_description = map_to_bids_names( + attrs.asdict(dataset.metadata, recurse=True) + ) + dataset_description["BIDSVersion"] = self.BIDS_VERSION + with open(dataset_description_fspath, "w") as f: + json.dump(dataset_description, f, indent=" ") if dataset.metadata.description is not None: readme_path = root_dir / "README" - if readme_path.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS dataset description at " - "%s, use 'overwrite_bids_metadata' to " - "force.", - str(readme_path), - ) - else: - with open(readme_path, "w") as f: - f.write(dataset.metadata.description) - participants_tsv_fspath = dataset.root_dir / "participants.tsv" + with open(readme_path, "w") as f: + f.write(dataset.metadata.description) columns = list(dataset.metadata.row_metadata) group_ids = [i for i in dataset.row_ids("group") if i is not None] if group_ids or columns: - if participants_tsv_fspath.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS participants TSV at " - "%s, use 'overwrite_bids_metadata' to " - "force.", - str(participants_tsv_fspath), - ) - else: - with open(dataset.root_dir / "participants.tsv", "w") as f: - f.write("participant_id") + subject_rows = dataset.rows("subject") + with open(dataset.root_dir / "participants.tsv", "w") as f: + f.write("participant_id") + if group_ids: + f.write("\tgroup") + if columns: + f.write("\t" + "\t".join(columns)) + f.write("\n") + for row in subject_rows: + f.write( + f"sub-{row.id}" + ) if group_ids: - f.write("\tgroup") + f.write("\t" + row.frequency_id('group')) if columns: - f.write("\t" + "\t".join(columns)) + f.write("\t" + "\t".join(row.metadata[k] for k in columns)) f.write("\n") - for row in dataset.rows("subject"): - f.write( - f"sub-{row.id}" - ) - if group_ids: - f.write("\t" + row.frequency_id('group')) - if columns: - f.write("\t" + "\t".join(row.metadata[k] for k in columns)) - f.write("\n") - participants_desc = {} - if group_ids: - participants_desc["group"] = { - "Description": "the group the participant belonged to", - "Levels": {g: f"{g} group" for g in dataset.row_ids("group")}, - } - for name, desc in dataset.metadata.row_metadata.items(): - participants_desc[name] = {"Description": desc} - with open(dataset.root_dir / "participants.json", "w") as f: - json.dump(participants_desc, f) + participants_desc = {} + if group_ids: + participants_desc["group"] = { + "Description": "the group the participant belonged to", + "Levels": {g: f"{g} group" for g in dataset.row_ids("group")}, + } + for name, desc in dataset.metadata.row_metadata.items(): + participants_desc[name] = {"Description": desc} + with open(dataset.root_dir / "participants.json", "w") as f: + json.dump(participants_desc, f) def _fileset_fspath(self, entry: DataEntry) -> Path: return Path(entry.row.dataset.id) / entry.uri diff --git a/arcana/bids/tasks.py b/arcana/bids/tasks.py index 6092e2c..4335c48 100644 --- a/arcana/bids/tasks.py +++ b/arcana/bids/tasks.py @@ -19,7 +19,7 @@ from arcana.core import __version__ from arcana.core.data.set import Dataset from fileformats.core import FileSet -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.bids.data import JsonEdit from arcana.core.exceptions import ArcanaUsageError from arcana.core.utils.serialize import ( @@ -82,8 +82,7 @@ def bids_app( outputs : list[ty.Union[AppField, dict[str, str]]] The outputs to be extracted from the derivatives directory. Should be a list of tuples consisting of the the path the file/directory is saved by the app within a BIDS subject/session, - e.g. freesurfer/recon-all, and the DataFormat class it is stored in, e.g. - arcana.dirtree.data.Directory. + e.g. freesurfer/recon-all, and the DataFormat class it is stored in, executable : str, optional Name of the executable within the image to run (i.e. the entrypoint of the image). Required when extending the base image and launching Arcana within it. Defaults to diff --git a/arcana/bids/tests/test_cli.py b/arcana/bids/tests/test_cli.py index 93af4a1..0f449c0 100644 --- a/arcana/bids/tests/test_cli.py +++ b/arcana/bids/tests/test_cli.py @@ -5,7 +5,7 @@ from arcana.testing.data.blueprint import ( TestDatasetBlueprint, FileSetEntryBlueprint as FileBP ) -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from fileformats.medimage import NiftiGzX from arcana.bids.cli import app_entrypoint from arcana.core.utils.serialize import ClassResolver diff --git a/arcana/bids/tests/test_data.py b/arcana/bids/tests/test_data.py index 8f0b0c5..6113af5 100644 --- a/arcana/bids/tests/test_data.py +++ b/arcana/bids/tests/test_data.py @@ -12,7 +12,7 @@ import docker from fileformats.medimage import NiftiX from arcana.core import __version__ -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.bids.data import Bids @@ -27,18 +27,21 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): dataset_name = "adataset" shutil.rmtree(path, ignore_errors=True) - group_ids = ["test", "control"] - member_ids = [str(i) for i in range(1, 4)] - subject_ids = ["{}{}".format(*t) for t in itertools.product(group_ids, member_ids)] - timepoint_ids = [str(i) for i in range(1, 3)] dataset = Bids().create_dataset( id=path, name=dataset_name, space=Clinical, - hierarchy=["subject", "timepoint"], - leaves=itertools.product(subject_ids, timepoint_ids), - id_composition={ - "subject": r"(?P\w+)(?P\d+)" + hierarchy=["group", "subject", "timepoint"], + leaves=[ + (group, f"{group}{member}", timepoint) + for group, member, timepoint in itertools.product( + ["test", "control"], + [str(i) for i in range(1, 4)], + [str(i) for i in range(1, 3)], + ) + ], + id_patterns={ + "member": r"subject::\w+(\d+)", }, metadata={ "description": MOCK_README, @@ -50,7 +53,7 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): "description": "Dataset was created programmatically from scratch", "code_url": "http://arcana.readthedocs.io", } - ] + ], }, ) @@ -97,7 +100,9 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): assert bids_success_str in result reloaded = Bids().load_dataset(id=path, name=dataset_name) - reloaded.add_sink("t1w", datatype=NiftiX, path="anat/T1w") # add sink to reloaded so it matches + reloaded.add_sink( + "t1w", datatype=NiftiX, path="anat/T1w" + ) # add sink to reloaded so it matches reloaded.name = "" # remove saved name so it matches assert dataset == reloaded @@ -185,9 +190,7 @@ def test_bids_json_edit(json_edit_blueprint: JsonEditBlueprint, work_dir: Path): name = "bids-dataset" shutil.rmtree(path, ignore_errors=True) - dataset = Bids( - json_edits=[(bp.path_re, bp.jq_script)], - ).create_dataset( + dataset = Bids(json_edits=[(bp.path_re, bp.jq_script)],).create_dataset( id=path, name=name, leaves=[("1",)], @@ -201,9 +204,8 @@ def test_bids_json_edit(json_edit_blueprint: JsonEditBlueprint, work_dir: Path): "description": "Dataset was created programmatically from scratch", "code_url": "http://arcana.readthedocs.io", } - ] + ], }, - ) for sf_name, sf_bp in bp.source_niftis.items(): diff --git a/pyproject.toml b/pyproject.toml index c70e10d..601eb9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ doc = [ ] test = [ "codecov", + "fileformats-testing", "medimages4tests >=0.3", "pytest>=5.4.3", "pytest-cov>=2.12.1",