From c0fd67d5b5286d80944d37b02cd3d83aec2d539d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:16:58 +1100 Subject: [PATCH 01/13] fixed up doc string --- arcana/bids/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arcana/bids/cli.py b/arcana/bids/cli.py index 132ea28..59dfa4f 100644 --- a/arcana/bids/cli.py +++ b/arcana/bids/cli.py @@ -15,8 +15,7 @@ def bids_group(): @bids_group.command( name="app-entrypoint", help="""Loads a dataset, or creates one it is not already present, then applies and -launches a pipeline in a single command. To be used within the command configuration -of an XNAT Container Service ready Docker image. +launches a pipeline in a single command. To be used inside BidsApp images. DATASET_LOCATOR string containing the nickname of the data store, the ID of the dataset (e.g. XNAT project ID or file-system directory) and the dataset's name From b188121081a61d236918566dd8f1b9666728929c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:16:58 +1100 Subject: [PATCH 02/13] updated to use empty dataset name in store --- arcana/bids/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 39563b3..511c9e2 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -167,7 +167,7 @@ def fileset_uri(self, path: str, datatype: type, row: DataRow) -> str: if dataset_name is None: base_uri = "" elif not dataset_name: - base_uri = f"derivatives/{Dataset.EMPTY_NAME}" + base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}" else: base_uri = f"derivatives/{dataset_name}" return base_uri + str( @@ -188,7 +188,7 @@ def field_uri(self, path: str, datatype: type, row: DataRow) -> str: if dataset_name is None: base_uri = "" elif not dataset_name: - base_uri = f"derivatives/{Dataset.EMPTY_NAME}" + base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}" else: base_uri = f"derivatives/{dataset_name}" try: From 74753516f928078474beb1398cf933e1e5184b02 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:16:58 +1100 Subject: [PATCH 03/13] added default space --- arcana/bids/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 511c9e2..10c7f35 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -69,6 +69,7 @@ class Bids(LocalStore): name: str = "bids" BIDS_VERSION = "1.0.1" + DEFAULT_SPACE = Clinical PROV_SUFFIX = ".provenance" FIELDS_FNAME = "__fields__" From 9152d5024d1978f63b887b982c6a444b0668a3be Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:17:49 +1100 Subject: [PATCH 04/13] updated docs link in readme --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 69d53fa..b7a527e 100644 --- a/README.rst +++ b/README.rst @@ -9,10 +9,10 @@ Arcana Extension - bids :alt: Python versions .. image:: https://img.shields.io/pypi/v/arcana-bids.svg :target: https://pypi.python.org/pypi/arcana-bids/ - :alt: Latest Version -.. image:: https://github.com/ArcanaFramework/arcana/actions/workflows/docs.yml/badge.svg - :target: http://arcana.readthedocs.io/en/latest/?badge=latest - :alt: Docs + :alt: Latest Version +.. image:: https://readthedocs.org/projects/arcana/badge/?version=latest + :target: https://arcanaframework.github.io/arcana + :alt: Documentation Status An extension of the Arcana framework to work with Brain Imaging Data Structure (BIDS) From 7b0658c782d1accfd8f5a8f494c38156c8766748 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:17:49 +1100 Subject: [PATCH 05/13] added fileformats-testing to test deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index c70e10d..601eb9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ doc = [ ] test = [ "codecov", + "fileformats-testing", "medimages4tests >=0.3", "pytest>=5.4.3", "pytest-cov>=2.12.1", From fcbed87a565c864a1e227f7a343d1b5f1f148bea Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:17:49 +1100 Subject: [PATCH 06/13] updated id-patterns --- arcana/bids/tests/test_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arcana/bids/tests/test_data.py b/arcana/bids/tests/test_data.py index 8f0b0c5..f16bf85 100644 --- a/arcana/bids/tests/test_data.py +++ b/arcana/bids/tests/test_data.py @@ -37,8 +37,9 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): space=Clinical, hierarchy=["subject", "timepoint"], leaves=itertools.product(subject_ids, timepoint_ids), - id_composition={ - "subject": r"(?P\w+)(?P\d+)" + id_patterns={ + "group": r"subject::(\w+)\d+", + "member": r"subject::\w+(\d+)", }, metadata={ "description": MOCK_README, From 2d36c7c9e9671c3410986ee499e9899ea4bb02d7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:17:49 +1100 Subject: [PATCH 07/13] debugged unittests after refactor of id-patterns --- arcana/bids/data.py | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 10c7f35..2d6a568 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -90,11 +90,10 @@ def populate_tree(self, tree: DataTree): The dataset to construct the tree dimensions for """ root_dir = Path(tree.dataset.id) - participants_fspath = root_dir / "participants.tsv" - participants = {} - if participants_fspath.exists(): - with open(participants_fspath) as f: + if "group" in tree.dataset.hierarchy: + with open(root_dir / "participants.tsv") as f: lines = f.read().splitlines() + participants = {} if lines: participant_keys = lines[0].split("\t") for line in lines[1:]: @@ -104,18 +103,17 @@ def populate_tree(self, tree: DataTree): if not subject_dir.name.startswith("sub-"): continue subject_id = subject_dir.name[len("sub-") :] - try: - additional_ids = {"group": participants[subject_id]["group"]} - except KeyError: - additional_ids = {} + if "group" in tree.dataset.hierarchy: + tree_path = participants[subject_id]["group"] + else: + tree_path = [] + tree_path.append(subject_id) if any(d.name.startswith("ses-") for d in subject_dir.iterdir()): for sess_dir in subject_dir.iterdir(): timepoint_id = sess_dir.name[len("ses-") :] - sess_add_ids = copy(additional_ids) - sess_add_ids["session"] = f"sub-{subject_id}_ses-{timepoint_id}" - tree.add_leaf([subject_id, timepoint_id], additional_ids=sess_add_ids) + tree.add_leaf(tree_path + [timepoint_id]) else: - tree.add_leaf([subject_id], additional_ids=additional_ids) + tree.add_leaf([subject_id]) def populate_row(self, row: DataRow): root_dir = row.dataset.root_dir @@ -269,31 +267,25 @@ def create_data_tree( id: str, leaves: list[tuple[str, ...]], hierarchy: list[str], - id_composition: dict[str, str] = None, **kwargs ): root_dir = Path(id) root_dir.mkdir(parents=True) # Create sub-directories corresponding to rows of the dataset group_ids = set() - subject_group_ids = {} + subjects_group_id = {} for ids_tuple in leaves: ids = dict(zip(hierarchy, ids_tuple)) # Add in composed IDs - ids.update(Dataset.decompose_ids(ids, id_composition)) - if "session" in hierarchy: - subject_id = ids["session"] - timepoint_id = None - assert "subject" not in ids - assert "timepoint" not in ids - else: + try: subject_id = ids["subject"] - timepoint_id = ids["timepoint"] - assert "session" not in ids + except KeyError: + subject_id = ids["session"] + timepoint_id = ids.get("timepoint") group_id = ids.get("group") if group_id: group_ids.add(group_id) - subject_group_ids[subject_id] = group_id + subjects_group_id[subject_id] = group_id sess_dir_fspath = root_dir / self._entry2fs_path( entry_path=None, subject_id=subject_id, timepoint_id=timepoint_id ) @@ -302,7 +294,7 @@ def create_data_tree( if group_ids: with open(root_dir / "participants.tsv", "w") as f: f.write("participant_id\tgroup\n") - for subject_id, group_id in subject_group_ids.items(): + for subject_id, group_id in subjects_group_id.items(): f.write(f"sub-{subject_id}\t{group_id}\n") #################### From ac12c286cd824bf8438e4e4f735636f83495a418 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:17:49 +1100 Subject: [PATCH 08/13] removed unnecessary import --- arcana/bids/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 2d6a568..2400344 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -4,7 +4,6 @@ import re import logging from operator import itemgetter -from copy import copy import attrs import jq from pathlib import Path From 338c7662bd8a64b18af711df599e48e65a034252 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Mar 2023 15:25:24 +1100 Subject: [PATCH 09/13] explicitly add in fileformats-testing to workflow build --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7c9b357..4b17a5e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,10 +33,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Update build tools - run: python -m pip install --upgrade pip flit_scm + run: python -m pip install --upgrade pip - name: Install Arcana - run: python -m pip install .[test] + run: python -m pip install .[test] fileformats-testing - name: Pytest run: pytest -vvs --cov arcana.bids --cov-config .coveragerc --cov-report xml From 83e2a3e5ad33bc59882dcedf5eb7d47d08a2cb78 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 28 Mar 2023 11:45:16 +1100 Subject: [PATCH 10/13] added check on valid hierarchies --- arcana/bids/data.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 2400344..231cb18 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -74,6 +74,13 @@ class Bids(LocalStore): FIELDS_FNAME = "__fields__" FIELDS_PROV_FNAME = "__fields_provenance__" + VALID_HIERARCHIES = ( + ["subject", "timepoint"], + ["session"], + ["group", "subject", "timepoint"], + ["group", "session"], + ) + ################################# # Abstract-method implementations ################################# @@ -103,7 +110,7 @@ def populate_tree(self, tree: DataTree): continue subject_id = subject_dir.name[len("sub-") :] if "group" in tree.dataset.hierarchy: - tree_path = participants[subject_id]["group"] + tree_path = [participants[subject_id]["group"]] else: tree_path = [] tree_path.append(subject_id) @@ -268,6 +275,12 @@ def create_data_tree( hierarchy: list[str], **kwargs ): + if hierarchy not in self.VALID_HIERARCHIES: + raise ArcanaUsageError( + f"Invalid hiearchy {hierarchy} provided to create a new data tree " + f"needs to be one of the following:\n" + + "\n".join(str(h) for h in self.VALID_HIERARCHIES) + ) root_dir = Path(id) root_dir.mkdir(parents=True) # Create sub-directories corresponding to rows of the dataset From 35e87e518009f1963f6e73a21f4472a326590244 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Apr 2023 21:01:40 +1000 Subject: [PATCH 11/13] updated doc string --- arcana/bids/tasks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arcana/bids/tasks.py b/arcana/bids/tasks.py index 6092e2c..831c006 100644 --- a/arcana/bids/tasks.py +++ b/arcana/bids/tasks.py @@ -82,8 +82,7 @@ def bids_app( outputs : list[ty.Union[AppField, dict[str, str]]] The outputs to be extracted from the derivatives directory. Should be a list of tuples consisting of the the path the file/directory is saved by the app within a BIDS subject/session, - e.g. freesurfer/recon-all, and the DataFormat class it is stored in, e.g. - arcana.dirtree.data.Directory. + e.g. freesurfer/recon-all, and the DataFormat class it is stored in, executable : str, optional Name of the executable within the image to run (i.e. the entrypoint of the image). Required when extending the base image and launching Arcana within it. Defaults to From 28de1b99a0f88c63e289cf3d295187e195cb554c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Apr 2023 18:29:56 +1000 Subject: [PATCH 12/13] updated stdlib imports --- arcana/bids/data.py | 4 ++-- arcana/bids/tasks.py | 2 +- arcana/bids/tests/test_cli.py | 2 +- arcana/bids/tests/test_data.py | 31 ++++++++++++++++--------------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index 231cb18..e31ee8c 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -14,7 +14,7 @@ from arcana.core.exceptions import ArcanaUsageError from arcana.core.data.tree import DataTree from arcana.core.data.set import Dataset -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.core.data.entry import DataEntry from arcana.core.data.row import DataRow @@ -301,7 +301,7 @@ def create_data_tree( sess_dir_fspath = root_dir / self._entry2fs_path( entry_path=None, subject_id=subject_id, timepoint_id=timepoint_id ) - sess_dir_fspath.mkdir(parents=True) + sess_dir_fspath.mkdir(parents=True, exist_ok=True) # Add participants.tsv to define the groups if present if group_ids: with open(root_dir / "participants.tsv", "w") as f: diff --git a/arcana/bids/tasks.py b/arcana/bids/tasks.py index 831c006..4335c48 100644 --- a/arcana/bids/tasks.py +++ b/arcana/bids/tasks.py @@ -19,7 +19,7 @@ from arcana.core import __version__ from arcana.core.data.set import Dataset from fileformats.core import FileSet -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.bids.data import JsonEdit from arcana.core.exceptions import ArcanaUsageError from arcana.core.utils.serialize import ( diff --git a/arcana/bids/tests/test_cli.py b/arcana/bids/tests/test_cli.py index 93af4a1..0f449c0 100644 --- a/arcana/bids/tests/test_cli.py +++ b/arcana/bids/tests/test_cli.py @@ -5,7 +5,7 @@ from arcana.testing.data.blueprint import ( TestDatasetBlueprint, FileSetEntryBlueprint as FileBP ) -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from fileformats.medimage import NiftiGzX from arcana.bids.cli import app_entrypoint from arcana.core.utils.serialize import ClassResolver diff --git a/arcana/bids/tests/test_data.py b/arcana/bids/tests/test_data.py index f16bf85..6113af5 100644 --- a/arcana/bids/tests/test_data.py +++ b/arcana/bids/tests/test_data.py @@ -12,7 +12,7 @@ import docker from fileformats.medimage import NiftiX from arcana.core import __version__ -from arcana.core.data.space import Clinical +from arcana.stdlib import Clinical from arcana.bids.data import Bids @@ -27,18 +27,20 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): dataset_name = "adataset" shutil.rmtree(path, ignore_errors=True) - group_ids = ["test", "control"] - member_ids = [str(i) for i in range(1, 4)] - subject_ids = ["{}{}".format(*t) for t in itertools.product(group_ids, member_ids)] - timepoint_ids = [str(i) for i in range(1, 3)] dataset = Bids().create_dataset( id=path, name=dataset_name, space=Clinical, - hierarchy=["subject", "timepoint"], - leaves=itertools.product(subject_ids, timepoint_ids), + hierarchy=["group", "subject", "timepoint"], + leaves=[ + (group, f"{group}{member}", timepoint) + for group, member, timepoint in itertools.product( + ["test", "control"], + [str(i) for i in range(1, 4)], + [str(i) for i in range(1, 3)], + ) + ], id_patterns={ - "group": r"subject::(\w+)\d+", "member": r"subject::\w+(\d+)", }, metadata={ @@ -51,7 +53,7 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): "description": "Dataset was created programmatically from scratch", "code_url": "http://arcana.readthedocs.io", } - ] + ], }, ) @@ -98,7 +100,9 @@ def test_bids_roundtrip(bids_validator_docker, bids_success_str, work_dir): assert bids_success_str in result reloaded = Bids().load_dataset(id=path, name=dataset_name) - reloaded.add_sink("t1w", datatype=NiftiX, path="anat/T1w") # add sink to reloaded so it matches + reloaded.add_sink( + "t1w", datatype=NiftiX, path="anat/T1w" + ) # add sink to reloaded so it matches reloaded.name = "" # remove saved name so it matches assert dataset == reloaded @@ -186,9 +190,7 @@ def test_bids_json_edit(json_edit_blueprint: JsonEditBlueprint, work_dir: Path): name = "bids-dataset" shutil.rmtree(path, ignore_errors=True) - dataset = Bids( - json_edits=[(bp.path_re, bp.jq_script)], - ).create_dataset( + dataset = Bids(json_edits=[(bp.path_re, bp.jq_script)],).create_dataset( id=path, name=name, leaves=[("1",)], @@ -202,9 +204,8 @@ def test_bids_json_edit(json_edit_blueprint: JsonEditBlueprint, work_dir: Path): "description": "Dataset was created programmatically from scratch", "code_url": "http://arcana.readthedocs.io", } - ] + ], }, - ) for sf_name, sf_bp in bp.source_niftis.items(): From cb7a334e68325c735682c5e00e19acd1cccc7c99 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Apr 2023 13:31:39 +1000 Subject: [PATCH 13/13] fixed bug in writing participants TSV --- arcana/bids/data.py | 98 +++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 62 deletions(-) diff --git a/arcana/bids/data.py b/arcana/bids/data.py index e31ee8c..faea8c2 100644 --- a/arcana/bids/data.py +++ b/arcana/bids/data.py @@ -313,11 +313,9 @@ def create_data_tree( # Overrides of API # #################### - def save_dataset( - self, dataset: Dataset, name: str = None, overwrite_bids_metadata: bool = False - ): + def save_dataset(self, dataset: Dataset, name: str = None): super().save_dataset(dataset, name=name) - self._save_metadata(dataset, overwrite_bids_metadata=overwrite_bids_metadata) + self._save_metadata(dataset) def create_dataset( self, @@ -352,81 +350,57 @@ def create_dataset( dataset = super().create_dataset( id=id, leaves=leaves, hierarchy=hierarchy, space=space, name=name, **kwargs ) - self._save_metadata(dataset, overwrite_bids_metadata=True) + self._save_metadata(dataset) return dataset ################ # Helper methods ################ - def _save_metadata(self, dataset: Dataset, overwrite_bids_metadata: bool = False): + def _save_metadata(self, dataset: Dataset): root_dir = Path(dataset.id) dataset_description_fspath = root_dir / "dataset_description.json" - if dataset_description_fspath.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS dataset description at " - "'%s, use 'overwrite_bids_metadata' to " - "force.", - str(dataset_description_fspath), - ) - else: - dataset_description = map_to_bids_names( - attrs.asdict(dataset.metadata, recurse=True) - ) - dataset_description["BIDSVersion"] = self.BIDS_VERSION - with open(dataset_description_fspath, "w") as f: - json.dump(dataset_description, f, indent=" ") + dataset_description = map_to_bids_names( + attrs.asdict(dataset.metadata, recurse=True) + ) + dataset_description["BIDSVersion"] = self.BIDS_VERSION + with open(dataset_description_fspath, "w") as f: + json.dump(dataset_description, f, indent=" ") if dataset.metadata.description is not None: readme_path = root_dir / "README" - if readme_path.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS dataset description at " - "%s, use 'overwrite_bids_metadata' to " - "force.", - str(readme_path), - ) - else: - with open(readme_path, "w") as f: - f.write(dataset.metadata.description) - participants_tsv_fspath = dataset.root_dir / "participants.tsv" + with open(readme_path, "w") as f: + f.write(dataset.metadata.description) columns = list(dataset.metadata.row_metadata) group_ids = [i for i in dataset.row_ids("group") if i is not None] if group_ids or columns: - if participants_tsv_fspath.exists() and not overwrite_bids_metadata: - logger.warning( - "Not attempting to overwrite existing BIDS participants TSV at " - "%s, use 'overwrite_bids_metadata' to " - "force.", - str(participants_tsv_fspath), - ) - else: - with open(dataset.root_dir / "participants.tsv", "w") as f: - f.write("participant_id") + subject_rows = dataset.rows("subject") + with open(dataset.root_dir / "participants.tsv", "w") as f: + f.write("participant_id") + if group_ids: + f.write("\tgroup") + if columns: + f.write("\t" + "\t".join(columns)) + f.write("\n") + for row in subject_rows: + f.write( + f"sub-{row.id}" + ) if group_ids: - f.write("\tgroup") + f.write("\t" + row.frequency_id('group')) if columns: - f.write("\t" + "\t".join(columns)) + f.write("\t" + "\t".join(row.metadata[k] for k in columns)) f.write("\n") - for row in dataset.rows("subject"): - f.write( - f"sub-{row.id}" - ) - if group_ids: - f.write("\t" + row.frequency_id('group')) - if columns: - f.write("\t" + "\t".join(row.metadata[k] for k in columns)) - f.write("\n") - participants_desc = {} - if group_ids: - participants_desc["group"] = { - "Description": "the group the participant belonged to", - "Levels": {g: f"{g} group" for g in dataset.row_ids("group")}, - } - for name, desc in dataset.metadata.row_metadata.items(): - participants_desc[name] = {"Description": desc} - with open(dataset.root_dir / "participants.json", "w") as f: - json.dump(participants_desc, f) + participants_desc = {} + if group_ids: + participants_desc["group"] = { + "Description": "the group the participant belonged to", + "Levels": {g: f"{g} group" for g in dataset.row_ids("group")}, + } + for name, desc in dataset.metadata.row_metadata.items(): + participants_desc[name] = {"Description": desc} + with open(dataset.root_dir / "participants.json", "w") as f: + json.dump(participants_desc, f) def _fileset_fspath(self, entry: DataEntry) -> Path: return Path(entry.row.dataset.id) / entry.uri