Skip to content

Commit

Permalink
Merge pull request #5 from ArcanaFramework/id-patterns
Browse files Browse the repository at this point in the history
Id patterns
  • Loading branch information
tclose authored Apr 11, 2023
2 parents 39cc599 + cb7a334 commit 74bc1e1
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 120 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Update build tools
run: python -m pip install --upgrade pip flit_scm
run: python -m pip install --upgrade pip

- name: Install Arcana
run: python -m pip install .[test]
run: python -m pip install .[test] fileformats-testing

- name: Pytest
run: pytest -vvs --cov arcana.bids --cov-config .coveragerc --cov-report xml
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ Arcana Extension - bids
:alt: Python versions
.. image:: https://img.shields.io/pypi/v/arcana-bids.svg
:target: https://pypi.python.org/pypi/arcana-bids/
:alt: Latest Version
.. image:: https://github.com/ArcanaFramework/arcana/actions/workflows/docs.yml/badge.svg
:target: http://arcana.readthedocs.io/en/latest/?badge=latest
:alt: Docs
:alt: Latest Version
.. image:: https://readthedocs.org/projects/arcana/badge/?version=latest
:target: https://arcanaframework.github.io/arcana
:alt: Documentation Status


An extension of the Arcana framework to work with Brain Imaging Data Structure (BIDS)
Expand Down
3 changes: 1 addition & 2 deletions arcana/bids/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ def bids_group():
@bids_group.command(
name="app-entrypoint",
help="""Loads a dataset, or creates one it is not already present, then applies and
launches a pipeline in a single command. To be used within the command configuration
of an XNAT Container Service ready Docker image.
launches a pipeline in a single command. To be used inside BidsApp images.
DATASET_LOCATOR string containing the nickname of the data store, the ID of the
dataset (e.g. XNAT project ID or file-system directory) and the dataset's name
Expand Down
163 changes: 71 additions & 92 deletions arcana/bids/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import re
import logging
from operator import itemgetter
from copy import copy
import attrs
import jq
from pathlib import Path
Expand All @@ -15,7 +14,7 @@
from arcana.core.exceptions import ArcanaUsageError
from arcana.core.data.tree import DataTree
from arcana.core.data.set import Dataset
from arcana.core.data.space import Clinical
from arcana.stdlib import Clinical
from arcana.core.data.entry import DataEntry
from arcana.core.data.row import DataRow

Expand Down Expand Up @@ -69,11 +68,19 @@ class Bids(LocalStore):
name: str = "bids"

BIDS_VERSION = "1.0.1"
DEFAULT_SPACE = Clinical

PROV_SUFFIX = ".provenance"
FIELDS_FNAME = "__fields__"
FIELDS_PROV_FNAME = "__fields_provenance__"

VALID_HIERARCHIES = (
["subject", "timepoint"],
["session"],
["group", "subject", "timepoint"],
["group", "session"],
)

#################################
# Abstract-method implementations
#################################
Expand All @@ -89,11 +96,10 @@ def populate_tree(self, tree: DataTree):
The dataset to construct the tree dimensions for
"""
root_dir = Path(tree.dataset.id)
participants_fspath = root_dir / "participants.tsv"
participants = {}
if participants_fspath.exists():
with open(participants_fspath) as f:
if "group" in tree.dataset.hierarchy:
with open(root_dir / "participants.tsv") as f:
lines = f.read().splitlines()
participants = {}
if lines:
participant_keys = lines[0].split("\t")
for line in lines[1:]:
Expand All @@ -103,18 +109,17 @@ def populate_tree(self, tree: DataTree):
if not subject_dir.name.startswith("sub-"):
continue
subject_id = subject_dir.name[len("sub-") :]
try:
additional_ids = {"group": participants[subject_id]["group"]}
except KeyError:
additional_ids = {}
if "group" in tree.dataset.hierarchy:
tree_path = [participants[subject_id]["group"]]
else:
tree_path = []
tree_path.append(subject_id)
if any(d.name.startswith("ses-") for d in subject_dir.iterdir()):
for sess_dir in subject_dir.iterdir():
timepoint_id = sess_dir.name[len("ses-") :]
sess_add_ids = copy(additional_ids)
sess_add_ids["session"] = f"sub-{subject_id}_ses-{timepoint_id}"
tree.add_leaf([subject_id, timepoint_id], additional_ids=sess_add_ids)
tree.add_leaf(tree_path + [timepoint_id])
else:
tree.add_leaf([subject_id], additional_ids=additional_ids)
tree.add_leaf([subject_id])

def populate_row(self, row: DataRow):
root_dir = row.dataset.root_dir
Expand Down Expand Up @@ -167,7 +172,7 @@ def fileset_uri(self, path: str, datatype: type, row: DataRow) -> str:
if dataset_name is None:
base_uri = ""
elif not dataset_name:
base_uri = f"derivatives/{Dataset.EMPTY_NAME}"
base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}"
else:
base_uri = f"derivatives/{dataset_name}"
return base_uri + str(
Expand All @@ -188,7 +193,7 @@ def field_uri(self, path: str, datatype: type, row: DataRow) -> str:
if dataset_name is None:
base_uri = ""
elif not dataset_name:
base_uri = f"derivatives/{Dataset.EMPTY_NAME}"
base_uri = f"derivatives/{self.EMPTY_DATASET_NAME}"
else:
base_uri = f"derivatives/{dataset_name}"
try:
Expand Down Expand Up @@ -268,51 +273,49 @@ def create_data_tree(
id: str,
leaves: list[tuple[str, ...]],
hierarchy: list[str],
id_composition: dict[str, str] = None,
**kwargs
):
if hierarchy not in self.VALID_HIERARCHIES:
raise ArcanaUsageError(
f"Invalid hiearchy {hierarchy} provided to create a new data tree "
f"needs to be one of the following:\n"
+ "\n".join(str(h) for h in self.VALID_HIERARCHIES)
)
root_dir = Path(id)
root_dir.mkdir(parents=True)
# Create sub-directories corresponding to rows of the dataset
group_ids = set()
subject_group_ids = {}
subjects_group_id = {}
for ids_tuple in leaves:
ids = dict(zip(hierarchy, ids_tuple))
# Add in composed IDs
ids.update(Dataset.decompose_ids(ids, id_composition))
if "session" in hierarchy:
subject_id = ids["session"]
timepoint_id = None
assert "subject" not in ids
assert "timepoint" not in ids
else:
try:
subject_id = ids["subject"]
timepoint_id = ids["timepoint"]
assert "session" not in ids
except KeyError:
subject_id = ids["session"]
timepoint_id = ids.get("timepoint")
group_id = ids.get("group")
if group_id:
group_ids.add(group_id)
subject_group_ids[subject_id] = group_id
subjects_group_id[subject_id] = group_id
sess_dir_fspath = root_dir / self._entry2fs_path(
entry_path=None, subject_id=subject_id, timepoint_id=timepoint_id
)
sess_dir_fspath.mkdir(parents=True)
sess_dir_fspath.mkdir(parents=True, exist_ok=True)
# Add participants.tsv to define the groups if present
if group_ids:
with open(root_dir / "participants.tsv", "w") as f:
f.write("participant_id\tgroup\n")
for subject_id, group_id in subject_group_ids.items():
for subject_id, group_id in subjects_group_id.items():
f.write(f"sub-{subject_id}\t{group_id}\n")

####################
# Overrides of API #
####################

def save_dataset(
self, dataset: Dataset, name: str = None, overwrite_bids_metadata: bool = False
):
def save_dataset(self, dataset: Dataset, name: str = None):
super().save_dataset(dataset, name=name)
self._save_metadata(dataset, overwrite_bids_metadata=overwrite_bids_metadata)
self._save_metadata(dataset)

def create_dataset(
self,
Expand Down Expand Up @@ -347,81 +350,57 @@ def create_dataset(
dataset = super().create_dataset(
id=id, leaves=leaves, hierarchy=hierarchy, space=space, name=name, **kwargs
)
self._save_metadata(dataset, overwrite_bids_metadata=True)
self._save_metadata(dataset)
return dataset

################
# Helper methods
################

def _save_metadata(self, dataset: Dataset, overwrite_bids_metadata: bool = False):
def _save_metadata(self, dataset: Dataset):
root_dir = Path(dataset.id)
dataset_description_fspath = root_dir / "dataset_description.json"
if dataset_description_fspath.exists() and not overwrite_bids_metadata:
logger.warning(
"Not attempting to overwrite existing BIDS dataset description at "
"'%s, use 'overwrite_bids_metadata' to "
"force.",
str(dataset_description_fspath),
)
else:
dataset_description = map_to_bids_names(
attrs.asdict(dataset.metadata, recurse=True)
)
dataset_description["BIDSVersion"] = self.BIDS_VERSION
with open(dataset_description_fspath, "w") as f:
json.dump(dataset_description, f, indent=" ")
dataset_description = map_to_bids_names(
attrs.asdict(dataset.metadata, recurse=True)
)
dataset_description["BIDSVersion"] = self.BIDS_VERSION
with open(dataset_description_fspath, "w") as f:
json.dump(dataset_description, f, indent=" ")

if dataset.metadata.description is not None:
readme_path = root_dir / "README"
if readme_path.exists() and not overwrite_bids_metadata:
logger.warning(
"Not attempting to overwrite existing BIDS dataset description at "
"%s, use 'overwrite_bids_metadata' to "
"force.",
str(readme_path),
)
else:
with open(readme_path, "w") as f:
f.write(dataset.metadata.description)
participants_tsv_fspath = dataset.root_dir / "participants.tsv"
with open(readme_path, "w") as f:
f.write(dataset.metadata.description)
columns = list(dataset.metadata.row_metadata)
group_ids = [i for i in dataset.row_ids("group") if i is not None]
if group_ids or columns:
if participants_tsv_fspath.exists() and not overwrite_bids_metadata:
logger.warning(
"Not attempting to overwrite existing BIDS participants TSV at "
"%s, use 'overwrite_bids_metadata' to "
"force.",
str(participants_tsv_fspath),
)
else:
with open(dataset.root_dir / "participants.tsv", "w") as f:
f.write("participant_id")
subject_rows = dataset.rows("subject")
with open(dataset.root_dir / "participants.tsv", "w") as f:
f.write("participant_id")
if group_ids:
f.write("\tgroup")
if columns:
f.write("\t" + "\t".join(columns))
f.write("\n")
for row in subject_rows:
f.write(
f"sub-{row.id}"
)
if group_ids:
f.write("\tgroup")
f.write("\t" + row.frequency_id('group'))
if columns:
f.write("\t" + "\t".join(columns))
f.write("\t" + "\t".join(row.metadata[k] for k in columns))
f.write("\n")
for row in dataset.rows("subject"):
f.write(
f"sub-{row.id}"
)
if group_ids:
f.write("\t" + row.frequency_id('group'))
if columns:
f.write("\t" + "\t".join(row.metadata[k] for k in columns))
f.write("\n")
participants_desc = {}
if group_ids:
participants_desc["group"] = {
"Description": "the group the participant belonged to",
"Levels": {g: f"{g} group" for g in dataset.row_ids("group")},
}
for name, desc in dataset.metadata.row_metadata.items():
participants_desc[name] = {"Description": desc}
with open(dataset.root_dir / "participants.json", "w") as f:
json.dump(participants_desc, f)
participants_desc = {}
if group_ids:
participants_desc["group"] = {
"Description": "the group the participant belonged to",
"Levels": {g: f"{g} group" for g in dataset.row_ids("group")},
}
for name, desc in dataset.metadata.row_metadata.items():
participants_desc[name] = {"Description": desc}
with open(dataset.root_dir / "participants.json", "w") as f:
json.dump(participants_desc, f)

def _fileset_fspath(self, entry: DataEntry) -> Path:
return Path(entry.row.dataset.id) / entry.uri
Expand Down
5 changes: 2 additions & 3 deletions arcana/bids/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from arcana.core import __version__
from arcana.core.data.set import Dataset
from fileformats.core import FileSet
from arcana.core.data.space import Clinical
from arcana.stdlib import Clinical
from arcana.bids.data import JsonEdit
from arcana.core.exceptions import ArcanaUsageError
from arcana.core.utils.serialize import (
Expand Down Expand Up @@ -82,8 +82,7 @@ def bids_app(
outputs : list[ty.Union[AppField, dict[str, str]]]
The outputs to be extracted from the derivatives directory. Should be a list of tuples
consisting of the the path the file/directory is saved by the app within a BIDS subject/session,
e.g. freesurfer/recon-all, and the DataFormat class it is stored in, e.g.
arcana.dirtree.data.Directory.
e.g. freesurfer/recon-all, and the DataFormat class it is stored in,
executable : str, optional
Name of the executable within the image to run (i.e. the entrypoint of the image).
Required when extending the base image and launching Arcana within it. Defaults to
Expand Down
2 changes: 1 addition & 1 deletion arcana/bids/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from arcana.testing.data.blueprint import (
TestDatasetBlueprint, FileSetEntryBlueprint as FileBP
)
from arcana.core.data.space import Clinical
from arcana.stdlib import Clinical
from fileformats.medimage import NiftiGzX
from arcana.bids.cli import app_entrypoint
from arcana.core.utils.serialize import ClassResolver
Expand Down
Loading

0 comments on commit 74bc1e1

Please sign in to comment.