diff --git a/.codespellrc b/.codespellrc index 22cd48ca96..8eae2b3993 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,5 +1,7 @@ [codespell] skip = *.js,*.svg,*.eps,.git,node_modules,env,venv,.mypy_cache,package-lock.json,CITATION.cff,tools/new_contributors.tsv,./tools/schemacode/docs/build +# Ignore diff/git format-patch headings which could include truncated lines +ignore-regex = ^@@ -[0-9].*@@.* ignore-words-list = fo,te,als,Acknowledgements,acknowledgements,weill,bu,winn,manuel builtin = clear,rare,en-GB_to_en-US # this overloads default dictionaries and I have not yet figured out diff --git a/.github/workflows/schemacode_ci.yml b/.github/workflows/schemacode_ci.yml index 6cf4dbcd4c..cf4640578a 100644 --- a/.github/workflows/schemacode_ci.yml +++ b/.github/workflows/schemacode_ci.yml @@ -27,6 +27,15 @@ jobs: python-version: ["3.11"] steps: - uses: actions/checkout@v4 + + - name: "Apply patches for BIDS-2.0" + run: tools/bids-2.0/apply_all + + - name: "Show differences after patching" + run: | + git add . + git diff --cached + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml new file mode 100644 index 0000000000..d7f3fbe476 --- /dev/null +++ b/.github/workflows/validate_bids-examples.yml @@ -0,0 +1,144 @@ +name: validate_datasets + +on: + push: + branches: ['master'] + pull_request: + branches: ['**'] +# create: +# branches: [master] +# tags: ['**'] +# schedule: +# - cron: "0 4 * * 1" + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest] # , macos-latest, windows-latest] + bids-validator: [master-deno] + python-version: ["3.11"] + + runs-on: ${{ matrix.platform }} + + env: + TZ: Europe/Berlin + FORCE_COLOR: 1 + + steps: + - uses: actions/checkout@v4 + + - name: "Apply patches for BIDS-2.0" + run: | + set -o pipefail + tools/bids-2.0/apply_all 2>&1 | tee /tmp/patch.log + + - name: "Show differences after patching" + run: | + git add . + git diff --cached + + - name: "Commit and push patched version online for possible introspection" + # Run only on a non-merge commit for the PR + if: > + github.repository == 'bids-standard/bids-specification' && + github.event.pull_request.head.ref == 'bids-2.0' && + github.event.pull_request.merge_commit_sha != github.sha + run: | + set -x + commit=$(git rev-parse HEAD) + branch=${GITHUB_HEAD_REF}-patched + git config --global user.email "github-ci@example.com" + git config --global user.name "BIDS 2.0 GitHub CI" + git checkout -b "$branch" + { + echo -e "Applied patches for ${GITHUB_HEAD_REF} to ${commit}\n"; + cat /tmp/patch.log; + } | git commit -F - + git push -f origin "$branch" + + # Setup Python with bst + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: "Install build dependencies" + run: pip install --upgrade build twine + - name: "Build source distribution and wheel" + run: python -m build tools/schemacode + - name: "Check distribution metadata" + run: twine check tools/schemacode/dist/* + - name: "Install bst tools from the build" + run: pip install $( ls tools/schemacode/dist/*.whl )[all] + - name: "Produce dump of the schema as schema.json" + run: bst -v export --output src/schema.json + + - uses: denoland/setup-deno@v1.1.2 + if: "matrix.bids-validator == 'master-deno'" + with: + deno-version: v1.x + + - name: Install BIDS validator (master deno build) + if: "matrix.bids-validator == 'master-deno'" + run: | + pushd .. + # Let's use specific commit for now + # TODO: progress it once in a while + commit=a7b291b882a8c6184219ccb84faae255ba96203a + git clone --depth 1 https://github.com/bids-standard/bids-validator + cd bids-validator + git fetch --depth 1 origin $commit; + echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /usr/local/bin/bids-validator + chmod a+x /usr/local/bin/bids-validator + which -a bids-validator + bids-validator --help + popd + + - name: Display versions and environment information + run: | + echo $TZ + date + echo -n "npm: "; npm --version + echo -n "node: "; node --version + echo -n "bids-validator: "; bids-validator --version + echo -n "python: "; python --version + + # Checkout bids-examples + - uses: actions/checkout@v4 + with: + # repository: bids-standard/bids-examples + # For now use the forked repository with support for deno validator + # from https://github.com/bids-standard/bids-examples/pull/435 + repository: yarikoptic/bids-examples + ref: deno-validator + path: bids-examples + + - name: Mark known not yet to be deno-legit BIDS datasets + run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION + shell: bash + working-directory: bids-examples + + - name: Validate using bids-validator without migration + run: ./run_tests.sh + working-directory: bids-examples + + - name: Migrate all BIDS datasets + run: /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets + shell: bash + working-directory: bids-examples + + - name: Show migrated datasets diff + run: git diff HEAD + working-directory: bids-examples + + # TODO: commit as a merge from current state of bids-examples + # and prior bids-2.0 branch there, but overloading with new updated + # state and recording commit hash of bids-specification used. + + - name: Validate all BIDS datasets using bids-validator after migration + run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh + working-directory: bids-examples diff --git a/CONTRIBUTING-BIDS2.0.md b/CONTRIBUTING-BIDS2.0.md new file mode 100644 index 0000000000..5e8eae9681 --- /dev/null +++ b/CONTRIBUTING-BIDS2.0.md @@ -0,0 +1,33 @@ +# HOWTO for BIDS 2.0 development branch + +## References + +- Current PR: https://github.com/bids-standard/bids-specification/pull/1775 +- BIDS 2.0 Project: https://github.com/orgs/bids-standard/projects/10 +- Full list of issues to consider: https://github.com/bids-standard/bids-2-devel/issues +- https://github.com/bids-standard/bids-2-devel/issues/57 + +## Instructions + +- **Minimization of "persistent" comments: Do not comment in the main thread of this PR to avoid flooding it** + - Ideally: initiate PR from a feature branch with desired changes so we could concentrate discussion on the topic there + - If needed: start discussion as a comment attached to pertinent location in [diff view](https://github.com/bids-standard/bids-specification/pull/1775/files) so we could eventually resolve it to collapse +- **To contribute to this PR -- submit a PR against the `bids-2.0` branch** + - If you host that feature branch in this repository, it is RECOMMENDED to name it with `bids-2.0-` prefix. + - **WARNING**: this PR can rebase or otherwise modify its set of commits (squash etc), so it is recommended to keep your feature branch also succinct to just few commits so it would be obvious what to rebase on top of rebased [bids-2.0]. + - When changes accepted they would be incorporated without Merge commit and might undergo squashing into just few commits to reflect that change. + - Where relevant do not forget to reference or close an issue from [bids-2-devel/issues](https://github.com/bids-standard/bids-2-devel/issues) +- **Minimization of commits/diff**. To make it manageable to review, diff in this PR should avoid mass changes which could be scripted. + - For scripted changes there would be scripts and "fix up patches" collected (in this PR) under `tools/bids-2.0/` directory. See [tools/bids-2.0/README.md](https://github.com/bids-standard/bids-specification/pull/1775/files) and files under the [tools/bids-2.0/patches/] for more details and example(s). + - Those scripts will be applied on CI and changes pushed to [bids-2.0-patched](https://github.com/bids-standard/bids-specification/tree/bids-2.0) branch. + - you can review [diff bids-2.0..bids-2.0-patched](https://github.com/bids-standard/bids-specification/compare/bids-2.0..bids-2.0-patched) to see if scripts or fixup patches need to be adjusted + - that [bids-2.0-patched] is pushed by `validate-datasets` workflow + - rendered: https://bids-specification.readthedocs.io/en/bids-2.0-patched/ + - if fixes spotted needed on top of [bids-2.0-patched] - they should be committed on top and `git format-patch` and added to `tools/bids-2.0/patches/` or absorbed into already existing patches there in. + +[tools/bids-2.0/README.md]: https://github.com/bids-standard/bids-specification/pull/1775/files +[bids-2.0]: https://github.com/bids-standard/bids-specification/tree/bids-2.0 +[bids-2.0-patched]: https://github.com/bids-standard/bids-specification/tree/bids-2.0-patched +[tools/bids-2.0/]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/ +[tools/bids-2.0/README.md]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/README.md +[tools/bids-2.0/patches/]: https://github.com/bids-standard/bids-specification/tree/bids-2.0/tools/bids-2.0/patches diff --git a/tools/bids-2.0/README.md b/tools/bids-2.0/README.md new file mode 100644 index 0000000000..c787bb7c7d --- /dev/null +++ b/tools/bids-2.0/README.md @@ -0,0 +1,20 @@ +This directory to contain various little helpers which would script desired +automated changes (where possible) for migrating specification (not datasets) +to BIDS 2.0. Ideally scripts should have some header pointing to the +underlying issue they are addressing. + +## `apply_all` + +`apply_all` script goes through `patches/` in a (numeric) sorted order +and applies those "patches". + +"patches" could be of two types: + +- an executable -- a script to be executed which introduces the changes. +- `.patch` - a regular patch which needs to be applied using `patch -p1` + +Typically for the same leading index (e.g. `01`) there would be a script and +then a patch to possibly manually tune up remaining changes. + +`apply_all` could also take an index -- then it would stop applying patches +having applied patches up to that index. diff --git a/tools/bids-2.0/apply_all b/tools/bids-2.0/apply_all new file mode 100755 index 0000000000..b486317cab --- /dev/null +++ b/tools/bids-2.0/apply_all @@ -0,0 +1,29 @@ +#!/bin/bash + +set -eu + +# hardcoding for now - relative path from the top +rpath=tools/bids-2.0 + +path="$(dirname "$0")" +cd "$path/../.." # go to the top of bids-specification + +apply_until=${1:-} + +# harmonize appearance +if [ -n "$apply_until" ] ; then + apply_until=$(printf "%02d" "$apply_until") +fi + +/bin/ls "$rpath"/patches/[0-9]* | sort -n | while read p; do + if [ "${p##*.}" == "patch" ]; then + echo "I: apply $p" + patch -p1 < $p + elif [ -x "$p" ] ; then + echo "I: run $p" + $p + else + echo "E: Do not know how to handle patch $p" >&2 + exit 1 + fi +done diff --git a/tools/bids-2.0/patches/01-01-rename_participants_to_subjects b/tools/bids-2.0/patches/01-01-rename_participants_to_subjects new file mode 100755 index 0000000000..7a868fad22 --- /dev/null +++ b/tools/bids-2.0/patches/01-01-rename_participants_to_subjects @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Description: rename all mentionings of participants to subjects +# +# Reference issues: +# - https://github.com/bids-standard/bids-2-devel/issues/14 + +set -eu -o pipefail + +pathspec=( ':(exclude)src/CHANGES.md' ':(exclude)src/appendices/licenses.md' ':(exclude)CODE_OF_CONDUCT.md' ':(exclude)tools/bids-2.0/*' ':(exclude)tools/schemacode/bidsschematools/migrations.py' ) + +# Plural -- no need really for a separate invocation since plural is just singular + s +# git grep -l participants -- "${pathspec[@]}" | xargs sed -i -e 's,participants,subjects,g' + +# Singular -- tricky since we want to exclude some. According to chatgpt no built in negative back lookup +# so suggested to replace with PLACEHOLDER +if git grep -q -l PLACEHOLDER -- "${pathspec[@]}"; then + echo "Assertion error -- PLACEHOLDER already used somehow" >&2 + exit 1 +fi + +git grep -l '[Pp]articipant' -- "${pathspec[@]}" | \ + xargs sed -i -e 's/\(Experiment-\|as reported by the \)participant/PLACEHOLDER#\1#/g' \ + -e 's/participant/subject/g' \ + -e 's/Participant/Subject/g' \ + -e 's/PLACEHOLDER#\([^#]*\)#/\1participant/g' diff --git a/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch b/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch new file mode 100644 index 0000000000..3c90141eef --- /dev/null +++ b/tools/bids-2.0/patches/01-02-rename_participants_to_subjects-fixup.patch @@ -0,0 +1,53 @@ +From 087982854fd000f6cb9d321faf25c68e4a2f1b1c Mon Sep 17 00:00:00 2001 +From: Yaroslav Halchenko +Date: Fri, 12 Apr 2024 15:54:31 -0400 +Subject: [PATCH] Post fixes to apply_all to fix some indentations due to + participant -> subject + +--- + src/appendices/coordinate-systems.md | 6 +++--- + src/schema/README.md | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/appendices/coordinate-systems.md b/src/appendices/coordinate-systems.md +index 9eb57359..9cb031b0 100644 +--- a/src/appendices/coordinate-systems.md ++++ b/src/appendices/coordinate-systems.md +@@ -233,10 +233,10 @@ described in [Common file level metadata fields][common file level metadata fiel + + In the case of multiple study templates, additional names may need to be defined. + +-| **Coordinate System** | **Description** | +-| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ++| **Coordinate System** | **Description** | ++| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | individual | Subject specific anatomical space (for example derived from T1w and/or T2w images). This coordinate system requires specifying an additional, subject-specific file to be fully defined. In context of surfaces this space has been referred to as `fsnative`. | +-| study | Custom space defined using a group/study-specific template. This coordinate system requires specifying an additional file to be fully defined. | ++| study | Custom space defined using a group/study-specific template. This coordinate system requires specifying an additional file to be fully defined. | + + ### Non-template coordinate system identifiers + +diff --git a/src/schema/README.md b/src/schema/README.md +index 0a6d7a3c..7587f01d 100644 +--- a/src/schema/README.md ++++ b/src/schema/README.md +@@ -243,7 +243,7 @@ and provide the *namespace* in which expressions are evaluated. + The following operators should be defined by an interpreter: + + | Operator | Definition | Example | +-| ----------- | ------------------------------------------------------------- | --------------------------------------------- | ++| ----------- | ------------------------------------------------------------- |-----------------------------------------------| + | `==` | equality | `suffix == "T1w"` | + | `!=` | inequality | `entities.task != "rest"` | + | `<`/`>` | less-than / greater-than | `sidecar.EchoTime < 0.5` | +@@ -253,7 +253,7 @@ The following operators should be defined by an interpreter: + | `&&` | conjunction, true if both RHS and LHS are true | `"Units" in sidecar && sidecar.Units == "mm"` | + | `\|\|` | disjunction, true if either RHS or LHS is true | `a < mn \|\| a > mx` | + | `.` | object query, returns value of subfield | `sidecar.Units` | +-| `[]` | array/string index, returns value of Nth element (0-indexed) | `columns.subject_label[0]` | ++| `[]` | array/string index, returns value of Nth element (0-indexed) | `columns.subject_label[0]` | + | `+` | numeric addition / string concatenation | `x + 1`, `stem + "suffix"` | + | `-`/`*`/`/` | numeric operators (division coerces to float) | `length(array) - 2`, `x * 2`, `1 / 2 == 0.5` | + +-- +2.43.0 diff --git a/tools/schemacode/bidsschematools/__main__.py b/tools/schemacode/bidsschematools/__main__.py index c8d554cf0a..87cd4387ab 100644 --- a/tools/schemacode/bidsschematools/__main__.py +++ b/tools/schemacode/bidsschematools/__main__.py @@ -3,6 +3,7 @@ import click +from .migrations import migrate_dataset as migrate_dataset_ from .schema import export_schema, load_schema @@ -32,5 +33,22 @@ def export(ctx, schema, output): fobj.write(text) +@cli.command() +@click.argument("dataset_paths", type=click.Path(dir_okay=True, file_okay=False), nargs=-1) +@click.pass_context +def migrate_datasets(ctx, dataset_paths): + """Migrate BIDS 1.x dataset to BIDS 2.0 + + Example of running in bids-examples to apply to all and review the diff + + \b + git clean -dfx && git reset --hard \\ + && { /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets } \\ + && git add . && git diff --cached + """ + for dataset_path in dataset_paths: + migrate_dataset_(dataset_path) + + if __name__ == "__main__": cli() diff --git a/tools/schemacode/bidsschematools/conftest.py b/tools/schemacode/bidsschematools/conftest.py index 3154e3e5cf..9ed4ec7a11 100644 --- a/tools/schemacode/bidsschematools/conftest.py +++ b/tools/schemacode/bidsschematools/conftest.py @@ -1,5 +1,6 @@ import logging import tempfile +from pathlib import Path from subprocess import run try: @@ -95,10 +96,27 @@ def schema_obj(): return schema.load_schema() -bids_examples = get_gitrepo_fixture( +bids_examples_pristine = get_gitrepo_fixture( "https://github.com/bids-standard/bids-examples", whitelist=BIDS_SELECTION, ) + + +@pytest.fixture(scope="session") +def bids_examples(bids_examples_pristine): + """Migrates examples to BIDS 2.0 before giving it back to tests""" + + from bidsschematools.migrations import migrate_dataset + + # TODO: potentially make it recursive in finding derivatives, + # as e.g. we have in ./ieeg_epilepsy_ecog/derivatives/freesurfer/participants.tsv + for item in Path(bids_examples_pristine).iterdir(): + if item.is_dir() and (item / "dataset_description.json").exists(): + migrate_dataset(item) + + return bids_examples_pristine + + bids_error_examples = get_gitrepo_fixture( "https://github.com/bids-standard/bids-error-examples", whitelist=BIDS_ERROR_SELECTION, diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py new file mode 100644 index 0000000000..1cf0441cf1 --- /dev/null +++ b/tools/schemacode/bidsschematools/migrations.py @@ -0,0 +1,108 @@ +import json +import os +import re +import subprocess +from functools import lru_cache +from itertools import chain +from pathlib import Path +from typing import Optional + +import bidsschematools as bst +import bidsschematools.utils + +lgr = bst.utils.get_logger() + +TARGET_VERSION = "2.0.0" + + +class NotBIDSDatasetError(Exception): + pass + + +def get_bids_version(dataset_path: Path) -> str: + dataset_description = dataset_path / "dataset_description.json" + if not dataset_description.exists(): + raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}") + return json.loads(dataset_description.read_text())["BIDSVersion"] + + +def migrate_version(dataset_path: Path): + """TODO: modify BIDSVersion in dataset_description.json + + Should do as a string manipulation not json to minimize + the diff""" + dataset_description = dataset_path / "dataset_description.json" + # Read/write as bytes so we do not change Windows line endings + content = dataset_description.read_bytes().decode() + old_version = json.loads(content)["BIDSVersion"] + migrated = re.sub(rf'("BIDSVersion":\s*)"{old_version}', r'\1"' + TARGET_VERSION, content) + assert json.loads(migrated)["BIDSVersion"] == TARGET_VERSION + dataset_description.write_bytes(migrated.encode()) + + +def migrate_participants(dataset_path: Path): + extensions = [".tsv", ".json"] + + for ext in extensions: + old_file = dataset_path / f"participants{ext}" + new_file = dataset_path / f"subjects{ext}" + if old_file.exists(): + rename_path(old_file, new_file) + lgr.info(f" - renamed {old_file} to {new_file}") + if ext == ".tsv": + # Do manual .decode() and .encode() to avoid changing line endings + migrated = ( + new_file.read_bytes().decode().replace("participant_id", "subject_id", 1) + ) + new_file.write_bytes(migrated.encode()) + lgr.info(f" - migrated content in {new_file}") + + +def migrate_dataset(dataset_path): + lgr.info(f"Migrating dataset at {dataset_path}") + dataset_path = Path(dataset_path) + try: + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return + except NotBIDSDatasetError: + lgr.warning("%s not a BIDS dataset, skipping", dataset_path) + return + # TODO: possibly add a check for BIDS version in dataset_description.json + # and skip if already 2.0, although ideally transformations + # should also be indepotent + for migration in [ + migrate_participants, + migrate_version, + ]: + lgr.info(f" - applying migration {migration.__name__}") + migration(dataset_path) + + +@lru_cache +def path_has_git(path: Path) -> bool: + return (path / ".git").exists() + + +def git_topdir(path: Path) -> Optional[Path]: + """Return top-level directory of a git repository containing path, + or None if not under git.""" + path = path.absolute() + for p in chain([path] if path.is_dir() else [], path.parents): + if path_has_git(p): + return p + return None + + +def rename_path(old_path: Path, new_path: Path): + """git aware rename. If under git, use git mv, otherwise just os.rename.""" + # if under git, use git mv but ensure that on border + # crossing (should just use DataLad and `mv` and it would do the right thing!) + if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)): + raise NotImplementedError( + f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}" + ) + if old_git_top: + subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top) + else: + os.rename(old_path, new_path)