Merge branch 'main' into pre-commit-ci-update-config

mne-tools · Jan 1, 2025 · a8b8f65 · a8b8f65
2 parents e1bd799 + ad67a04
commit a8b8f65
Show file tree

Hide file tree

Showing 12 changed files with 66 additions and 27 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -114,6 +114,11 @@
 # This patterns also effect to html_static_path and html_extra_path
 exclude_patterns = ["auto_examples/index.rst", "_build", "Thumbs.db", ".DS_Store"]
 
+nitpick_ignore_regex = [
+    # needs https://github.com/sphinx-doc/sphinx/issues/13178
+    ("py:class", r".*pathlib\._local\.Path"),
+]
+
 # HTML options (e.g., theme)
 html_show_sourcelink = False
 html_copy_source = False

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -23,6 +23,7 @@ The following authors had contributed before. Thank you for sticking around!
 
 * `Stefan Appelhoff`_
 * `Daniel McCloy`_
+* `Scott Huberty`_
 
 Detailed list of changes
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,6 +48,8 @@ Detailed list of changes
 ^^^^^^^^^^^^
 
 - :func:`mne_bids.read_raw_bids` can optionally return an ``event_id`` dictionary suitable for use with :func:`mne.events_from_annotations`, and if a ``values`` column is present in ``events.tsv`` it will be used as the source of the integer event ID codes, by `Daniel McCloy`_ (:gh:`1349`)
+- BIDS dictates that the recording entity should be displayed as "_recording-" in the filename. This PR makes :class:`mne_bids.BIDSPath`  correctly display "_recording-" (instead of "_rec-") in BIDSPath.fpath. By `Scott Huberty`_ (:gh:`1348`)
+- :func:`mne_bids.make_dataset_description` now correctly encodes the dataset description as UTF-8 on disk, by `Scott Huberty`_ (:gh:`1357`)
 
 ⚕️ Code health
 ^^^^^^^^^^^^^^

diff --git a/examples/convert_eeg_to_bids.py b/examples/convert_eeg_to_bids.py
@@ -59,7 +59,7 @@
 # to the "eyes closed" task.
 subject = 1
 run = 2
-eegbci.load_data(subject=subject, runs=run, update_path=True)
+eegbci.load_data(subjects=subject, runs=run, update_path=True)
 
 # %%
 # Let's see whether the data has been downloaded using a quick visualization
@@ -94,7 +94,7 @@
 # It prevents the data from being loaded and modified when converting to BIDS.
 
 # Load the data from "2 minutes eyes closed rest"
-edf_path = eegbci.load_data(subject=subject, runs=run)[0]
+edf_path = eegbci.load_data(subjects=subject, runs=run)[0]
 raw = mne.io.read_raw_edf(edf_path, preload=False)
 raw.info["line_freq"] = 50  # specify power line frequency as required by BIDS
 

diff --git a/examples/convert_group_studies.py b/examples/convert_group_studies.py
@@ -50,7 +50,7 @@
 run_map = dict(zip(runs, range(1, 4)))
 
 for subject_id in subject_ids:
-    eegbci.load_data(subject=subject_id, runs=runs, update_path=True)
+    eegbci.load_data(subjects=subject_id, runs=runs, update_path=True)
 
 # get path to MNE directory with the downloaded example data
 mne_data_dir = mne.get_config("MNE_DATASETS_EEGBCI_PATH")
@@ -81,7 +81,7 @@
 bids_list = list()
 for subject_id in subject_ids:
     for run in runs:
-        raw_fname = eegbci.load_data(subject=subject_id, runs=run)[0]
+        raw_fname = eegbci.load_data(subjects=subject_id, runs=run)[0]
         raw = mne.io.read_raw_edf(raw_fname)
         raw.info["line_freq"] = 50  # specify power line frequency
         raw_list.append(raw)

diff --git a/mne_bids/config.py b/mne_bids/config.py
@@ -235,7 +235,7 @@
     "run": "run",
     "proc": "processing",
     "space": "space",
-    "rec": "recording",
+    "recording": "recording",
     "split": "split",
     "desc": "description",
 }

diff --git a/mne_bids/path.py b/mne_bids/path.py
@@ -2088,7 +2088,7 @@ def get_entity_vals(
         ):
             continue
         if ignore_recordings and any(
-            [f"_rec-{a}_" in filename.stem for a in ignore_recordings]
+            [f"_recording-{a}_" in filename.stem for a in ignore_recordings]
         ):
             continue
         if ignore_splits and any(
@@ -2280,7 +2280,11 @@ def _filter_fnames(
         r"_proc-(" + "|".join(processing) + ")" if processing else r"(|_proc-([^_]+))"
     )
     space_str = r"_space-(" + "|".join(space) + ")" if space else r"(|_space-([^_]+))"
-    rec_str = r"_rec-(" + "|".join(recording) + ")" if recording else r"(|_rec-([^_]+))"
+    rec_str = (
+        r"_recording-(" + "|".join(recording) + ")"
+        if recording
+        else r"(|_recording-([^_]+))"
+    )
     split_str = r"_split-(" + "|".join(split) + ")" if split else r"(|_split-([^_]+))"
     desc_str = (
         r"_desc-(" + "|".join(description) + ")" if description else r"(|_desc-([^_]+))"

diff --git a/mne_bids/read.py b/mne_bids/read.py
@@ -531,7 +531,8 @@ def _handle_events_reading(events_fname, raw):
     logger.info(f"Reading events from {events_fname}.")
     events_dict = _from_tsv(events_fname)
 
-    # drop events where onset is n/a
+    # drop events where onset is n/a; we can't annotate them and thus don't need entries
+    # for them in event_id either
     events_dict = _drop(events_dict, "n/a", "onset")
 
     # Get event descriptions. Use `trial_type` column if available.
@@ -547,9 +548,11 @@ def _handle_events_reading(events_fname, raw):
     # If we lack proper event descriptions, perhaps we have at least an event value?
     elif "value" in events_dict:
         trial_type_col_name = "value"
-    # Worst case: all events will become `n/a` and all values will be `1`
+    # Worst case: all events become `n/a` and all values become `1`
     else:
         trial_type_col_name = None
+        descrs = np.full(len(events_dict["onset"]), "n/a")
+        event_id = {descrs[0]: 1}
 
     if trial_type_col_name is not None:
         # Drop events unrelated to a trial type
@@ -569,26 +572,33 @@ def _handle_events_reading(events_fname, raw):
                         "Creating hierarchical event names."
                     )
                     for ii in idx:
-                        value = values[ii]
-                        value = "na" if value == "n/a" else value
+                        # strip `/` from `n/a` before incorporating into trial type name
+                        value = values[ii] if values[ii] != "n/a" else "na"
                         new_name = f"{trial_type}/{value}"
                         logger.info(f"    Renaming event: {trial_type} -> {new_name}")
                         trial_types[ii] = new_name
-            # drop rows where `value` is `n/a` & convert remaining `value` to int (only
-            # when making our `event_id` dict; `value = n/a` doesn't prevent annotation)
+            # make a copy with rows dropped where `value` is `n/a` (only for making our
+            # `event_id` dict; `value = n/a` doesn't prevent making annotations).
             culled = _drop(events_dict, "n/a", "value")
-            event_id = dict(
-                zip(culled[trial_type_col_name], np.asarray(culled["value"], dtype=int))
-            )
+            # Often (but not always!) the `value` column was written by MNE-BIDS and
+            # represents integer event IDs (as would be found in MNE-Python events
+            # arrays / event_id dicts). But in case not, let's be defensive:
+            culled_vals = culled["value"]
+            try:
+                culled_vals = np.asarray(culled_vals, dtype=float)
+            except ValueError:  # contained strings or complex numbers
+                pass
+            else:
+                try:
+                    culled_vals = culled_vals.astype(int)
+                except ValueError:  # numeric, but has some non-integer values
+                    pass
+            event_id = dict(zip(culled[trial_type_col_name], culled_vals))
         else:
             event_id = dict(zip(trial_types, np.arange(len(trial_types))))
         descrs = np.asarray(trial_types, dtype=str)
 
-    # Worst case: all events become `n/a` and all values become `1`
-    else:
-        descrs = np.full(len(events_dict["onset"]), "n/a")
-        event_id = {descrs[0]: 1}
-    # Deal with "n/a" strings before converting to float
+    # convert onsets & durations to floats ("n/a" onsets were already dropped)
     ons = np.asarray(events_dict["onset"], dtype=float)
     durs = np.array(
         [0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float

diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py
@@ -857,7 +857,7 @@ def test_make_filenames():
         datatype="ieeg",
     )
     expected_str = (
-        "sub-one_ses-two_task-three_acq-four_run-1_proc-six_rec-seven_ieeg.json"
+        "sub-one_ses-two_task-three_acq-four_run-1_proc-six_recording-seven_ieeg.json"
     )
     assert BIDSPath(**prefix_data).basename == expected_str
     assert (
@@ -896,7 +896,7 @@ def test_make_filenames():
     basename = BIDSPath(**prefix_data, check=False)
     assert (
         basename.basename
-        == "sub-one_ses-two_task-three_acq-four_run-1_proc-six_rec-seven_ieeg.h5"
+        == "sub-one_ses-two_task-three_acq-four_run-1_proc-six_recording-seven_ieeg.h5"
     )
 
     # what happens with scans.tsv file

diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
@@ -579,6 +579,16 @@ def test_handle_events_reading(tmp_path):
     ev_arr, ev_dict = mne.events_from_annotations(raw)
     assert event_id == ev_dict == {"n/a": 1}  # fallback behavior
 
+    # Test with only a (non-numeric) `value` column
+    events = {"onset": [10, 15], "duration": [1, 1], "value": ["A", "B"]}
+    events_fname = tmp_path / "bids6" / "sub-01_task-test_events.tsv"
+    events_fname.parent.mkdir()
+    _to_tsv(events, events_fname)
+    raw, event_id = _handle_events_reading(events_fname, raw)
+    # don't pass event_id to mne.events_from_annotatations; its values are strings
+    assert event_id == {"A": "A", "B": "B"}
+    assert raw.annotations.description.tolist() == ["A", "B"]
+
 
 @pytest.mark.filterwarnings(warning_str["channel_unit_changed"])
 @testing.requires_testing_data

diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py
@@ -376,7 +376,7 @@ def test_make_dataset_description(tmp_path, monkeypatch):
     make_dataset_description(
         path=tmp_path,
         name="tst2",
-        authors="MNE B., MNE P.",
+        authors="MNE B., MNE P., MNE Ł.",
         funding="GSOC2019, GSOC2021",
         references_and_links="https://doi.org/10.21105/joss.01896",
         dataset_type="derivative",
@@ -386,7 +386,14 @@ def test_make_dataset_description(tmp_path, monkeypatch):
 
     with open(op.join(tmp_path, "dataset_description.json"), encoding="utf-8") as fid:
         dataset_description_json = json.load(fid)
-        assert dataset_description_json["Authors"] == ["MNE B.", "MNE P."]
+        assert dataset_description_json["Authors"] == ["MNE B.", "MNE P.", "MNE Ł."]
+        # If the text on disk is unicode, json.load will convert it. So let's test that
+        # the text was encoded correctly on disk.
+        fid.seek(0)
+        # don't use json.load here, as it will convert unicode to str
+        dataset_description_string = fid.read()
+        # Check that U+0141 was correctly encoded as Ł on disk
+        assert "MNE Ł." in dataset_description_string
 
     # Check we raise warnings and errors where appropriate
     with pytest.raises(

diff --git a/mne_bids/utils.py b/mne_bids/utils.py
@@ -233,7 +233,7 @@ def _write_json(fname, dictionary, overwrite=False):
             f'"{fname}" already exists. Please set overwrite to True.'
         )
 
-    json_output = json.dumps(dictionary, indent=4)
+    json_output = json.dumps(dictionary, indent=4, ensure_ascii=False)
     with open(fname, "w", encoding="utf-8") as fid:
         fid.write(json_output)
         fid.write("\n")

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 build-backend = "hatchling.build"
-requires = ["hatch-vcs", "hatchling"]
+requires = ["hatch-vcs", "hatchling==1.26.3"]
 
 [project]
 authors = [{name = "The MNE-BIDS developers"}]