catalystneuro · weiglszonja · Nov 15, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/...ple_lab_to_nwb/schierek_embargo_2024/extractors/schierek_embargo_2024_sortingextractor.py b/...ple_lab_to_nwb/schierek_embargo_2024/extractors/schierek_embargo_2024_sortingextractor.py
@@ -51,6 +51,11 @@ def __init__(self, file_path: FilePath, sampling_frequency: float):
         # Rename to 'ch' to match Phy output
         self.set_property(key="ch", values=channel_ids)
 
+        # add channel_depth
+        if "channel_depth" in units_data["SU"][0]:
+            channel_depths = [units_data["SU"][i]["channel_depth"] for i in range(num_units)]
+            self.set_property(key="channel_depth_um", values=channel_depths)
+
         electrode_properties_mapping = dict(
             channel_depth="channel_depth_um",
             location="brain_area",

diff --git a/..._nwb/schierek_embargo_2024/interfaces/schierek_embargo_2024_processedbehaviorinterface.py b/..._nwb/schierek_embargo_2024/interfaces/schierek_embargo_2024_processedbehaviorinterface.py
@@ -4,9 +4,10 @@
 from typing import Optional, Union
 
 import numpy as np
-from warnings import warn
 from ndx_structured_behavior.utils import loadmat
 from neuroconv import BaseDataInterface
+from neuroconv.utils import get_base_schema
+from pynwb.epoch import TimeIntervals
 from pynwb.file import NWBFile
 
 
@@ -27,10 +28,13 @@ def __init__(
         file_path: Union[str, Path]
             Path to the .mat file containing the processed behavior data.
         default_struct_name: str, optional
-            The struct name to load from the .mat file, default is "A".
+            The struct name to load from the .mat file, default is "S".
         """
 
         self.default_struct_name = default_struct_name
+        self._center_port_column_name = "Cled"
+        self._side_name_mapping = {"L": "Left", "R": "Right"}
+        self._block_name_mapping = {1: "Mixed", 2: "High", 3: "Low"}
         super().__init__(file_path=file_path, verbose=verbose)
 
     def _read_file(self, file_path: Union[str, Path]) -> dict:
@@ -40,42 +44,117 @@ def _read_file(self, file_path: Union[str, Path]) -> dict:
 
         return behavior_data[self.default_struct_name]
 
+    def get_metadata_schema(self) -> dict:
+        metadata_schema = super().get_metadata_schema()
+        metadata_schema["properties"]["Behavior"] = get_base_schema(tag="Behavior")
+        metadata_schema["properties"]["Behavior"].update(
+            required=["TimeIntervals"],
+            properties=dict(
+                TimeIntervals=dict(
+                    type="object",
+                    properties=dict(name=dict(type="string"), description=dict(type="string")),
+                )
+            ),
+        )
+        return metadata_schema
+
+    def get_metadata(self) -> dict:
+        metadata = super().get_metadata()
+        metadata["Behavior"] = dict(
+            TimeIntervals=dict(
+                name="processed_trials",
+                description="Contains the processed Bpod trials.",
+            )
+        )
+        return metadata
+
+    def _get_aligned_center_port_times(self):
+        """Get the aligned center port times from the processed behavior data."""
+        data = self._read_file(file_path=self.source_data["file_path"])
+        if self._center_port_column_name in data:
+            return [center_port_times[0] for center_port_times in data[self._center_port_column_name]]
+        else:
+            raise ValueError(f"'{self._center_port_column_name}' column not found in processed behavior data.")
+
     def add_to_nwbfile(
         self,
         nwbfile: NWBFile,
         metadata: dict,
         column_name_mapping: Optional[dict] = None,
         column_descriptions: Optional[dict] = None,
+        trial_start_times: Optional[list] = None,
+        trial_stop_times: Optional[list] = None,
     ) -> None:
         data = self._read_file(file_path=self.source_data["file_path"])
 
+        time_intervals_metadata = metadata["Behavior"]["TimeIntervals"]
+        trials_table = TimeIntervals(**time_intervals_metadata)
+
         if "RewardedSide" in data:
-            side_mapping = {"L": "Left", "R": "Right"}
-            data["RewardedSide"] = [side_mapping[side] for side in data["RewardedSide"]]
+            data["RewardedSide"] = [self._side_name_mapping[side] for side in data["RewardedSide"]]
+
+        if "Block" in data:
+            data["Block"] = [self._block_name_mapping[block] for block in data["Block"]]
+
+        num_trials = len(data["NoseInCenter"])
+        if "wait_thresh" in data:
+            # wait_thresh is a scalar, convert it to a list
+            data["wait_thresh"] = [data["wait_thresh"]] * num_trials
 
         columns_with_boolean = ["hits", "vios", "optout"]
         for column in columns_with_boolean:
             if column in data:
                 data[column] = list(np.array(data[column]).astype(bool))
 
-        columns_to_add = column_name_mapping.keys() if column_name_mapping is not None else data.keys()
+        columns_to_add = data.keys()
+        if column_name_mapping is not None:
+            columns_to_add = [column for column in column_name_mapping.keys() if column in data.keys()]
+
+        if nwbfile.trials is None:
+            assert trial_start_times is not None, "'trial_start_times' must be provided if trials table is not added."
+            assert trial_stop_times is not None, "'trial_stop_times' must be provided if trials table is not added."
+            assert (
+                len(trial_start_times) == num_trials
+            ), f"Length of 'trial_start_times' ({len(trial_start_times)}) must match the number of trials ({num_trials})."
+            assert (
+                len(trial_stop_times) == num_trials
+            ), f"Length of 'trial_stop_times' ({len(trial_stop_times)}) must match the number of trials ({num_trials})."
+        else:
+            trial_start_times = nwbfile.trials["start_time"][:]
+            trial_stop_times = nwbfile.trials["stop_time"][:]
 
-        trials = nwbfile.trials
-        if trials is None:
-            raise ValueError("Trials table not found in NWB file.")
+        for start_time, stop_time in zip(trial_start_times, trial_stop_times):
+            trials_table.add_row(
+                start_time=start_time,
+                stop_time=stop_time,
+                check_ragged=False,
+            )
+
+        # break it into onset and offset time columns
+        if self._center_port_column_name in columns_to_add:
+            columns_to_add.remove(self._center_port_column_name)
+            trials_table.add_column(
+                name="center_poke_onset_time",
+                description="The time of center port LED on for each trial.",
+                data=[center_poke_times[0] for center_poke_times in data[self._center_port_column_name]],
+            )
+            trials_table.add_column(
+                name="center_poke_offset_time",
+                description="The time of center port LED off for each trial.",
+                data=[center_poke_times[1] for center_poke_times in data[self._center_port_column_name]],
+            )
 
         for column_name in columns_to_add:
-            if column_name not in data:
-                warn(f"Column '{column_name}' not found in processed behavior data.", UserWarning)
-                continue
             name = column_name_mapping.get(column_name, column_name) if column_name_mapping is not None else column_name
             description = (
                 column_descriptions.get(column_name, "no description")
                 if column_descriptions is not None
                 else "no description"
             )
-            trials.add_column(
+            trials_table.add_column(
                 name=name,
                 description=description,
                 data=data[column_name],
             )
+
+        nwbfile.add_time_intervals(trials_table)
diff --git a/...le_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_behavior_metadata.yaml b/...le_lab_to_nwb/schierek_embargo_2024/metadata/schierek_embargo_2024_behavior_metadata.yaml
@@ -1,6 +1,11 @@
 Behavior:
+  TimeIntervals:
+    name: processed_trials
+    description: Contains the processed trials.
+  # The metadata for the raw Bpod trials.
   TrialsTable:
     description: |
+      Contains the raw Bpod trials.
       LED illumination from the center port indicated that the animal could initiate a trial by poking its nose in that
       port - upon trial initiation the center LED turned off. While in the center port, rats needed to maintain center
       fixation for a duration drawn uniformly from [0.8, 1.2] seconds. During the fixation period, a tone played from

diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_convert_session.py
@@ -109,8 +109,8 @@ def session_to_nwb(
     ]
     conversion_options.update(dict(RawBehavior=dict(task_arguments_to_exclude=task_arguments_to_exclude)))
 
-    recording_folder_name = recording_folder_path.stem
-    subject_id, session_id = recording_folder_name.split("_", maxsplit=1)
+    subject_id, session_id = Path(raw_behavior_file_path).stem.split("_", maxsplit=1)
+    protocol = session_id.split("_")[0]
 
     converter_kwargs = dict(source_data=source_data)
 
@@ -132,7 +132,7 @@ def session_to_nwb(
     metadata["NWBFile"].update(
         session_start_time=session_start_time.replace(tzinfo=tzinfo),
         session_id=session_id,
-        # TODO: add protocol name for behavior task
+        protocol=protocol,
     )
 
     # Update default metadata with the editable in the corresponding yaml file
@@ -168,6 +168,15 @@ def session_to_nwb(
 
     # The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional)
     column_name_mapping = dict(
+        NoseInCenter="nose_in_center",
+        TrainingStage="training_stage",
+        Block="block_type",
+        BlockLengthAd="num_trials_in_adaptation_blocks",
+        BlockLengthTest="num_trials_in_test_blocks",
+        ProbCatch="catch_percentage",
+        RewardDelay="reward_delay",
+        RewardAmount="reward_volume_ul",
+        WaitForPoke="wait_for_center_poke",
         hits="is_rewarded",
         vios="is_violation",
         optout="is_opt_out",
@@ -187,9 +196,18 @@ def session_to_nwb(
     )
     # The column descriptions are used to add descriptions to the columns in the processed data. (optional)
     column_descriptions = dict(
+        NoseInCenter="The time in seconds when the animal is required to maintain center port to initiate the trial (uniformly drawn from 0.8 - 1.2 seconds).",
+        TrainingStage="The stage of the training.",
+        Block="The block type (High, Low or Test). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. Test blocks are mixed blocks.",
+        BlockLengthAd="The number of trials in each high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks.",
+        BlockLengthTest="The number of trials in each mixed blocks.",
+        ProbCatch="The percentage of catch trials.",
+        RewardDelay="The delay in seconds to receive reward, drawn from exponential distribution with mean = 2.5 seconds.",
+        RewardAmount="The volume of reward in microliters.",
         hits="Whether the subject received reward for each trial.",
         vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.",
         optout="Whether the subject opted out for each trial.",
+        WaitForPoke="The time (s) between side port poke and center poke.",
         wait_time="The wait time for the subject for for each trial in seconds, after removing outliers."
         " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
         " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",

diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_notes.md b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_notes.md
@@ -30,3 +30,107 @@ The "SU" struct is a cell array of all individual cells simultaneously recorded
   - `umDistFromL1` – distance from L1 in microns
   - `AP` – anterior/posterior neuropixels probe location relative to Bregma
   - `ML` – medial/lateral neuropixels probe location relative to Bregma
+
+### Processed behavior data
+
+The processed behavior data is stored in custom .mat files (e.g. `J076_2023-12-12.mat`) with the following fields:
+
+- `S`
+  - `NoseInCenter` – time (s) rat was required to maintain center port to initiate the trial- uniformly drawn from [0.8 - 1.2] s (1 x ntrials). (same as `nose_in_center` in the trials table)
+  - `TrainingStage` - vector for the training stage for each trial included. Value of 9 corresponds to stage 8 described in methods (1 x ntrials)
+  - `Block` – block on that trial. 1: Mix block, 2: High block, 3: Low block (1 x ntrials).
+  - `BlockLengthAd` - number of trials in each high or low blocks. Uniformly 40. (1 x ntrials). Same as adapt_block in A_Structs BlockLengthTest - number of trials in each mixed blocks Uniformly 40. (1 x ntrials). Same as test_block in A_Structs
+  - `ProbCatch` - catch probability for that trial (1 x ntrials). Same as prob-catch in the A_Struct
+  - `RewardDelay` - delay (s) on that trial to receive reward. Set to 100 for catch trials. Drawn from exponential distribution with mean = 2.5 s (1 x ntrials). Same as reward_delay in the A_struct.
+  - `RewardAmount` - reward offered (uL) on that trial. [5 10 20 40 80] for males and some females, [4 8 16 32 64] for some females (1 x ntrials). Same as reward in A_struct.
+  - `RewardedSide` – side of the offered reward (1 x ntrials)
+  - `Hits` - logical vector for whether the rat received reward on that trial. True = reward was delivered. False = catch trials or violation trials (1 x ntrials). Same as hits in the A_struct.
+  - `ReactionTime` - The reaction time in seconds
+  - `Vios` - logical vector for whether the rat violated on that trial - did not maintain center poke for time required by nic. (1 x ntrials). Same as vios in the A_struct.
+  - `Optout` – logical vector for whether the rat opted out of that trial. May be catch trial or optional opt outs (ntrials x 1). Same as optout in A_Struct
+  - `WaitForPoke` - The time (s) between side port poke and center poke.
+  - `wait_time` - wait time for the rat on that trial, after removing outliers (set by wait_thresh). For hit trials (reward was delivered), wait_time = reward_delay. For opt-out trials, wait_time = time waited from trial start to opt-ing out (1 x ntrials)
+  - `iti` - time to initiate trial (s). Time between the end of the consummatory period and the time to initiate the next trial (1 x ntrials). Same as ITI in A_struct.
+  - `Cled` – Time of center light on/off for each trial (2 x ntrials)
+  - `Lled` – Time of reft light on/off for each trial (n x ntrials)
+  - `l_opt` – Time of left port entered/exited for each trial (n x ntrials)
+  - `Rled` – Time of right light on/off for each trial (n x ntrials)
+  - `r_opt` – Time of left port entered/exited for each trial (n x ntrials)
+  - `recordingLength` – time duration of the entire recording
+  - `wait_thresh` – time threshold for wait times of engagement for this session.
+
+#### TimeIntervals
+
+We are adding the processed trials data to the NWB file as [TimeIntervals](https://pynwb.readthedocs.io/en/stable/pynwb.epoch.html).
+The `processed_trials` table will be stored in the `intervals` group in the NWB file.
+
+The `schierek_embargo_2024.session_to_nwb()` function uses the `column_name_mapping` and `column_descriptions` dictionaries
+to map the processed data to the NWB file. The `column_name_mapping` is used to rename the columns in the processed data
+to more descriptive column names. The `column_descriptions` are used to provide a description of each column in the processed data.
+
+```python
+# The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional)
+column_name_mapping = dict(
+    NoseInCenter="nose_in_center",
+    TrainingStage="training_stage",
+    Block="block_type",
+    BlockLengthAd="num_trials_in_adaptation_blocks",
+    BlockLengthTest="num_trials_in_test_blocks",
+    ProbCatch="catch_percentage",
+    RewardDelay="reward_delay",
+    RewardAmount="reward_volume_ul",
+    WaitForPoke="wait_for_center_poke",
+    hits="is_rewarded",
+    vios="is_violation",
+    optout="is_opt_out",
+    wait_time="wait_time",
+    wait_thresh="wait_time_threshold",
+    wait_for_cpoke="wait_for_center_poke",
+    zwait_for_cpoke="z_scored_wait_for_center_poke",
+    RewardedSide="rewarded_port",
+    Cled="center_poke_times",
+    Lled="left_poke_times",
+    Rled="right_poke_times",
+    l_opt="left_opt_out_times",
+    r_opt="right_opt_out_times",
+    ReactionTime="reaction_time",
+    slrt="short_latency_reaction_time",
+    iti="inter_trial_interval",
+)
+
+column_descriptions = dict(
+    NoseInCenter="The time in seconds when the animal is required to maintain center port to initiate the trial (uniformly drawn from 0.8 - 1.2 seconds).",
+    TrainingStage="The stage of the training.",
+    Block="The block type (High, Low or Test). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. Test blocks are mixed blocks.",
+    BlockLengthAd="The number of trials in each high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks.",
+    BlockLengthTest="The number of trials in each mixed blocks.",
+    ProbCatch="The percentage of catch trials.",
+    RewardDelay="The delay in seconds to receive reward, drawn from exponential distribution with mean = 2.5 seconds.",
+    RewardAmount="The volume of reward in microliters.",
+    hits="Whether the subject received reward for each trial.",
+    vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.",
+    optout="Whether the subject opted out for each trial.",
+    WaitForPoke="The time (s) between side port poke and center poke.",
+    wait_time="The wait time for the subject for for each trial in seconds, after removing outliers."
+        " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
+        " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
+    wait_for_cpoke="The time between side port poke and center poke in seconds, includes the time when the subject is consuming the reward.",
+    zwait_for_cpoke="The z-scored wait_for_cpoke using all trials.",
+    RewardedSide="The rewarded port (Left or Right) for each trial.",
+    Cled="The time of center port LED on/off for each trial (2 x ntrials).",
+    Lled="The time of left port LED on/off for each trial (2 x ntrials).",
+    Rled="The time of right port LED on/off for each trial (2 x ntrials).",
+    l_opt="The time of left port entered/exited for each trial (2 x ntrials).",
+    r_opt="The time of right port entered/exited for each trial (2 x ntrials).",
+    ReactionTime="The reaction time in seconds.",
+    slrt="The short-latency reaction time in seconds.",
+    iti="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).",
+    wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
+)
+```
+
+### Mapping to NWB
+
+The following UML diagram shows the mapping of source data to NWB.
+
+![nwb mapping](schierek_embargo_2024_uml.png)
diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_uml.png b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_uml.png