From 3b45e76a34c69ef7f1a3f1ca7adf7a0131c9369b Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 14 Apr 2022 18:06:46 +0000 Subject: [PATCH 01/40] setup --- .../convert_tingley_long_term.py | 134 +++++++++++++ .../tingley_long_term_environment.yaml | 11 + .../tingley_long_term_metadata.yml | 11 + .../tingley_long_term_requirements.txt | 4 + .../tingley_long_term_utils.py | 17 ++ .../tingleylongtermbehaviorinterface.py | 189 ++++++++++++++++++ .../tingleylongtermnwbconverter.py | 169 ++++++++++++++++ 7 files changed, 535 insertions(+) create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py create mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py diff --git a/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py b/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py new file mode 100644 index 0000000..49d59fc --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py @@ -0,0 +1,134 @@ +from pathlib import Path +import sys +import warnings + +from nwb_conversion_tools.utils.json_schema import load_dict_from_file +from nwb_conversion_tools.utils.json_schema import dict_deep_update + +from buzsaki_lab_to_nwb import TingleySeptalNWBConverter +from joblib import Parallel, delayed + +n_jobs = 20 +stub_test = False +ripple_paper = True +conversion_factor = 0.195 # Intan + +data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") +home_path = Path("/home/jovyan/") + +if ripple_paper: + metadata_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/metadata_ripples.yml") + valid_sessions_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/valid_sessions_ripples.yml") +else: + metadata_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/metadata.yml") + valid_sessions_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/valid_sessions.yml") + +if stub_test: + nwb_output_path = home_path / Path("nwb_stub") +else: + # nwb_output_path = home_path / Path("nwb") + nwb_output_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/nwb") +nwb_output_path.mkdir(exist_ok=True) + +valid_session_dic = load_dict_from_file(valid_sessions_path) +valid_sessions_list = [] +subject_list = [] + +for subject, valid_sessions_for_subject in valid_session_dic.items(): + subject_list.append(subject) + valid_sessions_list += valid_sessions_for_subject + +session_path_list = [ + session + for subject in data_path.iterdir() + if subject.is_dir() and subject.name in subject_list + for session in subject.iterdir() + if session.is_dir() and session.name in valid_sessions_list +] + +if stub_test: + # Number here is to reference in discussion + nwbfile_list = [ + nwb_output_path / f"{n:03d}_{session.parent.stem}_{session.stem}_stub.nwb" + for n, session in enumerate(session_path_list) + ] +else: + nwbfile_list = [ + nwb_output_path / f"{session.parent.stem}_{session.stem}.nwb" for n, session in enumerate(session_path_list) + ] + + +def convert_session(session_path, nwbfile_path): + print("----------------") + print(session_path) + print(nwbfile_path) + + session_id = session_path.name + lfp_file_path = session_path / f"{session_path.name}.lfp" + raw_file_path = session_path / f"{session_id}.dat" + xml_file_path = session_path / f"{session_id}.xml" + spikes_matfile_path = session_path / f"{session_id}.spikes.cellinfo.mat" + behavior_matfile_path = session_path / f"{session_id}.behavior.mat" + + print("raw file available", raw_file_path.is_file()) + print("lfp file available", lfp_file_path.is_file()) + print("behavior / position mat file available", behavior_matfile_path.is_file()) + source_data = dict() + conversion_options = dict() + + source_data = dict( + NeuroscopeLFP=dict(file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path)), + ) + conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test)) + # conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, es_key="lfp")) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path) + ) + ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test, es_key="ElectricalSeries_raw")) + + clu_matches_in_session = len(list(session_path.glob("*.clu*"))) + res_matches_in_session = len(list(session_path.glob("*.res*"))) + + if spikes_matfile_path.is_file(): + print("cell explorer spiking data is used") + source_data.update(CellExplorerSorting=dict(file_path=str(spikes_matfile_path))) + else: + if clu_matches_in_session > 0 and res_matches_in_session > 0: + print("neuroscope spiking data is used") + source_data.update( + NeuroscopeSorting=dict( + folder_path=str(session_path), keep_mua_units=False, xml_file_path=str(xml_file_path) + ) + ) + conversion_options.update(NeuroscopeSorting=dict(stub_test=stub_test)) + else: + print("not spiking data available") + + if behavior_matfile_path.is_file(): + source_data.update(TingleySeptalBehavior=dict(folder_path=str(session_path))) + + converter = TingleySeptalNWBConverter(source_data) + + metadata = None + metadata = converter.get_metadata() + metadata_from_yaml = load_dict_from_file(metadata_path) + metadata = dict_deep_update(metadata, metadata_from_yaml) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + print("Done with conversion") + sys.stdout.flush() # Needed for verbosity in Parallel + + +Parallel(n_jobs=n_jobs)( + delayed(convert_session)(session_path=session_path, nwbfile_path=nwbfile_path) + for session_path, nwbfile_path in zip(session_path_list, nwbfile_list) +) diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml new file mode 100644 index 0000000..61be5cf --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml @@ -0,0 +1,11 @@ +name: buzsaki_tingley_long_term +channels: +- defaults +- anaconda +- conda-forge +dependencies: +- python==3.9 +- pip +- pip: + - -e . + - -r tingley_long_term_requirements.txt diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml new file mode 100644 index 0000000..4266b40 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml @@ -0,0 +1,11 @@ +NWBFile: + related_publications: + Tingley, David, and György Buzsáki. "Transformation of a spatial map across the hippocampal-lateral septal circuit." Neuron 98.6 (2018) 1229-1242. + session_description: + The hippocampus constructs a map of the environment. How this “cognitive map” is utilized by other brain regions to guide behavior remains unexplored. To examine how neuronal firing patterns in the hippocampus are transmitted and transformed, we recorded neurons in its principal subcortical target, the lateral septum (LS). We observed that LS neurons carry reliable spatial information in the phase of action potentials, relative to hippocampal theta oscillations, while the firing rates of LS neurons remained uninformative. Furthermore, this spatial phase code had an anatomical microstructure within the LS and was bound to the hippocampal spatial code by synchronous gamma frequency cell assemblies. Using a data-driven model, we show that rate-independent spatial tuning arises through the dynamic weighting of CA1 and CA3 cell assemblies. Our findings demonstrate that transformation of the hippocampal spatial map depends on higher-order theta-dependent neuronal sequences. + institution: NYU + lab: Buzsaki + experimenter: + - David Tingley +Subject: + species: Rattus norvegicus diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt new file mode 100644 index 0000000..e9414af --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt @@ -0,0 +1,4 @@ +mat4py==0.5.0 +mat73==0.52 +hdf5storage>=0.1.18 +nwb-conversion-tools @ git+https://github.com/catalystneuro/nwb-conversion-tools@5e39ca55266b8f7be48380c67471100a98413277 diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py new file mode 100644 index 0000000..e31caf1 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py @@ -0,0 +1,17 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from mat73 import loadmat as loadmat_mat73 +from mat4py import loadmat as loadmat_mat4py +from scipy.io import loadmat as loadmat_scipy + + +def read_matlab_file(file_path): + file_path = str(file_path) + + try: + mat_file = loadmat_mat4py(str(file_path)) + except: + try: + mat_file = loadmat_mat73(file_path) + except: + mat_file = loadmat_scipy(file_path) + return mat_file diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py new file mode 100644 index 0000000..67930e6 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py @@ -0,0 +1,189 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from pathlib import Path +import warnings + +import numpy as np +from hdmf.backends.hdf5.h5_utils import H5DataIO + +from pynwb.file import NWBFile, TimeIntervals +from pynwb.behavior import SpatialSeries, Position, CompassDirection +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils.conversion_tools import get_module +from nwb_conversion_tools.utils.json_schema import FolderPathType +from spikeextractors import NeuroscopeRecordingExtractor + +from .tingleyseptal_utils import read_matlab_file + + +class TingleySeptalBehaviorInterface(BaseDataInterface): + """Behavior data interface for the Tingley Septal project.""" + + def __init__(self, folder_path: FolderPathType): + super().__init__(folder_path=folder_path) + + def run_conversion(self, nwbfile: NWBFile, metadata: dict): + session_path = Path(self.source_data["folder_path"]) + session_id = session_path.stem + + # Load the file with behavioral data + behavior_file_path = Path(session_path) / f"{session_id}.behavior.mat" + behavior_mat = read_matlab_file(str(behavior_file_path))["behavior"] + + # Add trials + events = behavior_mat["events"] + trial_interval_list = events["trialIntervals"] + + data = [] + for start_time, stop_time in trial_interval_list: + data.append( + dict( + start_time=float(start_time), + stop_time=float(stop_time), + ) + ) + [nwbfile.add_trial(**row) for row in sorted(data, key=lambda x: x["start_time"])] + + trial_list = events["trials"] + direction_list = [trial.get("direction", "") for trial in trial_list] + trial_type_list = [trial.get("type", "") for trial in trial_list] + + if not all([direction == "" for direction in direction_list]): + nwbfile.add_trial_column(name="direction", description="direction of the trial", data=direction_list) + + if not all([trial_type == "" for trial_type in trial_type_list]): + nwbfile.add_trial_column(name="trial_type", description="type of trial", data=trial_type_list) + + # Position + module_name = "behavior" + module_description = "Contains behavioral data concerning position." + processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) + + timestamps = np.array(behavior_mat["timestamps"])[..., 0] + + position = behavior_mat["position"] + pos_data = [[x, y, z] for (x, y, z) in zip(position["x"], position["y"], position["y"])] + pos_data = np.array(pos_data)[..., 0] + + unit = behavior_mat.get("units", "") + + if unit == ["m", "meter", "meters"]: + conversion = 1.0 + else: + warnings.warn(f"Spatial units {unit} not listed in meters; " "setting conversion to nan.") + conversion = np.nan + + description = behavior_mat.get("description", "generic_position_tracking").replace("/", "-") + rotation_type = behavior_mat.get("rotationType", "non_specified") + + pos_obj = Position(name=f"{description}_task".replace(" ", "_")) + + spatial_series_object = SpatialSeries( + name="position", + description="(x,y,z) coordinates tracking subject movement.", + data=H5DataIO(pos_data, compression="gzip"), + reference_frame="unknown", + unit=unit, + conversion=conversion, + timestamps=timestamps, + resolution=np.nan, + ) + + pos_obj.add_spatial_series(spatial_series_object) + + # Add error if available + errorPerMarker = behavior_mat.get("errorPerMarker", None) + if errorPerMarker: + error_data = np.array([error for error in errorPerMarker])[..., 0] + + spatial_series_object = SpatialSeries( + name="error_per_marker", + description="Estimated error for marker tracking from optitrack system.", + data=H5DataIO(error_data, compression="gzip"), + reference_frame="unknown", + conversion=conversion, + timestamps=timestamps, + resolution=np.nan, + ) + pos_obj.add_spatial_series(spatial_series_object) + + processing_module.add_data_interface(pos_obj) + + # Compass + try: + orientation = behavior_mat["orientation"] + orientation_data = [ + [x, y, z, w] + for (x, y, z, w) in zip(orientation["x"], orientation["y"], orientation["z"], orientation["w"]) + ] + orientation_data = np.array(orientation_data)[..., 0] + + compass_obj = CompassDirection(name=f"allocentric_frame_tracking") + + spatial_series_object = SpatialSeries( + name="orientation", + description=f"(x, y, z, w) orientation coordinates, orientation type: {rotation_type}", + data=H5DataIO(orientation_data, compression="gzip"), + reference_frame="unknown", + conversion=conversion, + timestamps=timestamps, + resolution=np.nan, + ) + compass_obj.add_spatial_series(spatial_series_object) + processing_module.add_data_interface(compass_obj) + + except KeyError: + warnings.warn(f"Orientation data not found") + + # States + module_name = "ecephys" + module_description = "Contains behavioral data concerning classified states." + processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) + + # Sleep states + sleep_file_path = session_path / f"{session_id}.SleepState.states.mat" + if Path(sleep_file_path).exists(): + mat_file = read_matlab_file(sleep_file_path) + + state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") + sleep_state_dic = mat_file["SleepState"]["ints"] + table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") + table.add_column(name="label", description="Sleep state.") + + data = [] + for sleep_state in state_label_names: + values = sleep_state_dic[sleep_state] + if len(values) != 0 and isinstance(values[0], int): + values = [values] + for start_time, stop_time in values: + data.append( + dict( + start_time=float(start_time), + stop_time=float(stop_time), + label=state_label_names[sleep_state], + ) + ) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + processing_module.add(table) + + # Add epochs + lfp_file_path = session_path / f"{session_path.name}.lfp" + raw_file_path = session_path / f"{session_id}.dat" + xml_file_path = session_path / f"{session_id}.xml" + + if raw_file_path.is_file(): + recorder = NeuroscopeRecordingExtractor(file_path=raw_file_path, xml_file_path=xml_file_path) + else: + recorder = NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path) + + num_frames = recorder.get_num_frames() + sampling_frequency = recorder.get_sampling_frequency() + end_of_the_session = num_frames / sampling_frequency + + session_start = 0.0 + start_trials_time = min([interval[0] for interval in trial_interval_list]) + end_trials_time = max([interval[1] for interval in trial_interval_list]) + end_of_the_session = end_of_the_session + + nwbfile.add_epoch(start_time=session_start, stop_time=start_trials_time, tags="before trials") + nwbfile.add_epoch(start_time=start_trials_time, stop_time=end_trials_time, tags="during trials") + nwbfile.add_epoch(start_time=end_trials_time, stop_time=end_of_the_session, tags="after trials") diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py new file mode 100644 index 0000000..d9fc250 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py @@ -0,0 +1,169 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +import dateutil +from pathlib import Path +from collections import Counter +from datetime import datetime + +from nwb_conversion_tools import ( + NWBConverter, + NeuroscopeRecordingInterface, + NeuroscopeLFPInterface, + NeuroscopeSortingInterface, + CellExplorerSortingInterface, +) + +from .tingleyseptalbehaviorinterface import TingleySeptalBehaviorInterface +from .tingleyseptal_utils import read_matlab_file + + +DEVICE_INFO = dict( + cambridge=dict( + name="Cambridge prob (1 x 64)", + description=( + "Silicon probe from Cambridge Neurotech. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." + ), + ), + neuronexus_4_8=dict( + name="Neuronexus probe (4 x 8)", + description=( + "A 4 (shanks) x 8 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." + ), + ), + neuronexus_5_12=dict( + name="Neuronexus probe (5 x 12)", + description=( + "A 5 (shanks) x 12 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." + ), + ), + neuronexus_6_10=dict( + name="Neuronexus probe (6 x 10)", + description=( + "A 6 (shanks) x 10 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." + ), + ), + neuronexus_8_8=dict( + name="Neuronexus probe (8 x 8)", + description=( + "A 8 (shanks) x 8 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." + ), + ), + old_neuronexus_probe=dict( + name="Neuronexus probe (4 x 1)", + description=( + "according to author thse are reference sites a few millimeters dorsal from the rest" + "recorded from an older neuronexus probe" + ), + ), +) + + +class TingleySeptalNWBConverter(NWBConverter): + """Primary conversion class for the Tingley Septal data project""" + + data_interface_classes = dict( + NeuroscopeRecording=NeuroscopeRecordingInterface, + NeuroscopeLFP=NeuroscopeLFPInterface, + NeuroscopeSorting=NeuroscopeSortingInterface, + CellExplorerSorting=CellExplorerSortingInterface, + TingleySeptalBehavior=TingleySeptalBehaviorInterface, + ) + + def __init__(self, source_data: dict): + super().__init__(source_data=source_data) + + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) + session_path = lfp_file_path.parent + session_id = session_path.stem + + # Add region + session_info_matfile_path = session_path / f"{session_id}.sessionInfo.mat" + + if session_info_matfile_path.is_file(): + session_info_matfile = read_matlab_file(session_info_matfile_path)["sessionInfo"] + channel_region_list = session_info_matfile.get("region", None) + recording_extractor = self.data_interface_objects["NeuroscopeLFP"].recording_extractor + recording_extractor.set_property(key="brain_area", values=channel_region_list) + + if "NeuroscopeRecording" in self.data_interface_objects: + recording_extractor = self.data_interface_objects["NeuroscopeRecording"].recording_extractor + recording_extractor.set_property(key="brain_area", values=channel_region_list) + + def get_metadata(self): + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) + + session_path = lfp_file_path.parent + subject = str(session_path.parent.stem) + session_id = session_path.stem + subject_id = session_path.parent.name + + # See the names in the valid session for this logic + split = session_id.split("_") + + if split[0] == "DT1": + date = split[2] + elif split[0] == "DT2": + date = split[5] + else: + date = split[0] + + if date == "20170229": + date = "20170228" # 2017 is not a leap year (?!) + + if split[-1] == "merge" or split[0] == "DT1": + datetime_string = date + session_start = datetime.strptime(datetime_string, "%Y%m%d") + else: + time = split[-1] + datetime_string = date + time + session_start = datetime.strptime(datetime_string, "%Y%m%d%H%M%S") + + session_start = session_start.replace(tzinfo=dateutil.tz.gettz("US/Eastern")).isoformat() + metadata = super().get_metadata() + + metadata["NWBFile"].update(session_start_time=session_start, session_id=session_id) + metadata.update(Subject=dict(subject_id=subject_id)) + + # Group mapping + extractor = self.data_interface_objects["NeuroscopeLFP"].recording_extractor + channel_groups = extractor.get_channel_groups() + counts = Counter(channel_groups) # group_id : number_of_channels relationship + + inference_dic = { + 64: "cambridge", + 8: "neuronexus_4_8", + 12: "neuronexus_5_12", + 88: "neuronexus_8_8", # Can disambiguate between 4x8 and 8x8 with available info. + 10: "neuronexus_6_10", + 4: "old_neuronexus_probe", + 3: "neuronexus_4_8", + } + + if subject == "DT9": # This subject can be disambiguated by the number of channels per group + inferred_devices = {i: inference_dic[8] for i in range(1, 5)} + inferred_devices.update({i: inference_dic[88] for i in range(5, 5 + 8)}) + else: + inferred_devices = {key: inference_dic[value] for key, value in counts.items()} + + unique_inferred_devices = set(inferred_devices.values()) + metadata["Ecephys"]["Device"] = [DEVICE_INFO[inferred_device] for inferred_device in unique_inferred_devices] + for group_idx, inferred_device in inferred_devices.items(): + metadata["Ecephys"]["ElectrodeGroup"][group_idx - 1].update(device=DEVICE_INFO[inferred_device]["name"]) + + # Add region to groups + session_info_matfile_path = session_path / f"{session_id}.sessionInfo.mat" + if session_info_matfile_path.is_file(): + session_info_matfile = read_matlab_file(session_info_matfile_path)["sessionInfo"] + channel_region_list = session_info_matfile.get("region", None) + if channel_region_list: + channel_group_to_region = { + group: region for (group, region) in zip(channel_groups, channel_region_list) + } + for group_idx, region in channel_group_to_region.items(): + metadata["Ecephys"]["ElectrodeGroup"][group_idx - 1].update(location=region) + + return metadata From 1b3810093c69423915afd701ee2073a9029b9e16 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 15 Apr 2022 01:23:44 -0400 Subject: [PATCH 02/40] initial push --- .../convert_tingley_long_term.py | 134 ------------- .../tingley_long_term_metadata.yml | 11 - .../tingley_long_term_utils.py | 17 -- .../tingleylongtermbehaviorinterface.py | 189 ------------------ .../tingleylongtermnwbconverter.py | 169 ---------------- .../convert_tingley_metabolic.py | 100 +++++++++ .../tingley_metabolic_metadata.yml | 9 + .../tingley_metabolic_requirements.txt} | 1 + .../tingley_metabolic_subject_info.yml | 45 +++++ .../tingleymetabolicauxextractor.py | 53 +++++ .../tingleymetabolicauxinterface.py | 14 ++ .../tingleymetabolicnwbconverter.py | 57 ++++++ make_env.yml | 13 -- ....yaml => tingley_metabolic_environment.yml | 3 +- 14 files changed, 281 insertions(+), 534 deletions(-) delete mode 100644 buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py delete mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml delete mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py delete mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py delete mode 100644 buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml rename buzsaki_lab_to_nwb/{tingley_long_term/tingley_long_term_requirements.txt => tingley_metabolic/tingley_metabolic_requirements.txt} (91%) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py delete mode 100644 make_env.yml rename buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml => tingley_metabolic_environment.yml (60%) diff --git a/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py b/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py deleted file mode 100644 index 49d59fc..0000000 --- a/buzsaki_lab_to_nwb/tingley_long_term/convert_tingley_long_term.py +++ /dev/null @@ -1,134 +0,0 @@ -from pathlib import Path -import sys -import warnings - -from nwb_conversion_tools.utils.json_schema import load_dict_from_file -from nwb_conversion_tools.utils.json_schema import dict_deep_update - -from buzsaki_lab_to_nwb import TingleySeptalNWBConverter -from joblib import Parallel, delayed - -n_jobs = 20 -stub_test = False -ripple_paper = True -conversion_factor = 0.195 # Intan - -data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") -home_path = Path("/home/jovyan/") - -if ripple_paper: - metadata_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/metadata_ripples.yml") - valid_sessions_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/valid_sessions_ripples.yml") -else: - metadata_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/metadata.yml") - valid_sessions_path = Path("./buzsaki_lab_to_nwb/tingley_code_septal/valid_sessions.yml") - -if stub_test: - nwb_output_path = home_path / Path("nwb_stub") -else: - # nwb_output_path = home_path / Path("nwb") - nwb_output_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/nwb") -nwb_output_path.mkdir(exist_ok=True) - -valid_session_dic = load_dict_from_file(valid_sessions_path) -valid_sessions_list = [] -subject_list = [] - -for subject, valid_sessions_for_subject in valid_session_dic.items(): - subject_list.append(subject) - valid_sessions_list += valid_sessions_for_subject - -session_path_list = [ - session - for subject in data_path.iterdir() - if subject.is_dir() and subject.name in subject_list - for session in subject.iterdir() - if session.is_dir() and session.name in valid_sessions_list -] - -if stub_test: - # Number here is to reference in discussion - nwbfile_list = [ - nwb_output_path / f"{n:03d}_{session.parent.stem}_{session.stem}_stub.nwb" - for n, session in enumerate(session_path_list) - ] -else: - nwbfile_list = [ - nwb_output_path / f"{session.parent.stem}_{session.stem}.nwb" for n, session in enumerate(session_path_list) - ] - - -def convert_session(session_path, nwbfile_path): - print("----------------") - print(session_path) - print(nwbfile_path) - - session_id = session_path.name - lfp_file_path = session_path / f"{session_path.name}.lfp" - raw_file_path = session_path / f"{session_id}.dat" - xml_file_path = session_path / f"{session_id}.xml" - spikes_matfile_path = session_path / f"{session_id}.spikes.cellinfo.mat" - behavior_matfile_path = session_path / f"{session_id}.behavior.mat" - - print("raw file available", raw_file_path.is_file()) - print("lfp file available", lfp_file_path.is_file()) - print("behavior / position mat file available", behavior_matfile_path.is_file()) - source_data = dict() - conversion_options = dict() - - source_data = dict( - NeuroscopeLFP=dict(file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path)), - ) - conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test)) - # conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, es_key="lfp")) - - if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path) - ) - ) - conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test, es_key="ElectricalSeries_raw")) - - clu_matches_in_session = len(list(session_path.glob("*.clu*"))) - res_matches_in_session = len(list(session_path.glob("*.res*"))) - - if spikes_matfile_path.is_file(): - print("cell explorer spiking data is used") - source_data.update(CellExplorerSorting=dict(file_path=str(spikes_matfile_path))) - else: - if clu_matches_in_session > 0 and res_matches_in_session > 0: - print("neuroscope spiking data is used") - source_data.update( - NeuroscopeSorting=dict( - folder_path=str(session_path), keep_mua_units=False, xml_file_path=str(xml_file_path) - ) - ) - conversion_options.update(NeuroscopeSorting=dict(stub_test=stub_test)) - else: - print("not spiking data available") - - if behavior_matfile_path.is_file(): - source_data.update(TingleySeptalBehavior=dict(folder_path=str(session_path))) - - converter = TingleySeptalNWBConverter(source_data) - - metadata = None - metadata = converter.get_metadata() - metadata_from_yaml = load_dict_from_file(metadata_path) - metadata = dict_deep_update(metadata, metadata_from_yaml) - - converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, - ) - print("Done with conversion") - sys.stdout.flush() # Needed for verbosity in Parallel - - -Parallel(n_jobs=n_jobs)( - delayed(convert_session)(session_path=session_path, nwbfile_path=nwbfile_path) - for session_path, nwbfile_path in zip(session_path_list, nwbfile_list) -) diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml deleted file mode 100644 index 4266b40..0000000 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_metadata.yml +++ /dev/null @@ -1,11 +0,0 @@ -NWBFile: - related_publications: - Tingley, David, and György Buzsáki. "Transformation of a spatial map across the hippocampal-lateral septal circuit." Neuron 98.6 (2018) 1229-1242. - session_description: - The hippocampus constructs a map of the environment. How this “cognitive map” is utilized by other brain regions to guide behavior remains unexplored. To examine how neuronal firing patterns in the hippocampus are transmitted and transformed, we recorded neurons in its principal subcortical target, the lateral septum (LS). We observed that LS neurons carry reliable spatial information in the phase of action potentials, relative to hippocampal theta oscillations, while the firing rates of LS neurons remained uninformative. Furthermore, this spatial phase code had an anatomical microstructure within the LS and was bound to the hippocampal spatial code by synchronous gamma frequency cell assemblies. Using a data-driven model, we show that rate-independent spatial tuning arises through the dynamic weighting of CA1 and CA3 cell assemblies. Our findings demonstrate that transformation of the hippocampal spatial map depends on higher-order theta-dependent neuronal sequences. - institution: NYU - lab: Buzsaki - experimenter: - - David Tingley -Subject: - species: Rattus norvegicus diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py b/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py deleted file mode 100644 index e31caf1..0000000 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_utils.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" -from mat73 import loadmat as loadmat_mat73 -from mat4py import loadmat as loadmat_mat4py -from scipy.io import loadmat as loadmat_scipy - - -def read_matlab_file(file_path): - file_path = str(file_path) - - try: - mat_file = loadmat_mat4py(str(file_path)) - except: - try: - mat_file = loadmat_mat73(file_path) - except: - mat_file = loadmat_scipy(file_path) - return mat_file diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py deleted file mode 100644 index 67930e6..0000000 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermbehaviorinterface.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" -from pathlib import Path -import warnings - -import numpy as np -from hdmf.backends.hdf5.h5_utils import H5DataIO - -from pynwb.file import NWBFile, TimeIntervals -from pynwb.behavior import SpatialSeries, Position, CompassDirection -from nwb_conversion_tools.basedatainterface import BaseDataInterface -from nwb_conversion_tools.utils.conversion_tools import get_module -from nwb_conversion_tools.utils.json_schema import FolderPathType -from spikeextractors import NeuroscopeRecordingExtractor - -from .tingleyseptal_utils import read_matlab_file - - -class TingleySeptalBehaviorInterface(BaseDataInterface): - """Behavior data interface for the Tingley Septal project.""" - - def __init__(self, folder_path: FolderPathType): - super().__init__(folder_path=folder_path) - - def run_conversion(self, nwbfile: NWBFile, metadata: dict): - session_path = Path(self.source_data["folder_path"]) - session_id = session_path.stem - - # Load the file with behavioral data - behavior_file_path = Path(session_path) / f"{session_id}.behavior.mat" - behavior_mat = read_matlab_file(str(behavior_file_path))["behavior"] - - # Add trials - events = behavior_mat["events"] - trial_interval_list = events["trialIntervals"] - - data = [] - for start_time, stop_time in trial_interval_list: - data.append( - dict( - start_time=float(start_time), - stop_time=float(stop_time), - ) - ) - [nwbfile.add_trial(**row) for row in sorted(data, key=lambda x: x["start_time"])] - - trial_list = events["trials"] - direction_list = [trial.get("direction", "") for trial in trial_list] - trial_type_list = [trial.get("type", "") for trial in trial_list] - - if not all([direction == "" for direction in direction_list]): - nwbfile.add_trial_column(name="direction", description="direction of the trial", data=direction_list) - - if not all([trial_type == "" for trial_type in trial_type_list]): - nwbfile.add_trial_column(name="trial_type", description="type of trial", data=trial_type_list) - - # Position - module_name = "behavior" - module_description = "Contains behavioral data concerning position." - processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) - - timestamps = np.array(behavior_mat["timestamps"])[..., 0] - - position = behavior_mat["position"] - pos_data = [[x, y, z] for (x, y, z) in zip(position["x"], position["y"], position["y"])] - pos_data = np.array(pos_data)[..., 0] - - unit = behavior_mat.get("units", "") - - if unit == ["m", "meter", "meters"]: - conversion = 1.0 - else: - warnings.warn(f"Spatial units {unit} not listed in meters; " "setting conversion to nan.") - conversion = np.nan - - description = behavior_mat.get("description", "generic_position_tracking").replace("/", "-") - rotation_type = behavior_mat.get("rotationType", "non_specified") - - pos_obj = Position(name=f"{description}_task".replace(" ", "_")) - - spatial_series_object = SpatialSeries( - name="position", - description="(x,y,z) coordinates tracking subject movement.", - data=H5DataIO(pos_data, compression="gzip"), - reference_frame="unknown", - unit=unit, - conversion=conversion, - timestamps=timestamps, - resolution=np.nan, - ) - - pos_obj.add_spatial_series(spatial_series_object) - - # Add error if available - errorPerMarker = behavior_mat.get("errorPerMarker", None) - if errorPerMarker: - error_data = np.array([error for error in errorPerMarker])[..., 0] - - spatial_series_object = SpatialSeries( - name="error_per_marker", - description="Estimated error for marker tracking from optitrack system.", - data=H5DataIO(error_data, compression="gzip"), - reference_frame="unknown", - conversion=conversion, - timestamps=timestamps, - resolution=np.nan, - ) - pos_obj.add_spatial_series(spatial_series_object) - - processing_module.add_data_interface(pos_obj) - - # Compass - try: - orientation = behavior_mat["orientation"] - orientation_data = [ - [x, y, z, w] - for (x, y, z, w) in zip(orientation["x"], orientation["y"], orientation["z"], orientation["w"]) - ] - orientation_data = np.array(orientation_data)[..., 0] - - compass_obj = CompassDirection(name=f"allocentric_frame_tracking") - - spatial_series_object = SpatialSeries( - name="orientation", - description=f"(x, y, z, w) orientation coordinates, orientation type: {rotation_type}", - data=H5DataIO(orientation_data, compression="gzip"), - reference_frame="unknown", - conversion=conversion, - timestamps=timestamps, - resolution=np.nan, - ) - compass_obj.add_spatial_series(spatial_series_object) - processing_module.add_data_interface(compass_obj) - - except KeyError: - warnings.warn(f"Orientation data not found") - - # States - module_name = "ecephys" - module_description = "Contains behavioral data concerning classified states." - processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) - - # Sleep states - sleep_file_path = session_path / f"{session_id}.SleepState.states.mat" - if Path(sleep_file_path).exists(): - mat_file = read_matlab_file(sleep_file_path) - - state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") - sleep_state_dic = mat_file["SleepState"]["ints"] - table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") - table.add_column(name="label", description="Sleep state.") - - data = [] - for sleep_state in state_label_names: - values = sleep_state_dic[sleep_state] - if len(values) != 0 and isinstance(values[0], int): - values = [values] - for start_time, stop_time in values: - data.append( - dict( - start_time=float(start_time), - stop_time=float(stop_time), - label=state_label_names[sleep_state], - ) - ) - [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] - processing_module.add(table) - - # Add epochs - lfp_file_path = session_path / f"{session_path.name}.lfp" - raw_file_path = session_path / f"{session_id}.dat" - xml_file_path = session_path / f"{session_id}.xml" - - if raw_file_path.is_file(): - recorder = NeuroscopeRecordingExtractor(file_path=raw_file_path, xml_file_path=xml_file_path) - else: - recorder = NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path) - - num_frames = recorder.get_num_frames() - sampling_frequency = recorder.get_sampling_frequency() - end_of_the_session = num_frames / sampling_frequency - - session_start = 0.0 - start_trials_time = min([interval[0] for interval in trial_interval_list]) - end_trials_time = max([interval[1] for interval in trial_interval_list]) - end_of_the_session = end_of_the_session - - nwbfile.add_epoch(start_time=session_start, stop_time=start_trials_time, tags="before trials") - nwbfile.add_epoch(start_time=start_trials_time, stop_time=end_trials_time, tags="during trials") - nwbfile.add_epoch(start_time=end_trials_time, stop_time=end_of_the_session, tags="after trials") diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py b/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py deleted file mode 100644 index d9fc250..0000000 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingleylongtermnwbconverter.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" -import dateutil -from pathlib import Path -from collections import Counter -from datetime import datetime - -from nwb_conversion_tools import ( - NWBConverter, - NeuroscopeRecordingInterface, - NeuroscopeLFPInterface, - NeuroscopeSortingInterface, - CellExplorerSortingInterface, -) - -from .tingleyseptalbehaviorinterface import TingleySeptalBehaviorInterface -from .tingleyseptal_utils import read_matlab_file - - -DEVICE_INFO = dict( - cambridge=dict( - name="Cambridge prob (1 x 64)", - description=( - "Silicon probe from Cambridge Neurotech. Electrophysiological data were " - "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." - ), - ), - neuronexus_4_8=dict( - name="Neuronexus probe (4 x 8)", - description=( - "A 4 (shanks) x 8 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " - "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." - ), - ), - neuronexus_5_12=dict( - name="Neuronexus probe (5 x 12)", - description=( - "A 5 (shanks) x 12 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " - "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." - ), - ), - neuronexus_6_10=dict( - name="Neuronexus probe (6 x 10)", - description=( - "A 6 (shanks) x 10 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " - "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." - ), - ), - neuronexus_8_8=dict( - name="Neuronexus probe (8 x 8)", - description=( - "A 8 (shanks) x 8 (electrodes) silicon probe from Neuronexus. Electrophysiological data were " - "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with 20 kHz rate." - ), - ), - old_neuronexus_probe=dict( - name="Neuronexus probe (4 x 1)", - description=( - "according to author thse are reference sites a few millimeters dorsal from the rest" - "recorded from an older neuronexus probe" - ), - ), -) - - -class TingleySeptalNWBConverter(NWBConverter): - """Primary conversion class for the Tingley Septal data project""" - - data_interface_classes = dict( - NeuroscopeRecording=NeuroscopeRecordingInterface, - NeuroscopeLFP=NeuroscopeLFPInterface, - NeuroscopeSorting=NeuroscopeSortingInterface, - CellExplorerSorting=CellExplorerSortingInterface, - TingleySeptalBehavior=TingleySeptalBehaviorInterface, - ) - - def __init__(self, source_data: dict): - super().__init__(source_data=source_data) - - lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) - session_path = lfp_file_path.parent - session_id = session_path.stem - - # Add region - session_info_matfile_path = session_path / f"{session_id}.sessionInfo.mat" - - if session_info_matfile_path.is_file(): - session_info_matfile = read_matlab_file(session_info_matfile_path)["sessionInfo"] - channel_region_list = session_info_matfile.get("region", None) - recording_extractor = self.data_interface_objects["NeuroscopeLFP"].recording_extractor - recording_extractor.set_property(key="brain_area", values=channel_region_list) - - if "NeuroscopeRecording" in self.data_interface_objects: - recording_extractor = self.data_interface_objects["NeuroscopeRecording"].recording_extractor - recording_extractor.set_property(key="brain_area", values=channel_region_list) - - def get_metadata(self): - lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) - - session_path = lfp_file_path.parent - subject = str(session_path.parent.stem) - session_id = session_path.stem - subject_id = session_path.parent.name - - # See the names in the valid session for this logic - split = session_id.split("_") - - if split[0] == "DT1": - date = split[2] - elif split[0] == "DT2": - date = split[5] - else: - date = split[0] - - if date == "20170229": - date = "20170228" # 2017 is not a leap year (?!) - - if split[-1] == "merge" or split[0] == "DT1": - datetime_string = date - session_start = datetime.strptime(datetime_string, "%Y%m%d") - else: - time = split[-1] - datetime_string = date + time - session_start = datetime.strptime(datetime_string, "%Y%m%d%H%M%S") - - session_start = session_start.replace(tzinfo=dateutil.tz.gettz("US/Eastern")).isoformat() - metadata = super().get_metadata() - - metadata["NWBFile"].update(session_start_time=session_start, session_id=session_id) - metadata.update(Subject=dict(subject_id=subject_id)) - - # Group mapping - extractor = self.data_interface_objects["NeuroscopeLFP"].recording_extractor - channel_groups = extractor.get_channel_groups() - counts = Counter(channel_groups) # group_id : number_of_channels relationship - - inference_dic = { - 64: "cambridge", - 8: "neuronexus_4_8", - 12: "neuronexus_5_12", - 88: "neuronexus_8_8", # Can disambiguate between 4x8 and 8x8 with available info. - 10: "neuronexus_6_10", - 4: "old_neuronexus_probe", - 3: "neuronexus_4_8", - } - - if subject == "DT9": # This subject can be disambiguated by the number of channels per group - inferred_devices = {i: inference_dic[8] for i in range(1, 5)} - inferred_devices.update({i: inference_dic[88] for i in range(5, 5 + 8)}) - else: - inferred_devices = {key: inference_dic[value] for key, value in counts.items()} - - unique_inferred_devices = set(inferred_devices.values()) - metadata["Ecephys"]["Device"] = [DEVICE_INFO[inferred_device] for inferred_device in unique_inferred_devices] - for group_idx, inferred_device in inferred_devices.items(): - metadata["Ecephys"]["ElectrodeGroup"][group_idx - 1].update(device=DEVICE_INFO[inferred_device]["name"]) - - # Add region to groups - session_info_matfile_path = session_path / f"{session_id}.sessionInfo.mat" - if session_info_matfile_path.is_file(): - session_info_matfile = read_matlab_file(session_info_matfile_path)["sessionInfo"] - channel_region_list = session_info_matfile.get("region", None) - if channel_region_list: - channel_group_to_region = { - group: region for (group, region) in zip(channel_groups, channel_region_list) - } - for group_idx, region in channel_group_to_region.items(): - metadata["Ecephys"]["ElectrodeGroup"][group_idx - 1].update(location=region) - - return metadata diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py new file mode 100644 index 0000000..a61ac95 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -0,0 +1,100 @@ +"""Run entire conversion.""" +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor, as_completed + +from tqdm import tqdm +from nwb_conversion_tools.utils.json_schema import load_dict_from_file +from nwb_conversion_tools.utils.json_schema import dict_deep_update + +from buzsaki_lab_to_nwb.tingley_metabolic.tingleymetabolicnwbconverter import TingleyMetabolicConverter + +n_jobs = 20 +stub_test = True +conversion_factor = 0.195 # Intan + +data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") +home_path = Path("/home/jovyan/") + +metadata_path = Path(__file__) / "tingley_metabolic_metadata.yml" +subject_info_path = Path(__file__) / "tingley_metabolic_subject_info.yml" + +if stub_test: + nwb_output_path = home_path / Path("nwb_stub") +else: + nwb_output_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/nwb") +nwb_output_path.mkdir(exist_ok=True) + + +session_path_list = [subject_path.iterdir() for subject_path in (data_path / "metadata_metabolic.yml").iterdir()] +if stub_test: + nwbfile_list = [nwb_output_path / f"{session.parent.stem}_{session.stem}_stub.nwb" for session in session_path_list] +else: + nwbfile_list = [nwb_output_path / f"{session.parent.stem}_{session.stem}.nwb" for session in session_path_list] + +global_metadata = load_dict_from_file(metadata_path) +subject_info_table = load_dict_from_file(subject_info_path) + + +def convert_session(session_path, nwbfile_path): + """Run coonversion.""" + print("----------------") + print(session_path) + print(nwbfile_path) + + session_id = session_path.name + lfp_file_path = session_path / f"{session_path.name}.lfp" + raw_file_path = session_path / f"{session_id}.dat" + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / f"{session_id}.rhd" + xml_file_path = session_path / f"{session_id}.xml" + + print("raw file available...", raw_file_path.is_file()) + print("lfp file available...", lfp_file_path.is_file()) + source_data = dict() + conversion_options = dict() + + source_data = dict( + NeuroscopeLFP=dict(file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path)), + ) + conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test)) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path) + ) + ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update( + TingleySeptalBehavior=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path)) + ) + + converter = TingleyMetabolicConverter(source_data=source_data) + + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + metadata["NWBFile"].update( + session_description=subject_info_table.get( + metadata["NWBFile"]["Subject"]["subject_id"], + "Consult Supplementary Table 1 from the publication for more information about this session.", + ) + ) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + print("Done with conversion!") + + +with ProcessPoolExecutor(max_workers=n_jobs) as executor: + futures = [] + for session_path, nwbfile_path in zip(session_path_list, nwbfile_list): + futures.append(executor.submit(convert_session, session_path=session_path, nwbfile_path=nwbfile_path)) + completed_futures = tqdm(as_completed(futures), desc="Running conversion...", position=0, leave=False) + for future in completed_futures: + pass diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml new file mode 100644 index 0000000..9df97de --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml @@ -0,0 +1,9 @@ +NWBFile: + related_publications: + "Transformation of a spatial map across the hippocampal-lateral septal circuit." Neuron 98.6 (2018) 1229-1242. + lab: "Buzsáki" + experimenter: + - "Author: Tingley, David" + - "Author: Buzsáki, György" +Subject: + species: Rattus norvegicus diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt similarity index 91% rename from buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt rename to buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt index e9414af..7372044 100644 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt @@ -1,4 +1,5 @@ mat4py==0.5.0 mat73==0.52 hdf5storage>=0.1.18 +pyintan>=0.3.0 nwb-conversion-tools @ git+https://github.com/catalystneuro/nwb-conversion-tools@5e39ca55266b8f7be48380c67471100a98413277 diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml new file mode 100644 index 0000000..370ca78 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml @@ -0,0 +1,45 @@ +CGM1: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM2: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" +CGM3: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" +CGM4: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" +CGM5: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" +CGM6: "Experiment condition 'ripple/glucose recording' with surgery condition 'bilat rCA1'."" +CGM7: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." +CGM8: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." +CGM9: "Experiment condition 'ripple/glucose recording' with surgery condition 'flex probe in rCA1'."" +CGM10: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM11: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM12: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM13: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM14: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM15: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM16: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM17: "Experiment condition 'dorsal/ventral' with no surgery condition." +CGM18: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM19: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM20: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM21: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" +CGM22: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" +CGM23: "Experiment condition 'dorsal/ventral' with surgery condition 'dorsal/ventral probe implant'."" +CGM24: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM25: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM26: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM27: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" +CGM28: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" +CGM29: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" +CGM30: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" +CGM31: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'."" +CGM32: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" +CGM33: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'."" +CGM34: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" +CGM35: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" +CGM36: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" +CGM37: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" +CGM38: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" +CGM39: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" +CGM40: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" +CGM41: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" +CGM42: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" +CGM43: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" +CGM44: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" +CGM45: "Experiment condition 'ripple/glucose recording' with surgery condition 'PPC injected'."" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py new file mode 100644 index 0000000..5fe409f --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py @@ -0,0 +1,53 @@ +"""Author: Cody Baker.""" +from pathlib import Path + +from spikeextractors import RecordingExtractor, BinDatRecordingExtractor +from nwb_conversion_tools.utils import FilePathType +from pyintan import read_rhd + +from .tingleyauxextractor import TingleyAuxExtractor + + +class TingleyMetabolicAuxExtractor(BinDatRecordingExtractor): + """Aux data interface for the Tingley metabolic project.""" + + RX = TingleyAuxExtractor + + extractor_name = "TingleyMetabolicAuxExtractor" + has_default_locations = False + has_unscaled = True + is_writable = True + mode = "file" + + def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): + dat_file_path = Path(dat_file_path) + rhd_file_path = Path(rhd_file_path) + + RecordingExtractor.__init__(self) + rhd_info = read_rhd(filename=self.source_data["rhd_file_path"]) + first_aux_entry = next( + header_info_entry + for header_info_entry in rhd_info[1] + if header_info_entry["native_channel_name"] == "A-AUX1" + ) + first_aux_sub_entry = next( + header_info_entry for header_info_entry in rhd_info[2] if header_info_entry[0] == "A-AUX1" + ) + + # Manually confirmed that all aux channels have same properties + gain = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts + sampling_frequency = first_aux_entry["sampling_rate"] + dtype = first_aux_sub_entry[1] + numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) + + BinDatRecordingExtractor.__init__( + self, + file_path=dat_file_path, + sampling_frequency=sampling_frequency, + dtype=dtype, + numchan=numchan, + gain=gain, + ) + self._kwargs = dict( + dat_file_path=str(Path(dat_file_path).absolute()), rhd_file_path=str(Path(rhd_file_path).absolute()) + ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py new file mode 100644 index 0000000..9187914 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py @@ -0,0 +1,14 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from nwb_conversion_tools.basedatainterface import BaseRecordingDataInterface +from nwb_conversion_tools.utils import FilePathType + +from .tingleyauxextractor import TingleyAuxExtractor + + +class TingleyMetabolicAuxInterface(BaseRecordingDataInterface): + """Aux data interface for the Tingley metabolic project.""" + + RX = TingleyAuxExtractor + + def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): + super().__init__(dat_file_path=dat_file_path, rhd_file_path=rhd_file_path) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py new file mode 100644 index 0000000..9b0a583 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -0,0 +1,57 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +import dateutil +from pathlib import Path +from datetime import datetime + +from nwb_conversion_tools import ( + NWBConverter, + NeuroscopeRecordingInterface, + NeuroscopeLFPInterface, +) + +from .tingleymetabolicauxinterface import TingleyMetabolicAuxInterface + + +DEVICE_INFO = dict( + cambridge=dict( + name="Cambridge probe (1 x 64)", + description=( + "Silicon probe from Cambridge Neurotech. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." + ), + ), + neuronexus_4_8=dict( + name="Neuronexus probe (1 x 32)", + description=( + "Silicon probe from Cambridge Neurotech. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." + ), + ), +) + + +class TingleyMetabolicConverter(NWBConverter): + """Primary conversion class for the Tingley Metabolic data project.""" + + data_interface_classes = dict( + NeuroscopeRecording=NeuroscopeRecordingInterface, + NeuroscopeLFP=NeuroscopeLFPInterface, + TingleyMetabolicAux=TingleyMetabolicAuxInterface, + ) + + def get_metadata(self): + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) + + session_path = lfp_file_path.parent + session_id = session_path.stem + + session_id_split = session_id.split("_")[:-2] + subject_id = session_id_split[0] + date_string = session_id_split[-2:] + session_start_time = datetime.strptime(date_string=date_string, format="%Y%m%d%H%M%S") + session_start_time = session_start_time.replace(tzinfo=dateutil.tz.gettz("US/Eastern")).isoformat() + + metadata = super().get_metadata() + metadata["NWBFile"].update(session_start_time=session_start_time, session_id=session_id) + metadata.update(Subject=dict(subject_id=subject_id)) + return metadata diff --git a/make_env.yml b/make_env.yml deleted file mode 100644 index c7ed8d8..0000000 --- a/make_env.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: buzsaki-lab-to-nwb-env -channels: -- defaults -- anaconda -- conda-forge -dependencies: -- python>=3.7 -- ipython -- pandas>=1.2.3 -- pip -- pip: - - -r requirements.txt - - -e . diff --git a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml b/tingley_metabolic_environment.yml similarity index 60% rename from buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml rename to tingley_metabolic_environment.yml index 61be5cf..97a64dc 100644 --- a/buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_environment.yaml +++ b/tingley_metabolic_environment.yml @@ -6,6 +6,7 @@ channels: dependencies: - python==3.9 - pip +- git - pip: - -e . - - -r tingley_long_term_requirements.txt + - -r buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt From f8e0ce8e960677a57be2a357e59cdf4aedbce9a1 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 15 Apr 2022 05:40:45 +0000 Subject: [PATCH 03/40] Automated changes --- .../fujisawa/convert_fujisawa.py | 30 +- .../fujisawa/fujisawamiscdatainterface.py | 17 +- .../fujisawa/fujisawanwbconverter.py | 18 +- .../girardeau/convert_girardeau.py | 11 +- .../girardeau/girardeaumiscdatainterface.py | 28 +- .../girardeau/girardeaunwbconverter.py | 20 +- buzsaki_lab_to_nwb/mpgdatainterface.py | 8 +- .../peyrache/convert_peyrache.py | 27 +- .../peyrache/peyrachemiscdatainterface.py | 30 +- .../peyrache/peyrachenwbconverter.py | 39 +-- buzsaki_lab_to_nwb/watson/convert_watson.py | 95 ++++-- .../watson/watsonbehaviordatainterface.py | 95 +++--- .../watson/watsonlfpdatainterface.py | 90 ++--- .../watson/watsonnorecording.py | 16 +- .../watson/watsonnwbconverter.py | 317 ++++++++++-------- .../watson/watsonsortinginterface.py | 6 +- 16 files changed, 436 insertions(+), 411 deletions(-) diff --git a/buzsaki_lab_to_nwb/fujisawa/convert_fujisawa.py b/buzsaki_lab_to_nwb/fujisawa/convert_fujisawa.py index 05f3de2..ec522c2 100644 --- a/buzsaki_lab_to_nwb/fujisawa/convert_fujisawa.py +++ b/buzsaki_lab_to_nwb/fujisawa/convert_fujisawa.py @@ -7,8 +7,11 @@ base_path = Path("E:/BuzsakiData/FujisawaS") convert_sessions = [ - subsession for mouse in base_path.iterdir() if mouse.is_dir() - for session in mouse.iterdir() for subsession in session.iterdir() + subsession + for mouse in base_path.iterdir() + if mouse.is_dir() + for session in mouse.iterdir() + for subsession in session.iterdir() ] experimenter = "Shigeyoshi Fujisawa" @@ -46,16 +49,14 @@ subject_name = session_path.parent.parent.name session_id = session_path.name print(f"Converting session {session_id}...") - + lfp_file_path = str(session_path / f"{session_id}.eeg") raw_data_file_path = lfp_file_path.replace("eeg", "dat") mat_file_path = session_path / f"{session_id}_Behavior.mat" - + source_data = dict( NeuroscopeLFP=dict( - file_path=lfp_file_path, - gain=conversion_factor, - xml_file_path=str(session_path / f"{session_id}.xml") + file_path=lfp_file_path, gain=conversion_factor, xml_file_path=str(session_path / f"{session_id}.xml") ) ) conversion_options = dict(NeuroscopeLFP=dict(stub_test=stub_test)) @@ -67,24 +68,19 @@ conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) if mat_file_path.is_file(): source_data.update(Misc=dict(mat_file_path=str(mat_file_path))) - + converter = FujisawaNWBConverter(source_data) metadata = converter.get_metadata() - + # Specific info metadata["NWBFile"].update( - experimenter=experimenter, - session_description=paper_descr, - related_publications=paper_info + experimenter=experimenter, session_description=paper_descr, related_publications=paper_info ) metadata["Ecephys"]["Device"][0].update(description=device_descr) - + nwbfile_path = str(base_path / f"{session_id}_stub.nwb") converter.run_conversion( - nwbfile_path=nwbfile_path, - metadata=metadata, - conversion_options=conversion_options, - overwrite=True + nwbfile_path=nwbfile_path, metadata=metadata, conversion_options=conversion_options, overwrite=True ) except Exception as e: print(f"Unable to convert session {session_path} due to {e}!") diff --git a/buzsaki_lab_to_nwb/fujisawa/fujisawamiscdatainterface.py b/buzsaki_lab_to_nwb/fujisawa/fujisawamiscdatainterface.py index 6d0e458..b961c58 100644 --- a/buzsaki_lab_to_nwb/fujisawa/fujisawamiscdatainterface.py +++ b/buzsaki_lab_to_nwb/fujisawa/fujisawamiscdatainterface.py @@ -33,17 +33,14 @@ def run_conversion(self, nwbfile: NWBFile, metadata: dict): l_r_dict = {1: "Right", 2: "Left"} for trial in trial_info: nwbfile.add_trial( - start_time=trial[0], - stop_time=trial[1], - reward_time=trial[2], - left_or_right=l_r_dict[int(trial[3])] + start_time=trial[0], stop_time=trial[1], reward_time=trial[2], left_or_right=l_r_dict[int(trial[3])] ) # Position pos_info = mat_file["whlrl"] pos_data = [pos_info[:, 0:1], pos_info[:, 2:3]] - starting_time = 0. - rate = 20000/512 # from CRCNS info + starting_time = 0.0 + rate = 20000 / 512 # from CRCNS info conversion = np.nan # whl are arbitrary units pos_obj = Position(name="Position") for j in range(2): @@ -55,13 +52,11 @@ def run_conversion(self, nwbfile: NWBFile, metadata: dict): conversion=conversion, starting_time=starting_time, rate=rate, - resolution=np.nan + resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) get_module( - nwbfile=nwbfile, - name="behavior", - description="Contains processed behavioral data." + nwbfile=nwbfile, name="behavior", description="Contains processed behavioral data." ).add_data_interface(pos_obj) linearized_pos = mat_file["whlrld"][:, 6] @@ -78,7 +73,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata: dict): conversion=conversion, starting_time=starting_time, rate=rate, - resolution=np.nan + resolution=np.nan, ) lin_pos_obj.add_spatial_series(lin_spatial_series_object) get_module(nwbfile=nwbfile, name="behavior").add_data_interface(lin_pos_obj) diff --git a/buzsaki_lab_to_nwb/fujisawa/fujisawanwbconverter.py b/buzsaki_lab_to_nwb/fujisawa/fujisawanwbconverter.py index b7fab3f..2a832e5 100644 --- a/buzsaki_lab_to_nwb/fujisawa/fujisawanwbconverter.py +++ b/buzsaki_lab_to_nwb/fujisawa/fujisawanwbconverter.py @@ -3,8 +3,11 @@ from datetime import datetime from nwb_conversion_tools import NWBConverter -from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import NeuroscopeRecordingInterface, \ - NeuroscopeLFPInterface, NeuroscopeSortingInterface +from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import ( + NeuroscopeRecordingInterface, + NeuroscopeLFPInterface, + NeuroscopeSortingInterface, +) from .fujisawamiscdatainterface import FujisawaMiscInterface @@ -16,29 +19,26 @@ class FujisawaNWBConverter(NWBConverter): NeuroscopeRecording=NeuroscopeRecordingInterface, NeuroscopeLFP=NeuroscopeLFPInterface, NeuroscopeSorting=NeuroscopeSortingInterface, - Misc=FujisawaMiscInterface + Misc=FujisawaMiscInterface, ) def get_metadata(self): lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) session_id = lfp_file_path.parent.name - subject_id, _ = lfp_file_path.stem.split('.') + subject_id, _ = lfp_file_path.stem.split(".") datetime_string = "2008" + lfp_file_path.parent.parent.name[2:6] session_start = datetime.strptime(datetime_string, "%Y%m%d") metadata = super().get_metadata() metadata["NWBFile"].update( - session_start_time=session_start.astimezone(), - session_id=session_id, - institution="NYU", - lab="Buzsaki" + session_start_time=session_start.astimezone(), session_id=session_id, institution="NYU", lab="Buzsaki" ) metadata.update( Subject=dict( subject_id=lfp_file_path.parent.parent.parent.name, species="Rattus norvegicus domestica", sex="Male", - age="3-5 months" + age="3-5 months", ) ) diff --git a/buzsaki_lab_to_nwb/girardeau/convert_girardeau.py b/buzsaki_lab_to_nwb/girardeau/convert_girardeau.py index c0a0d95..2da39b1 100644 --- a/buzsaki_lab_to_nwb/girardeau/convert_girardeau.py +++ b/buzsaki_lab_to_nwb/girardeau/convert_girardeau.py @@ -26,24 +26,21 @@ NeuroscopeLFP=dict(file_path=eeg_file_path, gain=conversion_factor), CellExplorerSorting=dict(spikes_matfile_path=spikes_matfile_path), GirardeauMisc=dict(folder_path=folder_path), - MPG=dict(file_paths=mpg_file_paths) + MPG=dict(file_paths=mpg_file_paths), ) conversion_options = dict( CellExplorerSorting=dict(stub_test=stub_test), NeuroscopeLFP=dict(stub_test=stub_test), - MPG=dict(stub_test=stub_test) + MPG=dict(stub_test=stub_test), ) if raw_data_file_path.is_dir(): source_data.update(NeuroscopeRecording=dict(file_path=str(raw_data_file_path), gain=conversion_factor)) conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test, buffer_mb=2000)) else: - conversion_options['CellExplorerSorting'].update(write_ecephys_metadata=True) + conversion_options["CellExplorerSorting"].update(write_ecephys_metadata=True) converter = GirardeauNWBConverter(source_data) metadata = converter.get_metadata() converter.run_conversion( - nwbfile_path=nwbfile_path, - metadata=metadata, - conversion_options=conversion_options, - overwrite=True + nwbfile_path=nwbfile_path, metadata=metadata, conversion_options=conversion_options, overwrite=True ) diff --git a/buzsaki_lab_to_nwb/girardeau/girardeaumiscdatainterface.py b/buzsaki_lab_to_nwb/girardeau/girardeaumiscdatainterface.py index fc34859..29bc96b 100644 --- a/buzsaki_lab_to_nwb/girardeau/girardeaumiscdatainterface.py +++ b/buzsaki_lab_to_nwb/girardeau/girardeaumiscdatainterface.py @@ -17,6 +17,7 @@ # LapType mat files seem to have some info on the air puffs and mouse track runs, but it's hard to decipher and # not much documentation on it + class GirardeauMiscInterface(BaseDataInterface): """Primary data interface for miscellaneous aspects of the GirardeauG dataset.""" @@ -29,8 +30,8 @@ def run_conversion( nwbfile: NWBFile, metadata: dict, stub_test: bool = False, - ): - session_path = Path(self.source_data['folder_path']) + ): + session_path = Path(self.source_data["folder_path"]) session_id = session_path.name # Stimuli @@ -41,23 +42,19 @@ def run_conversion( # Epochs df = pd.read_csv( - session_path / f"{session_id}.cat.evt", - sep=" ", - names=("time", "begin_or_end", "of", "epoch_name") + session_path / f"{session_id}.cat.evt", sep=" ", names=("time", "begin_or_end", "of", "epoch_name") ) epoch_starts = [] - for j in range(int(len(df)/2)): - epoch_starts.append(df['time'][2 * j]) + for j in range(int(len(df) / 2)): + epoch_starts.append(df["time"][2 * j]) nwbfile.add_epoch( - start_time=epoch_starts[j], - stop_time=df['time'][2 * j + 1], - tags=[df['epoch_name'][2 * j][18:]] + start_time=epoch_starts[j], stop_time=df["time"][2 * j + 1], tags=[df["epoch_name"][2 * j][18:]] ) # Trials trialdata_path = session_path / f"{session_id}-TrackRunTimes.mat" if trialdata_path.is_file(): - trials_data = loadmat(trialdata_path)['trackruntimes'] + trials_data = loadmat(trialdata_path)["trackruntimes"] for trial_data in trials_data: nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1]) @@ -65,10 +62,7 @@ def run_conversion( whl_files = [] for whl_file in whl_files: add_position_data( - nwbfile=nwbfile, - session_path=session_path, - whl_file_path=whl_file, - starting_time=epoch_starts[j] + nwbfile=nwbfile, session_path=session_path, whl_file_path=whl_file, starting_time=epoch_starts[j] ) # States @@ -76,7 +70,7 @@ def run_conversion( # label renaming state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if sleep_state_fpath.is_file(): - matin = loadmat(sleep_state_fpath)['SleepState']['ints'][0][0] + matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") @@ -85,5 +79,5 @@ def run_conversion( for name in matin.dtype.names: for row in matin[name][0][0]: data.append(dict(start_time=row[0], stop_time=row[1], label=state_label_names[name])) - [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] check_module(nwbfile, "behavior", "Contains behavioral data.").add(table) diff --git a/buzsaki_lab_to_nwb/girardeau/girardeaunwbconverter.py b/buzsaki_lab_to_nwb/girardeau/girardeaunwbconverter.py index 80b1af6..5900571 100644 --- a/buzsaki_lab_to_nwb/girardeau/girardeaunwbconverter.py +++ b/buzsaki_lab_to_nwb/girardeau/girardeaunwbconverter.py @@ -3,8 +3,10 @@ from pathlib import Path from nwb_conversion_tools import NWBConverter -from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import NeuroscopeRecordingInterface, \ - NeuroscopeLFPInterface +from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import ( + NeuroscopeRecordingInterface, + NeuroscopeLFPInterface, +) from nwb_conversion_tools.datainterfaces.cellexplorerdatainterface import CellExplorerSortingInterface from ..mpgdatainterface import MPGInterface @@ -19,11 +21,11 @@ class GirardeauNWBConverter(NWBConverter): NeuroscopeLFP=NeuroscopeLFPInterface, CellExplorerSorting=CellExplorerSortingInterface, GirardeauMisc=GirardeauMiscInterface, - MPG=MPGInterface + MPG=MPGInterface, ) def get_metadata(self): - lfp_file_path = Path(self.data_interface_objects['NeuroscopeLFP'].source_data['file_path']) + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) session_id = lfp_file_path.stem session_start = datetime.strptime(session_id[6:], "%Y%m%d") @@ -53,14 +55,14 @@ def get_metadata(self): ) metadata = super().get_metadata() - metadata['NWBFile'].update( + metadata["NWBFile"].update( session_start_time=session_start.astimezone(), session_id=session_id, institution="NYU", lab="Buzsaki", experimenter="Gabrielle Girardeau", session_description=paper_descr, - related_publications=paper_info + related_publications=paper_info, ) metadata.update( Subject=dict( @@ -68,14 +70,14 @@ def get_metadata(self): sex="Male", genotype="Wild type", weight="300g", - age="3 months" + age="3 months", ) ) - if 'Ecephys' not in metadata: # If NeuroscopeRecording was not in source_data + if "Ecephys" not in metadata: # If NeuroscopeRecording was not in source_data session_path = lfp_file_path.parent xml_file_path = str(session_path / f"{session_id}.xml") metadata.update(NeuroscopeRecordingInterface.get_ecephys_metadata(xml_file_path=xml_file_path)) - metadata['Ecephys']['Device'][0].update(description=device_descr) + metadata["Ecephys"]["Device"][0].update(description=device_descr) return metadata diff --git a/buzsaki_lab_to_nwb/mpgdatainterface.py b/buzsaki_lab_to_nwb/mpgdatainterface.py index e22398d..5f17310 100644 --- a/buzsaki_lab_to_nwb/mpgdatainterface.py +++ b/buzsaki_lab_to_nwb/mpgdatainterface.py @@ -26,14 +26,14 @@ def run_conversion( nwbfile: NWBFile, metadata: dict, stub_test: bool = False, - ): + ): if stub_test: count_max = 10 else: count_max = np.inf - (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.') - file_paths = self.source_data['file_paths'] + (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split(".") + file_paths = self.source_data["file_paths"] for file in file_paths: cap = cv2.VideoCapture(file) if int(major_ver) < 3: @@ -55,6 +55,6 @@ def run_conversion( name=f"Video: {Path(file).name}", description="Video recorded by camera.", data=H5DataIO(mov, compression="gzip"), - rate=fps + rate=fps, ) nwbfile.add_acquisition(video) diff --git a/buzsaki_lab_to_nwb/peyrache/convert_peyrache.py b/buzsaki_lab_to_nwb/peyrache/convert_peyrache.py index eea3cd5..5eba1e0 100644 --- a/buzsaki_lab_to_nwb/peyrache/convert_peyrache.py +++ b/buzsaki_lab_to_nwb/peyrache/convert_peyrache.py @@ -58,40 +58,33 @@ source_data = dict( NeuroscopeSorting=dict(folder_path=folder_path, load_waveforms=True), NeuroscopeLFP=dict(file_path=eeg_file_path, gain=conversion_factor), - PeyracheMisc=dict(folder_path=folder_path) + PeyracheMisc=dict(folder_path=folder_path), ) conversion_options = dict( - NeuroscopeSorting=dict(stub_test=stub_test, write_waveforms=True), - NeuroscopeLFP=dict(stub_test=stub_test) + NeuroscopeSorting=dict(stub_test=stub_test, write_waveforms=True), NeuroscopeLFP=dict(stub_test=stub_test) ) if raw_data_folder_path.is_dir(): folder_path = str(raw_data_folder_path) - source_data.update( - NeuroscopeRecording=dict(folder_path=folder_path, gain=conversion_factor) - ) - conversion_options.update( - NeuroscopeRecording=dict(stub_test=stub_test) - ) + source_data.update(NeuroscopeRecording=dict(folder_path=folder_path, gain=conversion_factor)) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) else: - conversion_options['NeuroscopeSorting'].update(write_ecephys_metadata=True) + conversion_options["NeuroscopeSorting"].update(write_ecephys_metadata=True) peyrache_converter = PeyracheNWBConverter(source_data) metadata = peyrache_converter.get_metadata() # Specific info - metadata['NWBFile'].update( - experimenter=experimenter, - session_description=paper_descr, - related_publications=paper_info + metadata["NWBFile"].update( + experimenter=experimenter, session_description=paper_descr, related_publications=paper_info ) - metadata['Subject'].update( + metadata["Subject"].update( subject_id=session_path.parent.name, ) - metadata['Ecephys']['Device'][0].update(description=device_descr) + metadata["Ecephys"]["Device"][0].update(description=device_descr) peyrache_converter.run_conversion( nwbfile_path=str(nwbfile_path), metadata=metadata, conversion_options=conversion_options, - overwrite=overwrite + overwrite=overwrite, ) diff --git a/buzsaki_lab_to_nwb/peyrache/peyrachemiscdatainterface.py b/buzsaki_lab_to_nwb/peyrache/peyrachemiscdatainterface.py index 4dba6f1..d986913 100644 --- a/buzsaki_lab_to_nwb/peyrache/peyrachemiscdatainterface.py +++ b/buzsaki_lab_to_nwb/peyrache/peyrachemiscdatainterface.py @@ -23,9 +23,9 @@ def peyrache_spatial_series(name: str, description: str, data: np.array, convers data=H5DataIO(data, compression="gzip"), conversion=conversion, reference_frame="Unknown", - starting_time=0., + starting_time=0.0, rate=pos_sf, - resolution=np.nan + resolution=np.nan, ) @@ -37,7 +37,7 @@ def get_source_schema(cls): return dict(properties=dict(folder_path=dict(type="string"))) def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False): - session_path = Path(self.source_data['folder_path']) + session_path = Path(self.source_data["folder_path"]) session_id = session_path.stem # Stimuli @@ -48,20 +48,20 @@ def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool # label renaming specific to Peyrache state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if sleep_state_fpath.is_file(): - matin = loadmat(sleep_state_fpath)['SleepState']['ints'][0][0] + matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] - table = TimeIntervals(name='states', description="Sleep states of animal.") - table.add_column(name='label', description="Sleep state.") + table = TimeIntervals(name="states", description="Sleep states of animal.") + table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append(dict(start_time=row[0], stop_time=row[1], label=state_label_names[name])) - [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] - check_module(nwbfile, 'behavior', "Contains behavioral data.").add(table) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + check_module(nwbfile, "behavior", "Contains behavioral data.").add(table) # Position - pos_names = ['RedLED', 'BlueLED'] + pos_names = ["RedLED", "BlueLED"] pos_idx_from = [0, 2] pos_idx_to = [2, 4] @@ -74,7 +74,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool name=name, description="Raw sensor data. Values of -1 indicate that LED detection failed.", data=whl_data[:, idx_from:idx_to], - conversion=np.nan # whl file is in arbitrary grid units + conversion=np.nan, # whl file is in arbitrary grid units ) ) @@ -83,7 +83,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool if posfile_path.is_file(): # at least Mouse32-140820 was missing a .pos file try: pos_data = np.loadtxt(posfile_path) - pos_obj = Position(name='SubjectPosition') + pos_obj = Position(name="SubjectPosition") for name, idx_from, idx_to in zip(pos_names, pos_idx_from, pos_idx_to): pos_obj.add_spatial_series( peyrache_spatial_series( @@ -93,17 +93,17 @@ def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool "Values of -1 indicate that LED detection failed." ), data=pos_data[:, idx_from:idx_to], - conversion=1e-2 # from cm to m + conversion=1e-2, # from cm to m ) ) - check_module(nwbfile, 'behavior', "Contains behavioral data.").add(pos_obj) + check_module(nwbfile, "behavior", "Contains behavioral data.").add(pos_obj) except ValueError: # data issue present in at least Mouse17-170201 warn(f"Skipping .pos file for session {session_id}!") # Epochs - only available for sessions with raw data epoch_file = session_path / "raw" / f"{session_id}-raw-info" / f"{session_id}-behaviors.txt" if epoch_file.is_file(): - epoch_data = pd.read_csv(epoch_file, header=1)[f'{session_id}:'] + epoch_data = pd.read_csv(epoch_file, header=1)[f"{session_id}:"] epoch_dat_inds = [] epoch_names = [] for epochs in epoch_data: @@ -119,7 +119,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool info_extractor = NeuroscopeRecordingExtractor(recording_file) dat_end_time = info_extractor.get_num_frames() / info_extractor.get_sampling_frequency() # seconds exp_end_times.extend([dat_end_time]) - epoch_windows.extend([epoch_windows[-1] + sum(exp_end_times)]*2) + epoch_windows.extend([epoch_windows[-1] + sum(exp_end_times)] * 2) epoch_windows = np.array(epoch_windows[:-1]).reshape(-1, 2) for j, epoch_name in enumerate(epoch_names): diff --git a/buzsaki_lab_to_nwb/peyrache/peyrachenwbconverter.py b/buzsaki_lab_to_nwb/peyrache/peyrachenwbconverter.py index 65f750e..da8698a 100644 --- a/buzsaki_lab_to_nwb/peyrache/peyrachenwbconverter.py +++ b/buzsaki_lab_to_nwb/peyrache/peyrachenwbconverter.py @@ -3,8 +3,12 @@ from pathlib import Path from nwb_conversion_tools import NWBConverter -from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import NeuroscopeMultiRecordingTimeInterface, \ - NeuroscopeLFPInterface, NeuroscopeRecordingInterface, NeuroscopeSortingInterface +from nwb_conversion_tools.datainterfaces.neuroscopedatainterface import ( + NeuroscopeMultiRecordingTimeInterface, + NeuroscopeLFPInterface, + NeuroscopeRecordingInterface, + NeuroscopeSortingInterface, +) from nwb_conversion_tools.datainterfaces.cellexplorerdatainterface import CellExplorerSortingInterface from .peyrachemiscdatainterface import PeyracheMiscInterface @@ -17,45 +21,36 @@ class PeyracheNWBConverter(NWBConverter): NeuroscopeRecording=NeuroscopeMultiRecordingTimeInterface, NeuroscopeSorting=NeuroscopeSortingInterface, NeuroscopeLFP=NeuroscopeLFPInterface, - PeyracheMisc=PeyracheMiscInterface + PeyracheMisc=PeyracheMiscInterface, ) def get_metadata(self): """Auto-fill all relevant metadata used in run_conversion.""" - lfp_file_path = Path(self.data_interface_objects['NeuroscopeLFP'].source_data['file_path']) + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) session_path = lfp_file_path.parent session_id = lfp_file_path.stem - if '-' in session_id: - subject_id, date_text = session_id.split('-') + if "-" in session_id: + subject_id, date_text = session_id.split("-") session_start = dateparse(date_text[-4:] + date_text[:-4]) metadata = super().get_metadata() - metadata['NWBFile'].update( - session_start_time=session_start.astimezone(), - session_id=session_id, - institution="NYU", - lab="Buzsaki" - ) - metadata.update( - Subject=dict( - species="Mus musculus", - genotype="Wild type", - weight="27-50g" - ) + metadata["NWBFile"].update( + session_start_time=session_start.astimezone(), session_id=session_id, institution="NYU", lab="Buzsaki" ) + metadata.update(Subject=dict(species="Mus musculus", genotype="Wild type", weight="27-50g")) # Unit metadata retrieved from CellExplorer format # But still using Neuroscope for waveforms spikes_matfile_path = session_path / f"{session_id}.spikes.cellinfo.mat" if spikes_matfile_path.is_file(): sorting_metadata_interface = CellExplorerSortingInterface(spikes_matfile_path=str(spikes_matfile_path)) - sorting_metadata = sorting_metadata_interface.get_metadata()['UnitProperties'] - n_units = len(self.data_interface_objects['NeuroscopeSorting'].sorting_extractor.get_unit_ids()) + sorting_metadata = sorting_metadata_interface.get_metadata()["UnitProperties"] + n_units = len(self.data_interface_objects["NeuroscopeSorting"].sorting_extractor.get_unit_ids()) if len(sorting_metadata[0]) == n_units: print(f"Updating UnitProperties for session {session_id}!") - metadata['UnitProperties'] = sorting_metadata_interface['UnitProperties'] + metadata["UnitProperties"] = sorting_metadata_interface["UnitProperties"] - if 'Ecephys' not in metadata: # If NeuroscopeRecording was not in source_data + if "Ecephys" not in metadata: # If NeuroscopeRecording was not in source_data xml_file_path = str(session_path / f"{session_id}.xml") metadata.update(NeuroscopeRecordingInterface.get_ecephys_metadata(xml_file_path=xml_file_path)) diff --git a/buzsaki_lab_to_nwb/watson/convert_watson.py b/buzsaki_lab_to_nwb/watson/convert_watson.py index 6b04ddb..d83bad2 100644 --- a/buzsaki_lab_to_nwb/watson/convert_watson.py +++ b/buzsaki_lab_to_nwb/watson/convert_watson.py @@ -1,32 +1,48 @@ """Authors: Cody Baker and Ben Dichter.""" from buzsaki_lab_to_nwb import WatsonNWBConverter + # TODO: add pathlib import os # List of folder paths to iterate over base_path = "D:/BuzsakiData/WatsonBO" -convert_sessions = ["BWRat17/BWRat17_121712", "BWRat17/BWRat17_121912", "BWRat18/BWRat18_020513", - "BWRat19/BWRat19_032513", "BWRat19/BWRat19_032413", "BWRat20/BWRat20_101013", - "BWRat20/BWRat20_101513", "BWRat21/BWRat21_121113", "BWRat21/BWRat21_121613", - "BWRat21/BWRat21_121813", "Bogey/Bogey_012615", - "Dino/Dino_061814", - "Dino/Dino_061914", "Dino/Dino_062014", # incorrect # of channels for full lfp reshaping... - "Dino/Dino_072114"#, # missing clu files... - "Dino/Dino_072314", "Dino/Dino_072414", "Rizzo/Rizzo_022615", - "Rizzo/Rizzo_022715", - "Splinter/Splinter_020515", "Splinter/Splinter_020915", - "Templeton/Templeton_032415" - ] +convert_sessions = [ + "BWRat17/BWRat17_121712", + "BWRat17/BWRat17_121912", + "BWRat18/BWRat18_020513", + "BWRat19/BWRat19_032513", + "BWRat19/BWRat19_032413", + "BWRat20/BWRat20_101013", + "BWRat20/BWRat20_101513", + "BWRat21/BWRat21_121113", + "BWRat21/BWRat21_121613", + "BWRat21/BWRat21_121813", + "Bogey/Bogey_012615", + "Dino/Dino_061814", + "Dino/Dino_061914", + "Dino/Dino_062014", # incorrect # of channels for full lfp reshaping... + "Dino/Dino_072114" "Dino/Dino_072314", # , # missing clu files... + "Dino/Dino_072414", + "Rizzo/Rizzo_022615", + "Rizzo/Rizzo_022715", + "Splinter/Splinter_020515", + "Splinter/Splinter_020915", + "Templeton/Templeton_032415", +] experimenter = "Brendon Watson" -paper_descr = "Data was recorded using silicon probe electrodes in the frontal cortices of male Long " \ - "Evans rats between 4-7 months of age. The design was to have no specific behavior, " \ - "task or stimulus, rather the animal was left alone in it’s home cage (which it lives in at all " \ - "times)." -paper_info = "Network Homeostasis and State Dynamics of Neocortical Sleep" \ - "Watson BO, Levenstein D, Greene JP, Gelinas JN, Buzsáki G." \ - "Neuron. 2016 Apr 27. pii: S0896-6273(16)30056-3." \ - "doi: 10.1016/j.neuron.2016.03.036" +paper_descr = ( + "Data was recorded using silicon probe electrodes in the frontal cortices of male Long " + "Evans rats between 4-7 months of age. The design was to have no specific behavior, " + "task or stimulus, rather the animal was left alone in it’s home cage (which it lives in at all " + "times)." +) +paper_info = ( + "Network Homeostasis and State Dynamics of Neocortical Sleep" + "Watson BO, Levenstein D, Greene JP, Gelinas JN, Buzsáki G." + "Neuron. 2016 Apr 27. pii: S0896-6273(16)30056-3." + "doi: 10.1016/j.neuron.2016.03.036" +) for session in convert_sessions: print("Converting session {}...".format(session)) @@ -39,18 +55,22 @@ # construct input_args dict according to input schema input_args = { - 'NeuroscopeRecording': {'file_path': os.path.join(folder_path, session_id) + ".dat"}, - 'WatsonLFP': {'folder_path': folder_path}, - 'WatsonBehavior': {'folder_path': folder_path} + "NeuroscopeRecording": {"file_path": os.path.join(folder_path, session_id) + ".dat"}, + "WatsonLFP": {"folder_path": folder_path}, + "WatsonBehavior": {"folder_path": folder_path}, } # Very special case if session == "Dino/Dino_072114": - input_args.update({'CellExplorerSorting': {'spikes_file_path': os.path.join(folder_path, session_id) - + ".spikes.cellinfo.mat"}}) + input_args.update( + { + "CellExplorerSorting": { + "spikes_file_path": os.path.join(folder_path, session_id) + ".spikes.cellinfo.mat" + } + } + ) else: - input_args.update({'NeuroscopeSorting': {'folder_path': folder_path, - 'keep_mua_units': False}}) + input_args.update({"NeuroscopeSorting": {"folder_path": folder_path, "keep_mua_units": False}}) watson_converter = WatsonNWBConverter(**input_args) @@ -60,18 +80,19 @@ metadata = watson_converter.get_metadata() # Yuta specific info - metadata['NWBFile'].update({'experimenter': experimenter}) - metadata['NWBFile'].update({'session_description': paper_descr}) - metadata['NWBFile'].update({'related_publications': paper_info}) + metadata["NWBFile"].update({"experimenter": experimenter}) + metadata["NWBFile"].update({"session_description": paper_descr}) + metadata["NWBFile"].update({"related_publications": paper_info}) - metadata['Subject'].update({'species': 'Rattus norvegicus domestica - Long Evans'}) - metadata['Subject'].update({'genotype': 'Wild type'}) - metadata['Subject'].update({'age': '3-7 months'}) # No age data avilable per subject without contacting lab - metadata['Subject'].update({'weight': '250-500g'}) + metadata["Subject"].update({"species": "Rattus norvegicus domestica - Long Evans"}) + metadata["Subject"].update({"genotype": "Wild type"}) + metadata["Subject"].update({"age": "3-7 months"}) # No age data avilable per subject without contacting lab + metadata["Subject"].update({"weight": "250-500g"}) - device_descr = "silicon probe electrodes; see {}.xml or {}.sessionInfo.mat for more information".format(session_id, - session_id) - metadata[watson_converter.get_recording_type()]['Ecephys']['Device'][0].update({'description': device_descr}) + device_descr = "silicon probe electrodes; see {}.xml or {}.sessionInfo.mat for more information".format( + session_id, session_id + ) + metadata[watson_converter.get_recording_type()]["Ecephys"]["Device"][0].update({"description": device_descr}) nwbfile_path = os.path.join(folder_path, "{}_stub.nwb".format(session_id)) watson_converter.run_conversion(nwbfile_path, metadata, stub_test=True) diff --git a/buzsaki_lab_to_nwb/watson/watsonbehaviordatainterface.py b/buzsaki_lab_to_nwb/watson/watsonbehaviordatainterface.py index d8e6f0c..be94fd9 100644 --- a/buzsaki_lab_to_nwb/watson/watsonbehaviordatainterface.py +++ b/buzsaki_lab_to_nwb/watson/watsonbehaviordatainterface.py @@ -12,7 +12,6 @@ class WatsonBehaviorInterface(BaseDataInterface): - @classmethod def get_input_schema(cls): return {} @@ -24,89 +23,101 @@ def get_metadata_schema(self): metadata_schema = get_base_schema() return metadata_schema - def convert_data(self, nwbfile: NWBFile, metadata_dict: dict, - stub_test: bool = False, include_spike_waveforms: bool = False): - session_path = self.input_args['folder_path'] + def convert_data( + self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False + ): + session_path = self.input_args["folder_path"] # TODO: check/enforce format? - task_types = metadata_dict.get('task_types', []) + task_types = metadata_dict.get("task_types", []) subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] [nwbfile.add_stimulus(x) for x in get_events(session_path)] - exist_pos_data = any(os.path.isfile(os.path.join(session_path, - '{}__{}.mat'.format(session_id, task_type['name']))) - for task_type in task_types) + exist_pos_data = any( + os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"]))) + for task_type in task_types + ) if exist_pos_data: - nwbfile.add_epoch_column('label', 'name of epoch') + nwbfile.add_epoch_column("label", "name of epoch") for task_type in task_types: - label = task_type['name'] + label = task_type["name"] - file = os.path.join(session_path, session_id + '__' + label + '.mat') + file = os.path.join(session_path, session_id + "__" + label + ".mat") if os.path.isfile(file): - pos_obj = Position(name=label + '_position') + pos_obj = Position(name=label + "_position") matin = loadmat(file) - tt = matin['twhl_norm'][:, 0] + tt = matin["twhl_norm"][:, 0] exp_times = find_discontinuities(tt) - if 'conversion' in task_type: - conversion = task_type['conversion'] + if "conversion" in task_type: + conversion = task_type["conversion"] else: conversion = np.nan - for pos_type in ('twhl_norm', 'twhl_linearized'): + for pos_type in ("twhl_norm", "twhl_linearized"): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( - name=label + '_{}_spatial_series'.format(pos_type), - data=H5DataIO(pos_data_norm, compression='gzip'), - reference_frame='unknown', conversion=conversion, + name=label + "_{}_spatial_series".format(pos_type), + data=H5DataIO(pos_data_norm, compression="gzip"), + reference_frame="unknown", + conversion=conversion, resolution=np.nan, - timestamps=H5DataIO(tt, compression='gzip')) + timestamps=H5DataIO(tt, compression="gzip"), + ) pos_obj.add_spatial_series(spatial_series_object) - check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj) + check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj) for i, window in enumerate(exp_times): - nwbfile.add_epoch(start_time=window[0], stop_time=window[1], - label=label + '_' + str(i)) + nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i)) - trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') + trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat") if os.path.isfile(trialdata_path): - trials_data = loadmat(trialdata_path)['EightMazeRun'] + trials_data = loadmat(trialdata_path)["EightMazeRun"] - trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') - trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]] + trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat") + trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]] features = trialdatainfo[:7] - features[:2] = 'start_time', 'stop_time', - [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']] + features[:2] = ( + "start_time", + "stop_time", + ) + [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]] for trial_data in trials_data: if trial_data[3]: - cond = 'run_left' + cond = "run_left" else: - cond = 'run_right' - nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, - error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) - - sleep_state_fpath = os.path.join(session_path, '{}.SleepState.states.mat'.format(session_id)) + cond = "run_right" + nwbfile.add_trial( + start_time=trial_data[0], + stop_time=trial_data[1], + condition=cond, + error_run=trial_data[4], + stim_run=trial_data[5], + both_visit=trial_data[6], + ) + + sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id)) # label renaming specific to Watson - state_label_names = {'WAKEstate': "Awake", 'NREMstate': "Non-REM", 'REMstate': "REM"} + state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"} if os.path.isfile(sleep_state_fpath): - matin = loadmat(sleep_state_fpath)['SleepState']['ints'][0][0] + matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] - table = TimeIntervals(name='states', description="Sleep states of animal.") - table.add_column(name='label', description="Sleep state.") + table = TimeIntervals(name="states", description="Sleep states of animal.") + table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: - data.append({'start_time': row[0], 'stop_time': row[1], 'label': state_label_names[name]}) - [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] + data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]}) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] - check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) + check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table) diff --git a/buzsaki_lab_to_nwb/watson/watsonlfpdatainterface.py b/buzsaki_lab_to_nwb/watson/watsonlfpdatainterface.py index 209b81a..c20da88 100644 --- a/buzsaki_lab_to_nwb/watson/watsonlfpdatainterface.py +++ b/buzsaki_lab_to_nwb/watson/watsonlfpdatainterface.py @@ -11,12 +11,10 @@ class WatsonLFPInterface(BaseDataInterface): - @classmethod def get_input_schema(cls): return {} - def __init__(self, **input_args): super().__init__(**input_args) @@ -24,24 +22,24 @@ def get_metadata_schema(self): metadata_schema = get_base_schema() # ideally most of this be automatically determined from pynwb docvals - metadata_schema['properties']['TimeSeries'] = get_schema_from_hdmf_class(TimeSeries) - metadata_schema['properties']['DecompositionSeries'] = get_schema_from_hdmf_class(DecompositionSeries) - required_fields = ['TimeSeries', 'DecompositionSeries'] + metadata_schema["properties"]["TimeSeries"] = get_schema_from_hdmf_class(TimeSeries) + metadata_schema["properties"]["DecompositionSeries"] = get_schema_from_hdmf_class(DecompositionSeries) + required_fields = ["TimeSeries", "DecompositionSeries"] for field in required_fields: - metadata_schema['required'].append(field) + metadata_schema["required"].append(field) return metadata_schema def convert_data(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False): - session_path = self.input_args['folder_path'] + session_path = self.input_args["folder_path"] # TODO: check/enforce format? - all_shank_channels = metadata['all_shank_channels'] - special_electrode_dict = metadata.get('special_electrodes', []) - lfp_channels = metadata['lfp_channels'] - lfp_sampling_rate = metadata['lfp_sampling_rate'] - spikes_nsamples = metadata['spikes_nsamples'] - shank_channels = metadata['shank_channels'] - n_total_channels = metadata['n_total_channels'] + all_shank_channels = metadata["all_shank_channels"] + special_electrode_dict = metadata.get("special_electrodes", []) + lfp_channels = metadata["lfp_channels"] + lfp_sampling_rate = metadata["lfp_sampling_rate"] + spikes_nsamples = metadata["spikes_nsamples"] + shank_channels = metadata["shank_channels"] + n_total_channels = metadata["n_total_channels"] subject_path, session_id = os.path.split(session_path) @@ -50,46 +48,58 @@ def convert_data(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False lfp_data = all_channels_lfp_data[:, all_shank_channels] except IndexError: lfp_data = all_channels_lfp_data - lfp_ts = write_lfp(nwbfile, lfp_data, lfp_sampling_rate, - name=metadata['lfp']['name'], - description=metadata['lfp']['description'], - electrode_inds=None) + lfp_ts = write_lfp( + nwbfile, + lfp_data, + lfp_sampling_rate, + name=metadata["lfp"]["name"], + description=metadata["lfp"]["description"], + electrode_inds=None, + ) # TODO: error checking on format? for special_electrode in special_electrode_dict: - ts = TimeSeries(name=special_electrode['name'], - description=special_electrode['description'], - data=all_channels_lfp_data[:, special_electrode['channel']], - rate=lfp_sampling_rate, unit='V', resolution=np.nan) + ts = TimeSeries( + name=special_electrode["name"], + description=special_electrode["description"], + data=all_channels_lfp_data[:, special_electrode["channel"]], + rate=lfp_sampling_rate, + unit="V", + resolution=np.nan, + ) nwbfile.add_acquisition(ts) for ref_name, lfp_channel in lfp_channels.items(): try: all_lfp_phases = [] - for passband in ('theta', 'gamma'): - lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), - lfp_sampling_rate, - passband=passband) + for passband in ("theta", "gamma"): + lfp_fft = filter_lfp( + lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_sampling_rate, passband=passband + ) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) decomp_series_data = np.dstack(all_lfp_phases) # TODO: should units or metrics be metadata? - decomp_series = DecompositionSeries(name=metadata['lfp_decomposition'][ref_name]['name'], - description=metadata['lfp_decomposition'][ref_name]['description'], - data=decomp_series_data, - rate=lfp_sampling_rate, - source_timeseries=lfp_ts, - metric='phase', unit='radians') + decomp_series = DecompositionSeries( + name=metadata["lfp_decomposition"][ref_name]["name"], + description=metadata["lfp_decomposition"][ref_name]["description"], + data=decomp_series_data, + rate=lfp_sampling_rate, + source_timeseries=lfp_ts, + metric="phase", + unit="radians", + ) # TODO: the band limits should be extracted from parse_passband in band_analysis? - decomp_series.add_band(band_name='theta', band_limits=(4, 10)) - decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) + decomp_series.add_band(band_name="theta", band_limits=(4, 10)) + decomp_series.add_band(band_name="gamma", band_limits=(30, 80)) - check_module(nwbfile, 'ecephys', - 'contains processed extracellular electrophysiology data' - ).add_data_interface(decomp_series) + check_module( + nwbfile, "ecephys", "contains processed extracellular electrophysiology data" + ).add_data_interface(decomp_series) except IndexError: - print('Unable to index lfp data for decomposition series - skipping') + print("Unable to index lfp data for decomposition series - skipping") - write_spike_waveforms(nwbfile, session_path, spikes_nsamples=spikes_nsamples, shank_channels=shank_channels, - stub_test=stub_test) + write_spike_waveforms( + nwbfile, session_path, spikes_nsamples=spikes_nsamples, shank_channels=shank_channels, stub_test=stub_test + ) diff --git a/buzsaki_lab_to_nwb/watson/watsonnorecording.py b/buzsaki_lab_to_nwb/watson/watsonnorecording.py index f2964b6..4822277 100644 --- a/buzsaki_lab_to_nwb/watson/watsonnorecording.py +++ b/buzsaki_lab_to_nwb/watson/watsonnorecording.py @@ -7,20 +7,10 @@ class WatsonNoRecording(BaseRecordingExtractorInterface): RX = se.NumpyRecordingExtractor def convert_data(self, nwbfile, metadata, stub_test=False): - se.NwbRecordingExtractor.add_devices( - recording=self.recording_extractor, - nwbfile=nwbfile, - metadata=metadata - ) + se.NwbRecordingExtractor.add_devices(recording=self.recording_extractor, nwbfile=nwbfile, metadata=metadata) se.NwbRecordingExtractor.add_electrode_groups( - recording=self.recording_extractor, - nwbfile=nwbfile, - metadata=metadata + recording=self.recording_extractor, nwbfile=nwbfile, metadata=metadata ) - se.NwbRecordingExtractor.add_electrodes( - recording=self.recording_extractor, - nwbfile=nwbfile, - metadata=metadata - ) + se.NwbRecordingExtractor.add_electrodes(recording=self.recording_extractor, nwbfile=nwbfile, metadata=metadata) diff --git a/buzsaki_lab_to_nwb/watson/watsonnwbconverter.py b/buzsaki_lab_to_nwb/watson/watsonnwbconverter.py index cd50133..a125f5d 100644 --- a/buzsaki_lab_to_nwb/watson/watsonnwbconverter.py +++ b/buzsaki_lab_to_nwb/watson/watsonnwbconverter.py @@ -17,248 +17,269 @@ class WatsonNWBConverter(NWBConverter): # The order of this dictionary matter significantly, but python dictionaries are supposed to be unordered # This is compensated for the time being, but should this conceptually be a list instead? - data_interface_classes = {'NeuroscopeRecording': neuroscopedatainterface.NeuroscopeRecordingInterface, - 'NeuroscopeSorting': neuroscopedatainterface.NeuroscopeSortingInterface, - 'WatsonLFP': WatsonLFPInterface, - 'WatsonBehavior': WatsonBehaviorInterface} + data_interface_classes = { + "NeuroscopeRecording": neuroscopedatainterface.NeuroscopeRecordingInterface, + "NeuroscopeSorting": neuroscopedatainterface.NeuroscopeSortingInterface, + "WatsonLFP": WatsonLFPInterface, + "WatsonBehavior": WatsonBehaviorInterface, + } def __init__(self, **input_args): - dat_filepath = input_args.get('NeuroscopeRecording', {}).get('file_path', None) + dat_filepath = input_args.get("NeuroscopeRecording", {}).get("file_path", None) if not os.path.isfile(dat_filepath): new_data_interface_classes = {} - new_data_interface_classes.update({'WatsonNoRecording': WatsonNoRecording}) + new_data_interface_classes.update({"WatsonNoRecording": WatsonNoRecording}) for name, val in self.data_interface_classes.items(): new_data_interface_classes.update({name: val}) - new_data_interface_classes.pop('NeuroscopeRecording') + new_data_interface_classes.pop("NeuroscopeRecording") - session_id = os.path.split(input_args['WatsonLFP']['folder_path'])[1] - xml_filepath = os.path.join(input_args['WatsonLFP']['folder_path'], session_id + '.xml') + session_id = os.path.split(input_args["WatsonLFP"]["folder_path"])[1] + xml_filepath = os.path.join(input_args["WatsonLFP"]["folder_path"], session_id + ".xml") root = et.parse(xml_filepath).getroot() - n_channels = len([int(channel.text) - for group in root.find('spikeDetection').find('channelGroups').findall('group') - for channel in group.find('channels')]) + n_channels = len( + [ + int(channel.text) + for group in root.find("spikeDetection").find("channelGroups").findall("group") + for channel in group.find("channels") + ] + ) # The only information needed for this is .get_channel_ids() which is set by the shape of the input series - input_args.update({'WatsonNoRecording': {'timeseries': np.array(range(n_channels)), - 'sampling_frequency': 1}}) - input_args.pop('NeuroscopeRecording') + input_args.update( + {"WatsonNoRecording": {"timeseries": np.array(range(n_channels)), "sampling_frequency": 1}} + ) + input_args.pop("NeuroscopeRecording") self.data_interface_classes = new_data_interface_classes - self._recording_type = 'WatsonNoRecording' + self._recording_type = "WatsonNoRecording" else: - self._recording_type = 'NeuroscopeRecording' - + self._recording_type = "NeuroscopeRecording" + # Very special case for only one session - special_sorting = input_args.get('CellExplorerSorting', None) + special_sorting = input_args.get("CellExplorerSorting", None) if special_sorting is not None: - self.data_interface_classes.pop('NeuroscopeSorting') - self.data_interface_classes.update({'CellExplorerSorting': WatsonSortingInterface}) - self._sorting_type = 'CellExplorerSorting' + self.data_interface_classes.pop("NeuroscopeSorting") + self.data_interface_classes.update({"CellExplorerSorting": WatsonSortingInterface}) + self._sorting_type = "CellExplorerSorting" else: - self._sorting_type = 'NeuroscopeSorting' + self._sorting_type = "NeuroscopeSorting" super().__init__(**input_args) def get_recording_type(self): return self._recording_type - + def get_sorting_type(self): return self._sorting_type def get_metadata(self): # TODO: could be vastly improved with pathlib - session_path = self.data_interface_objects['WatsonLFP'].input_args['folder_path'] + session_path = self.data_interface_objects["WatsonLFP"].input_args["folder_path"] subject_path, session_id = os.path.split(session_path) - if '_' in session_id: - subject_id, date_text = session_id.split('_') + if "_" in session_id: + subject_id, date_text = session_id.split("_") session_start = dateparse(date_text) # TODO: add error checking on file existence xml_filepath = os.path.join(session_path, "{}.xml".format(session_id)) root = et.parse(xml_filepath).getroot() - shank_channels = [[int(channel.text) - for channel in group.find('channels')] - for group in root.find('spikeDetection').find('channelGroups').findall('group')] + shank_channels = [ + [int(channel.text) for channel in group.find("channels")] + for group in root.find("spikeDetection").find("channelGroups").findall("group") + ] all_shank_channels = np.concatenate(shank_channels) all_shank_channels.sort() - spikes_nsamples = int(root.find('neuroscope').find('spikes').find('nSamples').text) - lfp_sampling_rate = float(root.find('fieldPotentials').find('lfpSamplingRate').text) + spikes_nsamples = int(root.find("neuroscope").find("spikes").find("nSamples").text) + lfp_sampling_rate = float(root.find("fieldPotentials").find("lfpSamplingRate").text) session_info_filepath = os.path.join(session_path, "{}.sessionInfo.mat".format(session_id)) if os.path.isfile(session_info_filepath): - n_total_channels = loadmat(session_info_filepath)['sessionInfo']['nChannels'][0][0][0][0] + n_total_channels = loadmat(session_info_filepath)["sessionInfo"]["nChannels"][0][0][0][0] basic_metadata_filepath = os.path.join(session_path, "{}_BasicMetaData.mat".format(session_id)) if os.path.isfile(basic_metadata_filepath): - matin = loadmat(basic_metadata_filepath)['bmd'] - up_reference = matin['UPstatechannel'][0][0][0][0] - spindle_reference = matin['Spindlechannel'][0][0][0][0] - theta_reference = matin['Thetachannel'][0][0][0][0] + matin = loadmat(basic_metadata_filepath)["bmd"] + up_reference = matin["UPstatechannel"][0][0][0][0] + spindle_reference = matin["Spindlechannel"][0][0][0][0] + theta_reference = matin["Thetachannel"][0][0][0][0] shank_electrode_number = [x for channels in shank_channels for x, _ in enumerate(channels)] - shank_group_name = ["shank{}".format(n+1) for n, channels in enumerate(shank_channels) for _ in channels] + shank_group_name = ["shank{}".format(n + 1) for n, channels in enumerate(shank_channels) for _ in channels] cell_filepath = os.path.join(session_path, "{}.spikes.cellinfo.mat".format(session_id)) if os.path.isfile(cell_filepath): - cell_info = loadmat(cell_filepath)['spikes'] + cell_info = loadmat(cell_filepath)["spikes"] - celltype_mapping = {'pE': "excitatory", 'pI': "inhibitory"} + celltype_mapping = {"pE": "excitatory", "pI": "inhibitory"} celltype_filepath = os.path.join(session_path, "{}.CellClass.cellinfo.mat".format(session_id)) if os.path.isfile(celltype_filepath): - celltype_info = [str(celltype_mapping[x[0]]) - for x in loadmat(celltype_filepath)['CellClass']['label'][0][0][0]] + celltype_info = [ + str(celltype_mapping[x[0]]) for x in loadmat(celltype_filepath)["CellClass"]["label"][0][0][0] + ] device_name = "implant" if os.path.isfile(basic_metadata_filepath): - cortex_region = loadmat(basic_metadata_filepath)['CortexRegion'][0][0][0] + cortex_region = loadmat(basic_metadata_filepath)["CortexRegion"][0][0][0] else: cortex_region = "unknown" metadata = { - 'NWBFile': { - 'identifier': session_id, - 'session_start_time': session_start.astimezone(), - 'file_create_date': datetime.now().astimezone(), - 'session_id': session_id, - 'institution': 'NYU', - 'lab': 'Buzsaki' + "NWBFile": { + "identifier": session_id, + "session_start_time": session_start.astimezone(), + "file_create_date": datetime.now().astimezone(), + "session_id": session_id, + "institution": "NYU", + "lab": "Buzsaki", }, - 'Subject': { - 'subject_id': subject_id, + "Subject": { + "subject_id": subject_id, }, self.get_recording_type(): { - 'Ecephys': { - 'subset_channels': all_shank_channels, - 'Device': [{ - 'name': device_name - }], - 'ElectrodeGroup': [{ - 'name': f'shank{n+1}', - 'description': f'shank{n+1} electrodes', - 'location': cortex_region, - 'device_name': device_name - } for n, _ in enumerate(shank_channels)], - 'Electrodes': [ + "Ecephys": { + "subset_channels": all_shank_channels, + "Device": [{"name": device_name}], + "ElectrodeGroup": [ + { + "name": f"shank{n+1}", + "description": f"shank{n+1} electrodes", + "location": cortex_region, + "device_name": device_name, + } + for n, _ in enumerate(shank_channels) + ], + "Electrodes": [ { - 'name': 'shank_electrode_number', - 'description': '0-indexed channel within a shank', - 'data': shank_electrode_number + "name": "shank_electrode_number", + "description": "0-indexed channel within a shank", + "data": shank_electrode_number, }, { - 'name': 'group', - 'description': 'a reference to the ElectrodeGroup this electrode is a part of', - 'data': shank_group_name + "name": "group", + "description": "a reference to the ElectrodeGroup this electrode is a part of", + "data": shank_group_name, }, { - 'name': 'group_name', - 'description': 'the name of the ElectrodeGroup this electrode is a part of', - 'data': shank_group_name - } + "name": "group_name", + "description": "the name of the ElectrodeGroup this electrode is a part of", + "data": shank_group_name, + }, ], - 'ElectricalSeries': { - 'name': 'ElectricalSeries', - 'description': 'raw acquisition traces' - } + "ElectricalSeries": {"name": "ElectricalSeries", "description": "raw acquisition traces"}, } }, self.get_sorting_type(): { - 'UnitProperties': [ + "UnitProperties": [ + {"name": "cell_type", "description": "name of cell type", "data": celltype_info}, { - 'name': 'cell_type', - 'description': 'name of cell type', - 'data': celltype_info + "name": "global_id", + "description": "global id for cell for entire experiment", + "data": [int(x) for x in cell_info["UID"][0][0][0]], }, { - 'name': 'global_id', - 'description': 'global id for cell for entire experiment', - 'data': [int(x) for x in cell_info['UID'][0][0][0]] - }, - { - 'name': 'shank_id', - 'description': '0-indexed id of cluster from shank', + "name": "shank_id", + "description": "0-indexed id of cluster from shank", # - 2 b/c the 0 and 1 IDs from each shank have been removed - 'data': [int(x - 2) for x in cell_info['cluID'][0][0][0]] + "data": [int(x - 2) for x in cell_info["cluID"][0][0][0]], }, { - 'name': 'electrode_group', - 'description': 'the electrode group that each spike unit came from', - 'data': ["shank" + str(x) for x in cell_info['shankID'][0][0][0]] + "name": "electrode_group", + "description": "the electrode group that each spike unit came from", + "data": ["shank" + str(x) for x in cell_info["shankID"][0][0][0]], }, { - 'name': 'region', - 'description': 'brain region where unit was detected', - 'data': [str(x[0]) for x in cell_info['region'][0][0][0]] - } - ] + "name": "region", + "description": "brain region where unit was detected", + "data": [str(x[0]) for x in cell_info["region"][0][0][0]], + }, + ] }, - 'WatsonLFP': { - 'all_shank_channels': all_shank_channels, - 'lfp_channels': {}, - 'lfp_sampling_rate': lfp_sampling_rate, - 'lfp': {'name': 'lfp', - 'description': 'lfp signal for all shank electrodes'}, - 'lfp_decomposition': {}, - 'spikes_nsamples': spikes_nsamples, - 'shank_channels': shank_channels, - 'n_total_channels': n_total_channels + "WatsonLFP": { + "all_shank_channels": all_shank_channels, + "lfp_channels": {}, + "lfp_sampling_rate": lfp_sampling_rate, + "lfp": {"name": "lfp", "description": "lfp signal for all shank electrodes"}, + "lfp_decomposition": {}, + "spikes_nsamples": spikes_nsamples, + "shank_channels": shank_channels, + "n_total_channels": n_total_channels, }, - 'WatsonBehavior': { - } + "WatsonBehavior": {}, } # If reference channels are auto-detected for a given session, add them to the various metadata fields test_list = list(all_shank_channels == up_reference) if any(test_list): - metadata[self.get_recording_type()]['Ecephys']['Electrodes'].append({ - 'name': 'up_reference', - 'description': 'this electrode was used to calculate UP-states', - 'data': test_list - }) - metadata['WatsonLFP']['lfp_channels'].update({'up_reference': up_reference}) - metadata['WatsonLFP']['lfp_decomposition'].update({ - 'up_reference': {'name': 'UPDecompositionSeries', - 'description': 'Theta and Gamma phase for up-reference LFP'} - }) + metadata[self.get_recording_type()]["Ecephys"]["Electrodes"].append( + { + "name": "up_reference", + "description": "this electrode was used to calculate UP-states", + "data": test_list, + } + ) + metadata["WatsonLFP"]["lfp_channels"].update({"up_reference": up_reference}) + metadata["WatsonLFP"]["lfp_decomposition"].update( + { + "up_reference": { + "name": "UPDecompositionSeries", + "description": "Theta and Gamma phase for up-reference LFP", + } + } + ) test_list = list(all_shank_channels == spindle_reference) if any(test_list): - metadata[self.get_recording_type()]['Ecephys']['Electrodes'].append({ - 'name': 'spindle_reference', - 'description': 'this electrode was used to calculate slow-wave sleep', - 'data': test_list - }) - metadata['WatsonLFP']['lfp_channels'].update({'spindle_reference': spindle_reference}) - metadata['WatsonLFP']['lfp_decomposition'].update({ - 'spindle_reference': {'name': 'SpindleDecompositionSeries', - 'description': 'Theta and Gamma phase for spindle-reference LFP'} - }) + metadata[self.get_recording_type()]["Ecephys"]["Electrodes"].append( + { + "name": "spindle_reference", + "description": "this electrode was used to calculate slow-wave sleep", + "data": test_list, + } + ) + metadata["WatsonLFP"]["lfp_channels"].update({"spindle_reference": spindle_reference}) + metadata["WatsonLFP"]["lfp_decomposition"].update( + { + "spindle_reference": { + "name": "SpindleDecompositionSeries", + "description": "Theta and Gamma phase for spindle-reference LFP", + } + } + ) test_list = list(all_shank_channels == theta_reference) if any(test_list): - metadata[self.get_recording_type()]['Ecephys']['Electrodes'].append({ - 'name': 'theta_reference', - 'description': 'this electrode was used to calculate theta canonical bands', - 'data': test_list - }) - metadata['WatsonLFP']['lfp_channels'].update({'theta_reference': theta_reference}) - metadata['WatsonLFP']['lfp_decomposition'].update({ - 'theta_reference': {'name': 'ThetaDecompositionSeries', - 'description': 'Theta and Gamma phase for theta-reference LFP'} - }) + metadata[self.get_recording_type()]["Ecephys"]["Electrodes"].append( + { + "name": "theta_reference", + "description": "this electrode was used to calculate theta canonical bands", + "data": test_list, + } + ) + metadata["WatsonLFP"]["lfp_channels"].update({"theta_reference": theta_reference}) + metadata["WatsonLFP"]["lfp_decomposition"].update( + { + "theta_reference": { + "name": "ThetaDecompositionSeries", + "description": "Theta and Gamma phase for theta-reference LFP", + } + } + ) # If there is missing auto-detected metadata for unit properties, truncate those units from the extractor se_ids = set(self.data_interface_objects[self.get_sorting_type()].sorting_extractor.get_unit_ids()) if len(celltype_info) < len(se_ids): - defaults = {'cell_type': "unknown", 'region': "unknown"} + defaults = {"cell_type": "unknown", "region": "unknown"} missing_ids = se_ids - set(np.arange(len(celltype_info))) unit_map = self.data_interface_objects[self.get_sorting_type()].sorting_extractor._unit_map for missing_id in missing_ids: - metadata[self.get_sorting_type()]['UnitProperties'][0]['data'].append(defaults['cell_type']) - metadata[self.get_sorting_type()]['UnitProperties'][1]['data'].append(int(missing_id)) - metadata[self.get_sorting_type()]['UnitProperties'][2]['data'].append(int(unit_map[missing_id]['unit_id'] - - 1)) - metadata[self.get_sorting_type()]['UnitProperties'][3]['data'].append( - "shank{}".format(unit_map[missing_id]['sorting_id'])) - metadata[self.get_sorting_type()]['UnitProperties'][4]['data'].append(defaults['region']) + metadata[self.get_sorting_type()]["UnitProperties"][0]["data"].append(defaults["cell_type"]) + metadata[self.get_sorting_type()]["UnitProperties"][1]["data"].append(int(missing_id)) + metadata[self.get_sorting_type()]["UnitProperties"][2]["data"].append( + int(unit_map[missing_id]["unit_id"] - 1) + ) + metadata[self.get_sorting_type()]["UnitProperties"][3]["data"].append( + "shank{}".format(unit_map[missing_id]["sorting_id"]) + ) + metadata[self.get_sorting_type()]["UnitProperties"][4]["data"].append(defaults["region"]) return metadata diff --git a/buzsaki_lab_to_nwb/watson/watsonsortinginterface.py b/buzsaki_lab_to_nwb/watson/watsonsortinginterface.py index f79b3e7..dd55e92 100644 --- a/buzsaki_lab_to_nwb/watson/watsonsortinginterface.py +++ b/buzsaki_lab_to_nwb/watson/watsonsortinginterface.py @@ -10,9 +10,9 @@ class WatsonSortingInterface(BaseSortingExtractorInterface): def __init__(self, **input_args): self.sorting_extractor = self.SX() # Numpy doesn't require any arguments passed - - spikes_mat = loadmat(input_args['spikes_file_path']) - for j, times in enumerate(spikes_mat['spikes']['times'][0][0][0]): + + spikes_mat = loadmat(input_args["spikes_file_path"]) + for j, times in enumerate(spikes_mat["spikes"]["times"][0][0][0]): self.sorting_extractor.add_unit(unit_id=j, times=concatenate(times)) # dislike how this is hard-coded, but it is only for a single error-prone session self.sorting_extractor.set_sampling_frequency(20000) From 11d32eba56d89c5b51acdd888f8a32575cf8471e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 16 Apr 2022 12:35:24 -0400 Subject: [PATCH 04/40] adding ordering utils --- .../tingley_metabolic/tingley_metabolic_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py new file mode 100644 index 0000000..a1109f9 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -0,0 +1,7 @@ +"""Author: Cody Baker.""" +from typing import List + + +def order_sessions_by_start_time(session_names: List[str]): + """Given the list of {subject_id}_{start_time} strings, return a list ordered by the start_time.""" + pass From 8b7faba54797b88a429f0a135dadf244fdb6cc9d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 16 Apr 2022 14:40:33 -0400 Subject: [PATCH 05/40] simplify accelerometer --- .../tingley_metabolic/__init__.py | 2 + .../convert_tingley_metabolic.py | 18 +++-- .../tingley_metabolic_requirements.txt | 1 + .../tingley_metabolic_utils.py | 49 ++++++++++++-- .../tingleymetabolicaccelerometerinterface.py | 66 +++++++++++++++++++ .../tingleymetabolicauxextractor.py | 53 --------------- .../tingleymetabolicauxinterface.py | 14 ---- .../tingleymetabolicnwbconverter.py | 4 +- 8 files changed, 130 insertions(+), 77 deletions(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/__init__.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py new file mode 100644 index 0000000..027350a --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py @@ -0,0 +1,2 @@ +from .tingleymetabolicnwbconverter import TingleyMetabolicConverter +from .tingley_metabolic_utils import load_subject_glucose_series, get_subject_ecephys_session_start_times diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index a61ac95..6bdfd6f 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -6,7 +6,11 @@ from nwb_conversion_tools.utils.json_schema import load_dict_from_file from nwb_conversion_tools.utils.json_schema import dict_deep_update -from buzsaki_lab_to_nwb.tingley_metabolic.tingleymetabolicnwbconverter import TingleyMetabolicConverter +from buzsaki_lab_to_nwb.tingley_metabolic import ( + TingleyMetabolicConverter, + load_subject_glucose_series, + get_subject_ecephys_session_start_times, +) n_jobs = 20 stub_test = True @@ -48,6 +52,14 @@ def convert_session(session_path, nwbfile_path): rhd_file_path = session_path / f"{session_id}.rhd" xml_file_path = session_path / f"{session_id}.xml" + subject_id = session_id.split("_")[0] + subject_glucose_data = load_subject_glucose_series(session_path=session_path) + subject_ecephys_session_start_times = get_subject_ecephys_session_start_times(session_path=session_path) + # segment the ecephys against the glucose, return sub-series of glucose + # if sub-series is non-empty, include GlucoseInterface(series=sub_series) + # and increment the starting_times of .dat and .lfp interfaces + # else do not include glucose and just write ecephys with default start times + print("raw file available...", raw_file_path.is_file()) print("lfp file available...", lfp_file_path.is_file()) source_data = dict() @@ -67,9 +79,7 @@ def convert_session(session_path, nwbfile_path): conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update( - TingleySeptalBehavior=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path)) - ) + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) converter = TingleyMetabolicConverter(source_data=source_data) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt index 7372044..da61085 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt @@ -2,4 +2,5 @@ mat4py==0.5.0 mat73==0.52 hdf5storage>=0.1.18 pyintan>=0.3.0 +pandas>=1.4.2 nwb-conversion-tools @ git+https://github.com/catalystneuro/nwb-conversion-tools@5e39ca55266b8f7be48380c67471100a98413277 diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index a1109f9..fe451bb 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -1,7 +1,48 @@ """Author: Cody Baker.""" -from typing import List +from typing import List, Dict +from pathlib import Path +from datetime import datetime +import numpy as np +from pandas import read_csv -def order_sessions_by_start_time(session_names: List[str]): - """Given the list of {subject_id}_{start_time} strings, return a list ordered by the start_time.""" - pass + +def load_subject_glucose_series(session_path: Path): + """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" + subject_path = session_path.parent + all_csv = [x for x in subject_path.iterdir if ".csv" in x.suffixes] + + all_glucose_data = dict() + for file_path in all_csv: + all_glucose_data.update(read_glucose_csv(file_path=file_path)) + + all_timestamps = np.array(list(all_glucose_data.keys())) + all_isig = np.array(list(all_glucose_data.values())) + sort_indices = np.argsort(all_timestamps) + ordered_glucose_data = {k: v for k, v in zip(all_timestamps[sort_indices], all_isig[sort_indices])} + + return ordered_glucose_data + + +def read_glucose_csv(file_path: Path) -> Dict[datetime, float]: + """Parse a single glucose data file.""" + all_data = read_csv(filepath_or_buffer="C:/Users/Raven/Documents/TingleyD/CGM31.csv", skiprows=11) + excluded = all_data["Excluded"].astype(bool) + timestamps = [datetime.strptime(x, "%d/%m/%y %H:%M:%S") for x in all_data["Timestamp"][excluded]] + isig_signal = all_data["ISIG Value"][excluded] + + valid_timestamp_to_isig = { + timestamp: isig for timestamp, isig in zip(timestamps, isig_signal) if not np.isnan(isig) and isig != -9999 + } + + return valid_timestamp_to_isig + + +def get_subject_ecephys_session_start_times(session_path: Path) -> List[datetime]: + """Return all the start times for the ecephys sessions for this subject.""" + subject_path = session_path.parent + all_session_names = [x.name for x in subject_path.iterdir if x.isdir()] + all_datetime_strings = [session_name.strip("_")[:-2] for session_name in all_session_names] + + all_timestamps = np.sort(all_datetime_strings) + return all_timestamps diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py new file mode 100644 index 0000000..234dd7f --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -0,0 +1,66 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils import FilePathType +from pynwb import TimeSeries, H5DataIO +from spikeextractors.extraction_tools import read_binary +from pyintan.intan import read_rhd + + +class TingleyMetabolicAccelerometerInterface(BaseDataInterface): + """Aux data interface for the Tingley metabolic project.""" + + def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): + """ + Process accelerometer data stored unique ad-hoc format for accelerometer data stored in an 'auxiliary.dat' file. + + The data is stored in Neuroscope .dat binary blob format, but no accompanying .xml header. + Instead, the header is the original intan .rhd format. + + A few details to note: + i) Regardless of how many AUX channels are plugged in (which is read from the .rhd file), only the first 3 + have any actual data (the values for all other channels for all other time is -1). + ii) Even though the .rhd specifies the accelerometer data is acquired at 5kHz, the .dat has it stored at + 20kHz by duplicating the data value at every 4th index. I can only assume this was done for easier + side-by-side analysis of the raw data (which was acquired at 20kHz). + + Parameters + ---------- + dat_file_path : FilePathType + DESCRIPTION. + rhd_file_path : FilePathType + DESCRIPTION. + + Returns + ------- + None. + + """ + rhd_info = read_rhd(filename=rhd_file_path) + first_aux_entry = next( + header_info_entry + for header_info_entry in rhd_info[1] + if header_info_entry["native_channel_name"] == "A-AUX1" + ) + first_aux_sub_entry = next( + header_info_entry for header_info_entry in rhd_info[2] if header_info_entry[0] == "A-AUX1" + ) + + # Manually confirmed that all aux channels have same properties + self.conversion = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts + self.sampling_frequency = first_aux_entry["sampling_rate"] + dtype = first_aux_sub_entry[1] + numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) + + # Manually confirmed result is still memmap after slicing + self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] + + def run_conversion(self, nwbfile): + nwbfile.add_acquisition( + TimeSeries( + name="Accelerometer", + units="Volts", + data=H5DataIO(self.memmap), # should not need iterative write + conversion=self.conversion, + rate=self.sampling_frequency, + ), + ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py deleted file mode 100644 index 5fe409f..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxextractor.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Author: Cody Baker.""" -from pathlib import Path - -from spikeextractors import RecordingExtractor, BinDatRecordingExtractor -from nwb_conversion_tools.utils import FilePathType -from pyintan import read_rhd - -from .tingleyauxextractor import TingleyAuxExtractor - - -class TingleyMetabolicAuxExtractor(BinDatRecordingExtractor): - """Aux data interface for the Tingley metabolic project.""" - - RX = TingleyAuxExtractor - - extractor_name = "TingleyMetabolicAuxExtractor" - has_default_locations = False - has_unscaled = True - is_writable = True - mode = "file" - - def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): - dat_file_path = Path(dat_file_path) - rhd_file_path = Path(rhd_file_path) - - RecordingExtractor.__init__(self) - rhd_info = read_rhd(filename=self.source_data["rhd_file_path"]) - first_aux_entry = next( - header_info_entry - for header_info_entry in rhd_info[1] - if header_info_entry["native_channel_name"] == "A-AUX1" - ) - first_aux_sub_entry = next( - header_info_entry for header_info_entry in rhd_info[2] if header_info_entry[0] == "A-AUX1" - ) - - # Manually confirmed that all aux channels have same properties - gain = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts - sampling_frequency = first_aux_entry["sampling_rate"] - dtype = first_aux_sub_entry[1] - numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) - - BinDatRecordingExtractor.__init__( - self, - file_path=dat_file_path, - sampling_frequency=sampling_frequency, - dtype=dtype, - numchan=numchan, - gain=gain, - ) - self._kwargs = dict( - dat_file_path=str(Path(dat_file_path).absolute()), rhd_file_path=str(Path(rhd_file_path).absolute()) - ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py deleted file mode 100644 index 9187914..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicauxinterface.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" -from nwb_conversion_tools.basedatainterface import BaseRecordingDataInterface -from nwb_conversion_tools.utils import FilePathType - -from .tingleyauxextractor import TingleyAuxExtractor - - -class TingleyMetabolicAuxInterface(BaseRecordingDataInterface): - """Aux data interface for the Tingley metabolic project.""" - - RX = TingleyAuxExtractor - - def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): - super().__init__(dat_file_path=dat_file_path, rhd_file_path=rhd_file_path) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 9b0a583..9b551cc 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -9,7 +9,7 @@ NeuroscopeLFPInterface, ) -from .tingleymetabolicauxinterface import TingleyMetabolicAuxInterface +from .tingleymetabolicaccelerometerinterface import TingleyMetabolicAccelerometerInterface DEVICE_INFO = dict( @@ -36,7 +36,7 @@ class TingleyMetabolicConverter(NWBConverter): data_interface_classes = dict( NeuroscopeRecording=NeuroscopeRecordingInterface, NeuroscopeLFP=NeuroscopeLFPInterface, - TingleyMetabolicAux=TingleyMetabolicAuxInterface, + Accelerometer=TingleyMetabolicAccelerometerInterface, ) def get_metadata(self): From cb2942012e8e4e869495ea193c98aa01dc691e0c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 16 Apr 2022 17:09:09 -0400 Subject: [PATCH 06/40] saving temporary state of session segmenting --- .../convert_tingley_metabolic.py | 19 ++++-- .../tingley_metabolic_utils.py | 63 ++++++++++++++----- 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 6bdfd6f..aa59435 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -10,6 +10,8 @@ TingleyMetabolicConverter, load_subject_glucose_series, get_subject_ecephys_session_start_times, + segment_glucose_series, + get_session_datetime, ) n_jobs = 20 @@ -46,15 +48,24 @@ def convert_session(session_path, nwbfile_path): print(nwbfile_path) session_id = session_path.name - lfp_file_path = session_path / f"{session_path.name}.lfp" - raw_file_path = session_path / f"{session_id}.dat" aux_file_path = session_path / "auxiliary.dat" rhd_file_path = session_path / f"{session_id}.rhd" xml_file_path = session_path / f"{session_id}.xml" - subject_id = session_id.split("_")[0] - subject_glucose_data = load_subject_glucose_series(session_path=session_path) + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: + # raw_file_path = session_path / f"{session_id}.dat_orig" + + # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else + + subject_glucose_series = load_subject_glucose_series(session_path=session_path) subject_ecephys_session_start_times = get_subject_ecephys_session_start_times(session_path=session_path) + session_glucose_series = segment_glucose_series( + session_start_time=get_session_datetime(session_id=session_id), + glucose_series=subject_glucose_series, + ecephys_start_times=subject_ecephys_session_start_times, + ) # segment the ecephys against the glucose, return sub-series of glucose # if sub-series is non-empty, include GlucoseInterface(series=sub_series) # and increment the starting_times of .dat and .lfp interfaces diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index fe451bb..ccb9570 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -4,13 +4,13 @@ from datetime import datetime import numpy as np -from pandas import read_csv +from pandas import read_csv, to_datetime -def load_subject_glucose_series(session_path: Path): +def load_subject_glucose_series(session_path: Path) -> Dict[datetime, float]: """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" subject_path = session_path.parent - all_csv = [x for x in subject_path.iterdir if ".csv" in x.suffixes] + all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] all_glucose_data = dict() for file_path in all_csv: @@ -18,21 +18,23 @@ def load_subject_glucose_series(session_path: Path): all_timestamps = np.array(list(all_glucose_data.keys())) all_isig = np.array(list(all_glucose_data.values())) - sort_indices = np.argsort(all_timestamps) - ordered_glucose_data = {k: v for k, v in zip(all_timestamps[sort_indices], all_isig[sort_indices])} + sorted_indices = np.argsort(all_timestamps) + glucose_series = {k: v for k, v in zip(all_timestamps[sorted_indices], all_isig[sorted_indices])} - return ordered_glucose_data + return glucose_series def read_glucose_csv(file_path: Path) -> Dict[datetime, float]: """Parse a single glucose data file.""" - all_data = read_csv(filepath_or_buffer="C:/Users/Raven/Documents/TingleyD/CGM31.csv", skiprows=11) + all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) excluded = all_data["Excluded"].astype(bool) - timestamps = [datetime.strptime(x, "%d/%m/%y %H:%M:%S") for x in all_data["Timestamp"][excluded]] - isig_signal = all_data["ISIG Value"][excluded] valid_timestamp_to_isig = { - timestamp: isig for timestamp, isig in zip(timestamps, isig_signal) if not np.isnan(isig) and isig != -9999 + datetime_timestamp: isig_value + for datetime_timestamp, isig_value in zip( + to_datetime(all_data["Timestamp"][excluded], infer_datetime_format=True), all_data["ISIG Value"][excluded] + ) + if not np.isnan(isig_value) and isig_value != -9999 } return valid_timestamp_to_isig @@ -41,8 +43,41 @@ def read_glucose_csv(file_path: Path) -> Dict[datetime, float]: def get_subject_ecephys_session_start_times(session_path: Path) -> List[datetime]: """Return all the start times for the ecephys sessions for this subject.""" subject_path = session_path.parent - all_session_names = [x.name for x in subject_path.iterdir if x.isdir()] - all_datetime_strings = [session_name.strip("_")[:-2] for session_name in all_session_names] + subject_session_ids = [x.name for x in subject_path.iterdir() if x.is_dir()] + return sorted([get_session_datetime(session_id) for session_id in subject_session_ids]) - all_timestamps = np.sort(all_datetime_strings) - return all_timestamps + +def get_session_datetime(session_id: str): + """Auxiliary function for parsing the datetime part of a sesion ID.""" + return datetime.strptime("_".join(session_id.split("_")[-2:]), "%y%m%d_%H%M%S") + + +def segment_glucose_series( + this_ecephys_start_time: datetime, + glucose_series: Dict[datetime, float], + ecephys_start_times: List[datetime], + ecephys_end_times: List[datetime], +) -> (Dict[datetime, float], datetime): + """1.""" + glucose_timestamps = list(glucose_series.keys()) + + # If glucose recording ended before this ecephys session + if this_ecephys_start_time > glucose_timestamps[-1]: + return None, this_ecephys_start_time + + segments = dict() + ecephys_start_times_to_segment_number = dict() + # Calculate segments + if ecephys_start_times[0] > glucose_timestamps[0]: # if first ecephys session started before glucose recording + segments.update({0: (ecephys_start_times[0], ecephys_end_times[0])}) + else: + if ecephys_start_times[0] > glucose_timestamps[-1]: # if glucose recording ended before + segments.update({0: (glucose_timestamps[0], ecephys_end_times[0])}) + + glucose_series_per_segment = dict() + for segment_number, (start, stop) in segments.items(): + glucose_series_per_segment.update({segment_number: {k: v for k, v in glucose_series if start <= k <= stop}}) + + # Get the segment for this session + this_session_segment_number = ecephys_start_times_to_segment_number[this_ecephys_start_time] + return glucose_series_per_segment[this_session_segment_number], segments[this_session_segment_number][0] From 88de914417c8d550745fcb4d566621e82f998efe Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 17 Apr 2022 16:34:45 -0400 Subject: [PATCH 07/40] saving progress; only ripples and sleep state left to do --- .../convert_tingley_metabolic.py | 69 ++++++++------ .../tingley_metabolic_utils.py | 94 +++++++++---------- .../tingleymetabolicaccelerometerinterface.py | 5 +- .../tingleymetabolicglucoseinterface.py | 24 +++++ .../tingleymetabolicnwbconverter.py | 4 +- 5 files changed, 115 insertions(+), 81 deletions(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index aa59435..0283059 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -1,20 +1,22 @@ """Run entire conversion.""" from pathlib import Path from concurrent.futures import ProcessPoolExecutor, as_completed +from datetime import timedelta from tqdm import tqdm from nwb_conversion_tools.utils.json_schema import load_dict_from_file from nwb_conversion_tools.utils.json_schema import dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor from buzsaki_lab_to_nwb.tingley_metabolic import ( TingleyMetabolicConverter, load_subject_glucose_series, - get_subject_ecephys_session_start_times, segment_glucose_series, get_session_datetime, ) n_jobs = 20 +progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) stub_test = True conversion_factor = 0.195 # Intan @@ -31,11 +33,20 @@ nwb_output_path.mkdir(exist_ok=True) -session_path_list = [subject_path.iterdir() for subject_path in (data_path / "metadata_metabolic.yml").iterdir()] +subject_list = ["CGM1", "CGM2"] # This list will change based on what has finished transfering to the Hub +session_path_list = [subject_path for subject_path in data_path.iterdir() if subject_path.stem in subject_list] if stub_test: - nwbfile_list = [nwb_output_path / f"{session.parent.stem}_{session.stem}_stub.nwb" for session in session_path_list] + nwbfile_list = [ + nwb_output_path / f"{subject_path.stem}_{session.stem}_stub.nwb" + for subject_path in session_path_list + for session in subject_path.iterdir() + ] else: - nwbfile_list = [nwb_output_path / f"{session.parent.stem}_{session.stem}.nwb" for session in session_path_list] + nwbfile_list = [ + nwb_output_path / f"{subject_path.stem}_{session.stem}.nwb" + for subject_path in session_path_list + for session in subject_path.iterdir() + ] global_metadata = load_dict_from_file(metadata_path) subject_info_table = load_dict_from_file(subject_info_path) @@ -47,6 +58,7 @@ def convert_session(session_path, nwbfile_path): print(session_path) print(nwbfile_path) + conversion_options = dict() session_id = session_path.name aux_file_path = session_path / "auxiliary.dat" rhd_file_path = session_path / f"{session_id}.rhd" @@ -54,27 +66,27 @@ def convert_session(session_path, nwbfile_path): raw_file_path = session_path / f"{session_id}.dat" lfp_file_path = session_path / f"{session_id}.lfp" + + print("raw file available...", raw_file_path.is_file()) + print("lfp file available...", lfp_file_path.is_file()) + + # I know I'll need this for other sessions, just not yet # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: # raw_file_path = session_path / f"{session_id}.dat_orig" # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else subject_glucose_series = load_subject_glucose_series(session_path=session_path) - subject_ecephys_session_start_times = get_subject_ecephys_session_start_times(session_path=session_path) - session_glucose_series = segment_glucose_series( - session_start_time=get_session_datetime(session_id=session_id), + this_ecephys_start_time = get_session_datetime(session_id=session_id) + this_ecephys_stop_time = this_ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path).get_num_frames() / 1250.0 + ) + session_glucose_series, session_start_time = segment_glucose_series( + this_ecephys_start_time=this_ecephys_start_time, + this_ecephys_stop_time=this_ecephys_stop_time, glucose_series=subject_glucose_series, - ecephys_start_times=subject_ecephys_session_start_times, ) - # segment the ecephys against the glucose, return sub-series of glucose - # if sub-series is non-empty, include GlucoseInterface(series=sub_series) - # and increment the starting_times of .dat and .lfp interfaces - # else do not include glucose and just write ecephys with default start times - - print("raw file available...", raw_file_path.is_file()) - print("lfp file available...", lfp_file_path.is_file()) - source_data = dict() - conversion_options = dict() + source_data = dict(Glucose=dict(glucose_series=session_glucose_series)) source_data = dict( NeuroscopeLFP=dict(file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path)), @@ -93,16 +105,15 @@ def convert_session(session_path, nwbfile_path): source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) converter = TingleyMetabolicConverter(source_data=source_data) - metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) metadata["NWBFile"].update( session_description=subject_info_table.get( metadata["NWBFile"]["Subject"]["subject_id"], "Consult Supplementary Table 1 from the publication for more information about this session.", - ) + ), + session_start_time=session_start_time, ) - converter.run_conversion( nwbfile_path=str(nwbfile_path), metadata=metadata, @@ -112,10 +123,14 @@ def convert_session(session_path, nwbfile_path): print("Done with conversion!") -with ProcessPoolExecutor(max_workers=n_jobs) as executor: - futures = [] - for session_path, nwbfile_path in zip(session_path_list, nwbfile_list): - futures.append(executor.submit(convert_session, session_path=session_path, nwbfile_path=nwbfile_path)) - completed_futures = tqdm(as_completed(futures), desc="Running conversion...", position=0, leave=False) - for future in completed_futures: - pass +if n_jobs == 1: + for session_path, nwbfile_path in tqdm(zip(session_path_list, nwbfile_list), **progress_bar_options): + convert_session(session_path=session_path, nwbfile_path=nwbfile_path) +else: + with ProcessPoolExecutor(max_workers=n_jobs) as executor: + futures = [] + for session_path, nwbfile_path in zip(session_path_list, nwbfile_list): + futures.append(executor.submit(convert_session, session_path=session_path, nwbfile_path=nwbfile_path)) + completed_futures = tqdm(as_completed(futures), total=len(session_path_list), **progress_bar_options) + for future in completed_futures: + pass diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index ccb9570..0926349 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -1,50 +1,58 @@ """Author: Cody Baker.""" -from typing import List, Dict +from typing import List, Optional from pathlib import Path from datetime import datetime +from collections import namedtuple import numpy as np from pandas import read_csv, to_datetime +BaseGlucoseSeries = namedtuple("GlucoseSeries", "timestamps isig") -def load_subject_glucose_series(session_path: Path) -> Dict[datetime, float]: + +class GlucoseSeries(BaseGlucoseSeries): + def __init__(self, timestamps: Optional[List[datetime]] = None, isig: Optional[List[float]] = None): + timestamps = [] if timestamps is None else timestamps + isig = [] if isig is None else isig + self.order() + + def __add__(self, glucose_series: BaseGlucoseSeries): + self.timestamps.extend(glucose_series.timestamps) + self.isig.extend(glucose_series.isig) + self.order() + + def order(self): + sorted_indices = np.argsort(self.timestamps) + self.timestamps = list(np.array(self.timestamps)[sorted_indices]) + self.isig = list(np.array(self.timestamps)[sorted_indices]) + + def subset(self, timestamp: datetime): + cutoff_idx = next(idx for idx, series_timestamp in enumerate(self.timestamps) if timestamp >= series_timestamp) + self.timestamps = self.timestamps[:cutoff_idx] + self.isig = self.isig[:cutoff_idx] + + +def load_subject_glucose_series(session_path: Path) -> GlucoseSeries: """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" subject_path = session_path.parent all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] - all_glucose_data = dict() + glucose_series = GlucoseSeries(timestamps=[], isig=[]) for file_path in all_csv: - all_glucose_data.update(read_glucose_csv(file_path=file_path)) - - all_timestamps = np.array(list(all_glucose_data.keys())) - all_isig = np.array(list(all_glucose_data.values())) - sorted_indices = np.argsort(all_timestamps) - glucose_series = {k: v for k, v in zip(all_timestamps[sorted_indices], all_isig[sorted_indices])} - + glucose_series += read_glucose_csv(file_path=file_path) return glucose_series -def read_glucose_csv(file_path: Path) -> Dict[datetime, float]: +def read_glucose_csv(file_path: Path) -> GlucoseSeries: """Parse a single glucose data file.""" all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) - excluded = all_data["Excluded"].astype(bool) - - valid_timestamp_to_isig = { - datetime_timestamp: isig_value - for datetime_timestamp, isig_value in zip( - to_datetime(all_data["Timestamp"][excluded], infer_datetime_format=True), all_data["ISIG Value"][excluded] - ) - if not np.isnan(isig_value) and isig_value != -9999 - } - return valid_timestamp_to_isig + timestamps = all_data["ISIG Value"] + isig = to_datetime(all_data["Timestamp"], infer_datetime_format=True) + exclude = all_data["Excluded"].astype(bool) + np.isnan(isig) + (isig == -9999) -def get_subject_ecephys_session_start_times(session_path: Path) -> List[datetime]: - """Return all the start times for the ecephys sessions for this subject.""" - subject_path = session_path.parent - subject_session_ids = [x.name for x in subject_path.iterdir() if x.is_dir()] - return sorted([get_session_datetime(session_id) for session_id in subject_session_ids]) + return GlucoseSeries(timestamps=timestamps[exclude], isig=isig[exclude]) def get_session_datetime(session_id: str): @@ -53,31 +61,15 @@ def get_session_datetime(session_id: str): def segment_glucose_series( - this_ecephys_start_time: datetime, - glucose_series: Dict[datetime, float], - ecephys_start_times: List[datetime], - ecephys_end_times: List[datetime], -) -> (Dict[datetime, float], datetime): - """1.""" - glucose_timestamps = list(glucose_series.keys()) + this_ecephys_start_time: datetime, this_ecephys_stop_time: datetime, glucose_series: GlucoseSeries +) -> (GlucoseSeries, datetime): + """ + Return either the entire glucose history or the subset leading to the end of this ecephys session. + Also returns the NWB session start time. + """ # If glucose recording ended before this ecephys session - if this_ecephys_start_time > glucose_timestamps[-1]: - return None, this_ecephys_start_time - - segments = dict() - ecephys_start_times_to_segment_number = dict() - # Calculate segments - if ecephys_start_times[0] > glucose_timestamps[0]: # if first ecephys session started before glucose recording - segments.update({0: (ecephys_start_times[0], ecephys_end_times[0])}) + if this_ecephys_start_time > glucose_series[-1]: + return glucose_series, this_ecephys_start_time else: - if ecephys_start_times[0] > glucose_timestamps[-1]: # if glucose recording ended before - segments.update({0: (glucose_timestamps[0], ecephys_end_times[0])}) - - glucose_series_per_segment = dict() - for segment_number, (start, stop) in segments.items(): - glucose_series_per_segment.update({segment_number: {k: v for k, v in glucose_series if start <= k <= stop}}) - - # Get the segment for this session - this_session_segment_number = ecephys_start_times_to_segment_number[this_ecephys_start_time] - return glucose_series_per_segment[this_session_segment_number], segments[this_session_segment_number][0] + return glucose_series.subset(timestamp=this_ecephys_stop_time), glucose_series.timestamps[0] diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index 234dd7f..d66e462 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -1,4 +1,4 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" +"""Authors: Cody Baker.""" from nwb_conversion_tools.basedatainterface import BaseDataInterface from nwb_conversion_tools.utils import FilePathType from pynwb import TimeSeries, H5DataIO @@ -58,8 +58,9 @@ def run_conversion(self, nwbfile): nwbfile.add_acquisition( TimeSeries( name="Accelerometer", + description="Raw data from accelerometer sensors.", units="Volts", - data=H5DataIO(self.memmap), # should not need iterative write + data=H5DataIO(self.memmap.T), # should not need iterative write conversion=self.conversion, rate=self.sampling_frequency, ), diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py new file mode 100644 index 0000000..4bd5f6b --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -0,0 +1,24 @@ +"""Authors: Cody Baker.""" +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from pynwb import TimeSeries, H5DataIO + +from .tingley_metabolic_utils import GlucoseSeries + + +class TingleyMetabolicGlucoseInterface(BaseDataInterface): + """Glucose data interface for the Tingley metabolic project.""" + + def __init__(self, glucose_series: GlucoseSeries): + self.glucose_series = glucose_series + + def run_conversion(self, nwbfile): + nwbfile.add_acquisition( + TimeSeries( + name="GlucoseLevel", + description="Raw current from Medtronic iPro2 ISIG tracking.", + units="nA", + data=H5DataIO(self.glucose_series.isig), # should not need iterative write + conversion=1.0, + timestamps=H5DataIO(self.glucose_series.timestamps), + ), + ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 9b551cc..12c2be4 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -1,4 +1,4 @@ -"""Authors: Heberto Mayorquin and Cody Baker.""" +"""Authors: Cody Baker.""" import dateutil from pathlib import Path from datetime import datetime @@ -10,6 +10,7 @@ ) from .tingleymetabolicaccelerometerinterface import TingleyMetabolicAccelerometerInterface +from .tingleymetabolicglucoseinterface import TingleyMetabolicGlucoseInterface DEVICE_INFO = dict( @@ -37,6 +38,7 @@ class TingleyMetabolicConverter(NWBConverter): NeuroscopeRecording=NeuroscopeRecordingInterface, NeuroscopeLFP=NeuroscopeLFPInterface, Accelerometer=TingleyMetabolicAccelerometerInterface, + Glucose=TingleyMetabolicGlucoseInterface, ) def get_metadata(self): From 76dd9088f5c9c097a3b49db0100f305ad31e90dc Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 18 Apr 2022 11:02:21 -0400 Subject: [PATCH 08/40] added sleep states --- .../mpgdatainterface.py | 0 .../common_interfaces/sleepstatesinterface.py | 44 +++++++++++++++++++ .../convert_tingley_metabolic.py | 12 ++--- .../tingleymetabolicaccelerometerinterface.py | 12 ----- .../tingleymetabolicnwbconverter.py | 2 + 5 files changed, 53 insertions(+), 17 deletions(-) rename buzsaki_lab_to_nwb/{ => common_interfaces}/mpgdatainterface.py (100%) create mode 100644 buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py diff --git a/buzsaki_lab_to_nwb/mpgdatainterface.py b/buzsaki_lab_to_nwb/common_interfaces/mpgdatainterface.py similarity index 100% rename from buzsaki_lab_to_nwb/mpgdatainterface.py rename to buzsaki_lab_to_nwb/common_interfaces/mpgdatainterface.py diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py new file mode 100644 index 0000000..3614ada --- /dev/null +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -0,0 +1,44 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from pathlib import Path + +from scipy.io import loadmat +from pynwb import NWBFile +from pynwb.file import TimeIntervals +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils import FilePathType, get_module + + +class SleepStateInterface(BaseDataInterface): + """Data interface for handling sleepStates.mat files found across multiple projects.""" + + def __init__(self, mat_file_path: FilePathType): + super().__init__(mat_file_path=mat_file_path) + + def run_conversion(self, nwbfile: NWBFile): + processing_module = get_module( + nwbfile=nwbfile, name="ecephys", description="Contains behavioral data concerning classified states." + ) + + if Path(self.source_data["mat_file_path"]).exists(): + mat_file = loadmat(self.source_data["mat_file_path"]) + + state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") + sleep_state_dic = mat_file["SleepState"]["ints"] + table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") + table.add_column(name="label", description="Sleep state.") + + data = [] + for sleep_state in state_label_names: + values = sleep_state_dic[sleep_state] + if len(values) != 0 and isinstance(values[0], int): + values = [values] + for start_time, stop_time in values: + data.append( + dict( + start_time=float(start_time), + stop_time=float(stop_time), + label=state_label_names[sleep_state], + ) + ) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + processing_module.add(table) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 0283059..9b6e1ca 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -60,15 +60,14 @@ def convert_session(session_path, nwbfile_path): conversion_options = dict() session_id = session_path.name - aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / f"{session_id}.rhd" - xml_file_path = session_path / f"{session_id}.xml" + xml_file_path = session_path / f"{session_id}.xml" raw_file_path = session_path / f"{session_id}.dat" lfp_file_path = session_path / f"{session_id}.lfp" - print("raw file available...", raw_file_path.is_file()) - print("lfp file available...", lfp_file_path.is_file()) + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / f"{session_id}.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" # I know I'll need this for other sessions, just not yet # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: @@ -104,6 +103,9 @@ def convert_session(session_path, nwbfile_path): if aux_file_path.is_file() and rhd_file_path.is_file(): source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + converter = TingleyMetabolicConverter(source_data=source_data) metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index d66e462..cf0a229 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -22,18 +22,6 @@ def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): ii) Even though the .rhd specifies the accelerometer data is acquired at 5kHz, the .dat has it stored at 20kHz by duplicating the data value at every 4th index. I can only assume this was done for easier side-by-side analysis of the raw data (which was acquired at 20kHz). - - Parameters - ---------- - dat_file_path : FilePathType - DESCRIPTION. - rhd_file_path : FilePathType - DESCRIPTION. - - Returns - ------- - None. - """ rhd_info = read_rhd(filename=rhd_file_path) first_aux_entry = next( diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 12c2be4..c7c4806 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -11,6 +11,7 @@ from .tingleymetabolicaccelerometerinterface import TingleyMetabolicAccelerometerInterface from .tingleymetabolicglucoseinterface import TingleyMetabolicGlucoseInterface +from ..common_interfaces.sleepstatesinterface import SleepStatesInterface DEVICE_INFO = dict( @@ -39,6 +40,7 @@ class TingleyMetabolicConverter(NWBConverter): NeuroscopeLFP=NeuroscopeLFPInterface, Accelerometer=TingleyMetabolicAccelerometerInterface, Glucose=TingleyMetabolicGlucoseInterface, + SleepStates=SleepStatesInterface, ) def get_metadata(self): From fdae8f517f6b5fe08d7d2e6823eaec50650a88b9 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 18 Apr 2022 11:56:38 -0400 Subject: [PATCH 09/40] added ripples --- .../common_interfaces/sleepstatesinterface.py | 2 +- .../tingleymetabolicripplesinterface.py | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py index 3614ada..1903f0d 100644 --- a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -16,7 +16,7 @@ def __init__(self, mat_file_path: FilePathType): def run_conversion(self, nwbfile: NWBFile): processing_module = get_module( - nwbfile=nwbfile, name="ecephys", description="Contains behavioral data concerning classified states." + nwbfile=nwbfile, name="behavior", description="Contains behavioral data concerning classified states." ) if Path(self.source_data["mat_file_path"]).exists(): diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py new file mode 100644 index 0000000..fd32ccb --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -0,0 +1,68 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from pathlib import Path + +from scipy.io import loadmat +from pynwb import NWBFile, H5DataIO +from pynwb.file import TimeIntervals +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.tools.nwb_helpers import get_module +from nwb_conversion_tools.utils import FilePathType + + +class SleepStateInterface(BaseDataInterface): + """Data interface for handling ripples.mat files for the Tingley metabolic project.""" + + def __init__(self, mat_file_path: FilePathType): + super().__init__(mat_file_path=mat_file_path) + + def run_conversion(self, nwbfile: NWBFile): + processing_module = get_module( + nwbfile=nwbfile, + name="ecephys", + description="Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", + ) + + if Path(self.source_data["mat_file_path"]).exists(): + mat_file = loadmat(self.source_data["mat_file_path"]) + + mat_data = mat_file["ripples"] + start_and_stop_times = mat_data["timestamps"][0][0] + durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]] + peaks = [x[0] for x in mat_data["peaks"][0][0]] + peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]] + peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]] + peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]] + ripples = mat_data["maps"][0][0]["ripples"][0][0] + frequencies = mat_data["maps"][0][0]["frequency"][0][0] + phases = mat_data["maps"][0][0]["phase"][0][0] + amplitudes = mat_data["maps"][0][0]["amplitude"][0][0] + + descriptions = dict( + duration="Duration of the ripple event.", + peak="Peak of the ripple.", + peak_normed_power="Normed power of the peak.", + peak_frequency="Peak frequency of the ripple.", + peak_amplitude="Peak amplitude of the ripple.", + ) + indexed_descriptions = dict( + ripple="Extracted ripple data.", + frequency="Frequency of each point on the ripple.", + phase="Phase of each point on the ripple.", + amplitude="Amplitude of each point on the ripple.", + ) + + table = TimeIntervals(name="Ripples", description="Identified ripple events and their metrics.") + for start_time, stop_time in start_and_stop_times: + table.add_row(start_time=start_time, stop_time=stop_time) + for column_name, column_data in zip( + list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] + ): + table.add_column(name=column_name, description=descriptions[column_name], data=H5DataIO(column_data)) + for column_name, column_data in zip(list(indexed_descriptions), [ripples, frequencies, phases, amplitudes]): + table.add_column( + name=column_name, + description=indexed_descriptions[column_name], + index=list(range(column_data.shape[0])), + data=H5DataIO(column_data), + ) + processing_module.add(table) From 2f343f814caf4732890f0a325774140295f8d930 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 18 Apr 2022 12:09:43 -0400 Subject: [PATCH 10/40] integrated --- .../convert_tingley_metabolic.py | 4 ++++ .../tingleymetabolicnwbconverter.py | 2 ++ .../tingleymetabolicripplesinterface.py | 15 ++++++++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 9b6e1ca..8251a98 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -68,6 +68,7 @@ def convert_session(session_path, nwbfile_path): aux_file_path = session_path / "auxiliary.dat" rhd_file_path = session_path / f"{session_id}.rhd" sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] # I know I'll need this for other sessions, just not yet # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: @@ -106,6 +107,9 @@ def convert_session(session_path, nwbfile_path): if sleep_mat_file_path.is_file(): source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + converter = TingleyMetabolicConverter(source_data=source_data) metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index c7c4806..0db74a1 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -11,6 +11,7 @@ from .tingleymetabolicaccelerometerinterface import TingleyMetabolicAccelerometerInterface from .tingleymetabolicglucoseinterface import TingleyMetabolicGlucoseInterface +from .tingleymetabolicripplesinterface import TingleyMetabolicRipplesInterface from ..common_interfaces.sleepstatesinterface import SleepStatesInterface @@ -41,6 +42,7 @@ class TingleyMetabolicConverter(NWBConverter): Accelerometer=TingleyMetabolicAccelerometerInterface, Glucose=TingleyMetabolicGlucoseInterface, SleepStates=SleepStatesInterface, + Ripples=TingleyMetabolicRipplesInterface, ) def get_metadata(self): diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index fd32ccb..b1ab934 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -1,5 +1,5 @@ """Authors: Heberto Mayorquin and Cody Baker.""" -from pathlib import Path +from typing import Optional, List from scipy.io import loadmat from pynwb import NWBFile, H5DataIO @@ -9,11 +9,11 @@ from nwb_conversion_tools.utils import FilePathType -class SleepStateInterface(BaseDataInterface): +class TingleyMetabolicRipplesInterface(BaseDataInterface): """Data interface for handling ripples.mat files for the Tingley metabolic project.""" - def __init__(self, mat_file_path: FilePathType): - super().__init__(mat_file_path=mat_file_path) + def __init__(self, mat_file_paths: FilePathType): + super().__init__(mat_file_paths=mat_file_paths) def run_conversion(self, nwbfile: NWBFile): processing_module = get_module( @@ -22,8 +22,9 @@ def run_conversion(self, nwbfile: NWBFile): description="Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", ) - if Path(self.source_data["mat_file_path"]).exists(): - mat_file = loadmat(self.source_data["mat_file_path"]) + for mat_file_path in self.source_data["mat_file_paths"]: + table_name = mat_file_path.suffixes[-3].lstrip(".").title() + mat_file = loadmat(mat_file_path) mat_data = mat_file["ripples"] start_and_stop_times = mat_data["timestamps"][0][0] @@ -51,7 +52,7 @@ def run_conversion(self, nwbfile: NWBFile): amplitude="Amplitude of each point on the ripple.", ) - table = TimeIntervals(name="Ripples", description="Identified ripple events and their metrics.") + table = TimeIntervals(name=table_name, description="Identified ripple events and their metrics.") for start_time, stop_time in start_and_stop_times: table.add_row(start_time=start_time, stop_time=stop_time) for column_name, column_data in zip( From ec957318f04295736add6afef086d83621dd3cd6 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 18 Apr 2022 12:12:02 -0400 Subject: [PATCH 11/40] Update buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py Co-authored-by: Ben Dichter --- .../tingley_metabolic/tingleymetabolicripplesinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index b1ab934..c9766bd 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -52,7 +52,7 @@ def run_conversion(self, nwbfile: NWBFile): amplitude="Amplitude of each point on the ripple.", ) - table = TimeIntervals(name=table_name, description="Identified ripple events and their metrics.") + table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") for start_time, stop_time in start_and_stop_times: table.add_row(start_time=start_time, stop_time=stop_time) for column_name, column_data in zip( From 579074f93ae3e28275d5a7bd7b03ea63ad5ac320 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 18 Apr 2022 14:59:42 -0400 Subject: [PATCH 12/40] saving state --- .../common_interfaces/sleepstatesinterface.py | 7 +- .../tingley_metabolic/__init__.py | 2 +- .../convert_tingley_metabolic.py | 67 ++++++++------- .../tingley_metabolic_metadata.yml | 3 +- .../tingley_metabolic_subject_info.yml | 82 +++++++++---------- .../tingley_metabolic_utils.py | 54 +++++++----- .../tingleymetabolicaccelerometerinterface.py | 2 +- .../tingleymetabolicglucoseinterface.py | 21 +++-- .../tingleymetabolicnwbconverter.py | 5 +- .../tingleymetabolicripplesinterface.py | 4 +- 10 files changed, 135 insertions(+), 112 deletions(-) diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py index 1903f0d..b070fe6 100644 --- a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -5,16 +5,17 @@ from pynwb import NWBFile from pynwb.file import TimeIntervals from nwb_conversion_tools.basedatainterface import BaseDataInterface -from nwb_conversion_tools.utils import FilePathType, get_module +from nwb_conversion_tools.utils import FilePathType +from nwb_conversion_tools.tools.nwb_helpers import get_module -class SleepStateInterface(BaseDataInterface): +class SleepStatesInterface(BaseDataInterface): """Data interface for handling sleepStates.mat files found across multiple projects.""" def __init__(self, mat_file_path: FilePathType): super().__init__(mat_file_path=mat_file_path) - def run_conversion(self, nwbfile: NWBFile): + def run_conversion(self, nwbfile: NWBFile, metadata): processing_module = get_module( nwbfile=nwbfile, name="behavior", description="Contains behavioral data concerning classified states." ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py index 027350a..f7fbc3c 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py @@ -1,2 +1,2 @@ from .tingleymetabolicnwbconverter import TingleyMetabolicConverter -from .tingley_metabolic_utils import load_subject_glucose_series, get_subject_ecephys_session_start_times +from .tingley_metabolic_utils import get_session_datetime diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 8251a98..51d0a89 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -4,27 +4,24 @@ from datetime import timedelta from tqdm import tqdm -from nwb_conversion_tools.utils.json_schema import load_dict_from_file -from nwb_conversion_tools.utils.json_schema import dict_deep_update +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update from spikeextractors import NeuroscopeRecordingExtractor -from buzsaki_lab_to_nwb.tingley_metabolic import ( - TingleyMetabolicConverter, - load_subject_glucose_series, - segment_glucose_series, - get_session_datetime, -) +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime -n_jobs = 20 +n_jobs = 1 progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) stub_test = True conversion_factor = 0.195 # Intan -data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") -home_path = Path("/home/jovyan/") +# data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") +# home_path = Path("/home/jovyan/") -metadata_path = Path(__file__) / "tingley_metabolic_metadata.yml" -subject_info_path = Path(__file__) / "tingley_metabolic_subject_info.yml" +data_path = Path("E:/BuzsakiData/TingleyD") +home_path = Path("E:/BuzsakiData/TingleyD/") + +metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" +subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" if stub_test: nwb_output_path = home_path / Path("nwb_stub") @@ -33,8 +30,14 @@ nwb_output_path.mkdir(exist_ok=True) -subject_list = ["CGM1", "CGM2"] # This list will change based on what has finished transfering to the Hub -session_path_list = [subject_path for subject_path in data_path.iterdir() if subject_path.stem in subject_list] +subject_list = ["CGM1"] # This list will change based on what has finished transfering to the Hub +session_path_list = [ + session_path + for subject_path in data_path.iterdir() + if subject_path.is_dir() + for session_path in subject_path.iterdir() + if subject_path.stem in subject_list and session_path.is_dir() +] if stub_test: nwbfile_list = [ nwb_output_path / f"{subject_path.stem}_{session.stem}_stub.nwb" @@ -75,28 +78,32 @@ def convert_session(session_path, nwbfile_path): # raw_file_path = session_path / f"{session_id}.dat_orig" # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else - - subject_glucose_series = load_subject_glucose_series(session_path=session_path) this_ecephys_start_time = get_session_datetime(session_id=session_id) this_ecephys_stop_time = this_ecephys_start_time + timedelta( seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path).get_num_frames() / 1250.0 ) - session_glucose_series, session_start_time = segment_glucose_series( - this_ecephys_start_time=this_ecephys_start_time, - this_ecephys_stop_time=this_ecephys_stop_time, - glucose_series=subject_glucose_series, - ) - source_data = dict(Glucose=dict(glucose_series=session_glucose_series)) - source_data = dict( - NeuroscopeLFP=dict(file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path)), + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(this_ecephys_start_time), + ecephys_stop_time=str(this_ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), ) conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test)) if raw_file_path.is_file(): source_data.update( NeuroscopeRecording=dict( - file_path=str(raw_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path) + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, ) ) conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) @@ -104,8 +111,8 @@ def convert_session(session_path, nwbfile_path): if aux_file_path.is_file() and rhd_file_path.is_file(): source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + # if sleep_mat_file_path.is_file(): + # source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) if any(ripple_mat_file_paths): source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) @@ -115,10 +122,10 @@ def convert_session(session_path, nwbfile_path): metadata = dict_deep_update(metadata, global_metadata) metadata["NWBFile"].update( session_description=subject_info_table.get( - metadata["NWBFile"]["Subject"]["subject_id"], + metadata["Subject"]["subject_id"], "Consult Supplementary Table 1 from the publication for more information about this session.", ), - session_start_time=session_start_time, + session_start_time=str(converter.data_interface_objects["Glucose"].session_start_time), ) converter.run_conversion( nwbfile_path=str(nwbfile_path), diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml index 9df97de..4ceffdf 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml @@ -1,6 +1,5 @@ NWBFile: - related_publications: - "Transformation of a spatial map across the hippocampal-lateral septal circuit." Neuron 98.6 (2018) 1229-1242. + related_publications: "Transformation of a spatial map across the hippocampal-lateral septal circuit. Neuron 98.6 (2018) 1229-1242." lab: "Buzsáki" experimenter: - "Author: Tingley, David" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml index 370ca78..74f6c84 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml @@ -1,45 +1,45 @@ CGM1: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." -CGM2: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" -CGM3: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" -CGM4: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" -CGM5: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'."" -CGM6: "Experiment condition 'ripple/glucose recording' with surgery condition 'bilat rCA1'."" +CGM2: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM3: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM4: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM5: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM6: "Experiment condition 'ripple/glucose recording' with surgery condition 'bilat rCA1'." CGM7: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." CGM8: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." -CGM9: "Experiment condition 'ripple/glucose recording' with surgery condition 'flex probe in rCA1'."" -CGM10: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM11: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM12: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM13: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM14: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM15: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" -CGM16: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'."" +CGM9: "Experiment condition 'ripple/glucose recording' with surgery condition 'flex probe in rCA1'." +CGM10: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM11: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM12: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM13: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM14: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM15: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM16: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." CGM17: "Experiment condition 'dorsal/ventral' with no surgery condition." -CGM18: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM19: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM20: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM21: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" -CGM22: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" -CGM23: "Experiment condition 'dorsal/ventral' with surgery condition 'dorsal/ventral probe implant'."" -CGM24: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM25: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM26: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM27: "Experiment condition 'DREADDS' with surgery condition 'LS injected'."" -CGM28: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'."" -CGM29: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" -CGM30: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" -CGM31: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'."" -CGM32: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" -CGM33: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'."" -CGM34: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" -CGM35: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" -CGM36: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" -CGM37: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" -CGM38: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" -CGM39: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'."" -CGM40: "Experiment condition 'DREADDS' surgery condition 'MS injected'."" -CGM41: "Experiment condition 'DREADDS' surgery condition 'PPC injected'."" -CGM42: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" -CGM43: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" -CGM44: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'."" -CGM45: "Experiment condition 'ripple/glucose recording' with surgery condition 'PPC injected'."" +CGM18: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM19: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM20: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM21: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM22: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM23: "Experiment condition 'dorsal/ventral' with surgery condition 'dorsal/ventral probe implant'." +CGM24: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM25: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM26: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM27: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM28: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM29: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM30: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM31: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'." +CGM32: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM33: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'." +CGM34: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM35: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM36: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM37: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM38: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM39: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM40: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM41: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM42: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM43: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM44: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM45: "Experiment condition 'ripple/glucose recording' with surgery condition 'PPC injected'." diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index 0926349..eb78efe 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -2,42 +2,50 @@ from typing import List, Optional from pathlib import Path from datetime import datetime -from collections import namedtuple +from dataclasses import dataclass import numpy as np from pandas import read_csv, to_datetime -BaseGlucoseSeries = namedtuple("GlucoseSeries", "timestamps isig") +@dataclass +class GlucoseSeries: + # timestamps: Optional[List[datetime]] = None + # isig: Optional[List[float]] = None -class GlucoseSeries(BaseGlucoseSeries): def __init__(self, timestamps: Optional[List[datetime]] = None, isig: Optional[List[float]] = None): - timestamps = [] if timestamps is None else timestamps - isig = [] if isig is None else isig + super().__init__() + self.timestamps = [] if timestamps is None else timestamps + self.isig = [] if isig is None else isig self.order() - def __add__(self, glucose_series: BaseGlucoseSeries): + def __add__(self, glucose_series): self.timestamps.extend(glucose_series.timestamps) self.isig.extend(glucose_series.isig) self.order() + return self def order(self): sorted_indices = np.argsort(self.timestamps) - self.timestamps = list(np.array(self.timestamps)[sorted_indices]) - self.isig = list(np.array(self.timestamps)[sorted_indices]) + # self.timestamps = list(np.array(self.timestamps)[sorted_indices]) + unsorted_timestamps = list(self.timestamps) + self.timestamps = [unsorted_timestamps[idx] for idx in sorted_indices] + self.isig = list(np.array(self.isig)[sorted_indices]) def subset(self, timestamp: datetime): cutoff_idx = next(idx for idx, series_timestamp in enumerate(self.timestamps) if timestamp >= series_timestamp) - self.timestamps = self.timestamps[:cutoff_idx] - self.isig = self.isig[:cutoff_idx] + print(cutoff_idx) + timestamps = self.timestamps[:cutoff_idx] + isig = self.isig[:cutoff_idx] + return GlucoseSeries(timestamps=timestamps, isig=isig) -def load_subject_glucose_series(session_path: Path) -> GlucoseSeries: +def load_subject_glucose_series(session_path) -> GlucoseSeries: """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" - subject_path = session_path.parent + subject_path = Path(session_path).parent all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] - glucose_series = GlucoseSeries(timestamps=[], isig=[]) + glucose_series = GlucoseSeries() for file_path in all_csv: glucose_series += read_glucose_csv(file_path=file_path) return glucose_series @@ -47,12 +55,14 @@ def read_glucose_csv(file_path: Path) -> GlucoseSeries: """Parse a single glucose data file.""" all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) - timestamps = all_data["ISIG Value"] - isig = to_datetime(all_data["Timestamp"], infer_datetime_format=True) + isig = all_data["ISIG Value"] + exclude = all_data["Excluded"].astype(bool) + (1 - np.isnan(isig)) + (isig == -9999) + valid_isig = isig[exclude] + valid_timestamps = [ + x.to_pydatetime() for x in to_datetime(all_data["Timestamp"][exclude], infer_datetime_format=True) + ] - exclude = all_data["Excluded"].astype(bool) + np.isnan(isig) + (isig == -9999) - - return GlucoseSeries(timestamps=timestamps[exclude], isig=isig[exclude]) + return GlucoseSeries(timestamps=valid_timestamps, isig=valid_isig) def get_session_datetime(session_id: str): @@ -61,7 +71,7 @@ def get_session_datetime(session_id: str): def segment_glucose_series( - this_ecephys_start_time: datetime, this_ecephys_stop_time: datetime, glucose_series: GlucoseSeries + ecephys_start_time: datetime, ecephys_stop_time: datetime, glucose_series: GlucoseSeries ) -> (GlucoseSeries, datetime): """ Return either the entire glucose history or the subset leading to the end of this ecephys session. @@ -69,7 +79,7 @@ def segment_glucose_series( Also returns the NWB session start time. """ # If glucose recording ended before this ecephys session - if this_ecephys_start_time > glucose_series[-1]: - return glucose_series, this_ecephys_start_time + if ecephys_start_time > glucose_series.timestamps[-1]: + return glucose_series, ecephys_start_time else: - return glucose_series.subset(timestamp=this_ecephys_stop_time), glucose_series.timestamps[0] + return glucose_series.subset(timestamp=ecephys_stop_time), glucose_series.timestamps[0] diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index cf0a229..00a4fec 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -42,7 +42,7 @@ def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): # Manually confirmed result is still memmap after slicing self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] - def run_conversion(self, nwbfile): + def run_conversion(self, nwbfile, metadata): nwbfile.add_acquisition( TimeSeries( name="Accelerometer", diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index 4bd5f6b..441ab43 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -1,22 +1,33 @@ """Authors: Cody Baker.""" +from datetime import datetime + from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils import FilePathType from pynwb import TimeSeries, H5DataIO -from .tingley_metabolic_utils import GlucoseSeries +from .tingley_metabolic_utils import load_subject_glucose_series, segment_glucose_series class TingleyMetabolicGlucoseInterface(BaseDataInterface): """Glucose data interface for the Tingley metabolic project.""" - def __init__(self, glucose_series: GlucoseSeries): - self.glucose_series = glucose_series + def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): + subject_glucose_series = load_subject_glucose_series(session_path=session_path) + session_glucose_series, session_start_time = segment_glucose_series( + ecephys_start_time=datetime.fromisoformat(ecephys_start_time), + ecephys_stop_time=datetime.fromisoformat(ecephys_stop_time), + glucose_series=subject_glucose_series, + ) + self.session_start_time = session_start_time + self.glucose_series = session_glucose_series - def run_conversion(self, nwbfile): + def run_conversion(self, nwbfile, metadata): + print(self.glucose_series.isig) nwbfile.add_acquisition( TimeSeries( name="GlucoseLevel", description="Raw current from Medtronic iPro2 ISIG tracking.", - units="nA", + unit="nA", data=H5DataIO(self.glucose_series.isig), # should not need iterative write conversion=1.0, timestamps=H5DataIO(self.glucose_series.timestamps), diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 0db74a1..1508ced 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -53,11 +53,8 @@ def get_metadata(self): session_id_split = session_id.split("_")[:-2] subject_id = session_id_split[0] - date_string = session_id_split[-2:] - session_start_time = datetime.strptime(date_string=date_string, format="%Y%m%d%H%M%S") - session_start_time = session_start_time.replace(tzinfo=dateutil.tz.gettz("US/Eastern")).isoformat() metadata = super().get_metadata() - metadata["NWBFile"].update(session_start_time=session_start_time, session_id=session_id) + metadata["NWBFile"].update(session_id=session_id) metadata.update(Subject=dict(subject_id=subject_id)) return metadata diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index b1ab934..c085358 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -1,6 +1,4 @@ """Authors: Heberto Mayorquin and Cody Baker.""" -from typing import Optional, List - from scipy.io import loadmat from pynwb import NWBFile, H5DataIO from pynwb.file import TimeIntervals @@ -15,7 +13,7 @@ class TingleyMetabolicRipplesInterface(BaseDataInterface): def __init__(self, mat_file_paths: FilePathType): super().__init__(mat_file_paths=mat_file_paths) - def run_conversion(self, nwbfile: NWBFile): + def run_conversion(self, nwbfile: NWBFile, metadata): processing_module = get_module( nwbfile=nwbfile, name="ecephys", From e9f884c2084916955f5a0cfd1ab203860bdf099d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 20 Apr 2022 12:29:34 -0400 Subject: [PATCH 13/40] final stages of debugging; falling back to simpler glucose representation --- .../tingley_metabolic_utils.py | 94 ++++++++++--------- .../tingleymetabolicglucoseinterface.py | 27 +++--- 2 files changed, 64 insertions(+), 57 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index eb78efe..b7d29c3 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -2,56 +2,60 @@ from typing import List, Optional from pathlib import Path from datetime import datetime -from dataclasses import dataclass + +# from dataclasses import dataclass import numpy as np from pandas import read_csv, to_datetime -@dataclass -class GlucoseSeries: - # timestamps: Optional[List[datetime]] = None - # isig: Optional[List[float]] = None +# @dataclass +# class GlucoseSeries: +# # timestamps: Optional[List[datetime]] = None +# # isig: Optional[List[float]] = None - def __init__(self, timestamps: Optional[List[datetime]] = None, isig: Optional[List[float]] = None): - super().__init__() - self.timestamps = [] if timestamps is None else timestamps - self.isig = [] if isig is None else isig - self.order() +# def __init__(self, timestamps: Optional[List[datetime]] = None, isig: Optional[List[float]] = None): +# super().__init__() +# self.timestamps = [] if timestamps is None else timestamps +# self.isig = [] if isig is None else isig +# self.order() - def __add__(self, glucose_series): - self.timestamps.extend(glucose_series.timestamps) - self.isig.extend(glucose_series.isig) - self.order() - return self +# def __add__(self, glucose_series): +# self.timestamps.extend(glucose_series.timestamps) +# self.isig.extend(glucose_series.isig) +# self.order() +# return self - def order(self): - sorted_indices = np.argsort(self.timestamps) - # self.timestamps = list(np.array(self.timestamps)[sorted_indices]) - unsorted_timestamps = list(self.timestamps) - self.timestamps = [unsorted_timestamps[idx] for idx in sorted_indices] - self.isig = list(np.array(self.isig)[sorted_indices]) +# def order(self): +# sorted_indices = np.argsort(self.timestamps) +# # self.timestamps = list(np.array(self.timestamps)[sorted_indices]) +# unsorted_timestamps = list(self.timestamps) +# self.timestamps = [unsorted_timestamps[idx] for idx in sorted_indices] +# self.isig = list(np.array(self.isig)[sorted_indices]) - def subset(self, timestamp: datetime): - cutoff_idx = next(idx for idx, series_timestamp in enumerate(self.timestamps) if timestamp >= series_timestamp) - print(cutoff_idx) - timestamps = self.timestamps[:cutoff_idx] - isig = self.isig[:cutoff_idx] - return GlucoseSeries(timestamps=timestamps, isig=isig) +# def subset(self, timestamp: datetime): +# cutoff_idx = next(idx for idx, series_timestamp in enumerate(self.timestamps) if timestamp >= series_timestamp) +# print(cutoff_idx) +# timestamps = self.timestamps[:cutoff_idx] +# isig = self.isig[:cutoff_idx] +# return GlucoseSeries(timestamps=timestamps, isig=isig) -def load_subject_glucose_series(session_path) -> GlucoseSeries: +def load_subject_glucose_series(session_path) -> (List[datetime], List[float]): """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" subject_path = Path(session_path).parent all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] - glucose_series = GlucoseSeries() + timestamps = [] + isig = [] for file_path in all_csv: - glucose_series += read_glucose_csv(file_path=file_path) - return glucose_series + sub_timestamps, sub_isig = read_glucose_csv(file_path=file_path) + timestamps.extend(sub_timestamps) + isig.extend(sub_isig) + return timestamps, isig -def read_glucose_csv(file_path: Path) -> GlucoseSeries: +def read_glucose_csv(file_path: Path) -> (List[datetime], List[float]): """Parse a single glucose data file.""" all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) @@ -62,7 +66,7 @@ def read_glucose_csv(file_path: Path) -> GlucoseSeries: x.to_pydatetime() for x in to_datetime(all_data["Timestamp"][exclude], infer_datetime_format=True) ] - return GlucoseSeries(timestamps=valid_timestamps, isig=valid_isig) + return valid_timestamps, list(valid_isig) def get_session_datetime(session_id: str): @@ -70,16 +74,16 @@ def get_session_datetime(session_id: str): return datetime.strptime("_".join(session_id.split("_")[-2:]), "%y%m%d_%H%M%S") -def segment_glucose_series( - ecephys_start_time: datetime, ecephys_stop_time: datetime, glucose_series: GlucoseSeries -) -> (GlucoseSeries, datetime): - """ - Return either the entire glucose history or the subset leading to the end of this ecephys session. +# def segment_glucose_series( +# ecephys_start_time: datetime, ecephys_stop_time: datetime, glucose_series: GlucoseSeries +# ) -> (GlucoseSeries, datetime): +# """ +# Return either the entire glucose history or the subset leading to the end of this ecephys session. - Also returns the NWB session start time. - """ - # If glucose recording ended before this ecephys session - if ecephys_start_time > glucose_series.timestamps[-1]: - return glucose_series, ecephys_start_time - else: - return glucose_series.subset(timestamp=ecephys_stop_time), glucose_series.timestamps[0] +# Also returns the NWB session start time. +# """ +# # If glucose recording ended before this ecephys session +# if ecephys_start_time > glucose_series.timestamps[-1]: +# return glucose_series, ecephys_start_time +# else: +# return glucose_series.subset(timestamp=ecephys_stop_time), glucose_series.timestamps[0] diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index 441ab43..9da4c70 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -5,31 +5,34 @@ from nwb_conversion_tools.utils import FilePathType from pynwb import TimeSeries, H5DataIO -from .tingley_metabolic_utils import load_subject_glucose_series, segment_glucose_series +from .tingley_metabolic_utils import load_subject_glucose_series # , segment_glucose_series class TingleyMetabolicGlucoseInterface(BaseDataInterface): """Glucose data interface for the Tingley metabolic project.""" def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): - subject_glucose_series = load_subject_glucose_series(session_path=session_path) - session_glucose_series, session_start_time = segment_glucose_series( - ecephys_start_time=datetime.fromisoformat(ecephys_start_time), - ecephys_stop_time=datetime.fromisoformat(ecephys_stop_time), - glucose_series=subject_glucose_series, - ) - self.session_start_time = session_start_time - self.glucose_series = session_glucose_series + glucose_timestamps, glucose_isig = load_subject_glucose_series(session_path=session_path) + # session_glucose_series, session_start_time = segment_glucose_series( + # ecephys_start_time=datetime.fromisoformat(ecephys_start_time), + # ecephys_stop_time=datetime.fromisoformat(ecephys_stop_time), + # glucose_series=subject_glucose_series, + # ) + self.session_start_time = glucose_timestamps[0] + glucose_timestamps_floats_from_datetime = [ + (glucose_timestamp - self.session_start_time).total_seconds() for glucose_timestamp in glucose_timestamps + ] + self.glucose_timestamps = glucose_timestamps_floats_from_datetime + self.glucose_isig = glucose_isig def run_conversion(self, nwbfile, metadata): - print(self.glucose_series.isig) nwbfile.add_acquisition( TimeSeries( name="GlucoseLevel", description="Raw current from Medtronic iPro2 ISIG tracking.", unit="nA", - data=H5DataIO(self.glucose_series.isig), # should not need iterative write + data=H5DataIO(self.glucose_isig), # should not need iterative write conversion=1.0, - timestamps=H5DataIO(self.glucose_series.timestamps), + timestamps=H5DataIO(self.glucose_timestamps), ), ) From 3f70e10f8bb9029103bed0ae5623fd5cf4925304 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 20 Apr 2022 14:22:43 -0400 Subject: [PATCH 14/40] final debugging --- .../convert_tingley_metabolic.py | 24 +++++++- .../tingley_metabolic_metadata.yml | 38 ++++++++++-- .../tingley_metabolic_utils.py | 59 ++----------------- .../tingleymetabolicglucoseinterface.py | 5 -- .../tingleymetabolicnwbconverter.py | 5 +- 5 files changed, 65 insertions(+), 66 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 51d0a89..59b06f6 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -95,7 +95,6 @@ def convert_session(session_path, nwbfile_path): spikeextractors_backend=True, ), ) - conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test)) if raw_file_path.is_file(): source_data.update( @@ -121,12 +120,31 @@ def convert_session(session_path, nwbfile_path): metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) metadata["NWBFile"].update( - session_description=subject_info_table.get( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + experiment_description=subject_info_table.get( metadata["Subject"]["subject_id"], "Consult Supplementary Table 1 from the publication for more information about this session.", ), - session_start_time=str(converter.data_interface_objects["Glucose"].session_start_time), ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_increment = ( + this_ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, starting_time=ecephys_start_increment)) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, starting_time=ecephys_start_increment, es_key="ElectricalSeries_raw" + ) + ) + converter.run_conversion( nwbfile_path=str(nwbfile_path), metadata=metadata, diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml index 4ceffdf..160cd83 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml @@ -1,8 +1,38 @@ NWBFile: - related_publications: "Transformation of a spatial map across the hippocampal-lateral septal circuit. Neuron 98.6 (2018) 1229-1242." - lab: "Buzsáki" + related_publications: "https://doi.org/10.1038/s41586-021-03811-w" + lab: Buzsáki + institution: NYU experimenter: - - "Author: Tingley, David" - - "Author: Buzsáki, György" + - Tingley, David + - McClain, Kathryn + - Kaya, Ekin + - Carpenter, Jordan + - Buzsáki, György + keywords: + - glucose + - ecephys + - pharmacology Subject: + description: > + A total of 45 adult Long Evans rats were used in this study. Five were used for the hippocampal and lateral septal + in vivo recordings26. Ten rats were used for chronic electrophysiological recordings paired with glucose + monitoring. Eight rats were used for optogenetic induction of SPW-Rs and glucose monitoring. Six rats were used for + the DREADD experiment, and seven rats were used as controls for this experiment. Three additional rats were used + for simultaneous dorsal and ventral hippocampus recordings. Six additional rats were used for the posterior + parietal cortex (PPC) optogenetic stimulation control experiments. Sample sizes were selected to match cohort + sizes where applicable. species: Rattus norvegicus + strain: Long Evans + sex: U +Ecephys: + Device: + - name: IntanDevice + description: > + Recordings were conducted using the Intan RHD2000 interface board, sampled at 20 kHz. Amplification and + digitization were done on the head stage. Data were visualized with Neurosuite software. All + local field potential (LFP) analyses (ripple detection, state scoring and so on) were conducted on + the 1,250-Hz down-sampled signal. + ElectricalSeries_raw: + name: ElectricalSeriesRaw + ElectricalSeries_lfp: + name: ElectricalSeriesLFP \ No newline at end of file diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index b7d29c3..915ce43 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -1,46 +1,12 @@ """Author: Cody Baker.""" -from typing import List, Optional +from typing import List from pathlib import Path from datetime import datetime -# from dataclasses import dataclass - import numpy as np from pandas import read_csv, to_datetime -# @dataclass -# class GlucoseSeries: -# # timestamps: Optional[List[datetime]] = None -# # isig: Optional[List[float]] = None - -# def __init__(self, timestamps: Optional[List[datetime]] = None, isig: Optional[List[float]] = None): -# super().__init__() -# self.timestamps = [] if timestamps is None else timestamps -# self.isig = [] if isig is None else isig -# self.order() - -# def __add__(self, glucose_series): -# self.timestamps.extend(glucose_series.timestamps) -# self.isig.extend(glucose_series.isig) -# self.order() -# return self - -# def order(self): -# sorted_indices = np.argsort(self.timestamps) -# # self.timestamps = list(np.array(self.timestamps)[sorted_indices]) -# unsorted_timestamps = list(self.timestamps) -# self.timestamps = [unsorted_timestamps[idx] for idx in sorted_indices] -# self.isig = list(np.array(self.isig)[sorted_indices]) - -# def subset(self, timestamp: datetime): -# cutoff_idx = next(idx for idx, series_timestamp in enumerate(self.timestamps) if timestamp >= series_timestamp) -# print(cutoff_idx) -# timestamps = self.timestamps[:cutoff_idx] -# isig = self.isig[:cutoff_idx] -# return GlucoseSeries(timestamps=timestamps, isig=isig) - - def load_subject_glucose_series(session_path) -> (List[datetime], List[float]): """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" subject_path = Path(session_path).parent @@ -60,10 +26,12 @@ def read_glucose_csv(file_path: Path) -> (List[datetime], List[float]): all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) isig = all_data["ISIG Value"] - exclude = all_data["Excluded"].astype(bool) + (1 - np.isnan(isig)) + (isig == -9999) - valid_isig = isig[exclude] + exclude_col = all_data["Excluded"] + exclude_col.fillna(False, inplace=True) + exclude = (exclude_col.astype(bool) + np.isnan(isig) + (isig == -9999)).astype(bool) + valid_isig = isig[exclude == 0] valid_timestamps = [ - x.to_pydatetime() for x in to_datetime(all_data["Timestamp"][exclude], infer_datetime_format=True) + x.to_pydatetime() for x in to_datetime(all_data["Timestamp"][exclude == 0], infer_datetime_format=True) ] return valid_timestamps, list(valid_isig) @@ -72,18 +40,3 @@ def read_glucose_csv(file_path: Path) -> (List[datetime], List[float]): def get_session_datetime(session_id: str): """Auxiliary function for parsing the datetime part of a sesion ID.""" return datetime.strptime("_".join(session_id.split("_")[-2:]), "%y%m%d_%H%M%S") - - -# def segment_glucose_series( -# ecephys_start_time: datetime, ecephys_stop_time: datetime, glucose_series: GlucoseSeries -# ) -> (GlucoseSeries, datetime): -# """ -# Return either the entire glucose history or the subset leading to the end of this ecephys session. - -# Also returns the NWB session start time. -# """ -# # If glucose recording ended before this ecephys session -# if ecephys_start_time > glucose_series.timestamps[-1]: -# return glucose_series, ecephys_start_time -# else: -# return glucose_series.subset(timestamp=ecephys_stop_time), glucose_series.timestamps[0] diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index 9da4c70..dfbc8b6 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -13,11 +13,6 @@ class TingleyMetabolicGlucoseInterface(BaseDataInterface): def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): glucose_timestamps, glucose_isig = load_subject_glucose_series(session_path=session_path) - # session_glucose_series, session_start_time = segment_glucose_series( - # ecephys_start_time=datetime.fromisoformat(ecephys_start_time), - # ecephys_stop_time=datetime.fromisoformat(ecephys_stop_time), - # glucose_series=subject_glucose_series, - # ) self.session_start_time = glucose_timestamps[0] glucose_timestamps_floats_from_datetime = [ (glucose_timestamp - self.session_start_time).total_seconds() for glucose_timestamp in glucose_timestamps diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 1508ced..3ff7428 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -55,6 +55,9 @@ def get_metadata(self): subject_id = session_id_split[0] metadata = super().get_metadata() - metadata["NWBFile"].update(session_id=session_id) + metadata["NWBFile"].update( + session_id=session_id, + session_start_time=str(self.data_interface_objects["Glucose"].session_start_time), + ) metadata.update(Subject=dict(subject_id=subject_id)) return metadata From acd12b543cfbd6a7889f1ec578386ee5a1846146 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 21 Apr 2022 18:20:27 -0400 Subject: [PATCH 15/40] final debugging for all data types --- .../common_interfaces/sleepstatesinterface.py | 14 ++--- .../convert_tingley_metabolic.py | 57 +++++++++---------- .../tingley_metabolic_metadata.yml | 4 +- .../tingleymetabolicaccelerometerinterface.py | 14 ++--- .../tingleymetabolicglucoseinterface.py | 4 +- .../tingleymetabolicnwbconverter.py | 2 + .../tingleymetabolicripplesinterface.py | 38 +++++++------ 7 files changed, 68 insertions(+), 65 deletions(-) diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py index b070fe6..3ed132b 100644 --- a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -15,29 +15,29 @@ class SleepStatesInterface(BaseDataInterface): def __init__(self, mat_file_path: FilePathType): super().__init__(mat_file_path=mat_file_path) - def run_conversion(self, nwbfile: NWBFile, metadata): + def run_conversion(self, nwbfile: NWBFile, metadata, ecephys_start_time: float = 0.0): processing_module = get_module( nwbfile=nwbfile, name="behavior", description="Contains behavioral data concerning classified states." ) if Path(self.source_data["mat_file_path"]).exists(): - mat_file = loadmat(self.source_data["mat_file_path"]) + mat_file = loadmat(file_name=self.source_data["mat_file_path"]) + sleep_state_dic = mat_file["SleepState"]["ints"][0][0] state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") - sleep_state_dic = mat_file["SleepState"]["ints"] table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") table.add_column(name="label", description="Sleep state.") data = [] - for sleep_state in state_label_names: - values = sleep_state_dic[sleep_state] + for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): + values = sleep_state_dic[sleep_state][0][0] if len(values) != 0 and isinstance(values[0], int): values = [values] for start_time, stop_time in values: data.append( dict( - start_time=float(start_time), - stop_time=float(stop_time), + start_time=ecephys_start_time + float(start_time), + stop_time=ecephys_start_time + float(stop_time), label=state_label_names[sleep_state], ) ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 59b06f6..aed9dc1 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -2,6 +2,7 @@ from pathlib import Path from concurrent.futures import ProcessPoolExecutor, as_completed from datetime import timedelta +from warnings import simplefilter from tqdm import tqdm from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update @@ -30,26 +31,18 @@ nwb_output_path.mkdir(exist_ok=True) -subject_list = ["CGM1"] # This list will change based on what has finished transfering to the Hub +subject_list = ["CGM1", "CGM3"] # This list will change based on what has finished transfering to the Hub session_path_list = [ session_path for subject_path in data_path.iterdir() - if subject_path.is_dir() + if subject_path.is_dir() and subject_path.stem in subject_list for session_path in subject_path.iterdir() - if subject_path.stem in subject_list and session_path.is_dir() + if session_path.is_dir() ] if stub_test: - nwbfile_list = [ - nwb_output_path / f"{subject_path.stem}_{session.stem}_stub.nwb" - for subject_path in session_path_list - for session in subject_path.iterdir() - ] + nwbfile_list = [nwb_output_path / f"{session.stem}_stub.nwb" for session in session_path_list] else: - nwbfile_list = [ - nwb_output_path / f"{subject_path.stem}_{session.stem}.nwb" - for subject_path in session_path_list - for session in subject_path.iterdir() - ] + nwbfile_list = [nwb_output_path / f"{session.stem}.nwb" for session in session_path_list] global_metadata = load_dict_from_file(metadata_path) subject_info_table = load_dict_from_file(subject_info_path) @@ -57,10 +50,6 @@ def convert_session(session_path, nwbfile_path): """Run coonversion.""" - print("----------------") - print(session_path) - print(nwbfile_path) - conversion_options = dict() session_id = session_path.name @@ -69,7 +58,7 @@ def convert_session(session_path, nwbfile_path): lfp_file_path = session_path / f"{session_id}.lfp" aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / f"{session_id}.rhd" + rhd_file_path = session_path / "info.rhd" sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] @@ -78,15 +67,15 @@ def convert_session(session_path, nwbfile_path): # raw_file_path = session_path / f"{session_id}.dat_orig" # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else - this_ecephys_start_time = get_session_datetime(session_id=session_id) - this_ecephys_stop_time = this_ecephys_start_time + timedelta( + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path).get_num_frames() / 1250.0 ) source_data = dict( Glucose=dict( session_path=str(session_path), - ecephys_start_time=str(this_ecephys_start_time), - ecephys_stop_time=str(this_ecephys_stop_time), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), ), NeuroscopeLFP=dict( file_path=str(lfp_file_path), @@ -110,8 +99,8 @@ def convert_session(session_path, nwbfile_path): if aux_file_path.is_file() and rhd_file_path.is_file(): source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - # if sleep_mat_file_path.is_file(): - # source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) if any(ripple_mat_file_paths): source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) @@ -134,16 +123,24 @@ def convert_session(session_path, nwbfile_path): for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - ecephys_start_increment = ( - this_ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time ).total_seconds() - conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, starting_time=ecephys_start_increment)) + conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, starting_time=ecephys_start_time_increment)) if raw_file_path.is_file(): conversion_options.update( NeuroscopeRecording=dict( - stub_test=stub_test, starting_time=ecephys_start_increment, es_key="ElectricalSeries_raw" + stub_test=stub_test, starting_time=ecephys_start_time_increment, es_key="ElectricalSeries_raw" ) ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) converter.run_conversion( nwbfile_path=str(nwbfile_path), @@ -151,11 +148,11 @@ def convert_session(session_path, nwbfile_path): conversion_options=conversion_options, overwrite=True, ) - print("Done with conversion!") if n_jobs == 1: for session_path, nwbfile_path in tqdm(zip(session_path_list, nwbfile_list), **progress_bar_options): + simplefilter("ignore") convert_session(session_path=session_path, nwbfile_path=nwbfile_path) else: with ProcessPoolExecutor(max_workers=n_jobs) as executor: @@ -164,4 +161,4 @@ def convert_session(session_path, nwbfile_path): futures.append(executor.submit(convert_session, session_path=session_path, nwbfile_path=nwbfile_path)) completed_futures = tqdm(as_completed(futures), total=len(session_path_list), **progress_bar_options) for future in completed_futures: - pass + pass # To get tqdm to show diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml index 160cd83..78a8392 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml @@ -32,7 +32,7 @@ Ecephys: digitization were done on the head stage. Data were visualized with Neurosuite software. All local field potential (LFP) analyses (ripple detection, state scoring and so on) were conducted on the 1,250-Hz down-sampled signal. - ElectricalSeries_raw: - name: ElectricalSeriesRaw + - name: Medtronic iPro2 CGM + description: Continuous Glucose Monitoring (CGM) system used to track subject glucose levels. ElectricalSeries_lfp: name: ElectricalSeriesLFP \ No newline at end of file diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index 00a4fec..6a3423c 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -25,12 +25,10 @@ def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): """ rhd_info = read_rhd(filename=rhd_file_path) first_aux_entry = next( - header_info_entry - for header_info_entry in rhd_info[1] - if header_info_entry["native_channel_name"] == "A-AUX1" + header_info_entry for header_info_entry in rhd_info[1] if "AUX1" in header_info_entry["native_channel_name"] ) first_aux_sub_entry = next( - header_info_entry for header_info_entry in rhd_info[2] if header_info_entry[0] == "A-AUX1" + header_info_entry for header_info_entry in rhd_info[2] if "AUX1" in header_info_entry[0] ) # Manually confirmed that all aux channels have same properties @@ -42,14 +40,16 @@ def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): # Manually confirmed result is still memmap after slicing self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] - def run_conversion(self, nwbfile, metadata): + def run_conversion(self, nwbfile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): + stub_frames = 200 if stub_test else None nwbfile.add_acquisition( TimeSeries( name="Accelerometer", description="Raw data from accelerometer sensors.", - units="Volts", - data=H5DataIO(self.memmap.T), # should not need iterative write + unit="Volts", + data=H5DataIO(self.memmap.T[:stub_frames, :], compression="gzip"), # should not need iterative write conversion=self.conversion, rate=self.sampling_frequency, + starting_time=ecephys_start_time, ), ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index dfbc8b6..f32ed49 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -26,8 +26,8 @@ def run_conversion(self, nwbfile, metadata): name="GlucoseLevel", description="Raw current from Medtronic iPro2 ISIG tracking.", unit="nA", - data=H5DataIO(self.glucose_isig), # should not need iterative write + data=H5DataIO(self.glucose_isig, compression="gzip"), # should not need iterative write conversion=1.0, - timestamps=H5DataIO(self.glucose_timestamps), + timestamps=H5DataIO(self.glucose_timestamps, compression="gzip"), ), ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py index 3ff7428..9553ccf 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -60,4 +60,6 @@ def get_metadata(self): session_start_time=str(self.data_interface_objects["Glucose"].session_start_time), ) metadata.update(Subject=dict(subject_id=subject_id)) + if "NeuroscopeRecording" in self.data_interface_objects: + metadata["Ecephys"].update(ElectricalSeries_raw=dict(name="ElectricalSeriesRaw")) return metadata diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index f74a8fe..891d729 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -4,16 +4,16 @@ from pynwb.file import TimeIntervals from nwb_conversion_tools.basedatainterface import BaseDataInterface from nwb_conversion_tools.tools.nwb_helpers import get_module -from nwb_conversion_tools.utils import FilePathType class TingleyMetabolicRipplesInterface(BaseDataInterface): """Data interface for handling ripples.mat files for the Tingley metabolic project.""" - def __init__(self, mat_file_paths: FilePathType): + def __init__(self, mat_file_paths: list): super().__init__(mat_file_paths=mat_file_paths) - def run_conversion(self, nwbfile: NWBFile, metadata): + def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): + stub_events = 5 if stub_test else None processing_module = get_module( nwbfile=nwbfile, name="ecephys", @@ -22,19 +22,19 @@ def run_conversion(self, nwbfile: NWBFile, metadata): for mat_file_path in self.source_data["mat_file_paths"]: table_name = mat_file_path.suffixes[-3].lstrip(".").title() - mat_file = loadmat(mat_file_path) + mat_file = loadmat(file_name=mat_file_path) mat_data = mat_file["ripples"] - start_and_stop_times = mat_data["timestamps"][0][0] - durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]] - peaks = [x[0] for x in mat_data["peaks"][0][0]] - peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]] - peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]] - peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]] - ripples = mat_data["maps"][0][0]["ripples"][0][0] - frequencies = mat_data["maps"][0][0]["frequency"][0][0] - phases = mat_data["maps"][0][0]["phase"][0][0] - amplitudes = mat_data["maps"][0][0]["amplitude"][0][0] + start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] + durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] + peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] + peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] + peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] + peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] + ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] + frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] + phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] + amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] descriptions = dict( duration="Duration of the ripple event.", @@ -52,16 +52,20 @@ def run_conversion(self, nwbfile: NWBFile, metadata): table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") for start_time, stop_time in start_and_stop_times: - table.add_row(start_time=start_time, stop_time=stop_time) + table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) for column_name, column_data in zip( list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] ): - table.add_column(name=column_name, description=descriptions[column_name], data=H5DataIO(column_data)) + table.add_column( + name=column_name, + description=descriptions[column_name], + data=H5DataIO(column_data, compression="gzip"), + ) for column_name, column_data in zip(list(indexed_descriptions), [ripples, frequencies, phases, amplitudes]): table.add_column( name=column_name, description=indexed_descriptions[column_name], index=list(range(column_data.shape[0])), - data=H5DataIO(column_data), + data=H5DataIO(column_data, compression="gzip"), ) processing_module.add(table) From 3fd99be1d16b6b67e13a784e3a2857cf7895f8bc Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 24 Apr 2022 10:49:25 -0400 Subject: [PATCH 16/40] hub level debugs --- .../common_interfaces/sleepstatesinterface.py | 47 +++++----- .../convert_tingley_metabolic.py | 62 +++++++++---- .../tingleymetabolicripplesinterface.py | 93 ++++++++++--------- 3 files changed, 118 insertions(+), 84 deletions(-) diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py index 3ed132b..b7f6ff0 100644 --- a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -21,25 +21,30 @@ def run_conversion(self, nwbfile: NWBFile, metadata, ecephys_start_time: float = ) if Path(self.source_data["mat_file_path"]).exists(): - mat_file = loadmat(file_name=self.source_data["mat_file_path"]) - - sleep_state_dic = mat_file["SleepState"]["ints"][0][0] - state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") - table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") - table.add_column(name="label", description="Sleep state.") - - data = [] - for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): - values = sleep_state_dic[sleep_state][0][0] - if len(values) != 0 and isinstance(values[0], int): - values = [values] - for start_time, stop_time in values: - data.append( - dict( - start_time=ecephys_start_time + float(start_time), - stop_time=ecephys_start_time + float(stop_time), - label=state_label_names[sleep_state], + try: + mat_file = loadmat(file_name=self.source_data["mat_file_path"]) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False + + if mat_file_is_scipy_readable: # To-Do, re-do indexing for an hdfstorage reader + sleep_state_dic = mat_file["SleepState"]["ints"][0][0] + state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") + table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") + table.add_column(name="label", description="Sleep state.") + + data = [] + for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): + values = sleep_state_dic[sleep_state][0][0] + if len(values) != 0 and isinstance(values[0], int): + values = [values] + for start_time, stop_time in values: + data.append( + dict( + start_time=ecephys_start_time + float(start_time), + stop_time=ecephys_start_time + float(stop_time), + label=state_label_names[sleep_state], + ) ) - ) - [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] - processing_module.add(table) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + processing_module.add(table) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index aed9dc1..ffad29b 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -10,28 +10,27 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime -n_jobs = 1 +n_jobs = 10 progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) -stub_test = True +stub_test = False conversion_factor = 0.195 # Intan +buffer_gb = 10 +# note that on DANDIHub, max number of actual I/O operations on processes seems limited to 8-10, +# so total mem isn't technically buffer_gb * n_jobs -# data_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/") -# home_path = Path("/home/jovyan/") +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") -data_path = Path("E:/BuzsakiData/TingleyD") -home_path = Path("E:/BuzsakiData/TingleyD/") +# data_path = Path("E:/BuzsakiData/TingleyD") +# home_path = Path("E:/BuzsakiData/TingleyD/") metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" -if stub_test: - nwb_output_path = home_path / Path("nwb_stub") -else: - nwb_output_path = Path("/shared/catalystneuro/Buzsaki/TingleyD/nwb") -nwb_output_path.mkdir(exist_ok=True) - -subject_list = ["CGM1", "CGM3"] # This list will change based on what has finished transfering to the Hub +subject_list = [ + "CGM3" +] # [1,2,3,4,30,31,32,36,37,39]] # This list will change based on what has finished transfering to the Hub session_path_list = [ session_path for subject_path in data_path.iterdir() @@ -39,6 +38,15 @@ for session_path in subject_path.iterdir() if session_path.is_dir() ] + + +if stub_test: + nwb_output_path = data_path / "nwb_stub" +else: + nwb_output_path = data_path / f"nwb_{subject_list[0]}" +nwb_output_path.mkdir(exist_ok=True) + + if stub_test: nwbfile_list = [nwb_output_path / f"{session.stem}_stub.nwb" for session in session_path_list] else: @@ -50,6 +58,7 @@ def convert_session(session_path, nwbfile_path): """Run coonversion.""" + simplefilter("ignore") conversion_options = dict() session_id = session_path.name @@ -69,7 +78,8 @@ def convert_session(session_path, nwbfile_path): # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else ecephys_start_time = get_session_datetime(session_id=session_id) ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path).get_num_frames() / 1250.0 + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 ) source_data = dict( Glucose=dict( @@ -108,15 +118,19 @@ def convert_session(session_path, nwbfile_path): converter = TingleyMetabolicConverter(source_data=source_data) metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) + session_description = "Consult Supplementary Table 1 from the publication for more information about this session." metadata["NWBFile"].update( # session_description=subject_info_table.get( # metadata["Subject"]["subject_id"], # "Consult Supplementary Table 1 from the publication for more information about this session.", # ), - experiment_description=subject_info_table.get( - metadata["Subject"]["subject_id"], - "Consult Supplementary Table 1 from the publication for more information about this session.", - ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, ) if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": del metadata["Ecephys"]["Device"][0] @@ -126,11 +140,18 @@ def convert_session(session_path, nwbfile_path): ecephys_start_time_increment = ( ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time ).total_seconds() - conversion_options.update(NeuroscopeLFP=dict(stub_test=stub_test, starting_time=ecephys_start_time_increment)) + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) + ) + ) if raw_file_path.is_file(): conversion_options.update( NeuroscopeRecording=dict( - stub_test=stub_test, starting_time=ecephys_start_time_increment, es_key="ElectricalSeries_raw" + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb), ) ) if aux_file_path.is_file() and rhd_file_path.is_file(): @@ -155,6 +176,7 @@ def convert_session(session_path, nwbfile_path): simplefilter("ignore") convert_session(session_path=session_path, nwbfile_path=nwbfile_path) else: + simplefilter("ignore") with ProcessPoolExecutor(max_workers=n_jobs) as executor: futures = [] for session_path, nwbfile_path in zip(session_path_list, nwbfile_list): diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index 891d729..a42188b 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -22,50 +22,57 @@ def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ec for mat_file_path in self.source_data["mat_file_paths"]: table_name = mat_file_path.suffixes[-3].lstrip(".").title() - mat_file = loadmat(file_name=mat_file_path) + try: + mat_file = loadmat(file_name=mat_file_path) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False - mat_data = mat_file["ripples"] - start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] - durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] - peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] - peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] - peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] - peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] - ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] - frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] - phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] - amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] + if mat_file_is_scipy_readable: + mat_data = mat_file["ripples"] + start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] + durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] + peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] + peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] + peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] + peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] + ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] + frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] + phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] + amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] - descriptions = dict( - duration="Duration of the ripple event.", - peak="Peak of the ripple.", - peak_normed_power="Normed power of the peak.", - peak_frequency="Peak frequency of the ripple.", - peak_amplitude="Peak amplitude of the ripple.", - ) - indexed_descriptions = dict( - ripple="Extracted ripple data.", - frequency="Frequency of each point on the ripple.", - phase="Phase of each point on the ripple.", - amplitude="Amplitude of each point on the ripple.", - ) - - table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") - for start_time, stop_time in start_and_stop_times: - table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) - for column_name, column_data in zip( - list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] - ): - table.add_column( - name=column_name, - description=descriptions[column_name], - data=H5DataIO(column_data, compression="gzip"), + descriptions = dict( + duration="Duration of the ripple event.", + peak="Peak of the ripple.", + peak_normed_power="Normed power of the peak.", + peak_frequency="Peak frequency of the ripple.", + peak_amplitude="Peak amplitude of the ripple.", ) - for column_name, column_data in zip(list(indexed_descriptions), [ripples, frequencies, phases, amplitudes]): - table.add_column( - name=column_name, - description=indexed_descriptions[column_name], - index=list(range(column_data.shape[0])), - data=H5DataIO(column_data, compression="gzip"), + indexed_descriptions = dict( + ripple="Extracted ripple data.", + frequency="Frequency of each point on the ripple.", + phase="Phase of each point on the ripple.", + amplitude="Amplitude of each point on the ripple.", ) - processing_module.add(table) + + table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") + for start_time, stop_time in start_and_stop_times: + table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) + for column_name, column_data in zip( + list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] + ): + table.add_column( + name=column_name, + description=descriptions[column_name], + data=H5DataIO(column_data, compression="gzip"), + ) + for column_name, column_data in zip( + list(indexed_descriptions), [ripples, frequencies, phases, amplitudes] + ): + table.add_column( + name=column_name, + description=indexed_descriptions[column_name], + index=list(range(column_data.shape[0])), + data=H5DataIO(column_data, compression="gzip"), + ) + processing_module.add(table) From 7b30104b92b1b179e5e38141999a873b4f1604bc Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 24 Apr 2022 11:28:54 -0400 Subject: [PATCH 17/40] add prints --- .../common_interfaces/sleepstatesinterface.py | 56 +++++++++---------- .../tingleymetabolicripplesinterface.py | 1 + 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py index b7f6ff0..18d9b8e 100644 --- a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -1,6 +1,4 @@ """Authors: Heberto Mayorquin and Cody Baker.""" -from pathlib import Path - from scipy.io import loadmat from pynwb import NWBFile from pynwb.file import TimeIntervals @@ -20,31 +18,31 @@ def run_conversion(self, nwbfile: NWBFile, metadata, ecephys_start_time: float = nwbfile=nwbfile, name="behavior", description="Contains behavioral data concerning classified states." ) - if Path(self.source_data["mat_file_path"]).exists(): - try: - mat_file = loadmat(file_name=self.source_data["mat_file_path"]) - mat_file_is_scipy_readable = True - except NotImplementedError: - mat_file_is_scipy_readable = False - - if mat_file_is_scipy_readable: # To-Do, re-do indexing for an hdfstorage reader - sleep_state_dic = mat_file["SleepState"]["ints"][0][0] - state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") - table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") - table.add_column(name="label", description="Sleep state.") - - data = [] - for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): - values = sleep_state_dic[sleep_state][0][0] - if len(values) != 0 and isinstance(values[0], int): - values = [values] - for start_time, stop_time in values: - data.append( - dict( - start_time=ecephys_start_time + float(start_time), - stop_time=ecephys_start_time + float(stop_time), - label=state_label_names[sleep_state], - ) + try: + mat_file = loadmat(file_name=self.source_data["mat_file_path"]) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False + print(f"SleepStatesInterface is unable to convert {self.source_data['mat_file_path']} due to HDF5 version!") + + if mat_file_is_scipy_readable: # To-Do, re-do indexing for an hdfstorage reader + sleep_state_dic = mat_file["SleepState"]["ints"][0][0] + state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") + table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") + table.add_column(name="label", description="Sleep state.") + + data = [] + for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): + values = sleep_state_dic[sleep_state][0][0] + if len(values) != 0 and isinstance(values[0], int): + values = [values] + for start_time, stop_time in values: + data.append( + dict( + start_time=ecephys_start_time + float(start_time), + stop_time=ecephys_start_time + float(stop_time), + label=state_label_names[sleep_state], ) - [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] - processing_module.add(table) + ) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + processing_module.add(table) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index a42188b..fa4c5a4 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -27,6 +27,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ec mat_file_is_scipy_readable = True except NotImplementedError: mat_file_is_scipy_readable = False + print(f"RippleInterface is unable to convert {self.source_data['mat_file_path']} due to HDF5 version!") if mat_file_is_scipy_readable: mat_data = mat_file["ripples"] From 095f5d9377ca662428243699a13c46183baffdf3 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 24 Apr 2022 12:37:41 -0400 Subject: [PATCH 18/40] fix env name --- tingley_metabolic_environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tingley_metabolic_environment.yml b/tingley_metabolic_environment.yml index 97a64dc..0c1e7ef 100644 --- a/tingley_metabolic_environment.yml +++ b/tingley_metabolic_environment.yml @@ -1,4 +1,4 @@ -name: buzsaki_tingley_long_term +name: buzsaki_tingley_metabolic channels: - defaults - anaconda @@ -9,4 +9,4 @@ dependencies: - git - pip: - -e . - - -r buzsaki_lab_to_nwb/tingley_long_term/tingley_long_term_requirements.txt + - -r buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt From c39210a836690feb8b53363e809baf0801154a44 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 24 Apr 2022 12:49:36 -0400 Subject: [PATCH 19/40] add final path for simultaneous upload --- .../tingley_metabolic/convert_tingley_metabolic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index ffad29b..3324066 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -1,4 +1,5 @@ """Run entire conversion.""" +import os from pathlib import Path from concurrent.futures import ProcessPoolExecutor, as_completed from datetime import timedelta @@ -169,6 +170,7 @@ def convert_session(session_path, nwbfile_path): conversion_options=conversion_options, overwrite=True, ) + os.system(f"mv {nwbfile_path} {nwb_final_output_path / nwbfile_path.name}") if n_jobs == 1: From 053e9addcdea0e41a0fe6b51d98c67f5c20bf8be Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 24 Apr 2022 12:56:26 -0400 Subject: [PATCH 20/40] swap to pathlib --- .../tingley_metabolic/convert_tingley_metabolic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 3324066..35508e0 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -1,5 +1,4 @@ """Run entire conversion.""" -import os from pathlib import Path from concurrent.futures import ProcessPoolExecutor, as_completed from datetime import timedelta @@ -44,8 +43,10 @@ if stub_test: nwb_output_path = data_path / "nwb_stub" else: - nwb_output_path = data_path / f"nwb_{subject_list[0]}" + nwb_output_path = data_path / f"nwb_{subject_list[0]}_running" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}" nwb_output_path.mkdir(exist_ok=True) +nwb_final_output_path.mkdir(exist_ok=True) if stub_test: @@ -170,7 +171,7 @@ def convert_session(session_path, nwbfile_path): conversion_options=conversion_options, overwrite=True, ) - os.system(f"mv {nwbfile_path} {nwb_final_output_path / nwbfile_path.name}") + nwbfile_path.rename(nwb_final_output_path / nwbfile_path.name) if n_jobs == 1: From fab505f3d38ef0887ccb7de3e6183794df6663d3 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 4 May 2022 13:48:12 -0400 Subject: [PATCH 21/40] adding full automation --- .../convert_tingley_metabolic.py | 15 +- .../fully_automated_conversion.py | 225 ++++++++++++++++++ 2 files changed, 233 insertions(+), 7 deletions(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py index 35508e0..99e9b99 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -10,26 +10,26 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime -n_jobs = 10 +n_jobs = 1 progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) -stub_test = False +stub_test = True conversion_factor = 0.195 # Intan -buffer_gb = 10 +buffer_gb = 1 # note that on DANDIHub, max number of actual I/O operations on processes seems limited to 8-10, # so total mem isn't technically buffer_gb * n_jobs data_path = Path("/shared/catalystneuro/TingleyD/") home_path = Path("/home/jovyan/") -# data_path = Path("E:/BuzsakiData/TingleyD") -# home_path = Path("E:/BuzsakiData/TingleyD/") +data_path = Path("E:/BuzsakiData/TingleyD") +home_path = Path("E:/BuzsakiData/TingleyD/") metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" subject_list = [ - "CGM3" + "CGM4" ] # [1,2,3,4,30,31,32,36,37,39]] # This list will change based on what has finished transfering to the Hub session_path_list = [ session_path @@ -41,7 +41,8 @@ if stub_test: - nwb_output_path = data_path / "nwb_stub" + nwb_output_path = data_path / "nwb_{subject_list[0]}_running_stub" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}_stub" else: nwb_output_path = data_path / f"nwb_{subject_list[0]}_running" nwb_final_output_path = data_path / f"nwb_{subject_list[0]}" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py new file mode 100644 index 0000000..41bcad4 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -0,0 +1,225 @@ +"""Run entire conversion.""" +from pathlib import Path +from datetime import timedelta +from warnings import simplefilter + +from tqdm import tqdm +from nwb_conversion_tools.tools.data_transfers import ( + dandi_upload, + estimate_total_conversion_runtime, + estimate_s3_conversion_cost, + get_globus_dataset_content_sizes, + transfer_globus_content, +) +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor + +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime + +buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" +hub_globus_endpoint_id = "52de6745-40b8-4d2c-9a0b-610874c564f5" +dandiset_id = "000233" + +base_buzsaki_path = Path("TingleyD/Tingley2021_ripple_glucose_paper/") +subject = "CGM36" +all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject).as_posix() +) +sessions = list(set([Path(x).parent.name for x in all_content]) - set([""])) + +session_idx = 1 +session_id = sessions[session_idx] +assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" +content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + # f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", +] +content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) +# Ripple files are a little trickier, can have multiple text forms +content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] +) +content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + +content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) +total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6) +total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) +y_n = input( + f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " + "Continue? (y/n)" +) +assert y_n.lower() == "y" + + +progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) +stub_test = False +conversion_factor = 0.195 # Intan +buffer_gb = 50 +# note that on DANDIHub, max number of actual I/O operations on processes seems limited to 8-10, +# so total mem isn't technically buffer_gb * n_jobs + +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") + +data_path = Path("E:/BuzsakiData/TingleyD") +home_path = Path("E:/BuzsakiData/TingleyD/") + +metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" +subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" + + +subject_list = [ + "CGM4" +] # [1,2,3,4,30,31,32,36,37,39]] # This list will change based on what has finished transfering to the Hub +session_path_list = [ + session_path + for subject_path in data_path.iterdir() + if subject_path.is_dir() and subject_path.stem in subject_list + for session_path in subject_path.iterdir() + if session_path.is_dir() +] + + +if stub_test: + nwb_output_path = data_path / "nwb_{subject_list[0]}_running_stub" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}_stub" +else: + nwb_output_path = data_path / f"nwb_{subject_list[0]}_running" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}" +nwb_output_path.mkdir(exist_ok=True) +nwb_final_output_path.mkdir(exist_ok=True) + + +if stub_test: + nwbfile_list = [nwb_output_path / f"{session.stem}_stub.nwb" for session in session_path_list] +else: + nwbfile_list = [nwb_output_path / f"{session.stem}.nwb" for session in session_path_list] + +global_metadata = load_dict_from_file(metadata_path) +subject_info_table = load_dict_from_file(subject_info_path) + +for session_path, nwbfile_path in tqdm(zip(session_path_list, nwbfile_list), **progress_bar_options): + simplefilter("ignore") + conversion_options = dict() + session_id = session_path.name + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + + # I know I'll need this for other sessions, just not yet + # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: + # raw_file_path = session_path / f"{session_id}.dat_orig" + + # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), + ) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = "Consult Supplementary Table 1 from the publication for more information about this session." + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, + ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) + ) + ) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + nwbfile_path.rename(nwb_final_output_path / nwbfile_path.name) + dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_final_output_path) From 40ea98703b7a98079d684b498c641d9a56641841 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Wed, 4 May 2022 18:56:42 +0000 Subject: [PATCH 22/40] debugs --- .../fully_automated_conversion.py | 265 ++++++++---------- 1 file changed, 122 insertions(+), 143 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 41bcad4..63c7e6d 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -2,8 +2,9 @@ from pathlib import Path from datetime import timedelta from warnings import simplefilter +from shutil import rmtree +from natsort import natsorted -from tqdm import tqdm from nwb_conversion_tools.tools.data_transfers import ( dandi_upload, estimate_total_conversion_runtime, @@ -17,22 +18,22 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" -hub_globus_endpoint_id = "52de6745-40b8-4d2c-9a0b-610874c564f5" +hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" dandiset_id = "000233" -base_buzsaki_path = Path("TingleyD/Tingley2021_ripple_glucose_paper/") -subject = "CGM36" +base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") +subject_id = "CGM36" all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject).as_posix() + globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() ) -sessions = list(set([Path(x).parent.name for x in all_content]) - set([""])) +sessions = natsorted(list(set([Path(x).parent.name for x in all_content]) - set([""]))) -session_idx = 1 +session_idx = 0 session_id = sessions[session_idx] assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" content_to_attempt_transfer = [ f"{session_id}/{session_id}.xml", - # f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.dat", f"{session_id}/{session_id}.lfp", f"{session_id}/auxiliary.dat", f"{session_id}/info.rhd", @@ -62,164 +63,142 @@ assert y_n.lower() == "y" -progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) stub_test = False conversion_factor = 0.195 # Intan buffer_gb = 50 -# note that on DANDIHub, max number of actual I/O operations on processes seems limited to 8-10, -# so total mem isn't technically buffer_gb * n_jobs data_path = Path("/shared/catalystneuro/TingleyD/") home_path = Path("/home/jovyan/") -data_path = Path("E:/BuzsakiData/TingleyD") -home_path = Path("E:/BuzsakiData/TingleyD/") +# data_path = Path("E:/BuzsakiData/TingleyD") +# home_path = Path("E:/BuzsakiData/TingleyD/") metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" -subject_list = [ - "CGM4" -] # [1,2,3,4,30,31,32,36,37,39]] # This list will change based on what has finished transfering to the Hub -session_path_list = [ - session_path - for subject_path in data_path.iterdir() - if subject_path.is_dir() and subject_path.stem in subject_list - for session_path in subject_path.iterdir() - if session_path.is_dir() -] +nwb_output_path = data_path / f"nwb_{session_id}" +nwb_output_path.mkdir(exist_ok=True) +nwbfile_path = nwb_output_path / f"{session_id}.nwb" +session_path = data_path / f"{session_id}" +session_path.mkdir(exist_ok=True) + +transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=30.0, + progress_update_timeout=total_time * 2, +) +global_metadata = load_dict_from_file(metadata_path) +subject_info_table = load_dict_from_file(subject_info_path) -if stub_test: - nwb_output_path = data_path / "nwb_{subject_list[0]}_running_stub" - nwb_final_output_path = data_path / f"nwb_{subject_list[0]}_stub" -else: - nwb_output_path = data_path / f"nwb_{subject_list[0]}_running" - nwb_final_output_path = data_path / f"nwb_{subject_list[0]}" -nwb_output_path.mkdir(exist_ok=True) -nwb_final_output_path.mkdir(exist_ok=True) +simplefilter("ignore") +conversion_options = dict() +xml_file_path = session_path / f"{session_id}.xml" +raw_file_path = session_path / f"{session_id}.dat" +lfp_file_path = session_path / f"{session_id}.lfp" -if stub_test: - nwbfile_list = [nwb_output_path / f"{session.stem}_stub.nwb" for session in session_path_list] -else: - nwbfile_list = [nwb_output_path / f"{session.stem}.nwb" for session in session_path_list] +aux_file_path = session_path / "auxiliary.dat" +rhd_file_path = session_path / "info.rhd" +sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" +ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] -global_metadata = load_dict_from_file(metadata_path) -subject_info_table = load_dict_from_file(subject_info_path) +ecephys_start_time = get_session_datetime(session_id=session_id) +ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() / 1250.0 +) +source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), +) -for session_path, nwbfile_path in tqdm(zip(session_path_list, nwbfile_list), **progress_bar_options): - simplefilter("ignore") - conversion_options = dict() - session_id = session_path.name - - xml_file_path = session_path / f"{session_id}.xml" - raw_file_path = session_path / f"{session_id}.dat" - lfp_file_path = session_path / f"{session_id}.lfp" - - aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / "info.rhd" - sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" - ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] - - # I know I'll need this for other sessions, just not yet - # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: - # raw_file_path = session_path / f"{session_id}.dat_orig" - - # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else - ecephys_start_time = get_session_datetime(session_id=session_id) - ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() - / 1250.0 - ) - source_data = dict( - Glucose=dict( - session_path=str(session_path), - ecephys_start_time=str(ecephys_start_time), - ecephys_stop_time=str(ecephys_stop_time), - ), - NeuroscopeLFP=dict( - file_path=str(lfp_file_path), +if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path), spikeextractors_backend=True, - ), - ) - - if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ) ) - conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) - - if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - - if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) - - if any(ripple_mat_file_paths): - source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) - - converter = TingleyMetabolicConverter(source_data=source_data) - metadata = converter.get_metadata() - metadata = dict_deep_update(metadata, global_metadata) - session_description = "Consult Supplementary Table 1 from the publication for more information about this session." - metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. - session_description=session_description, - experiment_description=session_description, ) - if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": - del metadata["Ecephys"]["Device"][0] - for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: - electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - - ecephys_start_time_increment = ( - ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time - ).total_seconds() - conversion_options.update( - NeuroscopeLFP=dict( - stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) - ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) + +if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + +if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + +if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + +converter = TingleyMetabolicConverter(source_data=source_data) +metadata = converter.get_metadata() +metadata = dict_deep_update(metadata, global_metadata) +session_description = "Consult Supplementary Table 1 from the publication for more information about this session." +metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, +) +if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] +for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + +ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time +).total_seconds() +conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) ) - if raw_file_path.is_file(): - conversion_options.update( - NeuroscopeRecording=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - es_key="ElectricalSeries_raw", - iterator_opts=dict(buffer_gb=buffer_gb), - ) - ) - if aux_file_path.is_file() and rhd_file_path.is_file(): - conversion_options.update( - Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) +) +if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb), ) - if sleep_mat_file_path.is_file(): - conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) - if any(ripple_mat_file_paths): - conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) - - converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, ) - nwbfile_path.rename(nwb_final_output_path / nwbfile_path.name) - dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_final_output_path) +if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update(Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) +if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) +if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + +converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, +) +rmtree(session_path) +dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) From 9251cc8fb24a19234aa518385d944935dff75415 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 4 May 2022 15:11:18 -0400 Subject: [PATCH 23/40] debug --- .../fully_automated_conversion.py | 23 ++++++++++--------- .../tingley_metabolic_utils.py | 6 +++-- .../tingleymetabolicglucoseinterface.py | 1 + 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 63c7e6d..133dc2b 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -18,9 +18,20 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" -hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +# hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" dandiset_id = "000233" +stub_test = False +conversion_factor = 0.195 # Intany +buffer_gb = 50 + +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") + +data_path = Path("C:/Users/Raven/Documents/TingleyD/") + + base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") subject_id = "CGM36" all_content = get_globus_dataset_content_sizes( @@ -63,16 +74,6 @@ assert y_n.lower() == "y" -stub_test = False -conversion_factor = 0.195 # Intan -buffer_gb = 50 - -data_path = Path("/shared/catalystneuro/TingleyD/") -home_path = Path("/home/jovyan/") - -# data_path = Path("E:/BuzsakiData/TingleyD") -# home_path = Path("E:/BuzsakiData/TingleyD/") - metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py index 915ce43..a766d87 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -9,8 +9,10 @@ def load_subject_glucose_series(session_path) -> (List[datetime], List[float]): """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" - subject_path = Path(session_path).parent - all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] + all_csv = [x for x in Path(session_path).iterdir() if ".csv" in x.suffixes] + if not all_csv: + subject_path = Path(session_path).parent + all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] timestamps = [] isig = [] diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index f32ed49..d1b2884 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -13,6 +13,7 @@ class TingleyMetabolicGlucoseInterface(BaseDataInterface): def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): glucose_timestamps, glucose_isig = load_subject_glucose_series(session_path=session_path) + print(glucose_timestamps) self.session_start_time = glucose_timestamps[0] glucose_timestamps_floats_from_datetime = [ (glucose_timestamp - self.session_start_time).total_seconds() for glucose_timestamp in glucose_timestamps From 7097944745593e1152824458f83e0c7a870a2cbe Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Wed, 4 May 2022 19:47:07 +0000 Subject: [PATCH 24/40] finally working --- .../fully_automated_conversion.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 133dc2b..3933911 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -4,6 +4,7 @@ from warnings import simplefilter from shutil import rmtree from natsort import natsorted +from time import sleep from nwb_conversion_tools.tools.data_transfers import ( dandi_upload, @@ -18,8 +19,8 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" -# hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" -hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" +hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +#hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" dandiset_id = "000233" stub_test = False @@ -29,7 +30,7 @@ data_path = Path("/shared/catalystneuro/TingleyD/") home_path = Path("/home/jovyan/") -data_path = Path("C:/Users/Raven/Documents/TingleyD/") +#data_path = Path("C:/Users/Raven/Documents/TingleyD/") base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") @@ -39,7 +40,7 @@ ) sessions = natsorted(list(set([Path(x).parent.name for x in all_content]) - set([""]))) -session_idx = 0 +session_idx = 2 session_id = sessions[session_idx] assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" content_to_attempt_transfer = [ @@ -201,5 +202,10 @@ conversion_options=conversion_options, overwrite=True, ) -rmtree(session_path) +for j in range(1, 4): + try: + rmtree(session_path) + except OSError: + print(f"Attempt #{j} to remove sesion path...") + sleep(5) dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) From 9e14355c203005f6eb410573e71ac244ce292762 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 4 May 2022 19:47:50 +0000 Subject: [PATCH 25/40] Automated changes --- .../tingley_metabolic/fully_automated_conversion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 3933911..ab943ad 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -20,7 +20,7 @@ buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" -#hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" +# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" dandiset_id = "000233" stub_test = False @@ -30,7 +30,7 @@ data_path = Path("/shared/catalystneuro/TingleyD/") home_path = Path("/home/jovyan/") -#data_path = Path("C:/Users/Raven/Documents/TingleyD/") +# data_path = Path("C:/Users/Raven/Documents/TingleyD/") base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") From 427f9d1ad81f8523bdf348f4b6e4280c1804eb80 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 4 May 2022 15:58:00 -0400 Subject: [PATCH 26/40] subject looping with prompt --- .../fully_automated_conversion.py | 323 +++++++++--------- 1 file changed, 167 insertions(+), 156 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index ab943ad..9bced5c 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -4,7 +4,6 @@ from warnings import simplefilter from shutil import rmtree from natsort import natsorted -from time import sleep from nwb_conversion_tools.tools.data_transfers import ( dandi_upload, @@ -40,172 +39,184 @@ ) sessions = natsorted(list(set([Path(x).parent.name for x in all_content]) - set([""]))) -session_idx = 2 -session_id = sessions[session_idx] -assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" -content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", -] -content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) -# Ripple files are a little trickier, can have multiple text forms -content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() +session_idxs = set(range(len(sessions))) - set([0, 2]) +for session_idx in session_idxs: + session_id = sessions[session_idx] + # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" + if f"{session_id}/{session_id}.lfp" not in all_content: + y_n = input( + f"Skipping session_id {session_id} because there was no LFP (and hence likely a bad session). " + "Continue? (y/n) " + ) + assert y_n.lower() == "y" + continue + content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", ] -) -content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - -content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) -total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6) -total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) -y_n = input( - f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " - "Continue? (y/n)" -) -assert y_n.lower() == "y" - - -metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" -subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - - -nwb_output_path = data_path / f"nwb_{session_id}" -nwb_output_path.mkdir(exist_ok=True) -nwbfile_path = nwb_output_path / f"{session_id}.nwb" -session_path = data_path / f"{session_id}" -session_path.mkdir(exist_ok=True) - -transfer_globus_content( - source_endpoint_id=buzsaki_globus_endpoint_id, - source_files=[ - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], - ], - destination_endpoint_id=hub_globus_endpoint_id, - destination_folder=session_path, - progress_update_rate=30.0, - progress_update_timeout=total_time * 2, -) + content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + + content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) + total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6) + total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) + y_n = input( + f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " + "Continue? (y/n)" + ) + assert y_n.lower() == "y" + + metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" + subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" + + nwb_output_path = data_path / f"nwb_{session_id}" + nwb_output_path.mkdir(exist_ok=True) + nwbfile_path = nwb_output_path / f"{session_id}.nwb" + session_path = data_path / f"{session_id}" + session_path.mkdir(exist_ok=True) + + transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=30.0, + progress_update_timeout=total_time * 2, + ) -global_metadata = load_dict_from_file(metadata_path) -subject_info_table = load_dict_from_file(subject_info_path) + global_metadata = load_dict_from_file(metadata_path) + subject_info_table = load_dict_from_file(subject_info_path) -simplefilter("ignore") -conversion_options = dict() + simplefilter("ignore") + conversion_options = dict() -xml_file_path = session_path / f"{session_id}.xml" -raw_file_path = session_path / f"{session_id}.dat" -lfp_file_path = session_path / f"{session_id}.lfp" + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" -aux_file_path = session_path / "auxiliary.dat" -rhd_file_path = session_path / "info.rhd" -sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" -ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] -ecephys_start_time = get_session_datetime(session_id=session_id) -ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() / 1250.0 -) -source_data = dict( - Glucose=dict( - session_path=str(session_path), - ecephys_start_time=str(ecephys_start_time), - ecephys_stop_time=str(ecephys_stop_time), - ), - NeuroscopeLFP=dict( - file_path=str(lfp_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ), -) - -if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path), spikeextractors_backend=True, - ) + ), ) - conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) - -if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - -if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) - -if any(ripple_mat_file_paths): - source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) - -converter = TingleyMetabolicConverter(source_data=source_data) -metadata = converter.get_metadata() -metadata = dict_deep_update(metadata, global_metadata) -session_description = "Consult Supplementary Table 1 from the publication for more information about this session." -metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. - session_description=session_description, - experiment_description=session_description, -) -if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": - del metadata["Ecephys"]["Device"][0] -for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: - electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - -ecephys_start_time_increment = ( - ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time -).total_seconds() -conversion_options.update( - NeuroscopeLFP=dict( - stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = "Consult Supplementary Table 1 from the publication for more information about this session." + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, ) -) -if raw_file_path.is_file(): + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() conversion_options.update( - NeuroscopeRecording=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - es_key="ElectricalSeries_raw", - iterator_opts=dict(buffer_gb=buffer_gb), + NeuroscopeLFP=dict( + stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) ) ) -if aux_file_path.is_file() and rhd_file_path.is_file(): - conversion_options.update(Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) -if sleep_mat_file_path.is_file(): - conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) -if any(ripple_mat_file_paths): - conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) - -converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, -) -for j in range(1, 4): - try: - rmtree(session_path) - except OSError: - print(f"Attempt #{j} to remove sesion path...") - sleep(5) -dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + for j in range(1, 4): + try: + rmtree(session_path) + except OSError: + if len(list(session_path.iterdir())) > 0: + print(f"shutil.rmtree failed to clean directory for session {session_id}") + dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) + + y_n = input("Continue with dataset conversion? (y/n) ") + assert y_n.lower() == "y" From 17af8534eadeb8f6d47d45f1dc72e448ac818a19 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 4 May 2022 17:42:42 -0400 Subject: [PATCH 27/40] some extra polish --- .../tingley_metabolic/fully_automated_conversion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 9bced5c..0b2ecc1 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -145,7 +145,6 @@ spikeextractors_backend=True, ) ) - conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) if aux_file_path.is_file() and rhd_file_path.is_file(): source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) @@ -183,7 +182,9 @@ ).total_seconds() conversion_options.update( NeuroscopeLFP=dict( - stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), ) ) if raw_file_path.is_file(): @@ -192,7 +193,7 @@ stub_test=stub_test, starting_time=ecephys_start_time_increment, es_key="ElectricalSeries_raw", - iterator_opts=dict(buffer_gb=buffer_gb), + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), ) ) if aux_file_path.is_file() and rhd_file_path.is_file(): @@ -218,5 +219,5 @@ print(f"shutil.rmtree failed to clean directory for session {session_id}") dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) - y_n = input("Continue with dataset conversion? (y/n) ") + y_n = input("\nContinue with dataset conversion? (y/n) ") assert y_n.lower() == "y" From e5be7ab4c481efd8080494f77f528c2ad7d179cc Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Wed, 4 May 2022 21:46:12 +0000 Subject: [PATCH 28/40] further polish --- .../fully_automated_conversion.py | 24 ++++++++++++------- .../tingleymetabolicglucoseinterface.py | 1 - 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 0b2ecc1..fb40db4 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -44,10 +44,12 @@ session_id = sessions[session_idx] # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" if f"{session_id}/{session_id}.lfp" not in all_content: - y_n = input( - f"Skipping session_id {session_id} because there was no LFP (and hence likely a bad session). " - "Continue? (y/n) " - ) + y_n = "" + while y_n.lower() != "y" or y_n.lower() != "n": + y_n = input( + f"Skipping session_id {session_id} because there was no LFP (and hence likely a bad session). " + "Continue? (y/n) " + ) assert y_n.lower() == "y" continue content_to_attempt_transfer = [ @@ -75,10 +77,12 @@ content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6) total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) - y_n = input( - f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " - "Continue? (y/n)" - ) + y_n = "" + while y_n.lower() != "y" or y_n.lower() != "n": + y_n = input( + f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " + "Continue? (y/n)" + ) assert y_n.lower() == "y" metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" @@ -219,5 +223,7 @@ print(f"shutil.rmtree failed to clean directory for session {session_id}") dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) - y_n = input("\nContinue with dataset conversion? (y/n) ") + y_n = "" + while y_n.lower() != "y" or y_n.lower() != "n": + y_n = input("\nContinue with dataset conversion? (y/n)\n") assert y_n.lower() == "y" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py index d1b2884..f32ed49 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -13,7 +13,6 @@ class TingleyMetabolicGlucoseInterface(BaseDataInterface): def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): glucose_timestamps, glucose_isig = load_subject_glucose_series(session_path=session_path) - print(glucose_timestamps) self.session_start_time = glucose_timestamps[0] glucose_timestamps_floats_from_datetime = [ (glucose_timestamp - self.session_start_time).total_seconds() for glucose_timestamp in glucose_timestamps From 67c0d6e6ee3430e11f5e2970560dee8d76f59b8e Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Fri, 6 May 2022 14:10:31 +0000 Subject: [PATCH 29/40] saving state --- .../fully_automated_conversion.py | 359 ++++++++++-------- 1 file changed, 199 insertions(+), 160 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index fb40db4..b408410 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -1,9 +1,13 @@ """Run entire conversion.""" +import os +import traceback from pathlib import Path from datetime import timedelta from warnings import simplefilter + from shutil import rmtree from natsort import natsorted +from nwbinspector.tools import get_s3_urls_and_dandi_paths from nwb_conversion_tools.tools.data_transfers import ( dandi_upload, @@ -33,197 +37,232 @@ base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_id = "CGM36" +subject_id = "CGM37" all_content = get_globus_dataset_content_sizes( globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() ) -sessions = natsorted(list(set([Path(x).parent.name for x in all_content]) - set([""]))) +dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) +dandi_session_datetimes = ["_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content] # probably a better way to do this, just brute forcing for now +sessions = natsorted( + list( + set([Path(x).parent.name for x in all_content]) + - set( + [ + "CGM36_0um_0um_210301_174112", # bad .rhd header + "CGM36_0um_0um_210302_082521", # bad .rhd header + "CGM36_0um_0um_210302_090220", # bad .rhd header + "CGM37_1210um_634um_210124_090816", # done + "CGM37_848um_634um_210120_153950", # other MATLAB format + "CGM37_848um_634um_210121_085445", # done + "CGM37_848um_634um_210121_101547", # other MATLAB format + "CGM37_848um_634um_210121_101926", # done + "CGM37_1210um_634um_210122_075004", # other MATLAB format + ] + ) + ) +) -session_idxs = set(range(len(sessions))) - set([0, 2]) +session_idxs = set(range(len(sessions))) # - set([15]) for session_idx in session_idxs: - session_id = sessions[session_idx] - # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" - if f"{session_id}/{session_id}.lfp" not in all_content: + assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" + try: + session_id = sessions[session_idx] + # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" + if f"{session_id}/{session_id}.lfp" not in all_content: + print( + f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). " + ) + continue + if any([x in session_id for x in dandi_session_datetimes]): + print( + f"\nSkipping session_id {session_id} because it is already on DANDI." + ) + continue + + content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", + ] + content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + + content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) + total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=5.0) + total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) y_n = "" - while y_n.lower() != "y" or y_n.lower() != "n": + while not (y_n.lower() == "y" or y_n.lower() == "n"): y_n = input( - f"Skipping session_id {session_id} because there was no LFP (and hence likely a bad session). " - "Continue? (y/n) " + f"\nConverting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " + "Continue? (y/n): " ) assert y_n.lower() == "y" - continue - content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", - ] - content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) - # Ripple files are a little trickier, can have multiple text forms - content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() - ] - ) - content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - - content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6) - total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) - y_n = "" - while y_n.lower() != "y" or y_n.lower() != "n": - y_n = input( - f"Converting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " - "Continue? (y/n)" - ) - assert y_n.lower() == "y" - - metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" - subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - - nwb_output_path = data_path / f"nwb_{session_id}" - nwb_output_path.mkdir(exist_ok=True) - nwbfile_path = nwb_output_path / f"{session_id}.nwb" - session_path = data_path / f"{session_id}" - session_path.mkdir(exist_ok=True) - - transfer_globus_content( - source_endpoint_id=buzsaki_globus_endpoint_id, - source_files=[ - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], - ], - destination_endpoint_id=hub_globus_endpoint_id, - destination_folder=session_path, - progress_update_rate=30.0, - progress_update_timeout=total_time * 2, - ) - global_metadata = load_dict_from_file(metadata_path) - subject_info_table = load_dict_from_file(subject_info_path) + metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" + subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - simplefilter("ignore") - conversion_options = dict() + nwb_output_path = data_path / f"nwb_{session_id}" + nwb_output_path.mkdir(exist_ok=True) + nwbfile_path = nwb_output_path / f"{session_id}.nwb" + session_path = data_path / f"{session_id}" + session_path.mkdir(exist_ok=True) - xml_file_path = session_path / f"{session_id}.xml" - raw_file_path = session_path / f"{session_id}.dat" - lfp_file_path = session_path / f"{session_id}.lfp" + transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=30.0, + progress_update_timeout=total_time * 10, + ) - aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / "info.rhd" - sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" - ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + global_metadata = load_dict_from_file(metadata_path) + subject_info_table = load_dict_from_file(subject_info_path) - ecephys_start_time = get_session_datetime(session_id=session_id) - ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() - / 1250.0 - ) - source_data = dict( - Glucose=dict( - session_path=str(session_path), - ecephys_start_time=str(ecephys_start_time), - ecephys_stop_time=str(ecephys_stop_time), - ), - NeuroscopeLFP=dict( - file_path=str(lfp_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ), - ) + simplefilter("ignore") + conversion_options = dict() + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" - if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), gain=conversion_factor, xml_file_path=str(xml_file_path), spikeextractors_backend=True, - ) + ), ) - if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - - if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) - - if any(ripple_mat_file_paths): - source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) - - converter = TingleyMetabolicConverter(source_data=source_data) - metadata = converter.get_metadata() - metadata = dict_deep_update(metadata, global_metadata) - session_description = "Consult Supplementary Table 1 from the publication for more information about this session." - metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. - session_description=session_description, - experiment_description=session_description, - ) - if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": - del metadata["Ecephys"]["Device"][0] - for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: - electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - - ecephys_start_time_increment = ( - ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time - ).total_seconds() - conversion_options.update( - NeuroscopeLFP=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = "Consult Supplementary Table 1 from the publication for more information about this session." + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, ) - ) - if raw_file_path.is_file(): + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() conversion_options.update( - NeuroscopeRecording=dict( + NeuroscopeLFP=dict( stub_test=stub_test, starting_time=ecephys_start_time_increment, - es_key="ElectricalSeries_raw", iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), ) ) - if aux_file_path.is_file() and rhd_file_path.is_file(): - conversion_options.update( - Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, ) - if sleep_mat_file_path.is_file(): - conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) - if any(ripple_mat_file_paths): - conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) - - converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, - ) - for j in range(1, 4): + del converter try: rmtree(session_path) except OSError: if len(list(session_path.iterdir())) > 0: print(f"shutil.rmtree failed to clean directory for session {session_id}") - dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) + dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) - y_n = "" - while y_n.lower() != "y" or y_n.lower() != "n": - y_n = input("\nContinue with dataset conversion? (y/n)\n") - assert y_n.lower() == "y" + y_n = "" + while not (y_n.lower() == "y" or y_n.lower() == "n"): + y_n = input("\nContinue with dataset conversion? (y/n): ") + assert y_n.lower() == "y" + except Exception as ex: + # Clean up data files in event of any error + try: + rmtree(session_path, ignore_errors=True) + rmtree(nwb_output_path, ignore_errors=True) + rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) + except Exception: + a = 1 + y_n = "" + while not (y_n.lower() == "y" or y_n.lower() == "n"): + y_n = input(f"Could not convert session {session_id} due to {type(ex)}: {str(ex)}\n{traceback.format_exc()}\nWould you like to continue? (y/n): ") + assert y_n.lower() == "y" \ No newline at end of file From 040a891eb4d27a7dd78d78d6df84fb308537175d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 6 May 2022 14:11:27 +0000 Subject: [PATCH 30/40] Automated changes --- .../fully_automated_conversion.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index b408410..f36a563 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -42,7 +42,9 @@ globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() ) dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) -dandi_session_datetimes = ["_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content] # probably a better way to do this, just brute forcing for now +dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content +] # probably a better way to do this, just brute forcing for now sessions = natsorted( list( set([Path(x).parent.name for x in all_content]) @@ -62,23 +64,19 @@ ) ) -session_idxs = set(range(len(sessions))) # - set([15]) +session_idxs = set(range(len(sessions))) # - set([15]) for session_idx in session_idxs: assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" try: session_id = sessions[session_idx] # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" if f"{session_id}/{session_id}.lfp" not in all_content: - print( - f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). " - ) + print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") continue if any([x in session_id for x in dandi_session_datetimes]): - print( - f"\nSkipping session_id {session_id} because it is already on DANDI." - ) + print(f"\nSkipping session_id {session_id} because it is already on DANDI.") continue - + content_to_attempt_transfer = [ f"{session_id}/{session_id}.xml", f"{session_id}/{session_id}.dat", @@ -146,7 +144,9 @@ aux_file_path = session_path / "auxiliary.dat" rhd_file_path = session_path / "info.rhd" sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" - ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + ripple_mat_file_paths = [ + x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() + ] ecephys_start_time = get_session_datetime(session_id=session_id) ecephys_stop_time = ecephys_start_time + timedelta( @@ -189,7 +189,9 @@ converter = TingleyMetabolicConverter(source_data=source_data) metadata = converter.get_metadata() metadata = dict_deep_update(metadata, global_metadata) - session_description = "Consult Supplementary Table 1 from the publication for more information about this session." + session_description = ( + "Consult Supplementary Table 1 from the publication for more information about this session." + ) metadata["NWBFile"].update( # session_description=subject_info_table.get( # metadata["Subject"]["subject_id"], @@ -234,7 +236,9 @@ if sleep_mat_file_path.is_file(): conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) if any(ripple_mat_file_paths): - conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + conversion_options.update( + Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) converter.run_conversion( nwbfile_path=str(nwbfile_path), @@ -264,5 +268,7 @@ a = 1 y_n = "" while not (y_n.lower() == "y" or y_n.lower() == "n"): - y_n = input(f"Could not convert session {session_id} due to {type(ex)}: {str(ex)}\n{traceback.format_exc()}\nWould you like to continue? (y/n): ") - assert y_n.lower() == "y" \ No newline at end of file + y_n = input( + f"Could not convert session {session_id} due to {type(ex)}: {str(ex)}\n{traceback.format_exc()}\nWould you like to continue? (y/n): " + ) + assert y_n.lower() == "y" From f974430b770cf4138e55d941247bf72dbcd9077e Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 6 May 2022 10:15:15 -0400 Subject: [PATCH 31/40] small bug --- .../tingley_metabolic/tingleymetabolicripplesinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index fa4c5a4..dcb500b 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -27,7 +27,7 @@ def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ec mat_file_is_scipy_readable = True except NotImplementedError: mat_file_is_scipy_readable = False - print(f"RippleInterface is unable to convert {self.source_data['mat_file_path']} due to HDF5 version!") + print(f"RippleInterface is unable to convert {mat_file_path} due to HDF5 version!") if mat_file_is_scipy_readable: mat_data = mat_file["ripples"] From a32d70427425ae59a1481559d4e88934bd762156 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 6 May 2022 10:18:23 -0400 Subject: [PATCH 32/40] force safe pyintan --- .../tingleymetabolicaccelerometerinterface.py | 67 +++++++++++-------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index 6a3423c..15e87bd 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -23,33 +23,44 @@ def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): 20kHz by duplicating the data value at every 4th index. I can only assume this was done for easier side-by-side analysis of the raw data (which was acquired at 20kHz). """ - rhd_info = read_rhd(filename=rhd_file_path) - first_aux_entry = next( - header_info_entry for header_info_entry in rhd_info[1] if "AUX1" in header_info_entry["native_channel_name"] - ) - first_aux_sub_entry = next( - header_info_entry for header_info_entry in rhd_info[2] if "AUX1" in header_info_entry[0] - ) - - # Manually confirmed that all aux channels have same properties - self.conversion = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts - self.sampling_frequency = first_aux_entry["sampling_rate"] - dtype = first_aux_sub_entry[1] - numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) - - # Manually confirmed result is still memmap after slicing - self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] + try: + rhd_info = read_rhd(filename=rhd_file_path) + self.readable = True + except: # strange error with pyintan + self.readable = False + + if self.readable: + first_aux_entry = next( + header_info_entry + for header_info_entry in rhd_info[1] + if "AUX1" in header_info_entry["native_channel_name"] + ) + first_aux_sub_entry = next( + header_info_entry for header_info_entry in rhd_info[2] if "AUX1" in header_info_entry[0] + ) + + # Manually confirmed that all aux channels have same properties + self.conversion = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts + self.sampling_frequency = first_aux_entry["sampling_rate"] + dtype = first_aux_sub_entry[1] + numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) + + # Manually confirmed result is still memmap after slicing + self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] def run_conversion(self, nwbfile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): - stub_frames = 200 if stub_test else None - nwbfile.add_acquisition( - TimeSeries( - name="Accelerometer", - description="Raw data from accelerometer sensors.", - unit="Volts", - data=H5DataIO(self.memmap.T[:stub_frames, :], compression="gzip"), # should not need iterative write - conversion=self.conversion, - rate=self.sampling_frequency, - starting_time=ecephys_start_time, - ), - ) + if self.readable: + stub_frames = 200 if stub_test else None + nwbfile.add_acquisition( + TimeSeries( + name="Accelerometer", + description="Raw data from accelerometer sensors.", + unit="Volts", + data=H5DataIO( + self.memmap.T[:stub_frames, :], compression="gzip" + ), # should not need iterative write + conversion=self.conversion, + rate=self.sampling_frequency, + starting_time=ecephys_start_time, + ), + ) From 0e6c9d5cf0643f09e78ff46738d4e4f83ba0acf5 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sat, 7 May 2022 21:49:55 -0400 Subject: [PATCH 33/40] debug --- .../fully_automated_conversion.py | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index f36a563..e817ad9 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -37,38 +37,22 @@ base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_id = "CGM37" +subject_id = "CGM36" all_content = get_globus_dataset_content_sizes( globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() ) dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) dandi_session_datetimes = [ - "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content + "_".join(x.split("/")[1].split("_")[3:5]) for x in dandi_content ] # probably a better way to do this, just brute forcing for now -sessions = natsorted( - list( - set([Path(x).parent.name for x in all_content]) - - set( - [ - "CGM36_0um_0um_210301_174112", # bad .rhd header - "CGM36_0um_0um_210302_082521", # bad .rhd header - "CGM36_0um_0um_210302_090220", # bad .rhd header - "CGM37_1210um_634um_210124_090816", # done - "CGM37_848um_634um_210120_153950", # other MATLAB format - "CGM37_848um_634um_210121_085445", # done - "CGM37_848um_634um_210121_101547", # other MATLAB format - "CGM37_848um_634um_210121_101926", # done - "CGM37_1210um_634um_210122_075004", # other MATLAB format - ] - ) - ) -) +sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv +unconverted_sessions = natsorted( + [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] +) # natsorted for consistency on each run -session_idxs = set(range(len(sessions))) # - set([15]) -for session_idx in session_idxs: +for session_id in unconverted_sessions: assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" try: - session_id = sessions[session_idx] # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" if f"{session_id}/{session_id}.lfp" not in all_content: print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") From 3602c5181ba8f1a5f90d20a3fd1b94c1ce8c2ae5 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Sun, 8 May 2022 02:02:07 +0000 Subject: [PATCH 34/40] some other approachces --- .../fully_automated_conversion.py | 14 - .../fully_automated_single_session.py | 244 ++++++++++++++++++ .../tingley_metabolic/run_single_sess_iter.py | 4 + 3 files changed, 248 insertions(+), 14 deletions(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index f36a563..c59e5a6 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -48,19 +48,6 @@ sessions = natsorted( list( set([Path(x).parent.name for x in all_content]) - - set( - [ - "CGM36_0um_0um_210301_174112", # bad .rhd header - "CGM36_0um_0um_210302_082521", # bad .rhd header - "CGM36_0um_0um_210302_090220", # bad .rhd header - "CGM37_1210um_634um_210124_090816", # done - "CGM37_848um_634um_210120_153950", # other MATLAB format - "CGM37_848um_634um_210121_085445", # done - "CGM37_848um_634um_210121_101547", # other MATLAB format - "CGM37_848um_634um_210121_101926", # done - "CGM37_1210um_634um_210122_075004", # other MATLAB format - ] - ) ) ) @@ -246,7 +233,6 @@ conversion_options=conversion_options, overwrite=True, ) - del converter try: rmtree(session_path) except OSError: diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py new file mode 100644 index 0000000..55d5f26 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py @@ -0,0 +1,244 @@ +"""Run entire conversion.""" +import os +import traceback +from pathlib import Path +from datetime import timedelta +from warnings import simplefilter + +from shutil import rmtree +from natsort import natsorted +from nwbinspector.tools import get_s3_urls_and_dandi_paths + +from nwb_conversion_tools.tools.data_transfers import ( + dandi_upload, + estimate_total_conversion_runtime, + estimate_s3_conversion_cost, + get_globus_dataset_content_sizes, + transfer_globus_content, +) +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor + +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime + +buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" +hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" +dandiset_id = "000233" + +stub_test = False +conversion_factor = 0.195 # Intany +buffer_gb = 50 + +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") + +# data_path = Path("C:/Users/Raven/Documents/TingleyD/") + + +base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") +subject_id = "CGM37" +all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() +) +dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) +dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content +] # probably a better way to do this, just brute forcing for now +sessions = natsorted( + list( + set([Path(x).parent.name for x in all_content]) + ) +) + + +session_idxs = set(range(len(sessions))) +for session_idx in session_idxs: + assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" + try: + session_id = sessions[session_idx] + # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" + if f"{session_id}/{session_id}.lfp" not in all_content: + print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") + continue + if any([x in session_id for x in dandi_session_datetimes]): + print(f"\nSkipping session_id {session_id} because it is already on DANDI.") + continue + + content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", + ] + content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + + content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) + total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) + total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) + print(f"Total cost of {session_id}: ${total_cost}, total time: {total_time / 3600} hr") + + metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" + subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" + + nwb_output_path = data_path / f"nwb_{session_id}" + nwb_output_path.mkdir(exist_ok=True) + nwbfile_path = nwb_output_path / f"{session_id}.nwb" + session_path = data_path / f"{session_id}" + session_path.mkdir(exist_ok=True) + + transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=30.0, + progress_update_timeout=total_time * 10, + ) + + global_metadata = load_dict_from_file(metadata_path) + subject_info_table = load_dict_from_file(subject_info_path) + + simplefilter("ignore") + conversion_options = dict() + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [ + x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() + ] + + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), + ) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = ( + "Consult Supplementary Table 1 from the publication for more information about this session." + ) + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, + ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update( + Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + try: + rmtree(session_path) + except OSError: + if len(list(session_path.iterdir())) > 0: + print(f"shutil.rmtree failed to clean directory for session {session_id}") + except Exception as ex: + # Clean up data files in event of any error + try: + rmtree(session_path, ignore_errors=True) + rmtree(nwb_output_path, ignore_errors=True) + rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) + except Exception: + a = 1 + assert False, "Ending session." diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py new file mode 100644 index 0000000..9f67996 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py @@ -0,0 +1,4 @@ +from nwb_conversion_tools.tools.data_transfers import deploy_process + +for _ in range(2): + res = deploy_process(command="python fully_automated_single_session.py", catch_output=True) From ff7bde7a0888c22aadfb79280391db32738f5906 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Sun, 8 May 2022 02:05:00 +0000 Subject: [PATCH 35/40] Automated changes --- .../tingley_metabolic/fully_automated_single_session.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py index 55d5f26..68f8f00 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py @@ -45,11 +45,7 @@ dandi_session_datetimes = [ "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content ] # probably a better way to do this, just brute forcing for now -sessions = natsorted( - list( - set([Path(x).parent.name for x in all_content]) - ) -) +sessions = natsorted(list(set([Path(x).parent.name for x in all_content]))) session_idxs = set(range(len(sessions))) From 5007d92bb554e5fc1ea8695a307035a66f145ccf Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Thu, 12 May 2022 16:10:42 +0000 Subject: [PATCH 36/40] still wip --- .../fully_automated_conversion.py | 3 - .../fully_automated_single_session.py | 15 +- .../fully_automated_single_session_2.py | 243 ++++++++++++++++++ .../tingley_metabolic/run_single_sess_iter.py | 4 - .../run_single_sess_iter_2.py | 7 + 5 files changed, 256 insertions(+), 16 deletions(-) create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py create mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py index 5457886..17dce16 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py @@ -57,9 +57,6 @@ if f"{session_id}/{session_id}.lfp" not in all_content: print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") continue - if any([x in session_id for x in dandi_session_datetimes]): - print(f"\nSkipping session_id {session_id} because it is already on DANDI.") - continue content_to_attempt_transfer = [ f"{session_id}/{session_id}.xml", diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py index 55d5f26..ae162c9 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py @@ -45,18 +45,14 @@ dandi_session_datetimes = [ "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content ] # probably a better way to do this, just brute forcing for now -sessions = natsorted( - list( - set([Path(x).parent.name for x in all_content]) - ) -) - +sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv +unconverted_sessions = natsorted( + [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] +) # natsorted for consistency on each run -session_idxs = set(range(len(sessions))) -for session_idx in session_idxs: +for session_id in unconverted_sessions: assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" try: - session_id = sessions[session_idx] # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" if f"{session_id}/{session_id}.lfp" not in all_content: print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") @@ -233,6 +229,7 @@ except OSError: if len(list(session_path.iterdir())) > 0: print(f"shutil.rmtree failed to clean directory for session {session_id}") + dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) except Exception as ex: # Clean up data files in event of any error try: diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py new file mode 100644 index 0000000..66596d1 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py @@ -0,0 +1,243 @@ +"""Run entire conversion.""" +import os +import traceback +from pathlib import Path +from datetime import timedelta +from warnings import simplefilter +from time import sleep + +from shutil import rmtree +from natsort import natsorted +from nwbinspector.tools import get_s3_urls_and_dandi_paths + +from nwb_conversion_tools.tools.data_transfers import ( + dandi_upload, + estimate_total_conversion_runtime, + estimate_s3_conversion_cost, + get_globus_dataset_content_sizes, + transfer_globus_content, +) +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor + +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime + +buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" +hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" +dandiset_id = "000233" + +stub_test = False +conversion_factor = 0.195 # Intan +buffer_gb = 3 +data_size_threshold = 5 * 1e9 # 5 GB + +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") + +# data_path = Path("C:/Users/Raven/Documents/TingleyD/") + + +base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") +subject_id = "CGM2" +sleep(5) +all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() +) +dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) +dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content +] # probably a better way to do this, just brute forcing for now +sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv +unconverted_sessions = natsorted( + [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] +) # natsorted for consistency on each run + +for session_id in unconverted_sessions: + assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" + try: + # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" + if f"{session_id}/{session_id}.lfp" not in all_content: + print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") + continue + + content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", + ] + content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + + content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) + if content_to_transfer_size > data_size_threshold: + continue + total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) + total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) + print(f"Total cost of {session_id}: ${total_cost}, total time: {total_time / 3600} hr") + + metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" + subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" + + nwb_output_path = data_path / f"nwb_{session_id}" + nwb_output_path.mkdir(exist_ok=True) + nwbfile_path = nwb_output_path / f"{session_id}.nwb" + session_path = data_path / f"{session_id}" + session_path.mkdir(exist_ok=True) + + transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=total_time / 20, # every 5% or so + progress_update_timeout=max(total_time * 2, 5*60), + ) + + global_metadata = load_dict_from_file(metadata_path) + subject_info_table = load_dict_from_file(subject_info_path) + + simplefilter("ignore") + conversion_options = dict() + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [ + x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() + ] + + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), + ) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = ( + "Consult Supplementary Table 1 from the publication for more information about this session." + ) + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, + ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update( + Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + try: + rmtree(session_path) + except OSError: + if len(list(session_path.iterdir())) > 0: + print(f"shutil.rmtree failed to clean directory for session {session_id}") + dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path, cleanup=True) + except Exception as ex: + # Clean up data files in event of any error + try: + rmtree(session_path, ignore_errors=True) + rmtree(nwb_output_path, ignore_errors=True) + rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) + except Exception: + a = 1 + assert False, "Ending session." diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py deleted file mode 100644 index 9f67996..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter.py +++ /dev/null @@ -1,4 +0,0 @@ -from nwb_conversion_tools.tools.data_transfers import deploy_process - -for _ in range(2): - res = deploy_process(command="python fully_automated_single_session.py", catch_output=True) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py new file mode 100644 index 0000000..deb08a3 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py @@ -0,0 +1,7 @@ +import os + +for _ in range(30): + try: + os.system("python fully_automated_single_session_2.py") + except KeyboardInterrupt: + break \ No newline at end of file From 21c2a73af50d0f1a57b928e9059678a42f47038d Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 12 May 2022 16:12:26 +0000 Subject: [PATCH 37/40] Automated changes --- .../tingley_metabolic/fully_automated_single_session_2.py | 2 +- buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py index 66596d1..06294b2 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py @@ -108,7 +108,7 @@ destination_endpoint_id=hub_globus_endpoint_id, destination_folder=session_path, progress_update_rate=total_time / 20, # every 5% or so - progress_update_timeout=max(total_time * 2, 5*60), + progress_update_timeout=max(total_time * 2, 5 * 60), ) global_metadata = load_dict_from_file(metadata_path) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py index deb08a3..2e31417 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py @@ -4,4 +4,4 @@ try: os.system("python fully_automated_single_session_2.py") except KeyboardInterrupt: - break \ No newline at end of file + break From c0a1ce8241c6282a69c446fd4f922db2b9a759d6 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Mon, 16 May 2022 02:02:05 +0000 Subject: [PATCH 38/40] use concurrents --- .../fully_automated_conversion.py | 254 ------------------ ...py => fully_automated_multipar_session.py} | 179 ++++++++---- .../fully_automated_single_session.py | 241 ----------------- .../run_single_sess_iter_2.py | 7 - .../tingleymetabolicripplesinterface.py | 121 +++++---- 5 files changed, 196 insertions(+), 606 deletions(-) delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py rename buzsaki_lab_to_nwb/tingley_metabolic/{fully_automated_single_session_2.py => fully_automated_multipar_session.py} (56%) delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py delete mode 100644 buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py deleted file mode 100644 index 17dce16..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_conversion.py +++ /dev/null @@ -1,254 +0,0 @@ -"""Run entire conversion.""" -import os -import traceback -from pathlib import Path -from datetime import timedelta -from warnings import simplefilter - -from shutil import rmtree -from natsort import natsorted -from nwbinspector.tools import get_s3_urls_and_dandi_paths - -from nwb_conversion_tools.tools.data_transfers import ( - dandi_upload, - estimate_total_conversion_runtime, - estimate_s3_conversion_cost, - get_globus_dataset_content_sizes, - transfer_globus_content, -) -from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update -from spikeextractors import NeuroscopeRecordingExtractor - -from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime - -buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" -hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" -# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" -dandiset_id = "000233" - -stub_test = False -conversion_factor = 0.195 # Intany -buffer_gb = 50 - -data_path = Path("/shared/catalystneuro/TingleyD/") -home_path = Path("/home/jovyan/") - -# data_path = Path("C:/Users/Raven/Documents/TingleyD/") - - -base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_id = "CGM36" -all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() -) -dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) -dandi_session_datetimes = [ - "_".join(x.split("/")[1].split("_")[3:5]) for x in dandi_content -] # probably a better way to do this, just brute forcing for now -sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv -unconverted_sessions = natsorted( - [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] -) # natsorted for consistency on each run - -for session_id in unconverted_sessions: - assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" - try: - # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" - if f"{session_id}/{session_id}.lfp" not in all_content: - print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") - continue - - content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", - ] - content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) - # Ripple files are a little trickier, can have multiple text forms - content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() - ] - ) - content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - - content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=5.0) - total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6) - y_n = "" - while not (y_n.lower() == "y" or y_n.lower() == "n"): - y_n = input( - f"\nConverting session {session_id} will cost an estimated ${total_cost} and take {total_time/3600} hours. " - "Continue? (y/n): " - ) - assert y_n.lower() == "y" - - metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" - subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - - nwb_output_path = data_path / f"nwb_{session_id}" - nwb_output_path.mkdir(exist_ok=True) - nwbfile_path = nwb_output_path / f"{session_id}.nwb" - session_path = data_path / f"{session_id}" - session_path.mkdir(exist_ok=True) - - transfer_globus_content( - source_endpoint_id=buzsaki_globus_endpoint_id, - source_files=[ - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], - ], - destination_endpoint_id=hub_globus_endpoint_id, - destination_folder=session_path, - progress_update_rate=30.0, - progress_update_timeout=total_time * 10, - ) - - global_metadata = load_dict_from_file(metadata_path) - subject_info_table = load_dict_from_file(subject_info_path) - - simplefilter("ignore") - conversion_options = dict() - - xml_file_path = session_path / f"{session_id}.xml" - raw_file_path = session_path / f"{session_id}.dat" - lfp_file_path = session_path / f"{session_id}.lfp" - - aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / "info.rhd" - sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" - ripple_mat_file_paths = [ - x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() - ] - - ecephys_start_time = get_session_datetime(session_id=session_id) - ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() - / 1250.0 - ) - source_data = dict( - Glucose=dict( - session_path=str(session_path), - ecephys_start_time=str(ecephys_start_time), - ecephys_stop_time=str(ecephys_stop_time), - ), - NeuroscopeLFP=dict( - file_path=str(lfp_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ), - ) - - if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ) - ) - - if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - - if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) - - if any(ripple_mat_file_paths): - source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) - - converter = TingleyMetabolicConverter(source_data=source_data) - metadata = converter.get_metadata() - metadata = dict_deep_update(metadata, global_metadata) - session_description = ( - "Consult Supplementary Table 1 from the publication for more information about this session." - ) - metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. - session_description=session_description, - experiment_description=session_description, - ) - if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": - del metadata["Ecephys"]["Device"][0] - for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: - electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - - ecephys_start_time_increment = ( - ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time - ).total_seconds() - conversion_options.update( - NeuroscopeLFP=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), - ) - ) - if raw_file_path.is_file(): - conversion_options.update( - NeuroscopeRecording=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - es_key="ElectricalSeries_raw", - iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), - ) - ) - if aux_file_path.is_file() and rhd_file_path.is_file(): - conversion_options.update( - Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) - ) - if sleep_mat_file_path.is_file(): - conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) - if any(ripple_mat_file_paths): - conversion_options.update( - Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) - ) - - converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, - ) - try: - rmtree(session_path) - except OSError: - if len(list(session_path.iterdir())) > 0: - print(f"shutil.rmtree failed to clean directory for session {session_id}") - dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) - - y_n = "" - while not (y_n.lower() == "y" or y_n.lower() == "n"): - y_n = input("\nContinue with dataset conversion? (y/n): ") - assert y_n.lower() == "y" - except Exception as ex: - # Clean up data files in event of any error - try: - rmtree(session_path, ignore_errors=True) - rmtree(nwb_output_path, ignore_errors=True) - rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) - except Exception: - a = 1 - y_n = "" - while not (y_n.lower() == "y" or y_n.lower() == "n"): - y_n = input( - f"Could not convert session {session_id} due to {type(ex)}: {str(ex)}\n{traceback.format_exc()}\nWould you like to continue? (y/n): " - ) - assert y_n.lower() == "y" diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py similarity index 56% rename from buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py rename to buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py index 66596d1..4a51903 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session_2.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py @@ -1,17 +1,18 @@ """Run entire conversion.""" import os -import traceback +import json from pathlib import Path from datetime import timedelta from warnings import simplefilter -from time import sleep +from concurrent.futures import ProcessPoolExecutor, as_completed +from tqdm import tqdm from shutil import rmtree from natsort import natsorted from nwbinspector.tools import get_s3_urls_and_dandi_paths from nwb_conversion_tools.tools.data_transfers import ( - dandi_upload, + automatic_dandi_upload, estimate_total_conversion_runtime, estimate_s3_conversion_cost, get_globus_dataset_content_sizes, @@ -22,45 +23,55 @@ from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime +assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" + buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" -# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" dandiset_id = "000233" stub_test = False conversion_factor = 0.195 # Intan buffer_gb = 3 -data_size_threshold = 5 * 1e9 # 5 GB +n_jobs = 3 +data_size_threshold = 5 * 1e9 # GB data_path = Path("/shared/catalystneuro/TingleyD/") home_path = Path("/home/jovyan/") -# data_path = Path("C:/Users/Raven/Documents/TingleyD/") - base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_id = "CGM2" -sleep(5) -all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() -) -dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) -dandi_session_datetimes = [ - "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content -] # probably a better way to do this, just brute forcing for now -sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv -unconverted_sessions = natsorted( - [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] -) # natsorted for consistency on each run - -for session_id in unconverted_sessions: - assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" +subject_ids = iter( + ["CGM58", "CGM60", "A63", "Bruce", "DT12", "dt15", "flex1", "ros", "Vanessa"] +) # 47 and 50 have malformed csv? + + +def _transfer_and_convert(subject_id): try: - # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" - if f"{session_id}/{session_id}.lfp" not in all_content: - print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") - continue + content_cache_file_path = Path(f"/shared/catalystneuro/TingleyD/cache/cache_content_{subject_id}") + if not content_cache_file_path.exists(): + all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() + ) + with open(content_cache_file_path, mode="w") as fp: + json.dump(all_content, fp) + else: + with open(content_cache_file_path, mode="r") as fp: + all_content = json.load(fp) + dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) + dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content + ] # probably a better way to do this, just brute forcing for now + sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv + unconverted_sessions = natsorted( + [ + session_id + for session_id in sessions + if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes + ] + ) # natsorted for consistency on each run + gen = iter(unconverted_sessions) + session_id = next(gen) content_to_attempt_transfer = [ f"{session_id}/{session_id}.xml", f"{session_id}/{session_id}.dat", @@ -84,8 +95,70 @@ content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - if content_to_transfer_size > data_size_threshold: - continue + j = 0 + stop = False + while ( + f"{session_id}/{session_id}.lfp" not in all_content or content_to_transfer_size > data_size_threshold + ) and j <= len(unconverted_sessions): + j += 1 + try: + session_id = next(gen) + content_to_attempt_transfer = [ + f"{session_id}/{session_id}.xml", + f"{session_id}/{session_id}.dat", + f"{session_id}/{session_id}.lfp", + f"{session_id}/auxiliary.dat", + f"{session_id}/info.rhd", + f"{session_id}/{session_id}.SleepState.states.mat", + f"{session_id}/", + ] + content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in all_content + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] + + content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) + except StopIteration: + try: + subject_id = next(subject_ids) + content_cache_file_path = Path(f"/shared/catalystneuro/TingleyD/cache/cache_content_{subject_id}") + if not content_cache_file_path.exists(): + all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, + path=(base_buzsaki_path / subject_id).as_posix(), + ) + with open(content_cache_file_path, mode="w") as fp: + json.dump(all_content, fp) + else: + with open(content_cache_file_path, mode="r") as fp: + all_content = json.load(fp) + dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) + dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content + ] # probably a better way to do this, just brute forcing for now + sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv + unconverted_sessions = natsorted( + [ + session_id + for session_id in sessions + if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes + ] + ) # natsorted for consistency on each run + j = 0 + except StopIteration: + stop = True + print("\nAll remaining sessions missing LFP or too large.") + if j == len(unconverted_sessions) or stop: + assert False + total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) print(f"Total cost of {session_id}: ${total_cost}, total time: {total_time / 3600} hr") @@ -108,7 +181,7 @@ destination_endpoint_id=hub_globus_endpoint_id, destination_folder=session_path, progress_update_rate=total_time / 20, # every 5% or so - progress_update_timeout=max(total_time * 2, 5*60), + progress_update_timeout=max(total_time * 2, 5 * 60), ) global_metadata = load_dict_from_file(metadata_path) @@ -226,18 +299,34 @@ conversion_options=conversion_options, overwrite=True, ) - try: - rmtree(session_path) - except OSError: - if len(list(session_path.iterdir())) > 0: - print(f"shutil.rmtree failed to clean directory for session {session_id}") - dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path, cleanup=True) - except Exception as ex: - # Clean up data files in event of any error - try: - rmtree(session_path, ignore_errors=True) - rmtree(nwb_output_path, ignore_errors=True) - rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) - except Exception: - a = 1 - assert False, "Ending session." + return True, session_path, nwb_output_path + except Exception: + return False, False, False + + +def _transfer_convert_and_upload(subject_id): + try: + with ProcessPoolExecutor(max_workers=1) as executor: + future = executor.submit(_transfer_and_convert, subject_id=subject_id) + success, session_path, nwb_folder_path = future.result() + if success: + try: + rmtree(session_path, ignore_errors=True) + automatic_dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_folder_path) + finally: + rmtree(nwb_folder_path, ignore_errors=True) + rmtree(nwb_folder_path.parent / dandiset_id, ignore_errors=True) + finally: # try to cleanup again + rmtree(session_path, ignore_errors=True) + rmtree(nwb_folder_path, ignore_errors=True) + rmtree(nwb_folder_path.parent / dandiset_id, ignore_errors=True) + + +futures = [] +n_jobs = None if n_jobs == -1 else n_jobs # concurrents uses None instead of -1 for 'auto' mode +with ProcessPoolExecutor(max_workers=n_jobs) as executor: + for subject_id in subject_ids: + futures.append(executor.submit(_transfer_convert_and_upload, subject_id=subject_id)) +nwbfiles_iterable = tqdm(as_completed(futures), desc="Converting subjects...") +for future in nwbfiles_iterable: + _ = future.result() diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py deleted file mode 100644 index ae162c9..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_single_session.py +++ /dev/null @@ -1,241 +0,0 @@ -"""Run entire conversion.""" -import os -import traceback -from pathlib import Path -from datetime import timedelta -from warnings import simplefilter - -from shutil import rmtree -from natsort import natsorted -from nwbinspector.tools import get_s3_urls_and_dandi_paths - -from nwb_conversion_tools.tools.data_transfers import ( - dandi_upload, - estimate_total_conversion_runtime, - estimate_s3_conversion_cost, - get_globus_dataset_content_sizes, - transfer_globus_content, -) -from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update -from spikeextractors import NeuroscopeRecordingExtractor - -from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime - -buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" -hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" -# hub_globus_endpoint_id = "3d82aa0a-bc1d-11ec-8f83-e31722b18688" -dandiset_id = "000233" - -stub_test = False -conversion_factor = 0.195 # Intany -buffer_gb = 50 - -data_path = Path("/shared/catalystneuro/TingleyD/") -home_path = Path("/home/jovyan/") - -# data_path = Path("C:/Users/Raven/Documents/TingleyD/") - - -base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_id = "CGM37" -all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() -) -dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) -dandi_session_datetimes = [ - "_".join(x.split("/")[1].split("_")[1].split("-")[-2:]) for x in dandi_content -] # probably a better way to do this, just brute forcing for now -sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv -unconverted_sessions = natsorted( - [session_id for session_id in sessions if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes] -) # natsorted for consistency on each run - -for session_id in unconverted_sessions: - assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" - try: - # assert f"{session_id}/{session_id}.lfp" in all_content, "Skip session_idx {session_idx} - bad session!" - if f"{session_id}/{session_id}.lfp" not in all_content: - print(f"\nSkipping session_id {session_id} because there was no LFP (and hence likely a bad session). ") - continue - if any([x in session_id for x in dandi_session_datetimes]): - print(f"\nSkipping session_id {session_id} because it is already on DANDI.") - continue - - content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", - ] - content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) - # Ripple files are a little trickier, can have multiple text forms - content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() - ] - ) - content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - - content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) - total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) - print(f"Total cost of {session_id}: ${total_cost}, total time: {total_time / 3600} hr") - - metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" - subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - - nwb_output_path = data_path / f"nwb_{session_id}" - nwb_output_path.mkdir(exist_ok=True) - nwbfile_path = nwb_output_path / f"{session_id}.nwb" - session_path = data_path / f"{session_id}" - session_path.mkdir(exist_ok=True) - - transfer_globus_content( - source_endpoint_id=buzsaki_globus_endpoint_id, - source_files=[ - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], - ], - destination_endpoint_id=hub_globus_endpoint_id, - destination_folder=session_path, - progress_update_rate=30.0, - progress_update_timeout=total_time * 10, - ) - - global_metadata = load_dict_from_file(metadata_path) - subject_info_table = load_dict_from_file(subject_info_path) - - simplefilter("ignore") - conversion_options = dict() - - xml_file_path = session_path / f"{session_id}.xml" - raw_file_path = session_path / f"{session_id}.dat" - lfp_file_path = session_path / f"{session_id}.lfp" - - aux_file_path = session_path / "auxiliary.dat" - rhd_file_path = session_path / "info.rhd" - sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" - ripple_mat_file_paths = [ - x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() - ] - - ecephys_start_time = get_session_datetime(session_id=session_id) - ecephys_stop_time = ecephys_start_time + timedelta( - seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() - / 1250.0 - ) - source_data = dict( - Glucose=dict( - session_path=str(session_path), - ecephys_start_time=str(ecephys_start_time), - ecephys_stop_time=str(ecephys_stop_time), - ), - NeuroscopeLFP=dict( - file_path=str(lfp_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ), - ) - - if raw_file_path.is_file(): - source_data.update( - NeuroscopeRecording=dict( - file_path=str(raw_file_path), - gain=conversion_factor, - xml_file_path=str(xml_file_path), - spikeextractors_backend=True, - ) - ) - - if aux_file_path.is_file() and rhd_file_path.is_file(): - source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) - - if sleep_mat_file_path.is_file(): - source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) - - if any(ripple_mat_file_paths): - source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) - - converter = TingleyMetabolicConverter(source_data=source_data) - metadata = converter.get_metadata() - metadata = dict_deep_update(metadata, global_metadata) - session_description = ( - "Consult Supplementary Table 1 from the publication for more information about this session." - ) - metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. - session_description=session_description, - experiment_description=session_description, - ) - if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": - del metadata["Ecephys"]["Device"][0] - for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: - electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) - - ecephys_start_time_increment = ( - ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time - ).total_seconds() - conversion_options.update( - NeuroscopeLFP=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), - ) - ) - if raw_file_path.is_file(): - conversion_options.update( - NeuroscopeRecording=dict( - stub_test=stub_test, - starting_time=ecephys_start_time_increment, - es_key="ElectricalSeries_raw", - iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), - ) - ) - if aux_file_path.is_file() and rhd_file_path.is_file(): - conversion_options.update( - Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) - ) - if sleep_mat_file_path.is_file(): - conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) - if any(ripple_mat_file_paths): - conversion_options.update( - Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) - ) - - converter.run_conversion( - nwbfile_path=str(nwbfile_path), - metadata=metadata, - conversion_options=conversion_options, - overwrite=True, - ) - try: - rmtree(session_path) - except OSError: - if len(list(session_path.iterdir())) > 0: - print(f"shutil.rmtree failed to clean directory for session {session_id}") - dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_output_path) - except Exception as ex: - # Clean up data files in event of any error - try: - rmtree(session_path, ignore_errors=True) - rmtree(nwb_output_path, ignore_errors=True) - rmtree(nwb_output_path.parent / dandiset_id, ignore_errors=True) - except Exception: - a = 1 - assert False, "Ending session." diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py b/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py deleted file mode 100644 index deb08a3..0000000 --- a/buzsaki_lab_to_nwb/tingley_metabolic/run_single_sess_iter_2.py +++ /dev/null @@ -1,7 +0,0 @@ -import os - -for _ in range(30): - try: - os.system("python fully_automated_single_session_2.py") - except KeyboardInterrupt: - break \ No newline at end of file diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index dcb500b..3af3bb3 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -13,67 +13,70 @@ def __init__(self, mat_file_paths: list): super().__init__(mat_file_paths=mat_file_paths) def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): - stub_events = 5 if stub_test else None - processing_module = get_module( - nwbfile=nwbfile, - name="ecephys", - description="Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", - ) + try: + stub_events = 5 if stub_test else None + processing_module = get_module( + nwbfile=nwbfile, + name="ecephys", + description="Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", + ) - for mat_file_path in self.source_data["mat_file_paths"]: - table_name = mat_file_path.suffixes[-3].lstrip(".").title() - try: - mat_file = loadmat(file_name=mat_file_path) - mat_file_is_scipy_readable = True - except NotImplementedError: - mat_file_is_scipy_readable = False - print(f"RippleInterface is unable to convert {mat_file_path} due to HDF5 version!") + for mat_file_path in self.source_data["mat_file_paths"]: + table_name = mat_file_path.suffixes[-3].lstrip(".").title() + try: + mat_file = loadmat(file_name=mat_file_path) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False + print(f"RippleInterface is unable to convert {mat_file_path} due to HDF5 version!") - if mat_file_is_scipy_readable: - mat_data = mat_file["ripples"] - start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] - durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] - peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] - peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] - peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] - peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] - ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] - frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] - phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] - amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] + if mat_file_is_scipy_readable: + mat_data = mat_file["ripples"] + start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] + durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] + peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] + peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] + peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] + peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] + ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] + frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] + phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] + amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] - descriptions = dict( - duration="Duration of the ripple event.", - peak="Peak of the ripple.", - peak_normed_power="Normed power of the peak.", - peak_frequency="Peak frequency of the ripple.", - peak_amplitude="Peak amplitude of the ripple.", - ) - indexed_descriptions = dict( - ripple="Extracted ripple data.", - frequency="Frequency of each point on the ripple.", - phase="Phase of each point on the ripple.", - amplitude="Amplitude of each point on the ripple.", - ) - - table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") - for start_time, stop_time in start_and_stop_times: - table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) - for column_name, column_data in zip( - list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] - ): - table.add_column( - name=column_name, - description=descriptions[column_name], - data=H5DataIO(column_data, compression="gzip"), + descriptions = dict( + duration="Duration of the ripple event.", + peak="Peak of the ripple.", + peak_normed_power="Normed power of the peak.", + peak_frequency="Peak frequency of the ripple.", + peak_amplitude="Peak amplitude of the ripple.", ) - for column_name, column_data in zip( - list(indexed_descriptions), [ripples, frequencies, phases, amplitudes] - ): - table.add_column( - name=column_name, - description=indexed_descriptions[column_name], - index=list(range(column_data.shape[0])), - data=H5DataIO(column_data, compression="gzip"), + indexed_descriptions = dict( + ripple="Extracted ripple data.", + frequency="Frequency of each point on the ripple.", + phase="Phase of each point on the ripple.", + amplitude="Amplitude of each point on the ripple.", ) - processing_module.add(table) + + table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") + for start_time, stop_time in start_and_stop_times: + table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) + for column_name, column_data in zip( + list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] + ): + table.add_column( + name=column_name, + description=descriptions[column_name], + data=H5DataIO(column_data, compression="gzip"), + ) + for column_name, column_data in zip( + list(indexed_descriptions), [ripples, frequencies, phases, amplitudes] + ): + table.add_column( + name=column_name, + description=indexed_descriptions[column_name], + index=list(range(column_data.shape[0])), + data=H5DataIO(column_data, compression="gzip"), + ) + processing_module.add(table) + except Exception as ex: + print("Unable to convert Ripples!") \ No newline at end of file From fcf021f53443ff4f444df46b788501074e3f0825 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 16 May 2022 02:04:08 +0000 Subject: [PATCH 39/40] Automated changes --- .../tingleymetabolicripplesinterface.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py index 3af3bb3..5928237 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -57,9 +57,13 @@ def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ec amplitude="Amplitude of each point on the ripple.", ) - table = TimeIntervals(name=table_name, description=f"Identified {table_name} events and their metrics.") + table = TimeIntervals( + name=table_name, description=f"Identified {table_name} events and their metrics." + ) for start_time, stop_time in start_and_stop_times: - table.add_row(start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time) + table.add_row( + start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time + ) for column_name, column_data in zip( list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] ): @@ -79,4 +83,4 @@ def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ec ) processing_module.add(table) except Exception as ex: - print("Unable to convert Ripples!") \ No newline at end of file + print("Unable to convert Ripples!") From dd2d0ff8c66eba9595872f1d2f3c7bb915da23da Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Tue, 17 May 2022 14:36:27 +0000 Subject: [PATCH 40/40] update --- .../fully_automated_multipar_session.py | 239 ++++++++---------- .../tingleymetabolicaccelerometerinterface.py | 3 +- 2 files changed, 108 insertions(+), 134 deletions(-) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py index 4a51903..0a09848 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py @@ -1,14 +1,17 @@ """Run entire conversion.""" import os import json +import traceback from pathlib import Path from datetime import timedelta from warnings import simplefilter from concurrent.futures import ProcessPoolExecutor, as_completed -from tqdm import tqdm +from collections import defaultdict +from time import sleep from shutil import rmtree from natsort import natsorted +from tqdm import tqdm from nwbinspector.tools import get_s3_urls_and_dandi_paths from nwb_conversion_tools.tools.data_transfers import ( @@ -33,35 +36,43 @@ conversion_factor = 0.195 # Intan buffer_gb = 3 n_jobs = 3 -data_size_threshold = 5 * 1e9 # GB +data_size_threshold = 45 * 1e9 # GB -data_path = Path("/shared/catalystneuro/TingleyD/") -home_path = Path("/home/jovyan/") +cache_path = Path("/shared/catalystneuro/TingleyD/cache") +cache_path.mkdir(exist_ok=True) base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") -subject_ids = iter( - ["CGM58", "CGM60", "A63", "Bruce", "DT12", "dt15", "flex1", "ros", "Vanessa"] -) # 47 and 50 have malformed csv? - - -def _transfer_and_convert(subject_id): +subject_ids = ["bruce", "dt15", "flex1", "ros", "Vanessa"] +subject_ids.extend( + [f"CGM{x}" for x in [1, 2, 3, 4, 30, 31, 32, 36, 37, 39, 40, 41, 46, 48, 49, 51, 52, 55, 57, 58, 60]] +) # 47 and 50 have malformed csv? Something is also up with A63 and DT12 + +content_cache_file_path = cache_path / "cache_full_manifest.json" +if not content_cache_file_path.exists(): + print("No cache found! Fetching manifset from Globus.") + all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=base_buzsaki_path.as_posix() + ) + contents_per_subject = defaultdict(dict) + for file, size in all_content.items(): + subject_id = file.split("/")[0] + contents_per_subject[subject_id].update({file: size}) + with open(content_cache_file_path, mode="w") as fp: + json.dump(contents_per_subject, fp) +else: + print("Cache found! Loading manifest.") + with open(content_cache_file_path, mode="r") as fp: + contents_per_subject = json.load(fp) + + +def _transfer_and_convert(subject_id, subject_contents): try: - content_cache_file_path = Path(f"/shared/catalystneuro/TingleyD/cache/cache_content_{subject_id}") - if not content_cache_file_path.exists(): - all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, path=(base_buzsaki_path / subject_id).as_posix() - ) - with open(content_cache_file_path, mode="w") as fp: - json.dump(all_content, fp) - else: - with open(content_cache_file_path, mode="r") as fp: - all_content = json.load(fp) dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) dandi_session_datetimes = [ "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content ] # probably a better way to do this, just brute forcing for now - sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv + sessions = set([Path(x).parent.name for x in subject_contents]) - set([subject_id]) # subject_id for .csv unconverted_sessions = natsorted( [ session_id @@ -70,122 +81,81 @@ def _transfer_and_convert(subject_id): ] ) # natsorted for consistency on each run - gen = iter(unconverted_sessions) - session_id = next(gen) - content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", - ] - content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) - # Ripple files are a little trickier, can have multiple text forms - content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() + j = 1 + for j, session_id in enumerate(unconverted_sessions, start=1): + if f"{subject_id}/{session_id}/{session_id}.lfp" not in subject_contents: + print(f"Session {session_id} has no LFP! Skipping.", flush=True) + continue + if subject_id not in session_id: + print(f"Session {session_id} is likely an analysis folder!", flush=True) + continue + + content_to_attempt_transfer = [ + f"{subject_id}/{session_id}/{session_id}.xml", + f"{subject_id}/{session_id}/{session_id}.dat", + f"{subject_id}/{session_id}/{session_id}.lfp", + f"{subject_id}/{session_id}/auxiliary.dat", + f"{subject_id}/{session_id}/info.rhd", + f"{subject_id}/{session_id}/{session_id}.SleepState.states.mat", ] - ) - content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - - content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - j = 0 - stop = False - while ( - f"{session_id}/{session_id}.lfp" not in all_content or content_to_transfer_size > data_size_threshold - ) and j <= len(unconverted_sessions): - j += 1 - try: - session_id = next(gen) - content_to_attempt_transfer = [ - f"{session_id}/{session_id}.xml", - f"{session_id}/{session_id}.dat", - f"{session_id}/{session_id}.lfp", - f"{session_id}/auxiliary.dat", - f"{session_id}/info.rhd", - f"{session_id}/{session_id}.SleepState.states.mat", - f"{session_id}/", + content_to_attempt_transfer.extend([x for x in subject_contents if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in subject_contents + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() ] - content_to_attempt_transfer.extend([x for x in all_content if Path(x).suffix == ".csv"]) - # Ripple files are a little trickier, can have multiple text forms - content_to_attempt_transfer.extend( - [ - x - for x in all_content - if Path(x).parent.name == session_id - for suffix in Path(x).suffixes - if "ripples" in suffix.lower() - ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in subject_contents] + + content_to_transfer_size = sum([subject_contents[x] for x in content_to_transfer]) + if content_to_transfer_size > data_size_threshold: + print( + f"Session {session_id} with size ({content_to_transfer_size / 1e9} GB) is larger than specified " + f"threshold ({data_size_threshold / 1e9} GB)! Skipping", + flush=True, ) - content_to_transfer = [x for x in content_to_attempt_transfer if x in all_content] - - content_to_transfer_size = sum([all_content[x] for x in content_to_transfer]) - except StopIteration: - try: - subject_id = next(subject_ids) - content_cache_file_path = Path(f"/shared/catalystneuro/TingleyD/cache/cache_content_{subject_id}") - if not content_cache_file_path.exists(): - all_content = get_globus_dataset_content_sizes( - globus_endpoint_id=buzsaki_globus_endpoint_id, - path=(base_buzsaki_path / subject_id).as_posix(), - ) - with open(content_cache_file_path, mode="w") as fp: - json.dump(all_content, fp) - else: - with open(content_cache_file_path, mode="r") as fp: - all_content = json.load(fp) - dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) - dandi_session_datetimes = [ - "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content - ] # probably a better way to do this, just brute forcing for now - sessions = set([Path(x).parent.name for x in all_content]) - set([""]) # "" for .csv - unconverted_sessions = natsorted( - [ - session_id - for session_id in sessions - if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes - ] - ) # natsorted for consistency on each run - j = 0 - except StopIteration: - stop = True - print("\nAll remaining sessions missing LFP or too large.") - if j == len(unconverted_sessions) or stop: - assert False - - total_time = estimate_total_conversion_runtime(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) - total_cost = estimate_s3_conversion_cost(total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0) - print(f"Total cost of {session_id}: ${total_cost}, total time: {total_time / 3600} hr") + continue + break # Good session or end of all unconverted sessions + if j >= len(unconverted_sessions): + assert False, f"End of valid sessions for subject {subject_id}." + + total_time = estimate_total_conversion_runtime( + total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0, upload_rate_mb=150 + ) + total_cost = estimate_s3_conversion_cost( + total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0, upload_rate_mb=150 + ) + print( + f"\nTotal cost of {session_id} with size {content_to_transfer_size / 1e9} GB: " + f"${total_cost}, total time: {total_time / 3600} hr", + flush=True, + ) metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" - subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" - nwb_output_path = data_path / f"nwb_{session_id}" + nwb_output_path = cache_path / f"nwb_{session_id}" nwb_output_path.mkdir(exist_ok=True) nwbfile_path = nwb_output_path / f"{session_id}.nwb" - session_path = data_path / f"{session_id}" + session_path = cache_path / f"{session_id}" session_path.mkdir(exist_ok=True) transfer_globus_content( source_endpoint_id=buzsaki_globus_endpoint_id, source_files=[ - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" in x], - [base_buzsaki_path / subject_id / x for x in content_to_transfer if ".csv" not in x], + [base_buzsaki_path / subject_id / x.split("/")[-1] for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / x for x in content_to_transfer if ".csv" not in x], ], destination_endpoint_id=hub_globus_endpoint_id, destination_folder=session_path, - progress_update_rate=total_time / 20, # every 5% or so + progress_update_rate=min(total_time / 20, 120), # every 5% or every 2 minutes progress_update_timeout=max(total_time * 2, 5 * 60), ) global_metadata = load_dict_from_file(metadata_path) - subject_info_table = load_dict_from_file(subject_info_path) simplefilter("ignore") conversion_options = dict() @@ -246,15 +216,6 @@ def _transfer_and_convert(subject_id): "Consult Supplementary Table 1 from the publication for more information about this session." ) metadata["NWBFile"].update( - # session_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # experiment_description=subject_info_table.get( - # metadata["Subject"]["subject_id"], - # "Consult Supplementary Table 1 from the publication for more information about this session.", - # ), - # Since no mapping of subject_ids to ST1, just leave this for all. session_description=session_description, experiment_description=session_description, ) @@ -300,33 +261,45 @@ def _transfer_and_convert(subject_id): overwrite=True, ) return True, session_path, nwb_output_path - except Exception: - return False, False, False + except Exception as ex: + return False, f"{type(ex)}: - {str(ex)}\n\n{traceback.format_exc()}", False -def _transfer_convert_and_upload(subject_id): +def _transfer_convert_and_upload(subject_id, subject_contents): try: with ProcessPoolExecutor(max_workers=1) as executor: - future = executor.submit(_transfer_and_convert, subject_id=subject_id) + future = executor.submit(_transfer_and_convert, subject_id=subject_id, subject_contents=subject_contents) success, session_path, nwb_folder_path = future.result() if success: try: rmtree(session_path, ignore_errors=True) + finally: + rmtree(session_path, ignore_errors=True) + try: automatic_dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_folder_path) finally: rmtree(nwb_folder_path, ignore_errors=True) rmtree(nwb_folder_path.parent / dandiset_id, ignore_errors=True) + else: + print(session_path) + return session_path finally: # try to cleanup again rmtree(session_path, ignore_errors=True) rmtree(nwb_folder_path, ignore_errors=True) - rmtree(nwb_folder_path.parent / dandiset_id, ignore_errors=True) futures = [] n_jobs = None if n_jobs == -1 else n_jobs # concurrents uses None instead of -1 for 'auto' mode with ProcessPoolExecutor(max_workers=n_jobs) as executor: for subject_id in subject_ids: - futures.append(executor.submit(_transfer_convert_and_upload, subject_id=subject_id)) + futures.append( + executor.submit( + _transfer_convert_and_upload, subject_id=subject_id, subject_contents=contents_per_subject[subject_id] + ) + ) nwbfiles_iterable = tqdm(as_completed(futures), desc="Converting subjects...") for future in nwbfiles_iterable: - _ = future.result() + sleep(10) + error = future.result() + if error is not None: + print(error) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py index 15e87bd..f30c253 100644 --- a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -1,5 +1,6 @@ """Authors: Cody Baker.""" from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.tools.hdmf import SliceableDataChunkIterator from nwb_conversion_tools.utils import FilePathType from pynwb import TimeSeries, H5DataIO from spikeextractors.extraction_tools import read_binary @@ -57,7 +58,7 @@ def run_conversion(self, nwbfile, metadata, stub_test: bool = False, ecephys_sta description="Raw data from accelerometer sensors.", unit="Volts", data=H5DataIO( - self.memmap.T[:stub_frames, :], compression="gzip" + SliceableDataChunkIterator(data=self.memmap.T[:stub_frames, :]), compression="gzip" ), # should not need iterative write conversion=self.conversion, rate=self.sampling_frequency,