diff --git a/buzsaki_lab_to_nwb/mpgdatainterface.py b/buzsaki_lab_to_nwb/common_interfaces/mpgdatainterface.py similarity index 100% rename from buzsaki_lab_to_nwb/mpgdatainterface.py rename to buzsaki_lab_to_nwb/common_interfaces/mpgdatainterface.py diff --git a/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py new file mode 100644 index 00000000..18d9b8ee --- /dev/null +++ b/buzsaki_lab_to_nwb/common_interfaces/sleepstatesinterface.py @@ -0,0 +1,48 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from scipy.io import loadmat +from pynwb import NWBFile +from pynwb.file import TimeIntervals +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils import FilePathType +from nwb_conversion_tools.tools.nwb_helpers import get_module + + +class SleepStatesInterface(BaseDataInterface): + """Data interface for handling sleepStates.mat files found across multiple projects.""" + + def __init__(self, mat_file_path: FilePathType): + super().__init__(mat_file_path=mat_file_path) + + def run_conversion(self, nwbfile: NWBFile, metadata, ecephys_start_time: float = 0.0): + processing_module = get_module( + nwbfile=nwbfile, name="behavior", description="Contains behavioral data concerning classified states." + ) + + try: + mat_file = loadmat(file_name=self.source_data["mat_file_path"]) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False + print(f"SleepStatesInterface is unable to convert {self.source_data['mat_file_path']} due to HDF5 version!") + + if mat_file_is_scipy_readable: # To-Do, re-do indexing for an hdfstorage reader + sleep_state_dic = mat_file["SleepState"]["ints"][0][0] + state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") + table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") + table.add_column(name="label", description="Sleep state.") + + data = [] + for sleep_state in set(mat_file["SleepState"]["ints"][0][0].dtype.names): + values = sleep_state_dic[sleep_state][0][0] + if len(values) != 0 and isinstance(values[0], int): + values = [values] + for start_time, stop_time in values: + data.append( + dict( + start_time=ecephys_start_time + float(start_time), + stop_time=ecephys_start_time + float(stop_time), + label=state_label_names[sleep_state], + ) + ) + [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] + processing_module.add(table) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py new file mode 100644 index 00000000..f7fbc3c8 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/__init__.py @@ -0,0 +1,2 @@ +from .tingleymetabolicnwbconverter import TingleyMetabolicConverter +from .tingley_metabolic_utils import get_session_datetime diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py new file mode 100644 index 00000000..99e9b998 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/convert_tingley_metabolic.py @@ -0,0 +1,190 @@ +"""Run entire conversion.""" +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor, as_completed +from datetime import timedelta +from warnings import simplefilter + +from tqdm import tqdm +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor + +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime + +n_jobs = 1 +progress_bar_options = dict(desc="Running conversion...", position=0, leave=False) +stub_test = True +conversion_factor = 0.195 # Intan +buffer_gb = 1 +# note that on DANDIHub, max number of actual I/O operations on processes seems limited to 8-10, +# so total mem isn't technically buffer_gb * n_jobs + +data_path = Path("/shared/catalystneuro/TingleyD/") +home_path = Path("/home/jovyan/") + +data_path = Path("E:/BuzsakiData/TingleyD") +home_path = Path("E:/BuzsakiData/TingleyD/") + +metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" +subject_info_path = Path(__file__).parent / "tingley_metabolic_subject_info.yml" + + +subject_list = [ + "CGM4" +] # [1,2,3,4,30,31,32,36,37,39]] # This list will change based on what has finished transfering to the Hub +session_path_list = [ + session_path + for subject_path in data_path.iterdir() + if subject_path.is_dir() and subject_path.stem in subject_list + for session_path in subject_path.iterdir() + if session_path.is_dir() +] + + +if stub_test: + nwb_output_path = data_path / "nwb_{subject_list[0]}_running_stub" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}_stub" +else: + nwb_output_path = data_path / f"nwb_{subject_list[0]}_running" + nwb_final_output_path = data_path / f"nwb_{subject_list[0]}" +nwb_output_path.mkdir(exist_ok=True) +nwb_final_output_path.mkdir(exist_ok=True) + + +if stub_test: + nwbfile_list = [nwb_output_path / f"{session.stem}_stub.nwb" for session in session_path_list] +else: + nwbfile_list = [nwb_output_path / f"{session.stem}.nwb" for session in session_path_list] + +global_metadata = load_dict_from_file(metadata_path) +subject_info_table = load_dict_from_file(subject_info_path) + + +def convert_session(session_path, nwbfile_path): + """Run coonversion.""" + simplefilter("ignore") + conversion_options = dict() + session_id = session_path.name + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower()] + + # I know I'll need this for other sessions, just not yet + # if not raw_file_path.is_file() and (session_path / f"{session_id}.dat_orig").is_file: + # raw_file_path = session_path / f"{session_id}.dat_orig" + + # raw_file_path = session_path / f"{session_id}.dat" if (session_path / f"{session_id}.dat").is_file() else + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), + ) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + conversion_options.update(NeuroscopeRecording=dict(stub_test=stub_test)) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = "Consult Supplementary Table 1 from the publication for more information about this session." + metadata["NWBFile"].update( + # session_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # experiment_description=subject_info_table.get( + # metadata["Subject"]["subject_id"], + # "Consult Supplementary Table 1 from the publication for more information about this session.", + # ), + # Since no mapping of subject_ids to ST1, just leave this for all. + session_description=session_description, + experiment_description=session_description, + ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, starting_time=ecephys_start_time_increment, iterator_opts=dict(buffer_gb=buffer_gb) + ) + ) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update(Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment)) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + nwbfile_path.rename(nwb_final_output_path / nwbfile_path.name) + + +if n_jobs == 1: + for session_path, nwbfile_path in tqdm(zip(session_path_list, nwbfile_list), **progress_bar_options): + simplefilter("ignore") + convert_session(session_path=session_path, nwbfile_path=nwbfile_path) +else: + simplefilter("ignore") + with ProcessPoolExecutor(max_workers=n_jobs) as executor: + futures = [] + for session_path, nwbfile_path in zip(session_path_list, nwbfile_list): + futures.append(executor.submit(convert_session, session_path=session_path, nwbfile_path=nwbfile_path)) + completed_futures = tqdm(as_completed(futures), total=len(session_path_list), **progress_bar_options) + for future in completed_futures: + pass # To get tqdm to show diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py new file mode 100644 index 00000000..0a098482 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/fully_automated_multipar_session.py @@ -0,0 +1,305 @@ +"""Run entire conversion.""" +import os +import json +import traceback +from pathlib import Path +from datetime import timedelta +from warnings import simplefilter +from concurrent.futures import ProcessPoolExecutor, as_completed +from collections import defaultdict +from time import sleep + +from shutil import rmtree +from natsort import natsorted +from tqdm import tqdm +from nwbinspector.tools import get_s3_urls_and_dandi_paths + +from nwb_conversion_tools.tools.data_transfers import ( + automatic_dandi_upload, + estimate_total_conversion_runtime, + estimate_s3_conversion_cost, + get_globus_dataset_content_sizes, + transfer_globus_content, +) +from nwb_conversion_tools.utils import load_dict_from_file, dict_deep_update +from spikeextractors import NeuroscopeRecordingExtractor + +from buzsaki_lab_to_nwb.tingley_metabolic import TingleyMetabolicConverter, get_session_datetime + +assert os.environ.get("DANDI_API_KEY"), "Set your DANDI_API_KEY!" + +buzsaki_globus_endpoint_id = "188a6110-96db-11eb-b7a9-f57b2d55370d" +hub_globus_endpoint_id = "2b9b4d14-82a8-11ec-9f34-ed182a728dff" +dandiset_id = "000233" + +stub_test = False +conversion_factor = 0.195 # Intan +buffer_gb = 3 +n_jobs = 3 +data_size_threshold = 45 * 1e9 # GB + +cache_path = Path("/shared/catalystneuro/TingleyD/cache") +cache_path.mkdir(exist_ok=True) + + +base_buzsaki_path = Path("/TingleyD/Tingley2021_ripple_glucose_paper/") +subject_ids = ["bruce", "dt15", "flex1", "ros", "Vanessa"] +subject_ids.extend( + [f"CGM{x}" for x in [1, 2, 3, 4, 30, 31, 32, 36, 37, 39, 40, 41, 46, 48, 49, 51, 52, 55, 57, 58, 60]] +) # 47 and 50 have malformed csv? Something is also up with A63 and DT12 + +content_cache_file_path = cache_path / "cache_full_manifest.json" +if not content_cache_file_path.exists(): + print("No cache found! Fetching manifset from Globus.") + all_content = get_globus_dataset_content_sizes( + globus_endpoint_id=buzsaki_globus_endpoint_id, path=base_buzsaki_path.as_posix() + ) + contents_per_subject = defaultdict(dict) + for file, size in all_content.items(): + subject_id = file.split("/")[0] + contents_per_subject[subject_id].update({file: size}) + with open(content_cache_file_path, mode="w") as fp: + json.dump(contents_per_subject, fp) +else: + print("Cache found! Loading manifest.") + with open(content_cache_file_path, mode="r") as fp: + contents_per_subject = json.load(fp) + + +def _transfer_and_convert(subject_id, subject_contents): + try: + dandi_content = list(get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id).values()) + dandi_session_datetimes = [ + "_".join(x.split("/")[1].split("_")[-3:-1]) for x in dandi_content + ] # probably a better way to do this, just brute forcing for now + sessions = set([Path(x).parent.name for x in subject_contents]) - set([subject_id]) # subject_id for .csv + unconverted_sessions = natsorted( + [ + session_id + for session_id in sessions + if "_".join(session_id.split("_")[-2:]) not in dandi_session_datetimes + ] + ) # natsorted for consistency on each run + + j = 1 + for j, session_id in enumerate(unconverted_sessions, start=1): + if f"{subject_id}/{session_id}/{session_id}.lfp" not in subject_contents: + print(f"Session {session_id} has no LFP! Skipping.", flush=True) + continue + if subject_id not in session_id: + print(f"Session {session_id} is likely an analysis folder!", flush=True) + continue + + content_to_attempt_transfer = [ + f"{subject_id}/{session_id}/{session_id}.xml", + f"{subject_id}/{session_id}/{session_id}.dat", + f"{subject_id}/{session_id}/{session_id}.lfp", + f"{subject_id}/{session_id}/auxiliary.dat", + f"{subject_id}/{session_id}/info.rhd", + f"{subject_id}/{session_id}/{session_id}.SleepState.states.mat", + ] + content_to_attempt_transfer.extend([x for x in subject_contents if Path(x).suffix == ".csv"]) + # Ripple files are a little trickier, can have multiple text forms + content_to_attempt_transfer.extend( + [ + x + for x in subject_contents + if Path(x).parent.name == session_id + for suffix in Path(x).suffixes + if "ripples" in suffix.lower() + ] + ) + content_to_transfer = [x for x in content_to_attempt_transfer if x in subject_contents] + + content_to_transfer_size = sum([subject_contents[x] for x in content_to_transfer]) + if content_to_transfer_size > data_size_threshold: + print( + f"Session {session_id} with size ({content_to_transfer_size / 1e9} GB) is larger than specified " + f"threshold ({data_size_threshold / 1e9} GB)! Skipping", + flush=True, + ) + continue + break # Good session or end of all unconverted sessions + if j >= len(unconverted_sessions): + assert False, f"End of valid sessions for subject {subject_id}." + + total_time = estimate_total_conversion_runtime( + total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0, upload_rate_mb=150 + ) + total_cost = estimate_s3_conversion_cost( + total_mb=content_to_transfer_size / 1e6, transfer_rate_mb=3.0, upload_rate_mb=150 + ) + print( + f"\nTotal cost of {session_id} with size {content_to_transfer_size / 1e9} GB: " + f"${total_cost}, total time: {total_time / 3600} hr", + flush=True, + ) + + metadata_path = Path(__file__).parent / "tingley_metabolic_metadata.yml" + + nwb_output_path = cache_path / f"nwb_{session_id}" + nwb_output_path.mkdir(exist_ok=True) + nwbfile_path = nwb_output_path / f"{session_id}.nwb" + session_path = cache_path / f"{session_id}" + session_path.mkdir(exist_ok=True) + + transfer_globus_content( + source_endpoint_id=buzsaki_globus_endpoint_id, + source_files=[ + [base_buzsaki_path / subject_id / x.split("/")[-1] for x in content_to_transfer if ".csv" in x], + [base_buzsaki_path / x for x in content_to_transfer if ".csv" not in x], + ], + destination_endpoint_id=hub_globus_endpoint_id, + destination_folder=session_path, + progress_update_rate=min(total_time / 20, 120), # every 5% or every 2 minutes + progress_update_timeout=max(total_time * 2, 5 * 60), + ) + + global_metadata = load_dict_from_file(metadata_path) + + simplefilter("ignore") + conversion_options = dict() + + xml_file_path = session_path / f"{session_id}.xml" + raw_file_path = session_path / f"{session_id}.dat" + lfp_file_path = session_path / f"{session_id}.lfp" + + aux_file_path = session_path / "auxiliary.dat" + rhd_file_path = session_path / "info.rhd" + sleep_mat_file_path = session_path / f"{session_id}.SleepState.states.mat" + ripple_mat_file_paths = [ + x for x in session_path.iterdir() for suffix in x.suffixes if "ripples" in suffix.lower() + ] + + ecephys_start_time = get_session_datetime(session_id=session_id) + ecephys_stop_time = ecephys_start_time + timedelta( + seconds=NeuroscopeRecordingExtractor(file_path=lfp_file_path, xml_file_path=xml_file_path).get_num_frames() + / 1250.0 + ) + source_data = dict( + Glucose=dict( + session_path=str(session_path), + ecephys_start_time=str(ecephys_start_time), + ecephys_stop_time=str(ecephys_stop_time), + ), + NeuroscopeLFP=dict( + file_path=str(lfp_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ), + ) + + if raw_file_path.is_file(): + source_data.update( + NeuroscopeRecording=dict( + file_path=str(raw_file_path), + gain=conversion_factor, + xml_file_path=str(xml_file_path), + spikeextractors_backend=True, + ) + ) + + if aux_file_path.is_file() and rhd_file_path.is_file(): + source_data.update(Accelerometer=dict(dat_file_path=str(aux_file_path), rhd_file_path=str(rhd_file_path))) + + if sleep_mat_file_path.is_file(): + source_data.update(SleepStates=dict(mat_file_path=str(sleep_mat_file_path))) + + if any(ripple_mat_file_paths): + source_data.update(Ripples=dict(mat_file_paths=ripple_mat_file_paths)) + + converter = TingleyMetabolicConverter(source_data=source_data) + metadata = converter.get_metadata() + metadata = dict_deep_update(metadata, global_metadata) + session_description = ( + "Consult Supplementary Table 1 from the publication for more information about this session." + ) + metadata["NWBFile"].update( + session_description=session_description, + experiment_description=session_description, + ) + if metadata["Ecephys"]["Device"][0]["name"] == "Device_ecephys": + del metadata["Ecephys"]["Device"][0] + for electrode_group_metadata in metadata["Ecephys"]["ElectrodeGroup"]: + electrode_group_metadata.update(device=metadata["Ecephys"]["Device"][0]["name"]) + + ecephys_start_time_increment = ( + ecephys_start_time - converter.data_interface_objects["Glucose"].session_start_time + ).total_seconds() + conversion_options.update( + NeuroscopeLFP=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if raw_file_path.is_file(): + conversion_options.update( + NeuroscopeRecording=dict( + stub_test=stub_test, + starting_time=ecephys_start_time_increment, + es_key="ElectricalSeries_raw", + iterator_opts=dict(buffer_gb=buffer_gb, display_progress=True), + ) + ) + if aux_file_path.is_file() and rhd_file_path.is_file(): + conversion_options.update( + Accelerometer=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + if sleep_mat_file_path.is_file(): + conversion_options.update(SleepStates=dict(ecephys_start_time=ecephys_start_time_increment)) + if any(ripple_mat_file_paths): + conversion_options.update( + Ripples=dict(stub_test=stub_test, ecephys_start_time=ecephys_start_time_increment) + ) + + converter.run_conversion( + nwbfile_path=str(nwbfile_path), + metadata=metadata, + conversion_options=conversion_options, + overwrite=True, + ) + return True, session_path, nwb_output_path + except Exception as ex: + return False, f"{type(ex)}: - {str(ex)}\n\n{traceback.format_exc()}", False + + +def _transfer_convert_and_upload(subject_id, subject_contents): + try: + with ProcessPoolExecutor(max_workers=1) as executor: + future = executor.submit(_transfer_and_convert, subject_id=subject_id, subject_contents=subject_contents) + success, session_path, nwb_folder_path = future.result() + if success: + try: + rmtree(session_path, ignore_errors=True) + finally: + rmtree(session_path, ignore_errors=True) + try: + automatic_dandi_upload(dandiset_id=dandiset_id, nwb_folder_path=nwb_folder_path) + finally: + rmtree(nwb_folder_path, ignore_errors=True) + rmtree(nwb_folder_path.parent / dandiset_id, ignore_errors=True) + else: + print(session_path) + return session_path + finally: # try to cleanup again + rmtree(session_path, ignore_errors=True) + rmtree(nwb_folder_path, ignore_errors=True) + + +futures = [] +n_jobs = None if n_jobs == -1 else n_jobs # concurrents uses None instead of -1 for 'auto' mode +with ProcessPoolExecutor(max_workers=n_jobs) as executor: + for subject_id in subject_ids: + futures.append( + executor.submit( + _transfer_convert_and_upload, subject_id=subject_id, subject_contents=contents_per_subject[subject_id] + ) + ) +nwbfiles_iterable = tqdm(as_completed(futures), desc="Converting subjects...") +for future in nwbfiles_iterable: + sleep(10) + error = future.result() + if error is not None: + print(error) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml new file mode 100644 index 00000000..78a83925 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_metadata.yml @@ -0,0 +1,38 @@ +NWBFile: + related_publications: "https://doi.org/10.1038/s41586-021-03811-w" + lab: Buzsáki + institution: NYU + experimenter: + - Tingley, David + - McClain, Kathryn + - Kaya, Ekin + - Carpenter, Jordan + - Buzsáki, György + keywords: + - glucose + - ecephys + - pharmacology +Subject: + description: > + A total of 45 adult Long Evans rats were used in this study. Five were used for the hippocampal and lateral septal + in vivo recordings26. Ten rats were used for chronic electrophysiological recordings paired with glucose + monitoring. Eight rats were used for optogenetic induction of SPW-Rs and glucose monitoring. Six rats were used for + the DREADD experiment, and seven rats were used as controls for this experiment. Three additional rats were used + for simultaneous dorsal and ventral hippocampus recordings. Six additional rats were used for the posterior + parietal cortex (PPC) optogenetic stimulation control experiments. Sample sizes were selected to match cohort + sizes where applicable. + species: Rattus norvegicus + strain: Long Evans + sex: U +Ecephys: + Device: + - name: IntanDevice + description: > + Recordings were conducted using the Intan RHD2000 interface board, sampled at 20 kHz. Amplification and + digitization were done on the head stage. Data were visualized with Neurosuite software. All + local field potential (LFP) analyses (ripple detection, state scoring and so on) were conducted on + the 1,250-Hz down-sampled signal. + - name: Medtronic iPro2 CGM + description: Continuous Glucose Monitoring (CGM) system used to track subject glucose levels. + ElectricalSeries_lfp: + name: ElectricalSeriesLFP \ No newline at end of file diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt new file mode 100644 index 00000000..da610852 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt @@ -0,0 +1,6 @@ +mat4py==0.5.0 +mat73==0.52 +hdf5storage>=0.1.18 +pyintan>=0.3.0 +pandas>=1.4.2 +nwb-conversion-tools @ git+https://github.com/catalystneuro/nwb-conversion-tools@5e39ca55266b8f7be48380c67471100a98413277 diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml new file mode 100644 index 00000000..74f6c846 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_subject_info.yml @@ -0,0 +1,45 @@ +CGM1: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM2: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM3: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM4: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM5: "Experiment condition 'ripple/glucose recording' with surgery condition 'rCA1'." +CGM6: "Experiment condition 'ripple/glucose recording' with surgery condition 'bilat rCA1'." +CGM7: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." +CGM8: "Experiment condition 'ripple/glucose recording' with surgery condition 'rHypothalamus & rCA1'." +CGM9: "Experiment condition 'ripple/glucose recording' with surgery condition 'flex probe in rCA1'." +CGM10: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM11: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM12: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM13: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM14: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM15: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM16: "Experiment condition 'Opto stim' with surgery condition 'bilat CA3 CaMKII-ChR2'." +CGM17: "Experiment condition 'dorsal/ventral' with no surgery condition." +CGM18: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM19: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM20: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM21: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM22: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM23: "Experiment condition 'dorsal/ventral' with surgery condition 'dorsal/ventral probe implant'." +CGM24: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM25: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM26: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM27: "Experiment condition 'DREADDS' with surgery condition 'LS injected'." +CGM28: "Experiment condition 'PPC opto stim' with surgery condition 'PPC injected'." +CGM29: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM30: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM31: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'." +CGM32: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM33: "Experiment condition 'dorsal/ventral' surgery condition 'dorsal/ventral probe implant'." +CGM34: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM35: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM36: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM37: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM38: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM39: "Experiment condition 'PPC opto stim' surgery condition 'PPC injected'." +CGM40: "Experiment condition 'DREADDS' surgery condition 'MS injected'." +CGM41: "Experiment condition 'DREADDS' surgery condition 'PPC injected'." +CGM42: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM43: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM44: "Experiment condition 'DREADDS' with surgery condition 'PPC injected'." +CGM45: "Experiment condition 'ripple/glucose recording' with surgery condition 'PPC injected'." diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py new file mode 100644 index 00000000..a766d87f --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_utils.py @@ -0,0 +1,44 @@ +"""Author: Cody Baker.""" +from typing import List +from pathlib import Path +from datetime import datetime + +import numpy as np +from pandas import read_csv, to_datetime + + +def load_subject_glucose_series(session_path) -> (List[datetime], List[float]): + """Given the subject_id string and the ecephys session_path, load all glucose series data for further parsing.""" + all_csv = [x for x in Path(session_path).iterdir() if ".csv" in x.suffixes] + if not all_csv: + subject_path = Path(session_path).parent + all_csv = [x for x in subject_path.iterdir() if ".csv" in x.suffixes] + + timestamps = [] + isig = [] + for file_path in all_csv: + sub_timestamps, sub_isig = read_glucose_csv(file_path=file_path) + timestamps.extend(sub_timestamps) + isig.extend(sub_isig) + return timestamps, isig + + +def read_glucose_csv(file_path: Path) -> (List[datetime], List[float]): + """Parse a single glucose data file.""" + all_data = read_csv(filepath_or_buffer=file_path, skiprows=11) + + isig = all_data["ISIG Value"] + exclude_col = all_data["Excluded"] + exclude_col.fillna(False, inplace=True) + exclude = (exclude_col.astype(bool) + np.isnan(isig) + (isig == -9999)).astype(bool) + valid_isig = isig[exclude == 0] + valid_timestamps = [ + x.to_pydatetime() for x in to_datetime(all_data["Timestamp"][exclude == 0], infer_datetime_format=True) + ] + + return valid_timestamps, list(valid_isig) + + +def get_session_datetime(session_id: str): + """Auxiliary function for parsing the datetime part of a sesion ID.""" + return datetime.strptime("_".join(session_id.split("_")[-2:]), "%y%m%d_%H%M%S") diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py new file mode 100644 index 00000000..f30c253f --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicaccelerometerinterface.py @@ -0,0 +1,67 @@ +"""Authors: Cody Baker.""" +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.tools.hdmf import SliceableDataChunkIterator +from nwb_conversion_tools.utils import FilePathType +from pynwb import TimeSeries, H5DataIO +from spikeextractors.extraction_tools import read_binary +from pyintan.intan import read_rhd + + +class TingleyMetabolicAccelerometerInterface(BaseDataInterface): + """Aux data interface for the Tingley metabolic project.""" + + def __init__(self, dat_file_path: FilePathType, rhd_file_path: FilePathType): + """ + Process accelerometer data stored unique ad-hoc format for accelerometer data stored in an 'auxiliary.dat' file. + + The data is stored in Neuroscope .dat binary blob format, but no accompanying .xml header. + Instead, the header is the original intan .rhd format. + + A few details to note: + i) Regardless of how many AUX channels are plugged in (which is read from the .rhd file), only the first 3 + have any actual data (the values for all other channels for all other time is -1). + ii) Even though the .rhd specifies the accelerometer data is acquired at 5kHz, the .dat has it stored at + 20kHz by duplicating the data value at every 4th index. I can only assume this was done for easier + side-by-side analysis of the raw data (which was acquired at 20kHz). + """ + try: + rhd_info = read_rhd(filename=rhd_file_path) + self.readable = True + except: # strange error with pyintan + self.readable = False + + if self.readable: + first_aux_entry = next( + header_info_entry + for header_info_entry in rhd_info[1] + if "AUX1" in header_info_entry["native_channel_name"] + ) + first_aux_sub_entry = next( + header_info_entry for header_info_entry in rhd_info[2] if "AUX1" in header_info_entry[0] + ) + + # Manually confirmed that all aux channels have same properties + self.conversion = first_aux_entry["gain"] # offset confirmed to be 0, units confirmed to be Volts + self.sampling_frequency = first_aux_entry["sampling_rate"] + dtype = first_aux_sub_entry[1] + numchan = sum("AUX" in header_info_entry["native_channel_name"] for header_info_entry in rhd_info[1]) + + # Manually confirmed result is still memmap after slicing + self.memmap = read_binary(file=dat_file_path, numchan=numchan, dtype=dtype)[:3, ::4] + + def run_conversion(self, nwbfile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): + if self.readable: + stub_frames = 200 if stub_test else None + nwbfile.add_acquisition( + TimeSeries( + name="Accelerometer", + description="Raw data from accelerometer sensors.", + unit="Volts", + data=H5DataIO( + SliceableDataChunkIterator(data=self.memmap.T[:stub_frames, :]), compression="gzip" + ), # should not need iterative write + conversion=self.conversion, + rate=self.sampling_frequency, + starting_time=ecephys_start_time, + ), + ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py new file mode 100644 index 00000000..f32ed497 --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicglucoseinterface.py @@ -0,0 +1,33 @@ +"""Authors: Cody Baker.""" +from datetime import datetime + +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.utils import FilePathType +from pynwb import TimeSeries, H5DataIO + +from .tingley_metabolic_utils import load_subject_glucose_series # , segment_glucose_series + + +class TingleyMetabolicGlucoseInterface(BaseDataInterface): + """Glucose data interface for the Tingley metabolic project.""" + + def __init__(self, session_path: FilePathType, ecephys_start_time: str, ecephys_stop_time: str): + glucose_timestamps, glucose_isig = load_subject_glucose_series(session_path=session_path) + self.session_start_time = glucose_timestamps[0] + glucose_timestamps_floats_from_datetime = [ + (glucose_timestamp - self.session_start_time).total_seconds() for glucose_timestamp in glucose_timestamps + ] + self.glucose_timestamps = glucose_timestamps_floats_from_datetime + self.glucose_isig = glucose_isig + + def run_conversion(self, nwbfile, metadata): + nwbfile.add_acquisition( + TimeSeries( + name="GlucoseLevel", + description="Raw current from Medtronic iPro2 ISIG tracking.", + unit="nA", + data=H5DataIO(self.glucose_isig, compression="gzip"), # should not need iterative write + conversion=1.0, + timestamps=H5DataIO(self.glucose_timestamps, compression="gzip"), + ), + ) diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py new file mode 100644 index 00000000..9553ccfe --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicnwbconverter.py @@ -0,0 +1,65 @@ +"""Authors: Cody Baker.""" +import dateutil +from pathlib import Path +from datetime import datetime + +from nwb_conversion_tools import ( + NWBConverter, + NeuroscopeRecordingInterface, + NeuroscopeLFPInterface, +) + +from .tingleymetabolicaccelerometerinterface import TingleyMetabolicAccelerometerInterface +from .tingleymetabolicglucoseinterface import TingleyMetabolicGlucoseInterface +from .tingleymetabolicripplesinterface import TingleyMetabolicRipplesInterface +from ..common_interfaces.sleepstatesinterface import SleepStatesInterface + + +DEVICE_INFO = dict( + cambridge=dict( + name="Cambridge probe (1 x 64)", + description=( + "Silicon probe from Cambridge Neurotech. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." + ), + ), + neuronexus_4_8=dict( + name="Neuronexus probe (1 x 32)", + description=( + "Silicon probe from Cambridge Neurotech. Electrophysiological data were " + "acquired using an Intan RHD2000 system (Intan Technologies LLC) digitized with20 kHz rate." + ), + ), +) + + +class TingleyMetabolicConverter(NWBConverter): + """Primary conversion class for the Tingley Metabolic data project.""" + + data_interface_classes = dict( + NeuroscopeRecording=NeuroscopeRecordingInterface, + NeuroscopeLFP=NeuroscopeLFPInterface, + Accelerometer=TingleyMetabolicAccelerometerInterface, + Glucose=TingleyMetabolicGlucoseInterface, + SleepStates=SleepStatesInterface, + Ripples=TingleyMetabolicRipplesInterface, + ) + + def get_metadata(self): + lfp_file_path = Path(self.data_interface_objects["NeuroscopeLFP"].source_data["file_path"]) + + session_path = lfp_file_path.parent + session_id = session_path.stem + + session_id_split = session_id.split("_")[:-2] + subject_id = session_id_split[0] + + metadata = super().get_metadata() + metadata["NWBFile"].update( + session_id=session_id, + session_start_time=str(self.data_interface_objects["Glucose"].session_start_time), + ) + metadata.update(Subject=dict(subject_id=subject_id)) + if "NeuroscopeRecording" in self.data_interface_objects: + metadata["Ecephys"].update(ElectricalSeries_raw=dict(name="ElectricalSeriesRaw")) + return metadata diff --git a/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py new file mode 100644 index 00000000..5928237a --- /dev/null +++ b/buzsaki_lab_to_nwb/tingley_metabolic/tingleymetabolicripplesinterface.py @@ -0,0 +1,86 @@ +"""Authors: Heberto Mayorquin and Cody Baker.""" +from scipy.io import loadmat +from pynwb import NWBFile, H5DataIO +from pynwb.file import TimeIntervals +from nwb_conversion_tools.basedatainterface import BaseDataInterface +from nwb_conversion_tools.tools.nwb_helpers import get_module + + +class TingleyMetabolicRipplesInterface(BaseDataInterface): + """Data interface for handling ripples.mat files for the Tingley metabolic project.""" + + def __init__(self, mat_file_paths: list): + super().__init__(mat_file_paths=mat_file_paths) + + def run_conversion(self, nwbfile: NWBFile, metadata, stub_test: bool = False, ecephys_start_time: float = 0.0): + try: + stub_events = 5 if stub_test else None + processing_module = get_module( + nwbfile=nwbfile, + name="ecephys", + description="Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", + ) + + for mat_file_path in self.source_data["mat_file_paths"]: + table_name = mat_file_path.suffixes[-3].lstrip(".").title() + try: + mat_file = loadmat(file_name=mat_file_path) + mat_file_is_scipy_readable = True + except NotImplementedError: + mat_file_is_scipy_readable = False + print(f"RippleInterface is unable to convert {mat_file_path} due to HDF5 version!") + + if mat_file_is_scipy_readable: + mat_data = mat_file["ripples"] + start_and_stop_times = mat_data["timestamps"][0][0][:stub_events] + durations = [x[0] for x in mat_data["data"][0][0]["duration"][0][0]][:stub_events] + peaks = [x[0] for x in mat_data["peaks"][0][0]][:stub_events] + peak_normed_powers = [x[0] for x in mat_data["peakNormedPower"][0][0]][:stub_events] + peak_frequencies = [x[0] for x in mat_data["data"][0][0]["peakFrequency"][0][0]][:stub_events] + peak_amplitudes = [x[0] for x in mat_data["data"][0][0]["peakAmplitude"][0][0]][:stub_events] + ripples = mat_data["maps"][0][0]["ripples"][0][0][:stub_events] + frequencies = mat_data["maps"][0][0]["frequency"][0][0][:stub_events] + phases = mat_data["maps"][0][0]["phase"][0][0][:stub_events] + amplitudes = mat_data["maps"][0][0]["amplitude"][0][0][:stub_events] + + descriptions = dict( + duration="Duration of the ripple event.", + peak="Peak of the ripple.", + peak_normed_power="Normed power of the peak.", + peak_frequency="Peak frequency of the ripple.", + peak_amplitude="Peak amplitude of the ripple.", + ) + indexed_descriptions = dict( + ripple="Extracted ripple data.", + frequency="Frequency of each point on the ripple.", + phase="Phase of each point on the ripple.", + amplitude="Amplitude of each point on the ripple.", + ) + + table = TimeIntervals( + name=table_name, description=f"Identified {table_name} events and their metrics." + ) + for start_time, stop_time in start_and_stop_times: + table.add_row( + start_time=ecephys_start_time + start_time, stop_time=ecephys_start_time + stop_time + ) + for column_name, column_data in zip( + list(descriptions), [durations, peaks, peak_normed_powers, peak_frequencies, peak_amplitudes] + ): + table.add_column( + name=column_name, + description=descriptions[column_name], + data=H5DataIO(column_data, compression="gzip"), + ) + for column_name, column_data in zip( + list(indexed_descriptions), [ripples, frequencies, phases, amplitudes] + ): + table.add_column( + name=column_name, + description=indexed_descriptions[column_name], + index=list(range(column_data.shape[0])), + data=H5DataIO(column_data, compression="gzip"), + ) + processing_module.add(table) + except Exception as ex: + print("Unable to convert Ripples!") diff --git a/tingley_metabolic_environment.yml b/tingley_metabolic_environment.yml new file mode 100644 index 00000000..0c1e7ef8 --- /dev/null +++ b/tingley_metabolic_environment.yml @@ -0,0 +1,12 @@ +name: buzsaki_tingley_metabolic +channels: +- defaults +- anaconda +- conda-forge +dependencies: +- python==3.9 +- pip +- git +- pip: + - -e . + - -r buzsaki_lab_to_nwb/tingley_metabolic/tingley_metabolic_requirements.txt