-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add classes to read and write folders and the beginning of a possible…
… pipeline
- Loading branch information
1 parent
98b6d27
commit 9959217
Showing
7 changed files
with
122 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from pathlib import Path | ||
from typing import List | ||
|
||
|
||
class ReadAllPathsInFolder: | ||
def __init__( | ||
self, raw_data_folder: Path, filetypes_of_interest: List[str] | ||
): | ||
self.filetypes_of_interest = filetypes_of_interest | ||
self.datasets_paths = self.get_folders_first_layer(raw_data_folder) | ||
self.dataset_names = [ | ||
dataset_path.name for dataset_path in self.datasets_paths | ||
] | ||
|
||
def get_folders_first_layer(self, file_path: Path) -> List[Path]: | ||
return list(file_path.glob("*")) | ||
|
||
def get_files_paths(self, folder: Path) -> List[Path]: | ||
return [ | ||
file | ||
for filetype in self.filetypes_of_interest | ||
for file in folder.rglob(f"*.{filetype}") | ||
] | ||
|
||
def total_objects_by_filetype(self, folder: Path) -> dict: | ||
return { | ||
filetype: len(self.get_files_paths(folder)) | ||
for filetype in self.filetypes_of_interest | ||
} | ||
|
||
def max_session_number(self, filetype="tif", max_allowed=5) -> int: | ||
total_tif_number = [ | ||
self.total_objects_by_filetype(dataset_path).get(filetype, 0) | ||
for dataset_path in self.datasets_paths | ||
] | ||
|
||
return min(max(total_tif_number), max_allowed) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from pathlib import Path | ||
from typing import List | ||
|
||
from datashuttle.configs.config_class import Configs | ||
from datashuttle.utils import folders | ||
|
||
|
||
class DatashuttleWrapper: | ||
def __init__(self, output_path: Path) -> None: | ||
# This is supposed to run in the cluster and have direct access | ||
# to the central storages | ||
self.datashuttle_cfg = Configs( | ||
project_name=output_path.name, | ||
file_path=output_path, | ||
input_dict={ | ||
"local_path": output_path, | ||
"central_path": "", | ||
"connection_method": "local_filesystem", | ||
}, | ||
) | ||
|
||
def create_folders(self, dataset_names: List[str], session_number) -> None: | ||
folders.create_folder_trees( | ||
cfg=self.datashuttle_cfg, | ||
top_level_folder="derivatives", | ||
sub_names=[ | ||
f"sub-{i}_{dataset_name}" | ||
for i, dataset_name in enumerate(dataset_names) | ||
], | ||
ses_names=[f"ses-{i}" for i in range(session_number)], | ||
datatype="funcimg", | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import argparse | ||
from pathlib import Path | ||
|
||
from calcium_imaging_automation.core.reader import ReadAllPathsInFolder | ||
from calcium_imaging_automation.core.writer import DatashuttleWrapper | ||
|
||
|
||
def main(raw_data_path: Path, output_path: Path, filetypes_of_interest: list): | ||
""" | ||
Draft usage of the pipeline, now consisting of read and write operations. | ||
""" | ||
reader = ReadAllPathsInFolder(raw_data_path, filetypes_of_interest) | ||
|
||
writer = DatashuttleWrapper(output_path) | ||
number_of_tiffs = reader.max_session_number(filetype="tif") | ||
writer.create_folders(reader.dataset_names, session_number=number_of_tiffs) | ||
|
||
# [Placeholder for data processing] | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Example usage of the pipeline manager." | ||
) | ||
|
||
parser.add_argument( | ||
"raw_data_path", type=Path, help="Path to the raw data." | ||
) | ||
parser.add_argument( | ||
"output_path", type=Path, help="Path to the output data." | ||
) | ||
parser.add_argument( | ||
"--filetypes", | ||
type=list, | ||
nargs="+", | ||
help="Filetypes of interest.", | ||
default=["tif", "bin"], | ||
) | ||
|
||
args = parser.parse_args() | ||
raw_data_path = args.raw_data_path | ||
output_path = args.output_path | ||
file_types = args.filetypes | ||
|
||
main(raw_data_path, output_path, file_types) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#! /bin/bash | ||
python ./examples/example_usage.py \ | ||
/Users/lauraporta/local_data/rotation/ \ | ||
/Users/lauraporta/local_data/test/ |