Skip to content

Commit

Permalink
Add classes to read and write folders and the beginning of a possible…
Browse files Browse the repository at this point in the history
… pipeline
  • Loading branch information
lauraporta committed Oct 7, 2024
1 parent 98b6d27 commit 9959217
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 0 deletions.
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ include LICENSE
include README.md
exclude .pre-commit-config.yaml

recursive-include calcium_imaging_automation *.py
recursive-include examples *.py
recursive-include examples *.sh

recursive-exclude * __pycache__
recursive-exclude * *.py[co]
recursive-exclude docs *
Expand Down
Empty file.
37 changes: 37 additions & 0 deletions calcium_imaging_automation/core/reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pathlib import Path
from typing import List


class ReadAllPathsInFolder:
def __init__(
self, raw_data_folder: Path, filetypes_of_interest: List[str]
):
self.filetypes_of_interest = filetypes_of_interest
self.datasets_paths = self.get_folders_first_layer(raw_data_folder)
self.dataset_names = [
dataset_path.name for dataset_path in self.datasets_paths
]

def get_folders_first_layer(self, file_path: Path) -> List[Path]:
return list(file_path.glob("*"))

def get_files_paths(self, folder: Path) -> List[Path]:
return [
file
for filetype in self.filetypes_of_interest
for file in folder.rglob(f"*.{filetype}")
]

def total_objects_by_filetype(self, folder: Path) -> dict:
return {
filetype: len(self.get_files_paths(folder))
for filetype in self.filetypes_of_interest
}

def max_session_number(self, filetype="tif", max_allowed=5) -> int:
total_tif_number = [
self.total_objects_by_filetype(dataset_path).get(filetype, 0)
for dataset_path in self.datasets_paths
]

return min(max(total_tif_number), max_allowed)
32 changes: 32 additions & 0 deletions calcium_imaging_automation/core/writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pathlib import Path
from typing import List

from datashuttle.configs.config_class import Configs
from datashuttle.utils import folders


class DatashuttleWrapper:
def __init__(self, output_path: Path) -> None:
# This is supposed to run in the cluster and have direct access
# to the central storages
self.datashuttle_cfg = Configs(
project_name=output_path.name,
file_path=output_path,
input_dict={
"local_path": output_path,
"central_path": "",
"connection_method": "local_filesystem",
},
)

def create_folders(self, dataset_names: List[str], session_number) -> None:
folders.create_folder_trees(
cfg=self.datashuttle_cfg,
top_level_folder="derivatives",
sub_names=[
f"sub-{i}_{dataset_name}"
for i, dataset_name in enumerate(dataset_names)
],
ses_names=[f"ses-{i}" for i in range(session_number)],
datatype="funcimg",
)
Empty file added examples/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions examples/example_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import argparse
from pathlib import Path

from calcium_imaging_automation.core.reader import ReadAllPathsInFolder
from calcium_imaging_automation.core.writer import DatashuttleWrapper


def main(raw_data_path: Path, output_path: Path, filetypes_of_interest: list):
"""
Draft usage of the pipeline, now consisting of read and write operations.
"""
reader = ReadAllPathsInFolder(raw_data_path, filetypes_of_interest)

writer = DatashuttleWrapper(output_path)
number_of_tiffs = reader.max_session_number(filetype="tif")
writer.create_folders(reader.dataset_names, session_number=number_of_tiffs)

# [Placeholder for data processing]


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Example usage of the pipeline manager."
)

parser.add_argument(
"raw_data_path", type=Path, help="Path to the raw data."
)
parser.add_argument(
"output_path", type=Path, help="Path to the output data."
)
parser.add_argument(
"--filetypes",
type=list,
nargs="+",
help="Filetypes of interest.",
default=["tif", "bin"],
)

args = parser.parse_args()
raw_data_path = args.raw_data_path
output_path = args.output_path
file_types = args.filetypes

main(raw_data_path, output_path, file_types)
4 changes: 4 additions & 0 deletions examples/example_usage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#! /bin/bash
python ./examples/example_usage.py \
/Users/lauraporta/local_data/rotation/ \
/Users/lauraporta/local_data/test/

0 comments on commit 9959217

Please sign in to comment.