Skip to content

Commit

Permalink
Make via snakemake what all the other classes were doing
Browse files Browse the repository at this point in the history
  • Loading branch information
lauraporta committed Dec 10, 2024
1 parent 76ff8b7 commit 93cfd20
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 314 deletions.
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@ To extract dataset names
snakemake --cores 1 setup_output.txt
```


To run preprocessing with slurm, use the following command for one dataset:
```bash
snakemake --executor slurm --jobs 20 --latency-wait 10 preprocess_output_0.txt
```
For an array of datasets:
Run all jobs in the pipeline:
```bash
snakemake --executor slurm --jobs 20 --latency-wait 10 preprocess_output_{0..N}.txt
snakemake --executor slurm --jobs 20 --latency-wait 10 all
```
Replace N with the number of datasets you have in the `datasets.csv` file.
Add `-np --printshellcmds` for a dry run with commands printed to the terminal.
21 changes: 0 additions & 21 deletions _datasets.csv

This file was deleted.

130 changes: 0 additions & 130 deletions calcium_imaging_automation/core/reader.py

This file was deleted.

16 changes: 7 additions & 9 deletions calcium_imaging_automation/core/rules/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,17 @@
from derotation.derotate_batch import derotate
from snakemake.script import snakemake

try:
# Input arguments
read_dataset_path = Path(snakemake.input[0])
write_dataset_path = Path(snakemake.input[1])
output = snakemake.output[0]

output_path_dataset = write_dataset_path / "ses-0/funcimg/"
# Input arguments
read_dataset_path = Path(snakemake.input[0])
output_tif = Path(snakemake.output[0])

output_path_dataset = output_tif.parent.parent
try:
data = derotate(read_dataset_path, output_path_dataset)
metric_measured = stability_of_most_detected_blob(data)
with open(output, "w") as f:
with open(output_path_dataset / "metric.txt", "w") as f:
f.write(f"dataset: {read_dataset_path.stem} metric: {metric_measured}")
except Exception as e:
print(e.args)
with open(output, "w") as f:
with open(output_path_dataset / "error.txt", "w") as f:
f.write(str(e.args))
56 changes: 0 additions & 56 deletions calcium_imaging_automation/core/rules/setup.py

This file was deleted.

58 changes: 0 additions & 58 deletions calcium_imaging_automation/core/writer.py

This file was deleted.

76 changes: 44 additions & 32 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,35 +1,47 @@
rule setup:
input:
"/nfs/winstor/margrie/SimonWeiler/RawData/Invivo_imaging/3photon_rotation/shared/",
"/ceph/margrie/laura/cimaut/",
params:
folder_read_pattern="2*",
file_read_pattern=["rotation_00001.tif", "*.bin"],
output: "datasets.csv"
run:
"calcium_imaging_automation/core/rules/setup.py"

# import pandas as pd
# Base paths
raw_data_base = "/nfs/winstor/margrie/SimonWeiler/RawData/Invivo_imaging/3photon_rotation/shared/"
processed_data_base = "/ceph/margrie/laura/cimaut/derivatives"

# Dynamically discover folders matching the "2*" pattern
datasets = glob_wildcards(f"{raw_data_base}{{dataset}}").dataset
datasets = [ds for ds in datasets if ds.startswith("2")]
datasets = [ds.split("/")[0] for ds in datasets]
datasets = list(set(datasets))
datasets.sort()

# paths = pd.read_csv("datasets.csv")
# for the output
datasets_no_underscore = [ds.replace("_", "") for ds in datasets]

# rule all:
# input:
# expand("preprocess_output_{index}.txt", index=paths["index"])
# Final state of the pipeline
# Are all the outputs files present?
rule all:
input:
expand(
[
f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif",
f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv",
],
zip,
index=range(len(datasets)),
datasets_no_underscore=datasets_no_underscore,
)

# rule preprocess:
# input:
# lambda wildcards: paths.loc[int(wildcards.index), "read_dataset_path"],
# lambda wildcards: paths.loc[int(wildcards.index), "write_dataset_path"],
# output:
# "preprocess_output_{index}.txt"
# params:
# index=lambda wildcards: wildcards.index
# resources:
# partition="fast",
# mem_mb=16000,
# cpu_per_task=1,
# tasks=1,
# nodes=1,
# script:
# "calcium_imaging_automation/core/rules/preprocess.py"
rule preprocess:
input:
raw=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/",
# Dynamically match input files using patterns
# bin=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/aux_stim/*rotation_*001.bin",
# tif=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/imaging/rotation_*001.tif",
output:
tiff=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif",
csv=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv",
params:
index=lambda wildcards: wildcards.index
resources:
partition="fast",
mem_mb=16000,
cpu_per_task=1,
tasks=1,
nodes=1,
script:
"../calcium_imaging_automation/core/rules/preprocess.py"

0 comments on commit 93cfd20

Please sign in to comment.