From 826792b9f2d5e2aa6f4b4b65810f2deaba811bba Mon Sep 17 00:00:00 2001 From: lauraporta Date: Wed, 11 Dec 2024 15:58:35 +0000 Subject: [PATCH] Generate report with datavzrd --- .gitignore | 3 ++ MANIFEST.in | 1 + README.md | 16 ++++++--- .../core/rules/preprocess.py | 13 +++++--- .../core/rules/summarize_data.py | 28 ++++++++++++++++ pyproject.toml | 2 ++ workflow/Snakefile | 33 ++++++++++++++++--- workflow/resources/datavzrd_config.yaml | 22 +++++++++++++ 8 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 calcium_imaging_automation/core/rules/summarize_data.py create mode 100644 workflow/resources/datavzrd_config.yaml diff --git a/.gitignore b/.gitignore index ce92f74..6c02c04 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,6 @@ examples/*.sh # snakemake .snakemake/* + +# datavzrd +workflow/results/ diff --git a/MANIFEST.in b/MANIFEST.in index bb8163b..c76a641 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ exclude .pre-commit-config.yaml recursive-include calcium_imaging_automation *.py recursive-include examples *.py +recursive-include workflow *.yaml recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/README.md b/README.md index 4c81b3d..1d80ec8 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,19 @@ With support for local or cluster-based parallelization, CIMAT provides visualiz ### Run workflow with Snakemake -To extract dataset names -```bash -snakemake --cores 1 setup_output.txt -``` - Run all jobs in the pipeline: ```bash snakemake --executor slurm --jobs 20 --latency-wait 10 all ``` Add `-np --printshellcmds` for a dry run with commands printed to the terminal. + +### See interactive report with datavzrd +Build the csv: +```bash +snakemake --cores 1 workflow/results/data/summary.csv +``` +Create the report: +```bash +datavzrd workflow/resources/datavzrd_config.yaml --output workflow/results/datavzrd +``` +Then open the report (`index.html`) in a browser. diff --git a/calcium_imaging_automation/core/rules/preprocess.py b/calcium_imaging_automation/core/rules/preprocess.py index 66ddb5c..27434fb 100644 --- a/calcium_imaging_automation/core/rules/preprocess.py +++ b/calcium_imaging_automation/core/rules/preprocess.py @@ -1,3 +1,4 @@ +import traceback from pathlib import Path from derotation.analysis.metrics import stability_of_most_detected_blob @@ -13,8 +14,12 @@ data = derotate(read_dataset_path, output_path_dataset) metric_measured = stability_of_most_detected_blob(data) with open(output_path_dataset / "metric.txt", "w") as f: - f.write(f"dataset: {read_dataset_path.stem} metric: {metric_measured}") -except Exception as e: - print(e.args) + f.write(f"stability_of_most_detected_blob: {metric_measured}") + # make empty error file with open(output_path_dataset / "error.txt", "w") as f: - f.write(str(e.args)) + f.write("") +except Exception: + with open(output_path_dataset / "error.txt", "w") as f: + f.write(traceback.format_exc()) + with open(output_path_dataset / "metric.txt", "w") as f: + f.write(f"dataset: {read_dataset_path.stem} metric: NaN") diff --git a/calcium_imaging_automation/core/rules/summarize_data.py b/calcium_imaging_automation/core/rules/summarize_data.py new file mode 100644 index 0000000..566d3c9 --- /dev/null +++ b/calcium_imaging_automation/core/rules/summarize_data.py @@ -0,0 +1,28 @@ +from pathlib import Path + +import pandas as pd +from snakemake.script import snakemake + +# Retrieve parameters and inputs from Snakemake +datasets = snakemake.params.datasets +processed_data_base = snakemake.params.base_path + +data = [] +for idx, dataset in enumerate(datasets): + metric_file = Path( + f"{processed_data_base}/sub-{idx}_{dataset}/ses-0/funcimg/metric.txt" + ) + error_file = Path( + f"{processed_data_base}/sub-{idx}_{dataset}/ses-0/funcimg/error.txt" + ) + + # Read metric and error values + metric = metric_file.read_text().strip() if metric_file.exists() else "N/A" + error = error_file.read_text().strip() if error_file.exists() else "N/A" + + # Append results + data.append({"Dataset": dataset, "Metric": metric, "Error": error}) + +# Create a DataFrame and write to CSV +df = pd.DataFrame(data) +df.to_csv(snakemake.output[0], index=False) diff --git a/pyproject.toml b/pyproject.toml index c3013a9..4043067 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,8 +8,10 @@ dynamic = ["version"] dependencies = [ "numpy", + "pandas", "snakemake", "snakemake-executor-plugin-slurm", + "datavzrd", ] license = {text = "BSD-3-Clause"} diff --git a/workflow/Snakefile b/workflow/Snakefile index 27bc8b3..677d5cd 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -12,6 +12,7 @@ datasets.sort() # for the output datasets_no_underscore = [ds.replace("_", "") for ds in datasets] +# ----------------------------------------------------- # Final state of the pipeline # Are all the outputs files present? rule all: @@ -20,19 +21,22 @@ rule all: [ f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif", f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv", + f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/metric.txt", + f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/error.txt", ], zip, index=range(len(datasets)), datasets_no_underscore=datasets_no_underscore, - ) + ), +# ----------------------------------------------------- +# Preprocess rule preprocess: input: raw=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/", - # Dynamically match input files using patterns - # bin=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/aux_stim/*rotation_*001.bin", - # tif=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/imaging/rotation_*001.tif", output: + report(f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/metric.txt"), + report(f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/error.txt"), tiff=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif", csv=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv", params: @@ -45,3 +49,24 @@ rule preprocess: nodes=1, script: "../calcium_imaging_automation/core/rules/preprocess.py" + +# ----------------------------------------------------- +# Summarize data for datavzrd report +rule summarize_data: + input: + expand( + [ + f"{processed_data_base}/sub-{{index}}_{{dataset}}/ses-0/funcimg/metric.txt", + f"{processed_data_base}/sub-{{index}}_{{dataset}}/ses-0/funcimg/error.txt", + ], + zip, + index=range(len(datasets)), + dataset=datasets_no_underscore, + ) + output: + "workflow/results/data/summary.csv" + params: + datasets=datasets_no_underscore, + base_path=processed_data_base + script: + "../calcium_imaging_automation/core/rules/summarize_data.py" diff --git a/workflow/resources/datavzrd_config.yaml b/workflow/resources/datavzrd_config.yaml new file mode 100644 index 0000000..36388eb --- /dev/null +++ b/workflow/resources/datavzrd_config.yaml @@ -0,0 +1,22 @@ +datasets: + summary: + path: workflow/results/data/summary.csv + separator: "," + +views: + summary_view: + dataset: summary + render-table: + columns: + Dataset: + plot: + ticks: + scale: linear + Metric: + plot: + ticks: + scale: linear + Error: + plot: + ticks: + scale: linear