From 826792b9f2d5e2aa6f4b4b65810f2deaba811bba Mon Sep 17 00:00:00 2001
From: lauraporta <ucqflpo@ucl.ac.uk>
Date: Wed, 11 Dec 2024 15:58:35 +0000
Subject: [PATCH] Generate report with datavzrd

---
 .gitignore                                    |  3 ++
 MANIFEST.in                                   |  1 +
 README.md                                     | 16 ++++++---
 .../core/rules/preprocess.py                  | 13 +++++---
 .../core/rules/summarize_data.py              | 28 ++++++++++++++++
 pyproject.toml                                |  2 ++
 workflow/Snakefile                            | 33 ++++++++++++++++---
 workflow/resources/datavzrd_config.yaml       | 22 +++++++++++++
 8 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 calcium_imaging_automation/core/rules/summarize_data.py
 create mode 100644 workflow/resources/datavzrd_config.yaml

diff --git a/.gitignore b/.gitignore
index ce92f74..6c02c04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,6 @@ examples/*.sh
 
 # snakemake
 .snakemake/*
+
+# datavzrd
+workflow/results/
diff --git a/MANIFEST.in b/MANIFEST.in
index bb8163b..c76a641 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,6 +5,7 @@ exclude .pre-commit-config.yaml
 
 recursive-include calcium_imaging_automation *.py
 recursive-include examples *.py
+recursive-include workflow *.yaml
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
diff --git a/README.md b/README.md
index 4c81b3d..1d80ec8 100644
--- a/README.md
+++ b/README.md
@@ -8,13 +8,19 @@ With support for local or cluster-based parallelization, CIMAT provides visualiz
 
 
 ### Run workflow with Snakemake
-To extract dataset names
-```bash
-snakemake --cores 1 setup_output.txt
-```
-
 Run all jobs in the pipeline:
 ```bash
 snakemake --executor slurm --jobs 20 --latency-wait 10 all
 ```
 Add `-np --printshellcmds` for a dry run with commands printed to the terminal.
+
+### See interactive report with datavzrd
+Build the csv:
+```bash
+snakemake --cores 1 workflow/results/data/summary.csv
+```
+Create the report:
+```bash
+datavzrd workflow/resources/datavzrd_config.yaml --output workflow/results/datavzrd
+```
+Then open the report (`index.html`) in a browser.
diff --git a/calcium_imaging_automation/core/rules/preprocess.py b/calcium_imaging_automation/core/rules/preprocess.py
index 66ddb5c..27434fb 100644
--- a/calcium_imaging_automation/core/rules/preprocess.py
+++ b/calcium_imaging_automation/core/rules/preprocess.py
@@ -1,3 +1,4 @@
+import traceback
 from pathlib import Path
 
 from derotation.analysis.metrics import stability_of_most_detected_blob
@@ -13,8 +14,12 @@
     data = derotate(read_dataset_path, output_path_dataset)
     metric_measured = stability_of_most_detected_blob(data)
     with open(output_path_dataset / "metric.txt", "w") as f:
-        f.write(f"dataset: {read_dataset_path.stem} metric: {metric_measured}")
-except Exception as e:
-    print(e.args)
+        f.write(f"stability_of_most_detected_blob: {metric_measured}")
+    # make empty error file
     with open(output_path_dataset / "error.txt", "w") as f:
-        f.write(str(e.args))
+        f.write("")
+except Exception:
+    with open(output_path_dataset / "error.txt", "w") as f:
+        f.write(traceback.format_exc())
+    with open(output_path_dataset / "metric.txt", "w") as f:
+        f.write(f"dataset: {read_dataset_path.stem} metric: NaN")
diff --git a/calcium_imaging_automation/core/rules/summarize_data.py b/calcium_imaging_automation/core/rules/summarize_data.py
new file mode 100644
index 0000000..566d3c9
--- /dev/null
+++ b/calcium_imaging_automation/core/rules/summarize_data.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+
+import pandas as pd
+from snakemake.script import snakemake
+
+# Retrieve parameters and inputs from Snakemake
+datasets = snakemake.params.datasets
+processed_data_base = snakemake.params.base_path
+
+data = []
+for idx, dataset in enumerate(datasets):
+    metric_file = Path(
+        f"{processed_data_base}/sub-{idx}_{dataset}/ses-0/funcimg/metric.txt"
+    )
+    error_file = Path(
+        f"{processed_data_base}/sub-{idx}_{dataset}/ses-0/funcimg/error.txt"
+    )
+
+    # Read metric and error values
+    metric = metric_file.read_text().strip() if metric_file.exists() else "N/A"
+    error = error_file.read_text().strip() if error_file.exists() else "N/A"
+
+    # Append results
+    data.append({"Dataset": dataset, "Metric": metric, "Error": error})
+
+# Create a DataFrame and write to CSV
+df = pd.DataFrame(data)
+df.to_csv(snakemake.output[0], index=False)
diff --git a/pyproject.toml b/pyproject.toml
index c3013a9..4043067 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,8 +8,10 @@ dynamic = ["version"]
 
 dependencies = [
   "numpy",
+  "pandas",
   "snakemake",
   "snakemake-executor-plugin-slurm",
+  "datavzrd",
 ]
 
 license = {text = "BSD-3-Clause"}
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 27bc8b3..677d5cd 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -12,6 +12,7 @@ datasets.sort()
 #  for the output
 datasets_no_underscore = [ds.replace("_", "") for ds in datasets]
 
+#  -----------------------------------------------------
 #  Final state of the pipeline
 #  Are all the outputs files present?
 rule all:
@@ -20,19 +21,22 @@ rule all:
             [
                 f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif",
                 f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv",
+                f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/metric.txt",
+                f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/error.txt",
             ],
             zip,
             index=range(len(datasets)),
             datasets_no_underscore=datasets_no_underscore,
-        )
+        ),
 
+#  -----------------------------------------------------
+#  Preprocess
 rule preprocess:
     input:
         raw=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/",
-        # Dynamically match input files using patterns
-        # bin=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/aux_stim/*rotation_*001.bin",
-        # tif=lambda wildcards: f"{raw_data_base}{datasets[int(wildcards.index)]}/imaging/rotation_*001.tif",
     output:
+        report(f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/metric.txt"),
+        report(f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/error.txt"),
         tiff=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.tif",
         csv=f"{processed_data_base}/sub-{{index}}_{{datasets_no_underscore}}/ses-0/funcimg/derotation/derotated_full.csv",
     params:
@@ -45,3 +49,24 @@ rule preprocess:
         nodes=1,
     script:
         "../calcium_imaging_automation/core/rules/preprocess.py"
+
+#  -----------------------------------------------------
+#  Summarize data for datavzrd report
+rule summarize_data:
+    input:
+        expand(
+            [
+                f"{processed_data_base}/sub-{{index}}_{{dataset}}/ses-0/funcimg/metric.txt",
+                f"{processed_data_base}/sub-{{index}}_{{dataset}}/ses-0/funcimg/error.txt",
+            ],
+            zip,
+            index=range(len(datasets)),
+            dataset=datasets_no_underscore,
+        )
+    output:
+        "workflow/results/data/summary.csv"
+    params:
+        datasets=datasets_no_underscore,
+        base_path=processed_data_base
+    script:
+        "../calcium_imaging_automation/core/rules/summarize_data.py"
diff --git a/workflow/resources/datavzrd_config.yaml b/workflow/resources/datavzrd_config.yaml
new file mode 100644
index 0000000..36388eb
--- /dev/null
+++ b/workflow/resources/datavzrd_config.yaml
@@ -0,0 +1,22 @@
+datasets:
+  summary:
+    path: workflow/results/data/summary.csv
+    separator: ","
+
+views:
+  summary_view:
+    dataset: summary
+    render-table:
+      columns:
+        Dataset:
+          plot:
+            ticks:
+              scale: linear
+        Metric:
+          plot:
+            ticks:
+              scale: linear
+        Error:
+          plot:
+            ticks:
+              scale: linear