From 789147db1bfda1af2d49f65e729c869299f77535 Mon Sep 17 00:00:00 2001
From: lauraporta <ucqflpo@ucl.ac.uk>
Date: Mon, 11 Nov 2024 18:20:40 +0000
Subject: [PATCH] Store mlflow folder differently, link artifacts

---
 calcium_imaging_automation/core/writer.py |  9 ++++--
 examples/example_usage.py                 | 35 ++++++++++++++++-------
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/calcium_imaging_automation/core/writer.py b/calcium_imaging_automation/core/writer.py
index 120deca..2fbbc5c 100644
--- a/calcium_imaging_automation/core/writer.py
+++ b/calcium_imaging_automation/core/writer.py
@@ -47,10 +47,15 @@ def save_image(
         dataset_name: str,
         session_number: int,
         filename: str,
-    ) -> None:
+    ) -> Path:
         path = self.get_dataset_path(dataset_name)
         image = Image.fromarray(image).convert("L")
+        image_path = (
+            path / f"ses-{session_number}" / f"{filename}-{run_id}.png"
+        )
         image.save(
-            path / f"ses-{session_number}" / f"{filename}-{run_id}.png",
+            image_path,
             mode="PNG",
         )
+
+        return image_path
diff --git a/examples/example_usage.py b/examples/example_usage.py
index 02ada0b..3789fa9 100644
--- a/examples/example_usage.py
+++ b/examples/example_usage.py
@@ -17,13 +17,9 @@ def main(
     folder_read_pattern: str,
     file_read_pattern: List[str],
 ):
-    """
-    Draft usage of the pipeline, now consisting of read and write operations.
-    """
     # --- Setup experiment-wide logging to file ---
     (output_path / "logs").mkdir(exist_ok=True)
     logging.basicConfig(
-        # Save also time and date
         filename=str(
             output_path
             / "logs"
@@ -33,6 +29,11 @@ def main(
         format="%(asctime)s - %(message)s",
     )
 
+    # --- Setup MLflow tracking ---
+    mlflow_tracking_dir = output_path / "derivatives" / "mlflow"
+    mlflow.set_tracking_uri(str(mlflow_tracking_dir))
+    mlflow.set_experiment("calcium_imaging_pipeline")
+
     # --- Read folders and files ---
     reader = ReadAllPathsInFolder(
         raw_data_path,
@@ -51,10 +52,9 @@ def main(
 
     for dataset in reader.datasets_paths:
         dataset_name = dataset.stem
-        for session in range(1, number_of_tiffs + 1):
-            with (
-                mlflow.start_run()
-            ):  # Start a new MLflow run for each dataset-session
+        for session in range(0, number_of_tiffs):
+            # Start a new MLflow run for each dataset-session
+            with mlflow.start_run():
                 # Log session-specific parameters
                 mlflow.log_param("dataset_name", dataset_name)
                 mlflow.log_param("session_number", session)
@@ -75,7 +75,7 @@ def main(
                 mlflow.log_metric("metric_measured", metric_measured)
 
                 # Save image in session folder
-                writer.save_image(
+                image_path = writer.save_image(
                     image=data,
                     run_id=session,
                     dataset_name=dataset_name,
@@ -83,7 +83,22 @@ def main(
                     filename="image",
                 )
 
-                # Log that the run is complete for this session
+                # Log the image as an artifact in MLflow
+                mlflow.log_artifact(
+                    image_path,
+                    artifact_path=f"{dataset_name}/session_{session}",
+                )
+
+                logging.info(
+                    f"MLflow run_id: {mlflow.active_run().info.run_id}"
+                )
+                logging.info(
+                    "MLflow experiment_id: "
+                    + f"{mlflow.active_run().info.experiment_id}"
+                )
+                logging.info(
+                    f"MLflow tracking_uri: {mlflow.get_tracking_uri()}"
+                )
                 logging.info(
                     f"Completed MLflow run for dataset {dataset_name} "
                     + f"session {session}"