From 789147db1bfda1af2d49f65e729c869299f77535 Mon Sep 17 00:00:00 2001 From: lauraporta Date: Mon, 11 Nov 2024 18:20:40 +0000 Subject: [PATCH] Store mlflow folder differently, link artifacts --- calcium_imaging_automation/core/writer.py | 9 ++++-- examples/example_usage.py | 35 ++++++++++++++++------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/calcium_imaging_automation/core/writer.py b/calcium_imaging_automation/core/writer.py index 120deca..2fbbc5c 100644 --- a/calcium_imaging_automation/core/writer.py +++ b/calcium_imaging_automation/core/writer.py @@ -47,10 +47,15 @@ def save_image( dataset_name: str, session_number: int, filename: str, - ) -> None: + ) -> Path: path = self.get_dataset_path(dataset_name) image = Image.fromarray(image).convert("L") + image_path = ( + path / f"ses-{session_number}" / f"{filename}-{run_id}.png" + ) image.save( - path / f"ses-{session_number}" / f"{filename}-{run_id}.png", + image_path, mode="PNG", ) + + return image_path diff --git a/examples/example_usage.py b/examples/example_usage.py index 02ada0b..3789fa9 100644 --- a/examples/example_usage.py +++ b/examples/example_usage.py @@ -17,13 +17,9 @@ def main( folder_read_pattern: str, file_read_pattern: List[str], ): - """ - Draft usage of the pipeline, now consisting of read and write operations. - """ # --- Setup experiment-wide logging to file --- (output_path / "logs").mkdir(exist_ok=True) logging.basicConfig( - # Save also time and date filename=str( output_path / "logs" @@ -33,6 +29,11 @@ def main( format="%(asctime)s - %(message)s", ) + # --- Setup MLflow tracking --- + mlflow_tracking_dir = output_path / "derivatives" / "mlflow" + mlflow.set_tracking_uri(str(mlflow_tracking_dir)) + mlflow.set_experiment("calcium_imaging_pipeline") + # --- Read folders and files --- reader = ReadAllPathsInFolder( raw_data_path, @@ -51,10 +52,9 @@ def main( for dataset in reader.datasets_paths: dataset_name = dataset.stem - for session in range(1, number_of_tiffs + 1): - with ( - mlflow.start_run() - ): # Start a new MLflow run for each dataset-session + for session in range(0, number_of_tiffs): + # Start a new MLflow run for each dataset-session + with mlflow.start_run(): # Log session-specific parameters mlflow.log_param("dataset_name", dataset_name) mlflow.log_param("session_number", session) @@ -75,7 +75,7 @@ def main( mlflow.log_metric("metric_measured", metric_measured) # Save image in session folder - writer.save_image( + image_path = writer.save_image( image=data, run_id=session, dataset_name=dataset_name, @@ -83,7 +83,22 @@ def main( filename="image", ) - # Log that the run is complete for this session + # Log the image as an artifact in MLflow + mlflow.log_artifact( + image_path, + artifact_path=f"{dataset_name}/session_{session}", + ) + + logging.info( + f"MLflow run_id: {mlflow.active_run().info.run_id}" + ) + logging.info( + "MLflow experiment_id: " + + f"{mlflow.active_run().info.experiment_id}" + ) + logging.info( + f"MLflow tracking_uri: {mlflow.get_tracking_uri()}" + ) logging.info( f"Completed MLflow run for dataset {dataset_name} " + f"session {session}"