From b580fbc78d5d5d15da7d15a0916f2136244a3428 Mon Sep 17 00:00:00 2001 From: lauraporta Date: Tue, 12 Nov 2024 16:46:21 +0000 Subject: [PATCH] =?UTF-8?q?WIP:=20nested=20runs,=20=F0=9F=90=9B=20on=20art?= =?UTF-8?q?ifacts=20saving?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- calcium_imaging_automation/core/writer.py | 3 +- examples/example_usage.py | 101 ++++++++++++---------- 2 files changed, 56 insertions(+), 48 deletions(-) diff --git a/calcium_imaging_automation/core/writer.py b/calcium_imaging_automation/core/writer.py index 2fbbc5c..885d5f7 100644 --- a/calcium_imaging_automation/core/writer.py +++ b/calcium_imaging_automation/core/writer.py @@ -43,7 +43,6 @@ def get_dataset_path(self, dataset_name: str) -> Path: def save_image( self, image: np.ndarray, - run_id: int, dataset_name: str, session_number: int, filename: str, @@ -51,7 +50,7 @@ def save_image( path = self.get_dataset_path(dataset_name) image = Image.fromarray(image).convert("L") image_path = ( - path / f"ses-{session_number}" / f"{filename}-{run_id}.png" + path / f"ses-{session_number}" / f"{filename}.png" ) image.save( image_path, diff --git a/examples/example_usage.py b/examples/example_usage.py index fada0f3..a85b436 100644 --- a/examples/example_usage.py +++ b/examples/example_usage.py @@ -3,6 +3,7 @@ import logging from pathlib import Path from typing import List +import setuptools_scm import mlflow import numpy as np @@ -16,6 +17,7 @@ def main( output_path: Path, folder_read_pattern: str, file_read_pattern: List[str], + experiment_name: str = "pipeline_test", ): # --- Setup experiment-wide logging to file --- (output_path / "logs").mkdir(exist_ok=True) @@ -30,9 +32,9 @@ def main( ) # --- Setup MLflow tracking --- - mlflow_tracking_dir = output_path / "derivatives" / "mlflow" + mlflow_tracking_dir = output_path / "mlflow" mlflow.set_tracking_uri(str(mlflow_tracking_dir)) - mlflow.set_experiment("calcium_imaging_pipeline") + mlflow.set_experiment(experiment_name) # --- Read folders and files --- reader = ReadAquiredData( @@ -53,8 +55,11 @@ def main( for dataset in reader.datasets_paths: dataset_name = dataset.stem for session in range(0, number_of_tiffs): - # Start a new MLflow run for each dataset-session - with mlflow.start_run(): + # Generate mock data + data = np.random.rand(100, 100) + + # Start a new MLflow experiment for each dataset-session + with mlflow.start_run() as parent_run: # Log session-specific parameters mlflow.log_param("dataset_name", dataset_name) mlflow.log_param("session_number", session) @@ -62,48 +67,49 @@ def main( mlflow.log_param("output_path", str(output_path)) mlflow.log_param("folder_read_pattern", folder_read_pattern) mlflow.log_param("file_read_pattern", file_read_pattern) + mlflow.log_param("local_changes_hash", setuptools_scm.get_version()) logging.info( - f"Processing dataset {dataset_name} session {session}..." - ) - - # Mock processing - data = np.random.rand(100, 100) - metric_measured = np.random.rand() - - # Log metric with MLflow - mlflow.log_metric("metric_measured", metric_measured) - - # Save image in session folder - image_path = writer.save_image( - image=data, - run_id=session, - dataset_name=dataset_name, - session_number=session, - filename="image", + f"Starting MLflow experiment for dataset {dataset_name} session {session}..." ) - # Log the image as an artifact in MLflow - mlflow.log_artifact( - image_path, - artifact_path=f"{dataset_name}/session_{session}", - ) + # Mock processing for different runs within the experiment + for i in range(1, 11): # 10 runs with varying parameters + # Start a child run under the main dataset-session run + with mlflow.start_run(nested=True): + + # Mock metric calculation + metric_measured = np.mean(data) * i + + # Log parameters and metrics specific to this run + mlflow.log_param("data_size", f"{i * 10}x100") + mlflow.log_param("run_iteration", i) + mlflow.log_param("run_id", mlflow.active_run().info.run_id) + mlflow.log_metric("metric_measured", metric_measured) + + # Log the generated data as an artifact if desired + # Here, simulate an image or data file save path + image_path = writer.save_image( + image=data, + dataset_name=dataset_name, + session_number=session, + filename=f"image_run_{i}", + ) + + mlflow.log_artifact( + image_path, + artifact_path=f"{dataset_name}/session_{session}/run_{i}", + ) + + logging.info( + f"Completed MLflow run iteration {i} for dataset {dataset_name} session {session}" + ) logging.info( - f"MLflow run_id: {mlflow.active_run().info.run_id}" - ) - logging.info( - "MLflow experiment_id: " - + f"{mlflow.active_run().info.experiment_id}" - ) - logging.info( - f"MLflow tracking_uri: {mlflow.get_tracking_uri()}" - ) - logging.info( - f"Completed MLflow run for dataset {dataset_name} " - + f"session {session}" + f"Completed MLflow experiment for dataset {dataset_name} session {session}" ) + logging.info("Pipeline finished.") @@ -130,16 +136,19 @@ def main( help="List of glob patterns for reading files.", action="append", ) + parser.add_argument( + "--experiment_name", + type=str, + help="Name of the experiment.", + default="pipeline_test", + ) args = parser.parse_args() - raw_data_path = args.raw_data_path - output_path = args.output_path - folder_read_pattern = args.folder_read_pattern - file_read_pattern = args.file_read_pattern main( - raw_data_path, - output_path, - folder_read_pattern, - file_read_pattern, + args.raw_data_path, + args.output_path, + args.folder_read_pattern, + args.file_read_pattern, + args.experiment_name, )