From 8199358e767ce4582c67e9d95874b62fc4f663d4 Mon Sep 17 00:00:00 2001 From: Markus Semmler Date: Sat, 21 Oct 2023 04:30:08 +0200 Subject: [PATCH] Fix bugs in pipeline. --- scripts/render_plots.py | 31 +++++++++---------------------- src/re_classwise_shapley/log.py | 19 +++++++++++++++++++ src/re_classwise_shapley/utils.py | 8 +++++++- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/scripts/render_plots.py b/scripts/render_plots.py index dd4a84a2..f2b3569d 100644 --- a/scripts/render_plots.py +++ b/scripts/render_plots.py @@ -13,7 +13,6 @@ also stored in mlflow. The id of the mlflow experiment is given by the schema `experiment_name.model_name`. """ -import math as m import os import os.path from datetime import datetime @@ -22,9 +21,15 @@ import mlflow import numpy as np from dotenv import load_dotenv +from matplotlib import pyplot as plt from re_classwise_shapley.io import Accessor -from re_classwise_shapley.log import log_datasets, log_figure, setup_logger +from re_classwise_shapley.log import ( + get_or_create_mlflow_experiment, + log_datasets, + log_figure, + setup_logger, +) from re_classwise_shapley.plotting import ( plot_curves, plot_histogram, @@ -41,25 +46,6 @@ logger = setup_logger("render_plots") -def get_or_create_mlflow_experiment(experiment_name: str) -> str: - """ - Get or create a mlflow experiment. If the experiment does not exist, it will be - created. - - Args: - experiment_name: Name of the experiment. - - Returns: - Identifier of the experiment. - """ - experiment = mlflow.get_experiment_by_name(experiment_name) - if not experiment: - experiment_id = mlflow.create_experiment(experiment_name) - else: - experiment_id = experiment.experiment_id - return experiment_id - - @click.command() @click.option("--experiment-name", type=str, required=True) @click.option("--model-name", type=str, required=True) @@ -77,7 +63,6 @@ def render_plots(experiment_name: str, model_name: str): def _render_plots(experiment_name: str, model_name: str): - load_dotenv() logger.info("Starting plotting of data valuation experiment") output_folder = Accessor.PLOT_PATH / experiment_name / model_name mlflow_id = f"{experiment_name}.{model_name}" @@ -110,6 +95,7 @@ def _render_plots(experiment_name: str, model_name: str): ) ) + plt.switch_backend("agg") valuation_results = Accessor.valuation_results( experiment_name, model_name, @@ -169,4 +155,5 @@ def _render_plots(experiment_name: str, model_name: str): if __name__ == "__main__": + load_dotenv() render_plots() diff --git a/src/re_classwise_shapley/log.py b/src/re_classwise_shapley/log.py index 37567e22..cb8762be 100644 --- a/src/re_classwise_shapley/log.py +++ b/src/re_classwise_shapley/log.py @@ -105,3 +105,22 @@ def dataset_to_dataframe(dataset: Dataset) -> pd.DataFrame: columns=dataset.feature_names + dataset.target_names, ) return df + + +def get_or_create_mlflow_experiment(experiment_name: str) -> str: + """ + Get or create a mlflow experiment. If the experiment does not exist, it will be + created. + + Args: + experiment_name: Name of the experiment. + + Returns: + Identifier of the experiment. + """ + experiment = mlflow.get_experiment_by_name(experiment_name) + if not experiment: + experiment_id = mlflow.create_experiment(experiment_name) + else: + experiment_id = experiment.experiment_id + return experiment_id diff --git a/src/re_classwise_shapley/utils.py b/src/re_classwise_shapley/utils.py index d061442b..73f4009e 100644 --- a/src/re_classwise_shapley/utils.py +++ b/src/re_classwise_shapley/utils.py @@ -12,7 +12,13 @@ logger = setup_logger() -__all__ = ["flatten_dict", "pipeline_seed", "load_params_fast", "n_threaded"] +__all__ = [ + "flatten_dict", + "pipeline_seed", + "load_params_fast", + "n_threaded", + "linear_dataframe_to_table", +] def pipeline_seed(initial_seed: Seed, pipeline_step: int) -> int: