Plugins updated for new metadata column names

LewisResearchGroup · Jul 12, 2023 · 7cbe79c · 7cbe79c
1 parent 3f9150b
commit 7cbe79c
Show file tree

Hide file tree

Showing 8 changed files with 563 additions and 391 deletions.
diff --git a/ms_mint/Mint.py b/ms_mint/Mint.py
@@ -392,7 +392,7 @@ def crosstab(self, values: str = "peak_max", index: str = None, column: str = No
 
         cells of the returned table.
         """
-
+        
         df_meta = pd.merge(self.meta, self.results, left_index=True, right_on='ms_file_label')
 
         if index is None:
@@ -552,6 +552,8 @@ def load_metadata(self, fn=None):
             self.meta = pd.read_csv(fn, index_col=0)
         elif str(fn).endswith('.parquet'):
             self.meta = pd.read_parquet(fn)
+        if 'ms_file_label' in self.meta.columns:
+            self.meta = self.meta.set_index('ms_file_label')
         return self
 
     def save_metadata(self, fn=None):

diff --git a/ms_mint/io.py b/ms_mint/io.py
@@ -75,12 +75,7 @@ def ms_file_to_df(fn, read_only: bool = False):
         )
         # Set datatypes
         set_dtypes(df)
-
-    # assert df.scan_id.dtype in [np.int32, np.int64], df.scan_id.dtype
-    # assert df.intensity.dtype == np.int64, df.intensity.dtype
-    # assert df.mz.dtype == np.float64, df.mz.dtype
-    # assert df.scan_time.dtype == np.float64, df.scan_time.dtype
-
+    print(df.columns)
     return df
 
 

diff --git a/ms_mint/pca.py b/ms_mint/pca.py
@@ -100,28 +100,64 @@ def __init__(self, pca):
         """
         self.pca = pca
 
-    def cumulative_variance(self, height=4, aspect=2):
+
+    def cumulative_variance(self, interactive=False, **kwargs):
+        if interactive:
+            return self.cumulative_variance_px(**kwargs)
+        else:
+            return self.cumulative_variance_sns(**kwargs) 
+
+
+    def cumulative_variance_px(self, **kwargs):
+        """
+        After running mint.pca() this function can be used to plot the cumulative variance of the
+        principal components.
+
+        :return: Returns a plotly express figure.
+        :rtype: plotly.graph_objs._figure.Figure
+        """
+        n_components = self.pca.results["n_components"]
+        cum_expl_var = self.pca.results["cum_expl_var"]
+        df = pd.DataFrame({'Principal Component': np.arange(n_components) + 1, 'Explained variance [%]': cum_expl_var})
+        fig = px.bar(df, x='Principal Component', y='Explained variance [%]', 
+                     title="Cumulative explained variance",
+                     labels={'Principal Component':'Principal Component', 'Explained variance [%]':'Explained variance [%]'},
+                     **kwargs)
+        fig.update_layout(autosize=True, showlegend=False)
+        return fig
+
+    def cumulative_variance_sns(self, **kwargs):
         """
         After running mint.pca() this function can be used to plot the cumulative variance of the
         principal components.
 
         :return: Returns a matplotlib figure.
         :rtype: matplotlib.figure.Figure
         """
+        # Set default values for aspect and height
+        aspect = kwargs.get('aspect', 1)
+        height = kwargs.get('height', 5)
+
         n_components = self.pca.results["n_components"]
-        fig = plt.figure(figsize=(height * aspect, height))
         cum_expl_var = self.pca.results["cum_expl_var"]
-        plt.bar(
+
+        # Calculate width based on aspect ratio and number of components
+        width = height * aspect
+
+        fig, ax = plt.subplots(figsize=(width, height))
+        ax.bar(
             np.arange(n_components) + 1,
             cum_expl_var,
             facecolor="grey",
             edgecolor="none",
         )
-        plt.xlabel("Principal Component")
-        plt.ylabel("Explained variance [%]")
-        plt.title("Cumulative explained variance")
-        plt.grid()
-        plt.xticks(range(1, len(cum_expl_var) + 1))
+        ax.set_xlabel("Principal Component")
+        ax.set_ylabel("Explained variance [%]")
+        ax.set_title("Cumulative explained variance")
+        #ax.grid()
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)        
+        ax.set_xticks(range(1, len(cum_expl_var) + 1))
         return fig
 
     def _prepare_data(self, n_components=3, hue=None):
@@ -155,7 +191,7 @@ def pairplot(
         df = self._prepare_data(n_components=n_components, hue=hue)
 
         if isinstance(hue, list):
-            hue = 'Label'
+            hue = 'label'
 
         if interactive:
             return self.pairplot_plotly(df, color_col=hue, **kwargs)

diff --git a/ms_mint/plotting.py b/ms_mint/plotting.py
diff --git a/ms_mint/targets.py b/ms_mint/targets.py
@@ -59,6 +59,7 @@ def standardize_targets(targets, ms_mode="neutral"):
     targets = targets.rename(columns=DEPRECATED_LABELS)
     if targets.index.name == "peak_label":
         targets = targets.reset_index()
+
     assert pd.value_counts(targets.columns).max() == 1, pd.value_counts(targets.columns)
     cols = targets.columns
     if "formula" in targets.columns and not "mz_mean" in targets.columns:
@@ -85,8 +86,11 @@ def standardize_targets(targets, ms_mode="neutral"):
             targets[c] = None
             targets[c] = targets[c].astype(float)
     del c
+
     if "peak_label" not in cols:
+        logging.warning(f'"peak_label" not in cols, assigning new labels:\n{targets}')
         targets["peak_label"] = [f"C_{i}" for i in range(len(targets))]
+
     targets["intensity_threshold"] = targets["intensity_threshold"].fillna(0)
     targets["peak_label"] = targets["peak_label"].astype(str)
 

diff --git a/notebooks/Example-plotting.ipynb b/notebooks/Example-plotting.ipynb