Skip to content

Commit

Permalink
Plugins updated for new metadata column names
Browse files Browse the repository at this point in the history
  • Loading branch information
sorenwacker committed Jul 12, 2023
1 parent 3f9150b commit 7cbe79c
Show file tree
Hide file tree
Showing 8 changed files with 563 additions and 391 deletions.
4 changes: 3 additions & 1 deletion ms_mint/Mint.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def crosstab(self, values: str = "peak_max", index: str = None, column: str = No
cells of the returned table.
"""

df_meta = pd.merge(self.meta, self.results, left_index=True, right_on='ms_file_label')

if index is None:
Expand Down Expand Up @@ -552,6 +552,8 @@ def load_metadata(self, fn=None):
self.meta = pd.read_csv(fn, index_col=0)
elif str(fn).endswith('.parquet'):
self.meta = pd.read_parquet(fn)
if 'ms_file_label' in self.meta.columns:
self.meta = self.meta.set_index('ms_file_label')
return self

def save_metadata(self, fn=None):
Expand Down
7 changes: 1 addition & 6 deletions ms_mint/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,7 @@ def ms_file_to_df(fn, read_only: bool = False):
)
# Set datatypes
set_dtypes(df)

# assert df.scan_id.dtype in [np.int32, np.int64], df.scan_id.dtype
# assert df.intensity.dtype == np.int64, df.intensity.dtype
# assert df.mz.dtype == np.float64, df.mz.dtype
# assert df.scan_time.dtype == np.float64, df.scan_time.dtype

print(df.columns)
return df


Expand Down
54 changes: 45 additions & 9 deletions ms_mint/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,28 +100,64 @@ def __init__(self, pca):
"""
self.pca = pca

def cumulative_variance(self, height=4, aspect=2):

def cumulative_variance(self, interactive=False, **kwargs):
if interactive:
return self.cumulative_variance_px(**kwargs)
else:
return self.cumulative_variance_sns(**kwargs)


def cumulative_variance_px(self, **kwargs):
"""
After running mint.pca() this function can be used to plot the cumulative variance of the
principal components.
:return: Returns a plotly express figure.
:rtype: plotly.graph_objs._figure.Figure
"""
n_components = self.pca.results["n_components"]
cum_expl_var = self.pca.results["cum_expl_var"]
df = pd.DataFrame({'Principal Component': np.arange(n_components) + 1, 'Explained variance [%]': cum_expl_var})
fig = px.bar(df, x='Principal Component', y='Explained variance [%]',
title="Cumulative explained variance",
labels={'Principal Component':'Principal Component', 'Explained variance [%]':'Explained variance [%]'},
**kwargs)
fig.update_layout(autosize=True, showlegend=False)
return fig

def cumulative_variance_sns(self, **kwargs):
"""
After running mint.pca() this function can be used to plot the cumulative variance of the
principal components.
:return: Returns a matplotlib figure.
:rtype: matplotlib.figure.Figure
"""
# Set default values for aspect and height
aspect = kwargs.get('aspect', 1)
height = kwargs.get('height', 5)

n_components = self.pca.results["n_components"]
fig = plt.figure(figsize=(height * aspect, height))
cum_expl_var = self.pca.results["cum_expl_var"]
plt.bar(

# Calculate width based on aspect ratio and number of components
width = height * aspect

fig, ax = plt.subplots(figsize=(width, height))
ax.bar(
np.arange(n_components) + 1,
cum_expl_var,
facecolor="grey",
edgecolor="none",
)
plt.xlabel("Principal Component")
plt.ylabel("Explained variance [%]")
plt.title("Cumulative explained variance")
plt.grid()
plt.xticks(range(1, len(cum_expl_var) + 1))
ax.set_xlabel("Principal Component")
ax.set_ylabel("Explained variance [%]")
ax.set_title("Cumulative explained variance")
#ax.grid()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xticks(range(1, len(cum_expl_var) + 1))
return fig

def _prepare_data(self, n_components=3, hue=None):
Expand Down Expand Up @@ -155,7 +191,7 @@ def pairplot(
df = self._prepare_data(n_components=n_components, hue=hue)

if isinstance(hue, list):
hue = 'Label'
hue = 'label'

if interactive:
return self.pairplot_plotly(df, color_col=hue, **kwargs)
Expand Down
2 changes: 0 additions & 2 deletions ms_mint/plotting.py

This file was deleted.

4 changes: 4 additions & 0 deletions ms_mint/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def standardize_targets(targets, ms_mode="neutral"):
targets = targets.rename(columns=DEPRECATED_LABELS)
if targets.index.name == "peak_label":
targets = targets.reset_index()

assert pd.value_counts(targets.columns).max() == 1, pd.value_counts(targets.columns)
cols = targets.columns
if "formula" in targets.columns and not "mz_mean" in targets.columns:
Expand All @@ -85,8 +86,11 @@ def standardize_targets(targets, ms_mode="neutral"):
targets[c] = None
targets[c] = targets[c].astype(float)
del c

if "peak_label" not in cols:
logging.warning(f'"peak_label" not in cols, assigning new labels:\n{targets}')
targets["peak_label"] = [f"C_{i}" for i in range(len(targets))]

targets["intensity_threshold"] = targets["intensity_threshold"].fillna(0)
targets["peak_label"] = targets["peak_label"].astype(str)

Expand Down
183 changes: 160 additions & 23 deletions notebooks/Example-plotting.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 7cbe79c

Please sign in to comment.