Skip to content

Commit

Permalink
add predict_proba functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
diegomarvid committed Jul 17, 2024
1 parent e8b4acf commit 8837191
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 3 deletions.
28 changes: 27 additions & 1 deletion ml_garden/core/data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import os
import sys
from typing import Any, Optional, Union
from typing import Any, Dict, Optional, Union

import dill as pickle
import pandas as pd
Expand Down Expand Up @@ -675,6 +675,32 @@ def predictions(self, value: pd.Series):
"""
self["predictions"] = value

@property
def predict_proba(self) -> pd.DataFrame:
"""
Get the prediction probabilities from the DataContainer.
Returns
-------
pd.DataFrame
The prediction probabilities stored in the DataContainer.
For binary and multiclass classification, returns a DataFrame with a column for each class.
"""
return self["predict_proba"]

@predict_proba.setter
def predict_proba(self, value: pd.DataFrame):
"""
Set the prediction probabilities in the DataContainer.
Parameters
----------
value : pd.DataFrame
The prediction probabilities to be stored in the DataContainer.
Should be a DataFrame with a column for each class.
"""
self["predict_proba"] = value

@property
def explainer(self) -> BaseExplainer:
"""
Expand Down
16 changes: 16 additions & 0 deletions ml_garden/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,19 @@ def fit(
@abstractmethod
def predict(self, X: pd.DataFrame) -> pd.Series:
"""Abstract method for making predictions."""

def predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Predict class probabilities with the trained model.
Parameters
----------
X : pd.DataFrame
Features to make probability predictions on.
Returns
-------
pd.DataFrame
Predicted class probabilities for the input features.
"""
pass
11 changes: 11 additions & 0 deletions ml_garden/core/steps/fit_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import optuna
import pandas as pd
from sklearn.metrics import (
accuracy_score,
f1_score,
Expand Down Expand Up @@ -300,4 +301,14 @@ def predict(self, data: DataContainer) -> DataContainer:
self.logger.info(f"Predicting with {self.model_class.__name__} model")
data.flow[data.prediction_column] = data.model.predict(data.X_prediction)
data.predictions = data.flow[data.prediction_column]

# If the task is classification, also get the prediction probabilities
if data.task == Task.CLASSIFICATION:
proba_df = data.model.predict_proba(data.X_prediction)
proba_df.columns = [f"proba_{col}" for col in proba_df.columns]

# Concatenate the probabilities DataFrame with the existing DataFrame
data.flow = pd.concat([data.flow, proba_df], axis=1)
data.predict_proba = proba_df

return data
8 changes: 7 additions & 1 deletion ml_garden/implementation/tabular/autogluon/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,10 @@ def fit(self, X: pd.DataFrame, y: pd.Series, eval_set=None, verbose=True) -> Non

def predict(self, X: pd.DataFrame) -> pd.Series:
predictions = self.model.predict(X)
return predictions
return pd.Series(predictions, index=X.index)

def predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
if self.model.problem_type == "regression":
raise ValueError("predict_proba is not available for regression tasks.")
probabilities = self.model.predict_proba(X)
return pd.DataFrame(probabilities, index=X.index)
4 changes: 3 additions & 1 deletion ml_garden/implementation/tabular/xgboost/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import List

import pandas as pd
import xgboost as xgb
Expand Down Expand Up @@ -75,4 +76,5 @@ def predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame
Predicted class probabilities for the input features.
"""
return self.model.predict_proba(X)
proba = self.model.predict_proba(X)
return pd.DataFrame(proba, columns=self.model.classes_)

0 comments on commit 8837191

Please sign in to comment.