fix

chenyangkang · Nov 20, 2024 · 48d62c8 · 48d62c8
1 parent 6d50366
commit 48d62c8
Show file tree

Hide file tree

Showing 5 changed files with 175 additions and 35 deletions.
diff --git a/stemflow/model/AdaSTEM.py b/stemflow/model/AdaSTEM.py
@@ -112,8 +112,7 @@ def __init__(
         completely_random_rotation: bool = False,
         lazy_loading: bool = False,
         lazy_loading_dir: Union[str, None] = None,
-        min_class_sample: int = 1,
-        logit_agg: bool = False
+        min_class_sample: int = 1
     ):
         """Make an AdaSTEM object
 
@@ -187,8 +186,6 @@ def __init__(
                 If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
             min_class_sample:
                 Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
-            logit_agg:
-                Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
         Raises:
             AttributeError: Base model do not have method 'fit' or 'predict'
             AttributeError: task not in one of ['regression', 'classification', 'hurdle']
@@ -272,7 +269,6 @@ def __init__(
         # X. miscellaneous
         self.lazy_loading = lazy_loading
         self.lazy_loading_dir = lazy_loading_dir
-        self.logit_agg=logit_agg
 
         if not verbosity == 0:
             self.verbosity = 1
@@ -805,6 +801,7 @@ def predict_proba(
         n_jobs: Union[None, int] = None,
         aggregation: str = "mean",
         return_by_separate_ensembles: bool = False,
+        logit_agg: bool = False,
         **base_model_prediction_param
     ) -> Union[np.ndarray, Tuple[np.ndarray]]:
         """Predict probability
@@ -826,7 +823,8 @@ def predict_proba(
                 'mean' or 'median' for aggregation method across ensembles.
             return_by_separate_ensembles (bool, optional):
                 Experimental function. return not by aggregation, but by separate ensembles.
-
+            logit_agg:
+                Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
         Raises:
             TypeError:
                 X_test is not of type pd.core.frame.DataFrame.
@@ -857,7 +855,7 @@ def predict_proba(
             return new_res.values
 
         # Transform to logit space if classification:
-        if self.task=='classification' and self.logit_agg:
+        if self.task=='classification' and logit_agg:
             for col_index in range(res.shape[1]):
                 prob = np.clip(res.iloc[:,col_index], 1e-6, 1 - 1e-6)
                 res.iloc[:,col_index] = np.log(prob / (1-prob)) # logit space
@@ -924,6 +922,7 @@ def predict(
         n_jobs: Union[None, int] = 1,
         aggregation: str = "mean",
         return_by_separate_ensembles: bool = False,
+        logit_agg: bool = False,
         **base_model_prediction_param
     ) -> Union[np.ndarray, Tuple[np.ndarray]]:
         pass
@@ -1343,8 +1342,7 @@ def __init__(
         completely_random_rotation=False,
         lazy_loading = False,
         lazy_loading_dir = None,
-        min_class_sample = 1,
-        logit_agg=False
+        min_class_sample = 1
     ):
         super().__init__(
             base_model=base_model,
@@ -1377,8 +1375,7 @@ def __init__(
             completely_random_rotation=completely_random_rotation,
             lazy_loading=lazy_loading,
             lazy_loading_dir=lazy_loading_dir,
-            min_class_sample=min_class_sample,
-            logit_agg=logit_agg
+            min_class_sample=min_class_sample
         )
 
         self._estimator_type = 'classifier'
@@ -1392,6 +1389,7 @@ def predict(
         n_jobs: Union[int, None] = 1,
         aggregation: str = "mean",
         return_by_separate_ensembles: bool = False,
+        logit_agg: bool = False,
         **base_model_prediction_param
     ) -> Union[np.ndarray, Tuple[np.ndarray]]:
         """A rewrite of predict_proba adapted for Classifier
@@ -1419,7 +1417,8 @@ def predict(
                 Experimental function. return not by aggregation, but by separate ensembles.
             base_model_prediction_param:
                 Additional parameter passed to base_model.predict_proba or base_model.predict
-
+            logit_agg:
+                Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
         Raises:
             TypeError:
                 X_test is not of type pd.core.frame.DataFrame.
@@ -1441,6 +1440,7 @@ def predict(
                 n_jobs=n_jobs,
                 aggregation=aggregation,
                 return_by_separate_ensembles=return_by_separate_ensembles,
+                logit_agg=logit_agg,
                 **base_model_prediction_param
             )
             mean = mean[:,1].flatten()
@@ -1456,6 +1456,7 @@ def predict(
                 n_jobs=n_jobs,
                 aggregation=aggregation,
                 return_by_separate_ensembles=return_by_separate_ensembles,
+                logit_agg=logit_agg,
                 **base_model_prediction_param
             )
             mean = mean[:,1].flatten()

diff --git a/stemflow/model/STEM.py b/stemflow/model/STEM.py
@@ -49,8 +49,7 @@ def __init__(
         completely_random_rotation: bool = False,
         lazy_loading: bool = False,
         lazy_loading_dir: Union[str, None] = None,
-        min_class_sample: int = 1,
-        logit_agg: bool = False
+        min_class_sample: int = 1
     ):
         """Make a STEM object
 
@@ -122,9 +121,7 @@ def __init__(
                 If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
             min_class_sample:
                 Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
-            logit_agg:
-                Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
-                
+
         Raises:
             AttributeError: Base model do not have method 'fit' or 'predict'
             AttributeError: task not in one of ['regression', 'classification', 'hurdle']
@@ -182,8 +179,7 @@ def __init__(
             completely_random_rotation=completely_random_rotation,
             lazy_loading=lazy_loading,
             lazy_loading_dir=lazy_loading_dir,
-            min_class_sample=min_class_sample,
-            logit_agg=logit_agg
+            min_class_sample=min_class_sample
         )
 
         self.grid_len = grid_len
@@ -249,8 +245,7 @@ def __init__(
         completely_random_rotation: bool = False,
         lazy_loading: bool = False,
         lazy_loading_dir: Union[str, None] = None,
-        min_class_sample: int = 1,
-        logit_agg: bool = False
+        min_class_sample: int = 1
     ):
         super().__init__(
             base_model=base_model,
@@ -283,8 +278,7 @@ def __init__(
             completely_random_rotation=completely_random_rotation,
             lazy_loading=lazy_loading,
             lazy_loading_dir=lazy_loading_dir,
-            min_class_sample=min_class_sample,
-            logit_agg=logit_agg
+            min_class_sample=min_class_sample
         )
 
         self.grid_len = grid_len

diff --git a/stemflow/model/SphereAdaSTEM.py b/stemflow/model/SphereAdaSTEM.py
@@ -86,8 +86,7 @@ def __init__(
         radius: float = 6371.0,
         lazy_loading: bool = False,
         lazy_loading_dir: Union[str, None] = None,
-        min_class_sample: int = 1,
-        logit_agg: bool = False
+        min_class_sample: int = 1
     ):
         """Make a Spherical AdaSTEM object
 
@@ -161,9 +160,7 @@ def __init__(
                 If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
             min_class_sample:
                 Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
-            logit_agg:
-                Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
-
+                
         Raises:
             AttributeError: Base model do not have method 'fit' or 'predict'
             AttributeError: task not in one of ['regression', 'classification', 'hurdle']
@@ -220,8 +217,7 @@ def __init__(
             plot_empty=plot_empty,
             lazy_loading=lazy_loading,
             lazy_loading_dir=lazy_loading_dir,
-            min_class_sample=min_class_sample,
-            logit_agg=logit_agg
+            min_class_sample=min_class_sample
         )
 
         if not self.Spatio1 == "longitude":
@@ -558,8 +554,7 @@ def __init__(
         plot_empty=False,
         lazy_loading=False,
         lazy_loading_dir=None,
-        min_class_sample: int = 1,
-        logit_agg: bool = False
+        min_class_sample: int = 1
     ):
         super().__init__(
             base_model=base_model,
@@ -591,8 +586,7 @@ def __init__(
             plot_empty=plot_empty,
             lazy_loading=lazy_loading,
             lazy_loading_dir=lazy_loading_dir,
-            min_class_sample=min_class_sample,
-            logit_agg=logit_agg
+            min_class_sample=min_class_sample
         )
 
         self.predict = MethodType(AdaSTEMClassifier.predict, self)

diff --git a/tests/make_models.py b/tests/make_models.py
@@ -18,7 +18,7 @@
 from stemflow.model.SphereAdaSTEM import SphereAdaSTEM, SphereAdaSTEMClassifier, SphereAdaSTEMRegressor
 from stemflow.model.STEM import STEM, STEMClassifier, STEMRegressor
 from stemflow.model_selection import ST_train_test_split
-# from sklearn.calibration import CalibratedClassifierCV
+from sklearn.calibration import CalibratedClassifierCV
 # CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True)
 
 fold_ = 2
@@ -290,3 +290,79 @@ def make_AdaSTEMRegressor_Hurdle_for_AdaSTEM(fold_=2, min_req=1, **kwargs):
     )
 
     return model
+
+
+def make_STEMClassifier_caliP(fold_=2, min_req=1, **kwargs):
+    model = STEMClassifier(
+        base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
+        save_gridding_plot=True,
+        ensemble_fold=fold_,
+        min_ensemble_required=min_req,
+        grid_len=30,
+        temporal_start=1,
+        temporal_end=366,
+        temporal_step=40,
+        temporal_bin_interval=80,
+        points_lower_threshold=30,
+        Spatio1="longitude",
+        Spatio2="latitude",
+        Temporal1="DOY",
+        temporal_bin_start_jitter="adaptive",
+        spatio_bin_jitter_magnitude="adaptive",
+        use_temporal_to_train=True,
+        n_jobs=1,
+        min_class_sample=3,
+        **kwargs
+    )
+
+    return model
+
+def make_SphereAdaClassifier_caliP(fold_=2, min_req=1, **kwargs):
+    model = SphereAdaSTEMClassifier(
+        base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
+        save_gridding_plot=True,
+        ensemble_fold=fold_,
+        min_ensemble_required=min_req,
+        grid_len_upper_threshold=8000,
+        grid_len_lower_threshold=500,
+        temporal_start=1,
+        temporal_end=366,
+        temporal_step=40,
+        temporal_bin_interval=80,
+        points_lower_threshold=30,
+        Spatio1="longitude",
+        Spatio2="latitude",
+        Temporal1="DOY",
+        temporal_bin_start_jitter="adaptive",
+        spatio_bin_jitter_magnitude="adaptive",
+        use_temporal_to_train=True,
+        n_jobs=1,
+        min_class_sample=3,
+        **kwargs
+    )
+    return model
+
+def make_AdaSTEMClassifier_caliP(fold_=2, min_req=1, **kwargs):
+    model = AdaSTEMClassifier(
+        base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
+        save_gridding_plot=True,
+        ensemble_fold=fold_,
+        min_ensemble_required=min_req,
+        grid_len_upper_threshold=50,
+        grid_len_lower_threshold=20,
+        temporal_start=1,
+        temporal_end=366,
+        temporal_step=40,
+        temporal_bin_interval=80,
+        points_lower_threshold=30,
+        Spatio1="longitude",
+        Spatio2="latitude",
+        Temporal1="DOY",
+        temporal_bin_start_jitter="adaptive",
+        spatio_bin_jitter_magnitude="adaptive",
+        use_temporal_to_train=True,
+        n_jobs=1, 
+        min_class_sample=3,
+        **kwargs
+    )
+    return model
diff --git a/tests/test_model_prediction_logit_aggregation.py b/tests/test_model_prediction_logit_aggregation.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pandas as pd
+
+from stemflow.model.AdaSTEM import AdaSTEM
+from stemflow.model_selection import ST_train_test_split
+
+from .make_models import (
+    make_AdaSTEMClassifier_caliP,
+    make_SphereAdaClassifier_caliP,
+    make_STEMClassifier_caliP,
+)
+from .set_up_data import set_up_data
+
+x_names, (X, y) = set_up_data()
+X_train, X_test, y_train, y_test = ST_train_test_split(
+    X, y, Spatio_blocks_count=100, Temporal_blocks_count=100, random_state=42, test_size=0.3
+)
+
+
+def test_STEMClassifier_caliP():
+    model = make_STEMClassifier_caliP()
+    model = model.fit(X_train, np.where(y_train > 0, 1, 0))
+
+    pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
+    assert np.sum(~np.isnan(pred_mean)) > 0
+    assert np.sum(~np.isnan(pred_std)) > 0
+
+    pred = model.predict(X_test)
+    assert len(pred) == len(X_test)
+    assert np.sum(np.isnan(pred)) / len(pred) <= 0.5
+
+    pred_df = pd.DataFrame(
+        {"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
+    ).dropna()
+    assert len(pred_df) > 0
+
+    eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)
+
+def test_AdaSTEMClassifier_caliP():
+    model = make_AdaSTEMClassifier_caliP()
+    model = model.fit(X_train, np.where(y_train > 0, 1, 0))
+
+    pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
+    assert np.sum(~np.isnan(pred_mean)) > 0
+    assert np.sum(~np.isnan(pred_std)) > 0
+
+    pred = model.predict(X_test)
+    assert len(pred) == len(X_test)
+    assert np.sum(np.isnan(pred)) / len(pred) <= 0.5
+
+    pred_df = pd.DataFrame(
+        {"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
+    ).dropna()
+    assert len(pred_df) > 0
+
+    eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)
+
+def test_SphereAdaClassifier_caliP():
+    model = make_SphereAdaClassifier_caliP()
+    model = model.fit(X_train, np.where(y_train > 0, 1, 0))
+
+    pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
+    assert np.sum(~np.isnan(pred_mean)) > 0
+    assert np.sum(~np.isnan(pred_std)) > 0
+
+    pred = model.predict(X_test)
+    assert len(pred) == len(X_test)
+    assert np.sum(np.isnan(pred)) / len(pred) <= 0.5
+
+    pred_df = pd.DataFrame(
+        {"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
+    ).dropna()
+    assert len(pred_df) > 0
+
+    eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)